ftwin 0.8.10
ft_report_json.c
Go to the documentation of this file.
1
6/*
7 * Copyright (C) 2007 François Pesce : francois.pesce (at) gmail (dot) com
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 */
21
22#include "ft_report_json.h"
23
24#if HAVE_JANSSON
25
26#include <stdio.h>
27#include <string.h>
28
29#include <apr_strings.h>
30#include <apr_file_io.h>
31#include <apr_time.h>
32#include <jansson.h>
33
34#include "config.h"
35#include "debug.h"
36#include "ft_archive.h"
37#include "ft_config.h"
38#include "ft_file.h"
39#include "ft_report.h"
40#include "napr_hash.h"
41#include "napr_heap.h"
42
43/* Formats apr_time_t to ISO 8601 UTC string (YYYY-MM-DDTHH:MM:SSZ). */
44static const char *ft_format_time_iso8601_utc(apr_time_t t, apr_pool_t *pool)
45{
46 apr_time_exp_t exploded;
47 // Use apr_time_exp_gmt to get the time in UTC (GMT).
48 if (apr_time_exp_gmt(&exploded, t) != APR_SUCCESS) {
49 return apr_pstrdup(pool, "UNKNOWN_TIME");
50 }
51 return apr_psprintf(pool, "%04d-%02d-%02dT%02d:%02d:%02dZ",
52 exploded.tm_year + 1900, exploded.tm_mon + 1, exploded.tm_mday,
53 exploded.tm_hour, exploded.tm_min, exploded.tm_sec);
54}
55
56/* Converts XXH128 hash to a hex string. Assumes XXH128_hash_t has high64/low64 members. */
57static char *ft_hash_to_hex(ft_hash_t hash, apr_pool_t *pool)
58{
59 /* Use APR's format macro for 64-bit hex (expands to PRIx64) with zero-padding */
60 return apr_psprintf(pool, "%016" APR_UINT64_T_HEX_FMT "%016" APR_UINT64_T_HEX_FMT, hash.high64, hash.low64);
61}
62
63/* Helper to create a JSON object for a file entry */
64static json_t *create_file_json_object(ft_file_t *file, ft_conf_t *conf)
65{
66 json_t *obj = json_object();
67 const char *mtime_str = ft_format_time_iso8601_utc(file->mtime, conf->pool);
68
69 json_object_set_new(obj, "path", json_string(file->path));
70 if (is_option_set(conf->mask, OPTION_UNTAR)) {
71 json_object_set_new(obj, "archive_subpath", file->subpath ? json_string(file->subpath) : json_null());
72 }
73 json_object_set_new(obj, "mtime_utc", json_string(mtime_str));
74 json_object_set_new(obj, "prioritized", json_boolean(file->prioritized));
75 return obj;
76}
77
78apr_status_t ft_report_json(ft_conf_t *conf)
79{
80 // Variable declarations (mirroring ft_conf_twin_report)
81 char errbuf[128];
82 apr_off_t old_size = -1;
83 ft_file_t *file;
84 ft_fsize_t *fsize;
85 apr_uint32_t hash_value;
86 apr_size_t i, j;
87 int rv;
88 apr_status_t status;
89 apr_uint32_t chksum_array_sz = 0U;
90
91 json_t *root_array = json_array();
92 if (!root_array)
93 return APR_ENOMEM;
94
95 // Iterate through the heap (logic adapted from ft_conf_twin_report)
96 while (NULL != (file = napr_heap_extract(conf->heap))) {
97 if (file->size == old_size)
98 continue;
99 old_size = file->size;
100
101 if (NULL != (fsize = napr_hash_search(conf->sizes, &file->size, sizeof(apr_off_t), &hash_value))) {
102 chksum_array_sz = FTWIN_MIN(fsize->nb_files, fsize->nb_checksumed);
103 qsort(fsize->chksum_array, chksum_array_sz, sizeof(ft_chksum_t), ft_chksum_cmp);
104
105 for (i = 0; i < fsize->nb_files; i++) {
106 if (NULL == fsize->chksum_array[i].file)
107 continue;
108
109 json_t *current_set_obj = NULL;
110 json_t *duplicates_array = NULL;
111
112 for (j = i + 1; j < fsize->nb_files; j++) {
113 if (0 ==
114 memcmp(&fsize->chksum_array[i].hash_value, &fsize->chksum_array[j].hash_value, sizeof(ft_hash_t))) {
115
116 // --- Comparison Logic (Replicate exactly from ft_conf_twin_report) ---
117 char *fpathi, *fpathj;
118 if (is_option_set(conf->mask, OPTION_UNTAR)) {
119 if (NULL != fsize->chksum_array[i].file->subpath) {
120 fpathi = ft_archive_untar_file(fsize->chksum_array[i].file, conf->pool);
121 if (NULL == fpathi) {
122 DEBUG_ERR("error calling ft_archive_untar_file");
123 return APR_EGENERAL;
124 }
125 }
126 else {
127 fpathi = fsize->chksum_array[i].file->path;
128 }
129 if (NULL != fsize->chksum_array[j].file->subpath) {
130 fpathj = ft_archive_untar_file(fsize->chksum_array[j].file, conf->pool);
131 if (NULL == fpathj) {
132 DEBUG_ERR("error calling ft_archive_untar_file");
133 return APR_EGENERAL;
134 }
135 }
136 else {
137 fpathj = fsize->chksum_array[j].file->path;
138 }
139 }
140 else {
141 fpathi = fsize->chksum_array[i].file->path;
142 fpathj = fsize->chksum_array[j].file->path;
143 }
144 status = filecmp(conf->pool, fpathi, fpathj, fsize->val, conf->excess_size, &rv);
145
146 if (is_option_set(conf->mask, OPTION_UNTAR)) {
147 if (NULL != fsize->chksum_array[i].file->subpath)
148 (void) apr_file_remove(fpathi, conf->pool);
149 if (NULL != fsize->chksum_array[j].file->subpath)
150 (void) apr_file_remove(fpathj, conf->pool);
151 }
152 if (APR_SUCCESS != status) {
153 if (is_option_set(conf->mask, OPTION_VERBO))
154 fprintf(stderr, "\nskipping %s and %s comparison because: %s\n",
155 fsize->chksum_array[i].file->path, fsize->chksum_array[j].file->path,
156 apr_strerror(status, errbuf, 128));
157 rv = 1;
158 }
159 // -------------------------------------------------------------
160
161 if (0 == rv) {
162 if (is_option_set(conf->mask, OPTION_DRY_RUN)) {
163 fprintf(stderr, "Dry run: would perform action on %s and %s\n",
164 fsize->chksum_array[i].file->path, fsize->chksum_array[j].file->path);
165 }
166
167 // Initialize JSON set if first match for file[i]
168 if (NULL == current_set_obj) {
169 current_set_obj = json_object();
170 duplicates_array = json_array();
171
172 // Add metadata
173 json_object_set_new(current_set_obj, "size_bytes", json_integer(fsize->val));
174 char *hex_hash = ft_hash_to_hex(fsize->chksum_array[i].hash_value, conf->pool);
175 json_object_set_new(current_set_obj, "hash_xxh128", json_string(hex_hash));
176 json_object_set_new(current_set_obj, "duplicates", duplicates_array);
177
178 // Add file[i] details
179 json_array_append_new(duplicates_array,
180 create_file_json_object(fsize->chksum_array[i].file, conf));
181 }
182
183 // Add file[j] details
184 json_array_append_new(duplicates_array,
185 create_file_json_object(fsize->chksum_array[j].file, conf));
186
187 fsize->chksum_array[j].file = NULL; // Mark as processed
188 }
189 }
190 else {
191 break; // Hashes differ
192 }
193 }
194 // If a set was created, append it to the root array
195 if (NULL != current_set_obj) {
196 json_array_append_new(root_array, current_set_obj);
197 }
198 }
199 }
200 else {
201 DEBUG_ERR("inconsistency error found, no size[%" APR_OFF_T_FMT "] in hash for file %s", file->size, file->path);
202 return APR_EGENERAL;
203 }
204 }
205
206 // Dump the JSON output to stdout
207 json_dumpf(root_array, stdout, JSON_INDENT(2) | JSON_ENSURE_ASCII);
208 printf("\n");
209 fflush(stdout);
210 // Free the JSON structure
211 json_decref(root_array);
212
213 return APR_SUCCESS;
214}
215
216#endif /* HAVE_JANSSON */
UTIL debug output macros.
#define DEBUG_ERR(str, arg...)
Display error message at the level error.
Definition debug.h:31
apr_status_t filecmp(apr_pool_t *pool, const char *fname1, const char *fname2, apr_off_t size, apr_off_t excess_size, int *result_out)
Compares two files byte-by-byte to determine if they are identical.
Definition ft_file.c:285
Interface for file comparison and checksum calculation.
int ft_chksum_cmp(const void *chksum1, const void *chksum2)
Comparison function for sorting checksums.
Definition ft_report.c:40
apr_status_t ft_report_json(ft_conf_t *conf)
Reports duplicate files in JSON format to stdout.
void * napr_hash_search(napr_hash_t *hash, const void *key, apr_size_t key_len, apr_uint32_t *hash_value)
Searches the hash table for an item.
Definition napr_hash.c:145
A high-performance hash table implementation built on APR.
void * napr_heap_extract(napr_heap_t *heap)
Removes and returns the element at the top of the heap (the min or max element).
Definition napr_heap.c:122
A generic binary heap implementation (min-heap or max-heap).
The return value from 128-bit hashes.
Definition xxhash.h:1239
XXH64_hash_t low64
Definition xxhash.h:1240
XXH64_hash_t high64
Definition xxhash.h:1241
Main configuration structure for the ftwin application.
Definition ft_config.h:94