Line data Source code
1 : /**
2 : * @file ft_report_json.c
3 : * @brief JSON-based duplicate reporting functions.
4 : * @ingroup Reporting
5 : */
6 : /*
7 : * Copyright (C) 2007 François Pesce : francois.pesce (at) gmail (dot) com
8 : *
9 : * Licensed under the Apache License, Version 2.0 (the "License");
10 : * you may not use this file except in compliance with the License.
11 : * You may obtain a copy of the License at
12 : *
13 : * http://www.apache.org/licenses/LICENSE-2.0
14 : *
15 : * Unless required by applicable law or agreed to in writing, software
16 : * distributed under the License is distributed on an "AS IS" BASIS,
17 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 : * See the License for the specific language governing permissions and
19 : * limitations under the License.
20 : */
21 :
22 : #include "ft_report_json.h"
23 :
24 : #if HAVE_JANSSON
25 :
26 : #include <stdio.h>
27 : #include <string.h>
28 :
29 : #include <apr_strings.h>
30 : #include <apr_file_io.h>
31 : #include <apr_time.h>
32 : #include <jansson.h>
33 :
34 : #include "config.h"
35 : #include "debug.h"
36 : #include "ft_archive.h"
37 : #include "ft_config.h"
38 : #include "ft_file.h"
39 : #include "ft_report.h"
40 : #include "napr_hash.h"
41 : #include "napr_heap.h"
42 :
43 : /* Formats apr_time_t to ISO 8601 UTC string (YYYY-MM-DDTHH:MM:SSZ). */
44 2 : static const char *ft_format_time_iso8601_utc(apr_time_t t, apr_pool_t *pool)
45 : {
46 : apr_time_exp_t exploded;
47 : // Use apr_time_exp_gmt to get the time in UTC (GMT).
48 2 : if (apr_time_exp_gmt(&exploded, t) != APR_SUCCESS) {
49 0 : return apr_pstrdup(pool, "UNKNOWN_TIME");
50 : }
51 2 : return apr_psprintf(pool, "%04d-%02d-%02dT%02d:%02d:%02dZ",
52 2 : exploded.tm_year + 1900, exploded.tm_mon + 1, exploded.tm_mday,
53 : exploded.tm_hour, exploded.tm_min, exploded.tm_sec);
54 : }
55 :
56 : /* Converts XXH128 hash to a hex string. Assumes XXH128_hash_t has high64/low64 members. */
57 1 : static char *ft_hash_to_hex(ft_hash_t hash, apr_pool_t *pool)
58 : {
59 : /* Use APR's format macro for 64-bit hex (expands to PRIx64) with zero-padding */
60 1 : return apr_psprintf(pool, "%016" APR_UINT64_T_HEX_FMT "%016" APR_UINT64_T_HEX_FMT, hash.high64, hash.low64);
61 : }
62 :
63 : /* Helper to create a JSON object for a file entry */
64 2 : static json_t *create_file_json_object(ft_file_t *file, ft_conf_t *conf)
65 : {
66 2 : json_t *obj = json_object();
67 2 : const char *mtime_str = ft_format_time_iso8601_utc(file->mtime, conf->pool);
68 :
69 2 : json_object_set_new(obj, "path", json_string(file->path));
70 2 : if (is_option_set(conf->mask, OPTION_UNTAR)) {
71 0 : json_object_set_new(obj, "archive_subpath", file->subpath ? json_string(file->subpath) : json_null());
72 : }
73 2 : json_object_set_new(obj, "mtime_utc", json_string(mtime_str));
74 2 : json_object_set_new(obj, "prioritized", json_boolean(file->prioritized));
75 2 : return obj;
76 : }
77 :
78 1 : apr_status_t ft_report_json(ft_conf_t *conf)
79 : {
80 : // Variable declarations (mirroring ft_conf_twin_report)
81 : char errbuf[128];
82 1 : apr_off_t old_size = -1;
83 : ft_file_t *file;
84 : ft_fsize_t *fsize;
85 : apr_uint32_t hash_value;
86 : apr_size_t i, j;
87 : int rv;
88 : apr_status_t status;
89 1 : apr_uint32_t chksum_array_sz = 0U;
90 :
91 1 : json_t *root_array = json_array();
92 1 : if (!root_array)
93 0 : return APR_ENOMEM;
94 :
95 : // Iterate through the heap (logic adapted from ft_conf_twin_report)
96 3 : while (NULL != (file = napr_heap_extract(conf->heap))) {
97 2 : if (file->size == old_size)
98 1 : continue;
99 1 : old_size = file->size;
100 :
101 1 : if (NULL != (fsize = napr_hash_search(conf->sizes, &file->size, sizeof(apr_off_t), &hash_value))) {
102 1 : chksum_array_sz = FTWIN_MIN(fsize->nb_files, fsize->nb_checksumed);
103 1 : qsort(fsize->chksum_array, chksum_array_sz, sizeof(ft_chksum_t), ft_chksum_cmp);
104 :
105 3 : for (i = 0; i < fsize->nb_files; i++) {
106 2 : if (NULL == fsize->chksum_array[i].file)
107 1 : continue;
108 :
109 1 : json_t *current_set_obj = NULL;
110 1 : json_t *duplicates_array = NULL;
111 :
112 2 : for (j = i + 1; j < fsize->nb_files; j++) {
113 1 : if (0 ==
114 1 : memcmp(&fsize->chksum_array[i].hash_value, &fsize->chksum_array[j].hash_value, sizeof(ft_hash_t))) {
115 :
116 : // --- Comparison Logic (Replicate exactly from ft_conf_twin_report) ---
117 : char *fpathi, *fpathj;
118 1 : if (is_option_set(conf->mask, OPTION_UNTAR)) {
119 0 : if (NULL != fsize->chksum_array[i].file->subpath) {
120 0 : fpathi = ft_archive_untar_file(fsize->chksum_array[i].file, conf->pool);
121 0 : if (NULL == fpathi) {
122 0 : DEBUG_ERR("error calling ft_archive_untar_file");
123 0 : return APR_EGENERAL;
124 : }
125 : }
126 : else {
127 0 : fpathi = fsize->chksum_array[i].file->path;
128 : }
129 0 : if (NULL != fsize->chksum_array[j].file->subpath) {
130 0 : fpathj = ft_archive_untar_file(fsize->chksum_array[j].file, conf->pool);
131 0 : if (NULL == fpathj) {
132 0 : DEBUG_ERR("error calling ft_archive_untar_file");
133 0 : return APR_EGENERAL;
134 : }
135 : }
136 : else {
137 0 : fpathj = fsize->chksum_array[j].file->path;
138 : }
139 : }
140 : else {
141 1 : fpathi = fsize->chksum_array[i].file->path;
142 1 : fpathj = fsize->chksum_array[j].file->path;
143 : }
144 1 : status = filecmp(conf->pool, fpathi, fpathj, fsize->val, conf->excess_size, &rv);
145 :
146 1 : if (is_option_set(conf->mask, OPTION_UNTAR)) {
147 0 : if (NULL != fsize->chksum_array[i].file->subpath)
148 0 : (void) apr_file_remove(fpathi, conf->pool);
149 0 : if (NULL != fsize->chksum_array[j].file->subpath)
150 0 : (void) apr_file_remove(fpathj, conf->pool);
151 : }
152 1 : if (APR_SUCCESS != status) {
153 0 : if (is_option_set(conf->mask, OPTION_VERBO))
154 0 : fprintf(stderr, "\nskipping %s and %s comparison because: %s\n",
155 0 : fsize->chksum_array[i].file->path, fsize->chksum_array[j].file->path,
156 : apr_strerror(status, errbuf, 128));
157 0 : rv = 1;
158 : }
159 : // -------------------------------------------------------------
160 :
161 1 : if (0 == rv) {
162 1 : if (is_option_set(conf->mask, OPTION_DRY_RUN)) {
163 0 : fprintf(stderr, "Dry run: would perform action on %s and %s\n",
164 0 : fsize->chksum_array[i].file->path, fsize->chksum_array[j].file->path);
165 : }
166 :
167 : // Initialize JSON set if first match for file[i]
168 1 : if (NULL == current_set_obj) {
169 1 : current_set_obj = json_object();
170 1 : duplicates_array = json_array();
171 :
172 : // Add metadata
173 1 : json_object_set_new(current_set_obj, "size_bytes", json_integer(fsize->val));
174 1 : char *hex_hash = ft_hash_to_hex(fsize->chksum_array[i].hash_value, conf->pool);
175 1 : json_object_set_new(current_set_obj, "hash_xxh128", json_string(hex_hash));
176 1 : json_object_set_new(current_set_obj, "duplicates", duplicates_array);
177 :
178 : // Add file[i] details
179 1 : json_array_append_new(duplicates_array,
180 1 : create_file_json_object(fsize->chksum_array[i].file, conf));
181 : }
182 :
183 : // Add file[j] details
184 1 : json_array_append_new(duplicates_array,
185 1 : create_file_json_object(fsize->chksum_array[j].file, conf));
186 :
187 1 : fsize->chksum_array[j].file = NULL; // Mark as processed
188 : }
189 : }
190 : else {
191 0 : break; // Hashes differ
192 : }
193 : }
194 : // If a set was created, append it to the root array
195 1 : if (NULL != current_set_obj) {
196 1 : json_array_append_new(root_array, current_set_obj);
197 : }
198 : }
199 : }
200 : else {
201 0 : DEBUG_ERR("inconsistency error found, no size[%" APR_OFF_T_FMT "] in hash for file %s", file->size, file->path);
202 0 : return APR_EGENERAL;
203 : }
204 : }
205 :
206 : // Dump the JSON output to stdout
207 1 : json_dumpf(root_array, stdout, JSON_INDENT(2) | JSON_ENSURE_ASCII);
208 1 : printf("\n");
209 1 : fflush(stdout);
210 : // Free the JSON structure
211 1 : json_decref(root_array);
212 :
213 1 : return APR_SUCCESS;
214 : }
215 :
216 : #endif /* HAVE_JANSSON */
|