Line data Source code
1 : /**
2 : * @file ft_report.c
3 : * @brief Text-based duplicate reporting functions.
4 : * @ingroup Reporting
5 : */
6 : /*
7 : * Copyright (C) 2007 François Pesce : francois.pesce (at) gmail (dot) com
8 : *
9 : * Licensed under the Apache License, Version 2.0 (the "License");
10 : * you may not use this file except in compliance with the License.
11 : * You may obtain a copy of the License at
12 : *
13 : * http://www.apache.org/licenses/LICENSE-2.0
14 : *
15 : * Unless required by applicable law or agreed to in writing, software
16 : * distributed under the License is distributed on an "AS IS" BASIS,
17 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 : * See the License for the specific language governing permissions and
19 : * limitations under the License.
20 : */
21 :
22 : #include "ft_report.h"
23 :
24 : #include <stdio.h>
25 : #include <string.h>
26 : #include <unistd.h>
27 :
28 : #include <apr_strings.h>
29 : #include <apr_file_io.h>
30 :
31 : #include "config.h"
32 : #include "debug.h"
33 : #include "ft_archive.h"
34 : #include "ft_config.h"
35 : #include "ft_file.h"
36 : #include "human_size.h"
37 : #include "napr_hash.h"
38 : #include "napr_heap.h"
39 :
40 72 : int ft_chksum_cmp(const void *chksum1, const void *chksum2)
41 : {
42 72 : const ft_chksum_t *chk1 = chksum1;
43 72 : const ft_chksum_t *chk2 = chksum2;
44 : int i;
45 :
46 72 : i = memcmp(&chk1->hash_value, &chk2->hash_value, sizeof(ft_hash_t));
47 :
48 72 : if (0 == i) {
49 72 : return chk1->file->prioritized - chk2->file->prioritized;
50 : }
51 :
52 0 : return i;
53 : }
54 :
55 : /* Forward declaration for helper function */
56 : static apr_status_t compare_and_report_pair(ft_conf_t *conf, ft_fsize_t *fsize, apr_size_t i, apr_size_t j,
57 : unsigned char *already_printed, const reporting_colors_t *colors);
58 :
59 : /**
60 : * @brief Processes a single file to find and report its duplicates.
61 : *
62 : * This helper function is called by ft_report_duplicates. It iterates through
63 : * the checksum array for a given file size, compares subsequent files with
64 : * the same hash, and calls compare_and_report_pair to verify and report
65 : * true duplicates. This simplifies the main reporting loop by encapsulating
66 : * the logic for handling a single file's potential duplicates.
67 : *
68 : * @return APR_SUCCESS on success, or an error status if reporting fails.
69 : */
70 : static apr_status_t
71 104 : process_and_report_duplicates_for_file(ft_conf_t *conf, ft_fsize_t *fsize, apr_size_t i, const reporting_colors_t *colors)
72 : {
73 104 : unsigned char already_printed = 0;
74 : apr_size_t j;
75 :
76 104 : if (NULL == fsize->chksum_array[i].file) {
77 69 : return APR_SUCCESS; /* Already processed as a duplicate */
78 : }
79 :
80 104 : for (j = i + 1; j < fsize->nb_files; j++) {
81 : /* If hashes match, perform a full comparison */
82 69 : if (0 == memcmp(&fsize->chksum_array[i].hash_value, &fsize->chksum_array[j].hash_value, sizeof(ft_hash_t))) {
83 69 : if (compare_and_report_pair(conf, fsize, i, j, &already_printed, colors) != APR_SUCCESS) {
84 0 : return APR_EGENERAL;
85 : }
86 : }
87 : else {
88 : /* Hashes are ordered, so we can break early */
89 0 : break;
90 : }
91 : }
92 :
93 35 : if (already_printed) {
94 35 : printf("\n\n");
95 : }
96 :
97 35 : return APR_SUCCESS;
98 : }
99 :
100 16 : apr_status_t ft_report_duplicates(ft_conf_t *conf)
101 : {
102 16 : apr_off_t old_size = -1;
103 16 : ft_file_t *file = NULL;
104 16 : ft_fsize_t *fsize = NULL;
105 16 : apr_uint32_t hash_value = 0;
106 16 : apr_size_t i = 0;
107 16 : apr_uint32_t chksum_array_sz = 0U;
108 16 : int use_color = isatty(STDOUT_FILENO);
109 48 : const reporting_colors_t colors = {
110 16 : use_color ? ANSI_COLOR_CYAN ANSI_COLOR_BOLD : "",
111 16 : use_color ? ANSI_COLOR_BLUE ANSI_COLOR_BOLD : "",
112 16 : use_color ? ANSI_COLOR_RESET : ""
113 : };
114 :
115 16 : if (is_option_set(conf->mask, OPTION_VERBO)) {
116 0 : (void) fprintf(stderr, "Reporting duplicate files:\n");
117 : }
118 :
119 120 : while (NULL != (file = napr_heap_extract(conf->heap))) {
120 104 : if (file->size == old_size) {
121 69 : continue;
122 : }
123 35 : old_size = file->size;
124 :
125 35 : fsize = napr_hash_search(conf->sizes, &file->size, sizeof(apr_off_t), &hash_value);
126 35 : if (NULL != fsize) {
127 35 : chksum_array_sz = FTWIN_MIN(fsize->nb_files, fsize->nb_checksumed);
128 35 : qsort(fsize->chksum_array, chksum_array_sz, sizeof(ft_chksum_t), ft_chksum_cmp);
129 :
130 139 : for (i = 0; i < fsize->nb_files; i++) {
131 104 : if (process_and_report_duplicates_for_file(conf, fsize, i, &colors) != APR_SUCCESS) {
132 0 : return APR_EGENERAL;
133 : }
134 : }
135 : }
136 : else {
137 0 : DEBUG_ERR("inconsistency error found, no size[%" APR_OFF_T_FMT "] in hash for file %s", file->size, file->path);
138 0 : return APR_EGENERAL;
139 : }
140 : }
141 :
142 16 : return APR_SUCCESS;
143 : }
144 :
145 : /**
146 : * @brief Gets the file paths for comparison, handling archive extraction if needed.
147 : * @return APR_SUCCESS on success, or an error status if extraction fails.
148 : */
149 69 : static apr_status_t get_comparison_paths(ft_conf_t *conf, ft_file_t *file_i, ft_file_t *file_j, char **fpathi, char **fpathj)
150 : {
151 69 : if (is_option_set(conf->mask, OPTION_UNTAR)) {
152 0 : if (file_i->subpath) {
153 0 : *fpathi = ft_archive_untar_file(file_i, conf->pool);
154 0 : if (!*fpathi) {
155 0 : return APR_EGENERAL;
156 : }
157 : }
158 : else {
159 0 : *fpathi = file_i->path;
160 : }
161 0 : if (file_j->subpath) {
162 0 : *fpathj = ft_archive_untar_file(file_j, conf->pool);
163 0 : if (!*fpathj) {
164 0 : if (file_i->subpath) {
165 0 : (void) apr_file_remove(*fpathi, conf->pool);
166 : }
167 0 : return APR_EGENERAL;
168 : }
169 : }
170 : else {
171 0 : *fpathj = file_j->path;
172 : }
173 : }
174 : else {
175 69 : *fpathi = file_i->path;
176 69 : *fpathj = file_j->path;
177 : }
178 69 : return APR_SUCCESS;
179 : }
180 :
181 : /**
182 : * @brief Cleans up temporary files created during archive extraction.
183 : */
184 69 : static void cleanup_comparison_paths(ft_conf_t *conf, ft_file_t *file_i, ft_file_t *file_j, char *fpathi, char *fpathj)
185 : {
186 69 : if (is_option_set(conf->mask, OPTION_UNTAR)) {
187 0 : if (file_i->subpath) {
188 0 : (void) apr_file_remove(fpathi, conf->pool);
189 : }
190 0 : if (file_j->subpath) {
191 0 : (void) apr_file_remove(fpathj, conf->pool);
192 : }
193 : }
194 69 : }
195 :
196 : /**
197 : * @brief Formats and prints the output for a duplicate file entry.
198 : */
199 104 : static void format_and_print_duplicate(ft_conf_t *conf, const ft_file_t *file, const reporting_colors_t *colors)
200 : {
201 104 : if (is_option_set(conf->mask, OPTION_UNTAR) && file->subpath) {
202 0 : printf("%s%s%c%s%s", colors->path, file->path, (':' != conf->sep) ? ':' : '|', file->subpath, colors->reset);
203 : }
204 : else {
205 104 : printf("%s%s%s", colors->path, file->path, colors->reset);
206 : }
207 104 : }
208 :
209 69 : static apr_status_t compare_and_report_pair(ft_conf_t *conf, ft_fsize_t *fsize, apr_size_t i, apr_size_t j,
210 : unsigned char *already_printed, const reporting_colors_t *colors)
211 : {
212 69 : char *fpathi = NULL;
213 69 : char *fpathj = NULL;
214 69 : int rv = 0;
215 : apr_status_t status;
216 :
217 69 : ft_file_t *file_i = fsize->chksum_array[i].file;
218 69 : ft_file_t *file_j = fsize->chksum_array[j].file;
219 :
220 69 : if (get_comparison_paths(conf, file_i, file_j, &fpathi, &fpathj) != APR_SUCCESS) {
221 0 : DEBUG_ERR("Failed to get comparison paths for %s and %s", file_i->path, file_j->path);
222 0 : return APR_EGENERAL;
223 : }
224 :
225 69 : status = filecmp(conf->pool, fpathi, fpathj, fsize->val, conf->excess_size, &rv);
226 69 : cleanup_comparison_paths(conf, file_i, file_j, fpathi, fpathj);
227 :
228 69 : if (status != APR_SUCCESS) {
229 0 : if (is_option_set(conf->mask, OPTION_VERBO)) {
230 0 : char errbuf[ERROR_BUFFER_SIZE];
231 0 : (void) fprintf(stderr, "\nskipping %s and %s comparison because: %s\n", file_i->path, file_j->path,
232 0 : apr_strerror(status, errbuf, sizeof(errbuf)));
233 : }
234 0 : return APR_SUCCESS; /* Continue processing other pairs */
235 : }
236 :
237 69 : if (rv == 0) {
238 69 : if (is_option_set(conf->mask, OPTION_DRY_RUN)) {
239 0 : fprintf(stderr, "Dry run: would report %s and %s as duplicates.\n", file_i->path, file_j->path);
240 : }
241 :
242 69 : if (!*already_printed) {
243 35 : if (is_option_set(conf->mask, OPTION_SIZED)) {
244 0 : const char *human_size = format_human_size(fsize->val, conf->pool);
245 0 : printf("%sSize: %s%s\n", colors->size, human_size, colors->reset);
246 : }
247 35 : format_and_print_duplicate(conf, file_i, colors);
248 35 : *already_printed = 1;
249 : }
250 :
251 69 : printf("%c", conf->sep);
252 69 : format_and_print_duplicate(conf, file_j, colors);
253 :
254 69 : fsize->chksum_array[j].file = NULL; /* Mark as a twin */
255 69 : fflush(stdout);
256 : }
257 :
258 69 : return APR_SUCCESS;
259 : }
|