ftwin 0.8.10
ft_report.c
Go to the documentation of this file.
1
6/*
7 * Copyright (C) 2007 François Pesce : francois.pesce (at) gmail (dot) com
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 */
21
22#include "ft_report.h"
23
24#include <stdio.h>
25#include <string.h>
26#include <unistd.h>
27
28#include <apr_strings.h>
29#include <apr_file_io.h>
30
31#include "config.h"
32#include "debug.h"
33#include "ft_archive.h"
34#include "ft_config.h"
35#include "ft_file.h"
36#include "human_size.h"
37#include "napr_hash.h"
38#include "napr_heap.h"
39
40int ft_chksum_cmp(const void *chksum1, const void *chksum2)
41{
42 const ft_chksum_t *chk1 = chksum1;
43 const ft_chksum_t *chk2 = chksum2;
44 int i;
45
46 i = memcmp(&chk1->hash_value, &chk2->hash_value, sizeof(ft_hash_t));
47
48 if (0 == i) {
49 return chk1->file->prioritized - chk2->file->prioritized;
50 }
51
52 return i;
53}
54
55/* Forward declaration for helper function */
56static apr_status_t compare_and_report_pair(ft_conf_t *conf, ft_fsize_t *fsize, apr_size_t i, apr_size_t j,
57 unsigned char *already_printed, const reporting_colors_t *colors);
58
70static apr_status_t
71process_and_report_duplicates_for_file(ft_conf_t *conf, ft_fsize_t *fsize, apr_size_t i, const reporting_colors_t *colors)
72{
73 unsigned char already_printed = 0;
74 apr_size_t j;
75
76 if (NULL == fsize->chksum_array[i].file) {
77 return APR_SUCCESS; /* Already processed as a duplicate */
78 }
79
80 for (j = i + 1; j < fsize->nb_files; j++) {
81 /* If hashes match, perform a full comparison */
82 if (0 == memcmp(&fsize->chksum_array[i].hash_value, &fsize->chksum_array[j].hash_value, sizeof(ft_hash_t))) {
83 if (compare_and_report_pair(conf, fsize, i, j, &already_printed, colors) != APR_SUCCESS) {
84 return APR_EGENERAL;
85 }
86 }
87 else {
88 /* Hashes are ordered, so we can break early */
89 break;
90 }
91 }
92
93 if (already_printed) {
94 printf("\n\n");
95 }
96
97 return APR_SUCCESS;
98}
99
101{
102 apr_off_t old_size = -1;
103 ft_file_t *file = NULL;
104 ft_fsize_t *fsize = NULL;
105 apr_uint32_t hash_value = 0;
106 apr_size_t i = 0;
107 apr_uint32_t chksum_array_sz = 0U;
108 int use_color = isatty(STDOUT_FILENO);
109 const reporting_colors_t colors = {
110 use_color ? ANSI_COLOR_CYAN ANSI_COLOR_BOLD : "",
111 use_color ? ANSI_COLOR_BLUE ANSI_COLOR_BOLD : "",
112 use_color ? ANSI_COLOR_RESET : ""
113 };
114
115 if (is_option_set(conf->mask, OPTION_VERBO)) {
116 (void) fprintf(stderr, "Reporting duplicate files:\n");
117 }
118
119 while (NULL != (file = napr_heap_extract(conf->heap))) {
120 if (file->size == old_size) {
121 continue;
122 }
123 old_size = file->size;
124
125 fsize = napr_hash_search(conf->sizes, &file->size, sizeof(apr_off_t), &hash_value);
126 if (NULL != fsize) {
127 chksum_array_sz = FTWIN_MIN(fsize->nb_files, fsize->nb_checksumed);
128 qsort(fsize->chksum_array, chksum_array_sz, sizeof(ft_chksum_t), ft_chksum_cmp);
129
130 for (i = 0; i < fsize->nb_files; i++) {
131 if (process_and_report_duplicates_for_file(conf, fsize, i, &colors) != APR_SUCCESS) {
132 return APR_EGENERAL;
133 }
134 }
135 }
136 else {
137 DEBUG_ERR("inconsistency error found, no size[%" APR_OFF_T_FMT "] in hash for file %s", file->size, file->path);
138 return APR_EGENERAL;
139 }
140 }
141
142 return APR_SUCCESS;
143}
144
149static apr_status_t get_comparison_paths(ft_conf_t *conf, ft_file_t *file_i, ft_file_t *file_j, char **fpathi, char **fpathj)
150{
151 if (is_option_set(conf->mask, OPTION_UNTAR)) {
152 if (file_i->subpath) {
153 *fpathi = ft_archive_untar_file(file_i, conf->pool);
154 if (!*fpathi) {
155 return APR_EGENERAL;
156 }
157 }
158 else {
159 *fpathi = file_i->path;
160 }
161 if (file_j->subpath) {
162 *fpathj = ft_archive_untar_file(file_j, conf->pool);
163 if (!*fpathj) {
164 if (file_i->subpath) {
165 (void) apr_file_remove(*fpathi, conf->pool);
166 }
167 return APR_EGENERAL;
168 }
169 }
170 else {
171 *fpathj = file_j->path;
172 }
173 }
174 else {
175 *fpathi = file_i->path;
176 *fpathj = file_j->path;
177 }
178 return APR_SUCCESS;
179}
180
184static void cleanup_comparison_paths(ft_conf_t *conf, ft_file_t *file_i, ft_file_t *file_j, char *fpathi, char *fpathj)
185{
186 if (is_option_set(conf->mask, OPTION_UNTAR)) {
187 if (file_i->subpath) {
188 (void) apr_file_remove(fpathi, conf->pool);
189 }
190 if (file_j->subpath) {
191 (void) apr_file_remove(fpathj, conf->pool);
192 }
193 }
194}
195
199static void format_and_print_duplicate(ft_conf_t *conf, const ft_file_t *file, const reporting_colors_t *colors)
200{
201 if (is_option_set(conf->mask, OPTION_UNTAR) && file->subpath) {
202 printf("%s%s%c%s%s", colors->path, file->path, (':' != conf->sep) ? ':' : '|', file->subpath, colors->reset);
203 }
204 else {
205 printf("%s%s%s", colors->path, file->path, colors->reset);
206 }
207}
208
209static apr_status_t compare_and_report_pair(ft_conf_t *conf, ft_fsize_t *fsize, apr_size_t i, apr_size_t j,
210 unsigned char *already_printed, const reporting_colors_t *colors)
211{
212 char *fpathi = NULL;
213 char *fpathj = NULL;
214 int rv = 0;
215 apr_status_t status;
216
217 ft_file_t *file_i = fsize->chksum_array[i].file;
218 ft_file_t *file_j = fsize->chksum_array[j].file;
219
220 if (get_comparison_paths(conf, file_i, file_j, &fpathi, &fpathj) != APR_SUCCESS) {
221 DEBUG_ERR("Failed to get comparison paths for %s and %s", file_i->path, file_j->path);
222 return APR_EGENERAL;
223 }
224
225 status = filecmp(conf->pool, fpathi, fpathj, fsize->val, conf->excess_size, &rv);
226 cleanup_comparison_paths(conf, file_i, file_j, fpathi, fpathj);
227
228 if (status != APR_SUCCESS) {
229 if (is_option_set(conf->mask, OPTION_VERBO)) {
230 char errbuf[ERROR_BUFFER_SIZE];
231 (void) fprintf(stderr, "\nskipping %s and %s comparison because: %s\n", file_i->path, file_j->path,
232 apr_strerror(status, errbuf, sizeof(errbuf)));
233 }
234 return APR_SUCCESS; /* Continue processing other pairs */
235 }
236
237 if (rv == 0) {
238 if (is_option_set(conf->mask, OPTION_DRY_RUN)) {
239 fprintf(stderr, "Dry run: would report %s and %s as duplicates.\n", file_i->path, file_j->path);
240 }
241
242 if (!*already_printed) {
243 if (is_option_set(conf->mask, OPTION_SIZED)) {
244 const char *human_size = format_human_size(fsize->val, conf->pool);
245 printf("%sSize: %s%s\n", colors->size, human_size, colors->reset);
246 }
247 format_and_print_duplicate(conf, file_i, colors);
248 *already_printed = 1;
249 }
250
251 printf("%c", conf->sep);
252 format_and_print_duplicate(conf, file_j, colors);
253
254 fsize->chksum_array[j].file = NULL; /* Mark as a twin */
255 fflush(stdout);
256 }
257
258 return APR_SUCCESS;
259}
UTIL debug output macros.
#define DEBUG_ERR(str, arg...)
Display error message at the level error.
Definition debug.h:31
apr_status_t filecmp(apr_pool_t *pool, const char *fname1, const char *fname2, apr_off_t size, apr_off_t excess_size, int *result_out)
Compares two files byte-by-byte to determine if they are identical.
Definition ft_file.c:285
Interface for file comparison and checksum calculation.
apr_status_t ft_report_duplicates(ft_conf_t *conf)
Reports duplicate files in text format to stdout.
Definition ft_report.c:100
int ft_chksum_cmp(const void *chksum1, const void *chksum2)
Comparison function for sorting checksums.
Definition ft_report.c:40
static void format_and_print_duplicate(ft_conf_t *conf, const ft_file_t *file, const reporting_colors_t *colors)
Formats and prints the output for a duplicate file entry.
Definition ft_report.c:199
static void cleanup_comparison_paths(ft_conf_t *conf, ft_file_t *file_i, ft_file_t *file_j, char *fpathi, char *fpathj)
Cleans up temporary files created during archive extraction.
Definition ft_report.c:184
static apr_status_t process_and_report_duplicates_for_file(ft_conf_t *conf, ft_fsize_t *fsize, apr_size_t i, const reporting_colors_t *colors)
Processes a single file to find and report its duplicates.
Definition ft_report.c:71
static apr_status_t get_comparison_paths(ft_conf_t *conf, ft_file_t *file_i, ft_file_t *file_j, char **fpathi, char **fpathj)
Gets the file paths for comparison, handling archive extraction if needed.
Definition ft_report.c:149
const char * format_human_size(apr_off_t size, apr_pool_t *pool)
Formats a size in bytes into a human-readable string.
Definition human_size.c:13
Utilities for parsing and formatting human-readable file sizes.
void * napr_hash_search(napr_hash_t *hash, const void *key, apr_size_t key_len, apr_uint32_t *hash_value)
Searches the hash table for an item.
Definition napr_hash.c:145
A high-performance hash table implementation built on APR.
void * napr_heap_extract(napr_heap_t *heap)
Removes and returns the element at the top of the heap (the min or max element).
Definition napr_heap.c:122
A generic binary heap implementation (min-heap or max-heap).
The return value from 128-bit hashes.
Definition xxhash.h:1239
Main configuration structure for the ftwin application.
Definition ft_config.h:94
A struct to hold the ANSI color codes for reporting.
Definition ft_report.h:20