28#include <apr_strings.h>
29#include <apr_file_io.h>
33#include "ft_archive.h"
42 const ft_chksum_t *chk1 = chksum1;
43 const ft_chksum_t *chk2 = chksum2;
46 i = memcmp(&chk1->hash_value, &chk2->hash_value,
sizeof(
ft_hash_t));
49 return chk1->file->prioritized - chk2->file->prioritized;
56static apr_status_t compare_and_report_pair(
ft_conf_t *conf, ft_fsize_t *fsize, apr_size_t i, apr_size_t j,
73 unsigned char already_printed = 0;
76 if (NULL == fsize->chksum_array[i].file) {
80 for (j = i + 1; j < fsize->nb_files; j++) {
82 if (0 == memcmp(&fsize->chksum_array[i].hash_value, &fsize->chksum_array[j].hash_value,
sizeof(
ft_hash_t))) {
83 if (compare_and_report_pair(conf, fsize, i, j, &already_printed, colors) != APR_SUCCESS) {
93 if (already_printed) {
102 apr_off_t old_size = -1;
103 ft_file_t *file = NULL;
104 ft_fsize_t *fsize = NULL;
105 apr_uint32_t hash_value = 0;
107 apr_uint32_t chksum_array_sz = 0U;
108 int use_color = isatty(STDOUT_FILENO);
110 use_color ? ANSI_COLOR_CYAN ANSI_COLOR_BOLD :
"",
111 use_color ? ANSI_COLOR_BLUE ANSI_COLOR_BOLD :
"",
112 use_color ? ANSI_COLOR_RESET :
""
115 if (is_option_set(conf->mask, OPTION_VERBO)) {
116 (void) fprintf(stderr,
"Reporting duplicate files:\n");
120 if (file->size == old_size) {
123 old_size = file->size;
125 fsize =
napr_hash_search(conf->sizes, &file->size,
sizeof(apr_off_t), &hash_value);
127 chksum_array_sz = FTWIN_MIN(fsize->nb_files, fsize->nb_checksumed);
128 qsort(fsize->chksum_array, chksum_array_sz,
sizeof(ft_chksum_t),
ft_chksum_cmp);
130 for (i = 0; i < fsize->nb_files; i++) {
137 DEBUG_ERR(
"inconsistency error found, no size[%" APR_OFF_T_FMT
"] in hash for file %s", file->size, file->path);
151 if (is_option_set(conf->mask, OPTION_UNTAR)) {
152 if (file_i->subpath) {
153 *fpathi = ft_archive_untar_file(file_i, conf->pool);
159 *fpathi = file_i->path;
161 if (file_j->subpath) {
162 *fpathj = ft_archive_untar_file(file_j, conf->pool);
164 if (file_i->subpath) {
165 (void) apr_file_remove(*fpathi, conf->pool);
171 *fpathj = file_j->path;
175 *fpathi = file_i->path;
176 *fpathj = file_j->path;
186 if (is_option_set(conf->mask, OPTION_UNTAR)) {
187 if (file_i->subpath) {
188 (void) apr_file_remove(fpathi, conf->pool);
190 if (file_j->subpath) {
191 (void) apr_file_remove(fpathj, conf->pool);
201 if (is_option_set(conf->mask, OPTION_UNTAR) && file->subpath) {
202 printf(
"%s%s%c%s%s", colors->path, file->path, (
':' != conf->sep) ?
':' :
'|', file->subpath, colors->reset);
205 printf(
"%s%s%s", colors->path, file->path, colors->reset);
209static apr_status_t compare_and_report_pair(
ft_conf_t *conf, ft_fsize_t *fsize, apr_size_t i, apr_size_t j,
217 ft_file_t *file_i = fsize->chksum_array[i].file;
218 ft_file_t *file_j = fsize->chksum_array[j].file;
221 DEBUG_ERR(
"Failed to get comparison paths for %s and %s", file_i->path, file_j->path);
225 status =
filecmp(conf->pool, fpathi, fpathj, fsize->val, conf->excess_size, &rv);
228 if (status != APR_SUCCESS) {
229 if (is_option_set(conf->mask, OPTION_VERBO)) {
230 char errbuf[ERROR_BUFFER_SIZE];
231 (void) fprintf(stderr,
"\nskipping %s and %s comparison because: %s\n", file_i->path, file_j->path,
232 apr_strerror(status, errbuf,
sizeof(errbuf)));
238 if (is_option_set(conf->mask, OPTION_DRY_RUN)) {
239 fprintf(stderr,
"Dry run: would report %s and %s as duplicates.\n", file_i->path, file_j->path);
242 if (!*already_printed) {
243 if (is_option_set(conf->mask, OPTION_SIZED)) {
245 printf(
"%sSize: %s%s\n", colors->size, human_size, colors->reset);
248 *already_printed = 1;
251 printf(
"%c", conf->sep);
254 fsize->chksum_array[j].file = NULL;
UTIL debug output macros.
#define DEBUG_ERR(str, arg...)
Display error message at the level error.
apr_status_t filecmp(apr_pool_t *pool, const char *fname1, const char *fname2, apr_off_t size, apr_off_t excess_size, int *result_out)
Compares two files byte-by-byte to determine if they are identical.
Interface for file comparison and checksum calculation.
apr_status_t ft_report_duplicates(ft_conf_t *conf)
Reports duplicate files in text format to stdout.
int ft_chksum_cmp(const void *chksum1, const void *chksum2)
Comparison function for sorting checksums.
static void format_and_print_duplicate(ft_conf_t *conf, const ft_file_t *file, const reporting_colors_t *colors)
Formats and prints the output for a duplicate file entry.
static void cleanup_comparison_paths(ft_conf_t *conf, ft_file_t *file_i, ft_file_t *file_j, char *fpathi, char *fpathj)
Cleans up temporary files created during archive extraction.
static apr_status_t process_and_report_duplicates_for_file(ft_conf_t *conf, ft_fsize_t *fsize, apr_size_t i, const reporting_colors_t *colors)
Processes a single file to find and report its duplicates.
static apr_status_t get_comparison_paths(ft_conf_t *conf, ft_file_t *file_i, ft_file_t *file_j, char **fpathi, char **fpathj)
Gets the file paths for comparison, handling archive extraction if needed.
const char * format_human_size(apr_off_t size, apr_pool_t *pool)
Formats a size in bytes into a human-readable string.
Utilities for parsing and formatting human-readable file sizes.
void * napr_hash_search(napr_hash_t *hash, const void *key, apr_size_t key_len, apr_uint32_t *hash_value)
Searches the hash table for an item.
A high-performance hash table implementation built on APR.
void * napr_heap_extract(napr_heap_t *heap)
Removes and returns the element at the top of the heap (the min or max element).
A generic binary heap implementation (min-heap or max-heap).
The return value from 128-bit hashes.
Main configuration structure for the ftwin application.
A struct to hold the ANSI color codes for reporting.