LCOV - code coverage report
Current view: top level - src - ft_report.c (source / functions) Coverage Total Hit
Test: coverage.info Lines: 68.2 % 107 73
Test Date: 2025-10-15 21:43:52 Functions: 100.0 % 7 7

            Line data    Source code
       1              : /**
       2              :  * @file ft_report.c
       3              :  * @brief Text-based duplicate reporting functions.
       4              :  * @ingroup Reporting
       5              :  */
       6              : /*
       7              :  * Copyright (C) 2007 François Pesce : francois.pesce (at) gmail (dot) com
       8              :  *
       9              :  * Licensed under the Apache License, Version 2.0 (the "License");
      10              :  * you may not use this file except in compliance with the License.
      11              :  * You may obtain a copy of the License at
      12              :  *
      13              :  *      http://www.apache.org/licenses/LICENSE-2.0
      14              :  *
      15              :  * Unless required by applicable law or agreed to in writing, software
      16              :  * distributed under the License is distributed on an "AS IS" BASIS,
      17              :  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      18              :  * See the License for the specific language governing permissions and
      19              :  * limitations under the License.
      20              :  */
      21              : 
      22              : #include "ft_report.h"
      23              : 
      24              : #include <stdio.h>
      25              : #include <string.h>
      26              : #include <unistd.h>
      27              : 
      28              : #include <apr_strings.h>
      29              : #include <apr_file_io.h>
      30              : 
      31              : #include "config.h"
      32              : #include "debug.h"
      33              : #include "ft_archive.h"
      34              : #include "ft_config.h"
      35              : #include "ft_file.h"
      36              : #include "human_size.h"
      37              : #include "napr_hash.h"
      38              : #include "napr_heap.h"
      39              : 
      40           72 : int ft_chksum_cmp(const void *chksum1, const void *chksum2)
      41              : {
      42           72 :     const ft_chksum_t *chk1 = chksum1;
      43           72 :     const ft_chksum_t *chk2 = chksum2;
      44              :     int i;
      45              : 
      46           72 :     i = memcmp(&chk1->hash_value, &chk2->hash_value, sizeof(ft_hash_t));
      47              : 
      48           72 :     if (0 == i) {
      49           72 :         return chk1->file->prioritized - chk2->file->prioritized;
      50              :     }
      51              : 
      52            0 :     return i;
      53              : }
      54              : 
      55              : /* Forward declaration for helper function */
      56              : static apr_status_t compare_and_report_pair(ft_conf_t *conf, ft_fsize_t *fsize, apr_size_t i, apr_size_t j,
      57              :                                             unsigned char *already_printed, const reporting_colors_t *colors);
      58              : 
      59              : /**
      60              :  * @brief Processes a single file to find and report its duplicates.
      61              :  *
      62              :  * This helper function is called by ft_report_duplicates. It iterates through
      63              :  * the checksum array for a given file size, compares subsequent files with
      64              :  * the same hash, and calls compare_and_report_pair to verify and report
      65              :  * true duplicates. This simplifies the main reporting loop by encapsulating
      66              :  * the logic for handling a single file's potential duplicates.
      67              :  *
      68              :  * @return APR_SUCCESS on success, or an error status if reporting fails.
      69              :  */
      70              : static apr_status_t
      71          104 : process_and_report_duplicates_for_file(ft_conf_t *conf, ft_fsize_t *fsize, apr_size_t i, const reporting_colors_t *colors)
      72              : {
      73          104 :     unsigned char already_printed = 0;
      74              :     apr_size_t j;
      75              : 
      76          104 :     if (NULL == fsize->chksum_array[i].file) {
      77           69 :         return APR_SUCCESS;     /* Already processed as a duplicate */
      78              :     }
      79              : 
      80          104 :     for (j = i + 1; j < fsize->nb_files; j++) {
      81              :         /* If hashes match, perform a full comparison */
      82           69 :         if (0 == memcmp(&fsize->chksum_array[i].hash_value, &fsize->chksum_array[j].hash_value, sizeof(ft_hash_t))) {
      83           69 :             if (compare_and_report_pair(conf, fsize, i, j, &already_printed, colors) != APR_SUCCESS) {
      84            0 :                 return APR_EGENERAL;
      85              :             }
      86              :         }
      87              :         else {
      88              :             /* Hashes are ordered, so we can break early */
      89            0 :             break;
      90              :         }
      91              :     }
      92              : 
      93           35 :     if (already_printed) {
      94           35 :         printf("\n\n");
      95              :     }
      96              : 
      97           35 :     return APR_SUCCESS;
      98              : }
      99              : 
     100           16 : apr_status_t ft_report_duplicates(ft_conf_t *conf)
     101              : {
     102           16 :     apr_off_t old_size = -1;
     103           16 :     ft_file_t *file = NULL;
     104           16 :     ft_fsize_t *fsize = NULL;
     105           16 :     apr_uint32_t hash_value = 0;
     106           16 :     apr_size_t i = 0;
     107           16 :     apr_uint32_t chksum_array_sz = 0U;
     108           16 :     int use_color = isatty(STDOUT_FILENO);
     109           48 :     const reporting_colors_t colors = {
     110           16 :         use_color ? ANSI_COLOR_CYAN ANSI_COLOR_BOLD : "",
     111           16 :         use_color ? ANSI_COLOR_BLUE ANSI_COLOR_BOLD : "",
     112           16 :         use_color ? ANSI_COLOR_RESET : ""
     113              :     };
     114              : 
     115           16 :     if (is_option_set(conf->mask, OPTION_VERBO)) {
     116            0 :         (void) fprintf(stderr, "Reporting duplicate files:\n");
     117              :     }
     118              : 
     119          120 :     while (NULL != (file = napr_heap_extract(conf->heap))) {
     120          104 :         if (file->size == old_size) {
     121           69 :             continue;
     122              :         }
     123           35 :         old_size = file->size;
     124              : 
     125           35 :         fsize = napr_hash_search(conf->sizes, &file->size, sizeof(apr_off_t), &hash_value);
     126           35 :         if (NULL != fsize) {
     127           35 :             chksum_array_sz = FTWIN_MIN(fsize->nb_files, fsize->nb_checksumed);
     128           35 :             qsort(fsize->chksum_array, chksum_array_sz, sizeof(ft_chksum_t), ft_chksum_cmp);
     129              : 
     130          139 :             for (i = 0; i < fsize->nb_files; i++) {
     131          104 :                 if (process_and_report_duplicates_for_file(conf, fsize, i, &colors) != APR_SUCCESS) {
     132            0 :                     return APR_EGENERAL;
     133              :                 }
     134              :             }
     135              :         }
     136              :         else {
     137            0 :             DEBUG_ERR("inconsistency error found, no size[%" APR_OFF_T_FMT "] in hash for file %s", file->size, file->path);
     138            0 :             return APR_EGENERAL;
     139              :         }
     140              :     }
     141              : 
     142           16 :     return APR_SUCCESS;
     143              : }
     144              : 
     145              : /**
     146              :  * @brief Gets the file paths for comparison, handling archive extraction if needed.
     147              :  * @return APR_SUCCESS on success, or an error status if extraction fails.
     148              :  */
     149           69 : static apr_status_t get_comparison_paths(ft_conf_t *conf, ft_file_t *file_i, ft_file_t *file_j, char **fpathi, char **fpathj)
     150              : {
     151           69 :     if (is_option_set(conf->mask, OPTION_UNTAR)) {
     152            0 :         if (file_i->subpath) {
     153            0 :             *fpathi = ft_archive_untar_file(file_i, conf->pool);
     154            0 :             if (!*fpathi) {
     155            0 :                 return APR_EGENERAL;
     156              :             }
     157              :         }
     158              :         else {
     159            0 :             *fpathi = file_i->path;
     160              :         }
     161            0 :         if (file_j->subpath) {
     162            0 :             *fpathj = ft_archive_untar_file(file_j, conf->pool);
     163            0 :             if (!*fpathj) {
     164            0 :                 if (file_i->subpath) {
     165            0 :                     (void) apr_file_remove(*fpathi, conf->pool);
     166              :                 }
     167            0 :                 return APR_EGENERAL;
     168              :             }
     169              :         }
     170              :         else {
     171            0 :             *fpathj = file_j->path;
     172              :         }
     173              :     }
     174              :     else {
     175           69 :         *fpathi = file_i->path;
     176           69 :         *fpathj = file_j->path;
     177              :     }
     178           69 :     return APR_SUCCESS;
     179              : }
     180              : 
     181              : /**
     182              :  * @brief Cleans up temporary files created during archive extraction.
     183              :  */
     184           69 : static void cleanup_comparison_paths(ft_conf_t *conf, ft_file_t *file_i, ft_file_t *file_j, char *fpathi, char *fpathj)
     185              : {
     186           69 :     if (is_option_set(conf->mask, OPTION_UNTAR)) {
     187            0 :         if (file_i->subpath) {
     188            0 :             (void) apr_file_remove(fpathi, conf->pool);
     189              :         }
     190            0 :         if (file_j->subpath) {
     191            0 :             (void) apr_file_remove(fpathj, conf->pool);
     192              :         }
     193              :     }
     194           69 : }
     195              : 
     196              : /**
     197              :  * @brief Formats and prints the output for a duplicate file entry.
     198              :  */
     199          104 : static void format_and_print_duplicate(ft_conf_t *conf, const ft_file_t *file, const reporting_colors_t *colors)
     200              : {
     201          104 :     if (is_option_set(conf->mask, OPTION_UNTAR) && file->subpath) {
     202            0 :         printf("%s%s%c%s%s", colors->path, file->path, (':' != conf->sep) ? ':' : '|', file->subpath, colors->reset);
     203              :     }
     204              :     else {
     205          104 :         printf("%s%s%s", colors->path, file->path, colors->reset);
     206              :     }
     207          104 : }
     208              : 
     209           69 : static apr_status_t compare_and_report_pair(ft_conf_t *conf, ft_fsize_t *fsize, apr_size_t i, apr_size_t j,
     210              :                                             unsigned char *already_printed, const reporting_colors_t *colors)
     211              : {
     212           69 :     char *fpathi = NULL;
     213           69 :     char *fpathj = NULL;
     214           69 :     int rv = 0;
     215              :     apr_status_t status;
     216              : 
     217           69 :     ft_file_t *file_i = fsize->chksum_array[i].file;
     218           69 :     ft_file_t *file_j = fsize->chksum_array[j].file;
     219              : 
     220           69 :     if (get_comparison_paths(conf, file_i, file_j, &fpathi, &fpathj) != APR_SUCCESS) {
     221            0 :         DEBUG_ERR("Failed to get comparison paths for %s and %s", file_i->path, file_j->path);
     222            0 :         return APR_EGENERAL;
     223              :     }
     224              : 
     225           69 :     status = filecmp(conf->pool, fpathi, fpathj, fsize->val, conf->excess_size, &rv);
     226           69 :     cleanup_comparison_paths(conf, file_i, file_j, fpathi, fpathj);
     227              : 
     228           69 :     if (status != APR_SUCCESS) {
     229            0 :         if (is_option_set(conf->mask, OPTION_VERBO)) {
     230            0 :             char errbuf[ERROR_BUFFER_SIZE];
     231            0 :             (void) fprintf(stderr, "\nskipping %s and %s comparison because: %s\n", file_i->path, file_j->path,
     232            0 :                            apr_strerror(status, errbuf, sizeof(errbuf)));
     233              :         }
     234            0 :         return APR_SUCCESS;     /* Continue processing other pairs */
     235              :     }
     236              : 
     237           69 :     if (rv == 0) {
     238           69 :         if (is_option_set(conf->mask, OPTION_DRY_RUN)) {
     239            0 :             fprintf(stderr, "Dry run: would report %s and %s as duplicates.\n", file_i->path, file_j->path);
     240              :         }
     241              : 
     242           69 :         if (!*already_printed) {
     243           35 :             if (is_option_set(conf->mask, OPTION_SIZED)) {
     244            0 :                 const char *human_size = format_human_size(fsize->val, conf->pool);
     245            0 :                 printf("%sSize: %s%s\n", colors->size, human_size, colors->reset);
     246              :             }
     247           35 :             format_and_print_duplicate(conf, file_i, colors);
     248           35 :             *already_printed = 1;
     249              :         }
     250              : 
     251           69 :         printf("%c", conf->sep);
     252           69 :         format_and_print_duplicate(conf, file_j, colors);
     253              : 
     254           69 :         fsize->chksum_array[j].file = NULL;  /* Mark as a twin */
     255           69 :         fflush(stdout);
     256              :     }
     257              : 
     258           69 :     return APR_SUCCESS;
     259              : }
        

Generated by: LCOV version 2.0-1