LCOV - code coverage report
Current view: top level - src - ft_ignore.c (source / functions) Coverage Total Hit
Test: coverage.info Lines: 73.8 % 145 107
Test Date: 2025-10-15 21:43:52 Functions: 100.0 % 5 5

            Line data    Source code
       1              : /**
       2              :  * @file ft_ignore.c
       3              :  * @brief Implementation of the hierarchical .gitignore-style pattern matching logic.
       4              :  * @ingroup Utilities
       5              :  */
       6              : /*
       7              :  * Copyright (C) 2025 François Pesce : francois.pesce (at) gmail (dot) com
       8              :  *
       9              :  * Licensed under the Apache License, Version 2.0 (the "License");
      10              :  * you may not use this file except in compliance with the License.
      11              :  * You may obtain a copy of the License at
      12              :  *
      13              :  *      http://www.apache.org/licenses/LICENSE-2.0
      14              :  *
      15              :  * Unless required by applicable law or agreed to in writing, software
      16              :  * distributed under the License is distributed on an "AS IS" BASIS,
      17              :  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      18              :  * See the License for the specific language governing permissions and
      19              :  * limitations under the License.
      20              :  */
      21              : 
      22              : #include "ft_ignore.h"
      23              : #include "debug.h"
      24              : #include <apr_file_io.h>
      25              : #include <apr_strings.h>
      26              : #include <string.h>
      27              : #include <ctype.h>
      28              : 
      29              : /**
      30              :  * @brief The maximum length of a pattern string.
      31              :  *
      32              :  * This constant defines the maximum buffer size for reading and processing
      33              :  * a single pattern from a .gitignore file.
      34              :  */
      35              : static const size_t MAX_PATTERN_LEN = 4096;
      36              : 
      37              : /**
      38              :  * Convert Git glob pattern to PCRE regex
      39              :  * Handles: *, **, ?, [abc], !, /, escapes
      40              :  */
      41          374 : static char *ft_glob_to_pcre(const char *pattern, apr_pool_t *pool, unsigned int *flags)
      42              : {
      43          374 :     char *result = apr_pcalloc(pool, MAX_PATTERN_LEN);
      44          374 :     const char *pattern_ptr = pattern;
      45          374 :     char *result_ptr = result;
      46          374 :     int starts_with_slash = 0;
      47              : 
      48          374 :     *flags = 0;
      49              : 
      50              :     /* Skip negation marker */
      51          374 :     if (*pattern_ptr == '!') {
      52            2 :         *flags |= FT_IGNORE_NEGATE;
      53            2 :         pattern_ptr++;
      54            2 :         while (isspace(*pattern_ptr)) {
      55            0 :             pattern_ptr++;
      56              :         }
      57              :     }
      58              : 
      59              :     /* Check if pattern starts with / */
      60          374 :     if (*pattern_ptr == '/') {
      61            1 :         starts_with_slash = 1;
      62            1 :         pattern_ptr++;
      63              :     }
      64              : 
      65              :     /* Check if pattern ends with / (directory only) */
      66          374 :     const char *end = pattern_ptr + strlen(pattern_ptr) - 1;
      67          374 :     while (end > pattern_ptr && isspace(*end)) {
      68            0 :         end--;
      69              :     }
      70          374 :     if (*end == '/') {
      71          225 :         *flags |= FT_IGNORE_DIR_ONLY;
      72              :     }
      73              : 
      74              :     /* Build regex - start anchor */
      75          374 :     if (starts_with_slash) {
      76            1 :         *result_ptr++ = '^';
      77              :     }
      78              :     else {
      79              :         /* Pattern can match at any level */
      80          373 :         *result_ptr++ = '(';
      81          373 :         *result_ptr++ = '^';
      82          373 :         *result_ptr++ = '|';
      83          373 :         *result_ptr++ = '/';
      84          373 :         *result_ptr++ = ')';
      85              :     }
      86              : 
      87              :     /* Convert pattern to regex */
      88         2375 :     while (*pattern_ptr) {
      89              :         /* Skip trailing slash for directory-only patterns */
      90         2226 :         if (*pattern_ptr == '/' && *flags & FT_IGNORE_DIR_ONLY && *(pattern_ptr + 1) == '\0') {
      91          225 :             break;
      92              :         }
      93              : 
      94         2001 :         if (*pattern_ptr == '\\' && *(pattern_ptr + 1)) {
      95              :             /* Escaped character */
      96            0 :             pattern_ptr++;
      97            0 :             *result_ptr++ = '\\';
      98            0 :             *result_ptr++ = *pattern_ptr++;
      99              :         }
     100         2001 :         else if (*pattern_ptr == '*') {
     101          113 :             if (*(pattern_ptr + 1) == '*') {
     102              :                 /* Double star matches any number of directories */
     103            1 :                 if (*(pattern_ptr + 2) == '/') {
     104              :                     /* Double star with slash pattern */
     105            1 :                     const char *pcre_pattern = "(.*/)?";
     106            1 :                     strcpy(result_ptr, pcre_pattern);
     107            1 :                     result_ptr += strlen(pcre_pattern);
     108            1 :                     pattern_ptr += 3;
     109              :                 }
     110            0 :                 else if (*(pattern_ptr - 1) == '/' || pattern_ptr == pattern) {
     111              :                     /* Slash double star at end or beginning */
     112            0 :                     const char *pcre_pattern = ".*";
     113            0 :                     strcpy(result_ptr, pcre_pattern);
     114            0 :                     result_ptr += strlen(pcre_pattern);
     115            0 :                     pattern_ptr += 2;
     116              :                 }
     117              :                 else {
     118              :                     /* Treat as single * */
     119            0 :                     const char *pcre_pattern = "[^/]*";
     120            0 :                     strcpy(result_ptr, pcre_pattern);
     121            0 :                     result_ptr += strlen(pcre_pattern);
     122            0 :                     pattern_ptr++;
     123              :                 }
     124              :             }
     125              :             else {
     126              :                 /* * matches anything except / */
     127          112 :                 const char *pcre_pattern = "[^/]*";
     128          112 :                 strcpy(result_ptr, pcre_pattern);
     129          112 :                 result_ptr += strlen(pcre_pattern);
     130          112 :                 pattern_ptr++;
     131              :             }
     132              :         }
     133         1888 :         else if (*pattern_ptr == '?') {
     134              :             /* ? matches any single character except / */
     135            0 :             const char *pcre_pattern = "[^/]";
     136            0 :             strcpy(result_ptr, pcre_pattern);
     137            0 :             result_ptr += strlen(pcre_pattern);
     138            0 :             pattern_ptr++;
     139              :         }
     140         1888 :         else if (*pattern_ptr == '[') {
     141              :             /* Character class */
     142            0 :             *result_ptr++ = '[';
     143            0 :             pattern_ptr++;
     144            0 :             if (*pattern_ptr == '!') {
     145            0 :                 *result_ptr++ = '^';
     146            0 :                 pattern_ptr++;
     147              :             }
     148            0 :             while (*pattern_ptr && *pattern_ptr != ']') {
     149            0 :                 if (*pattern_ptr == '\\' && *(pattern_ptr + 1)) {
     150            0 :                     pattern_ptr++;
     151            0 :                     *result_ptr++ = '\\';
     152              :                 }
     153            0 :                 *result_ptr++ = *pattern_ptr++;
     154              :             }
     155            0 :             if (*pattern_ptr == ']')
     156            0 :                 *result_ptr++ = *pattern_ptr++;
     157              :         }
     158         1888 :         else if (*pattern_ptr == '/') {
     159            0 :             *result_ptr++ = '/';
     160            0 :             pattern_ptr++;
     161              :         }
     162         1888 :         else if (strchr(".^$+{}()|", *pattern_ptr)) {
     163              :             /* Escape regex metacharacters */
     164          235 :             *result_ptr++ = '\\';
     165          235 :             *result_ptr++ = *pattern_ptr++;
     166              :         }
     167              :         else {
     168         1653 :             *result_ptr++ = *pattern_ptr++;
     169              :         }
     170              :     }
     171              : 
     172              :     /* End anchor - match end of path or trailing / for directories */
     173          374 :     if (*flags & FT_IGNORE_DIR_ONLY) {
     174          225 :         strcpy(result_ptr, "/?$");
     175              :     }
     176              :     else {
     177          149 :         *result_ptr++ = '$';
     178          149 :         *result_ptr = '\0';
     179              :     }
     180              : 
     181          374 :     return result;
     182              : }
     183              : 
     184           28 : ft_ignore_context_t *ft_ignore_context_create(apr_pool_t *pool, ft_ignore_context_t * parent, const char *base_dir)
     185              : {
     186           28 :     ft_ignore_context_t *ctx = apr_pcalloc(pool, sizeof(ft_ignore_context_t));
     187              : 
     188           28 :     ctx->pool = pool;
     189           28 :     ctx->parent = parent;
     190           28 :     ctx->base_dir = apr_pstrdup(pool, base_dir);
     191           28 :     ctx->base_dir_len = strlen(base_dir);
     192           28 :     ctx->patterns = apr_array_make(pool, 16, sizeof(ft_ignore_pattern_t *));
     193              : 
     194           28 :     return ctx;
     195              : }
     196              : 
     197          376 : apr_status_t ft_ignore_add_pattern_str(ft_ignore_context_t * ctx, const char *pattern_str)
     198              : {
     199              :     const char *trimmed;
     200          376 :     unsigned int flags = 0;
     201              :     char *regex_str;
     202              :     pcre *regex;
     203              :     const char *error;
     204              :     int erroffset;
     205              :     ft_ignore_pattern_t *pattern;
     206              : 
     207              :     /* Trim whitespace */
     208          376 :     trimmed = pattern_str;
     209          376 :     while (isspace(*trimmed))
     210            0 :         trimmed++;
     211              : 
     212              :     /* Skip empty lines and comments */
     213          376 :     if (*trimmed == '\0' || *trimmed == '#') {
     214            2 :         return APR_SUCCESS;
     215              :     }
     216              : 
     217              :     /* Convert glob to regex */
     218          374 :     regex_str = ft_glob_to_pcre(trimmed, ctx->pool, &flags);
     219              : 
     220              :     /* Compile regex */
     221          374 :     regex = pcre_compile(regex_str, 0, &error, &erroffset, NULL);
     222          374 :     if (!regex) {
     223            0 :         DEBUG_ERR("Failed to compile pattern '%s': %s", trimmed, error);
     224            0 :         return APR_EGENERAL;
     225              :     }
     226              : 
     227              :     /* Create pattern struct */
     228          374 :     pattern = apr_pcalloc(ctx->pool, sizeof(ft_ignore_pattern_t));
     229          374 :     pattern->regex = regex;
     230          374 :     pattern->pattern_str = apr_pstrdup(ctx->pool, trimmed);
     231          374 :     pattern->flags = flags;
     232              : 
     233              :     /* Add to context */
     234          374 :     APR_ARRAY_PUSH(ctx->patterns, ft_ignore_pattern_t *) = pattern;
     235              : 
     236          374 :     return APR_SUCCESS;
     237              : }
     238              : 
     239            1 : apr_status_t ft_ignore_load_file(ft_ignore_context_t * ctx, const char *filepath)
     240            1 : {
     241              :     apr_file_t *file;
     242              :     apr_status_t status;
     243            1 :     char line[MAX_PATTERN_LEN];
     244              : 
     245            1 :     status = apr_file_open(&file, filepath, APR_READ, APR_OS_DEFAULT, ctx->pool);
     246            1 :     if (status != APR_SUCCESS) {
     247            0 :         return status;
     248              :     }
     249              : 
     250            6 :     while (apr_file_gets(line, sizeof(line), file) == APR_SUCCESS) {
     251              :         /* Remove newline */
     252            5 :         apr_size_t len = strlen(line);
     253            5 :         if (len > 0 && line[len - 1] == '\n') {
     254            5 :             line[len - 1] = '\0';
     255            5 :             len--;
     256              :         }
     257            5 :         if (len > 0 && line[len - 1] == '\r') {
     258            0 :             line[len - 1] = '\0';
     259              :         }
     260              : 
     261            5 :         ft_ignore_add_pattern_str(ctx, line);
     262              :     }
     263              : 
     264            1 :     (void) apr_file_close(file);
     265            1 :     return APR_SUCCESS;
     266              : }
     267              : 
     268          153 : ft_ignore_match_result_t ft_ignore_match(ft_ignore_context_t * ctx, const char *fullpath, int is_dir)
     269              : {
     270              :     ft_ignore_context_t *current_ctx;
     271          153 :     ft_ignore_match_result_t result = FT_IGNORE_MATCH_NONE;
     272              :     const char *relative_path;
     273              : 
     274          153 :     if (!ctx || !fullpath) {
     275            0 :         return FT_IGNORE_MATCH_NONE;
     276              :     }
     277              : 
     278              :     /* Walk up the context hierarchy */
     279          309 :     for (current_ctx = ctx; current_ctx != NULL; current_ctx = current_ctx->parent) {
     280              :         int i;
     281              : 
     282              :         /* Calculate relative path from this context's base_dir */
     283          156 :         if (strncmp(fullpath, current_ctx->base_dir, current_ctx->base_dir_len) == 0) {
     284           31 :             relative_path = fullpath + current_ctx->base_dir_len;
     285              :             /* Skip leading slash */
     286           59 :             while (*relative_path == '/')
     287           28 :                 relative_path++;
     288              :         }
     289              :         else {
     290              :             /* Path not under this context's base, try parent */
     291          125 :             continue;
     292              :         }
     293              : 
     294              :         /* Check patterns in order (last match wins) */
     295          135 :         for (i = 0; i < current_ctx->patterns->nelts; i++) {
     296          104 :             ft_ignore_pattern_t *pattern = APR_ARRAY_IDX(current_ctx->patterns, i, ft_ignore_pattern_t *);
     297              :             int match;
     298              : 
     299              :             /* Skip directory-only patterns if this is not a directory */
     300          104 :             if ((pattern->flags & FT_IGNORE_DIR_ONLY) && !is_dir) {
     301           42 :                 continue;
     302              :             }
     303              : 
     304              :             /* Try to match */
     305           62 :             match = pcre_exec(pattern->regex, NULL, relative_path, strlen(relative_path), 0, 0, NULL, 0);
     306              : 
     307           62 :             if (match >= 0) {
     308              :                 /* Pattern matched */
     309           21 :                 if (pattern->flags & FT_IGNORE_NEGATE) {
     310            2 :                     result = FT_IGNORE_MATCH_WHITELISTED;
     311              :                 }
     312              :                 else {
     313           19 :                     result = FT_IGNORE_MATCH_IGNORED;
     314              :                 }
     315              :                 /* Don't break - continue checking for later patterns */
     316              :             }
     317              :         }
     318              :     }
     319              : 
     320          153 :     return result;
     321              : }
        

Generated by: LCOV version 2.0-1