ftwin 0.8.10
ft_ignore.c
Go to the documentation of this file.
1
6/*
7 * Copyright (C) 2025 François Pesce : francois.pesce (at) gmail (dot) com
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 */
21
22#include "ft_ignore.h"
23#include "debug.h"
24#include <apr_file_io.h>
25#include <apr_strings.h>
26#include <string.h>
27#include <ctype.h>
28
35static const size_t MAX_PATTERN_LEN = 4096;
36
41static char *ft_glob_to_pcre(const char *pattern, apr_pool_t *pool, unsigned int *flags)
42{
43 char *result = apr_pcalloc(pool, MAX_PATTERN_LEN);
44 const char *pattern_ptr = pattern;
45 char *result_ptr = result;
46 int starts_with_slash = 0;
47
48 *flags = 0;
49
50 /* Skip negation marker */
51 if (*pattern_ptr == '!') {
52 *flags |= FT_IGNORE_NEGATE;
53 pattern_ptr++;
54 while (isspace(*pattern_ptr)) {
55 pattern_ptr++;
56 }
57 }
58
59 /* Check if pattern starts with / */
60 if (*pattern_ptr == '/') {
61 starts_with_slash = 1;
62 pattern_ptr++;
63 }
64
65 /* Check if pattern ends with / (directory only) */
66 const char *end = pattern_ptr + strlen(pattern_ptr) - 1;
67 while (end > pattern_ptr && isspace(*end)) {
68 end--;
69 }
70 if (*end == '/') {
71 *flags |= FT_IGNORE_DIR_ONLY;
72 }
73
74 /* Build regex - start anchor */
75 if (starts_with_slash) {
76 *result_ptr++ = '^';
77 }
78 else {
79 /* Pattern can match at any level */
80 *result_ptr++ = '(';
81 *result_ptr++ = '^';
82 *result_ptr++ = '|';
83 *result_ptr++ = '/';
84 *result_ptr++ = ')';
85 }
86
87 /* Convert pattern to regex */
88 while (*pattern_ptr) {
89 /* Skip trailing slash for directory-only patterns */
90 if (*pattern_ptr == '/' && *flags & FT_IGNORE_DIR_ONLY && *(pattern_ptr + 1) == '\0') {
91 break;
92 }
93
94 if (*pattern_ptr == '\\' && *(pattern_ptr + 1)) {
95 /* Escaped character */
96 pattern_ptr++;
97 *result_ptr++ = '\\';
98 *result_ptr++ = *pattern_ptr++;
99 }
100 else if (*pattern_ptr == '*') {
101 if (*(pattern_ptr + 1) == '*') {
102 /* Double star matches any number of directories */
103 if (*(pattern_ptr + 2) == '/') {
104 /* Double star with slash pattern */
105 const char *pcre_pattern = "(.*/)?";
106 strcpy(result_ptr, pcre_pattern);
107 result_ptr += strlen(pcre_pattern);
108 pattern_ptr += 3;
109 }
110 else if (*(pattern_ptr - 1) == '/' || pattern_ptr == pattern) {
111 /* Slash double star at end or beginning */
112 const char *pcre_pattern = ".*";
113 strcpy(result_ptr, pcre_pattern);
114 result_ptr += strlen(pcre_pattern);
115 pattern_ptr += 2;
116 }
117 else {
118 /* Treat as single * */
119 const char *pcre_pattern = "[^/]*";
120 strcpy(result_ptr, pcre_pattern);
121 result_ptr += strlen(pcre_pattern);
122 pattern_ptr++;
123 }
124 }
125 else {
126 /* * matches anything except / */
127 const char *pcre_pattern = "[^/]*";
128 strcpy(result_ptr, pcre_pattern);
129 result_ptr += strlen(pcre_pattern);
130 pattern_ptr++;
131 }
132 }
133 else if (*pattern_ptr == '?') {
134 /* ? matches any single character except / */
135 const char *pcre_pattern = "[^/]";
136 strcpy(result_ptr, pcre_pattern);
137 result_ptr += strlen(pcre_pattern);
138 pattern_ptr++;
139 }
140 else if (*pattern_ptr == '[') {
141 /* Character class */
142 *result_ptr++ = '[';
143 pattern_ptr++;
144 if (*pattern_ptr == '!') {
145 *result_ptr++ = '^';
146 pattern_ptr++;
147 }
148 while (*pattern_ptr && *pattern_ptr != ']') {
149 if (*pattern_ptr == '\\' && *(pattern_ptr + 1)) {
150 pattern_ptr++;
151 *result_ptr++ = '\\';
152 }
153 *result_ptr++ = *pattern_ptr++;
154 }
155 if (*pattern_ptr == ']')
156 *result_ptr++ = *pattern_ptr++;
157 }
158 else if (*pattern_ptr == '/') {
159 *result_ptr++ = '/';
160 pattern_ptr++;
161 }
162 else if (strchr(".^$+{}()|", *pattern_ptr)) {
163 /* Escape regex metacharacters */
164 *result_ptr++ = '\\';
165 *result_ptr++ = *pattern_ptr++;
166 }
167 else {
168 *result_ptr++ = *pattern_ptr++;
169 }
170 }
171
172 /* End anchor - match end of path or trailing / for directories */
173 if (*flags & FT_IGNORE_DIR_ONLY) {
174 strcpy(result_ptr, "/?$");
175 }
176 else {
177 *result_ptr++ = '$';
178 *result_ptr = '\0';
179 }
180
181 return result;
182}
183
184ft_ignore_context_t *ft_ignore_context_create(apr_pool_t *pool, ft_ignore_context_t * parent, const char *base_dir)
185{
186 ft_ignore_context_t *ctx = apr_pcalloc(pool, sizeof(ft_ignore_context_t));
187
188 ctx->pool = pool;
189 ctx->parent = parent;
190 ctx->base_dir = apr_pstrdup(pool, base_dir);
191 ctx->base_dir_len = strlen(base_dir);
192 ctx->patterns = apr_array_make(pool, 16, sizeof(ft_ignore_pattern_t *));
193
194 return ctx;
195}
196
197apr_status_t ft_ignore_add_pattern_str(ft_ignore_context_t * ctx, const char *pattern_str)
198{
199 const char *trimmed;
200 unsigned int flags = 0;
201 char *regex_str;
202 pcre *regex;
203 const char *error;
204 int erroffset;
205 ft_ignore_pattern_t *pattern;
206
207 /* Trim whitespace */
208 trimmed = pattern_str;
209 while (isspace(*trimmed))
210 trimmed++;
211
212 /* Skip empty lines and comments */
213 if (*trimmed == '\0' || *trimmed == '#') {
214 return APR_SUCCESS;
215 }
216
217 /* Convert glob to regex */
218 regex_str = ft_glob_to_pcre(trimmed, ctx->pool, &flags);
219
220 /* Compile regex */
221 regex = pcre_compile(regex_str, 0, &error, &erroffset, NULL);
222 if (!regex) {
223 DEBUG_ERR("Failed to compile pattern '%s': %s", trimmed, error);
224 return APR_EGENERAL;
225 }
226
227 /* Create pattern struct */
228 pattern = apr_pcalloc(ctx->pool, sizeof(ft_ignore_pattern_t));
229 pattern->regex = regex;
230 pattern->pattern_str = apr_pstrdup(ctx->pool, trimmed);
231 pattern->flags = flags;
232
233 /* Add to context */
234 APR_ARRAY_PUSH(ctx->patterns, ft_ignore_pattern_t *) = pattern;
235
236 return APR_SUCCESS;
237}
238
239apr_status_t ft_ignore_load_file(ft_ignore_context_t * ctx, const char *filepath)
240{
241 apr_file_t *file;
242 apr_status_t status;
243 char line[MAX_PATTERN_LEN];
244
245 status = apr_file_open(&file, filepath, APR_READ, APR_OS_DEFAULT, ctx->pool);
246 if (status != APR_SUCCESS) {
247 return status;
248 }
249
250 while (apr_file_gets(line, sizeof(line), file) == APR_SUCCESS) {
251 /* Remove newline */
252 apr_size_t len = strlen(line);
253 if (len > 0 && line[len - 1] == '\n') {
254 line[len - 1] = '\0';
255 len--;
256 }
257 if (len > 0 && line[len - 1] == '\r') {
258 line[len - 1] = '\0';
259 }
260
261 ft_ignore_add_pattern_str(ctx, line);
262 }
263
264 (void) apr_file_close(file);
265 return APR_SUCCESS;
266}
267
268ft_ignore_match_result_t ft_ignore_match(ft_ignore_context_t * ctx, const char *fullpath, int is_dir)
269{
270 ft_ignore_context_t *current_ctx;
272 const char *relative_path;
273
274 if (!ctx || !fullpath) {
276 }
277
278 /* Walk up the context hierarchy */
279 for (current_ctx = ctx; current_ctx != NULL; current_ctx = current_ctx->parent) {
280 int i;
281
282 /* Calculate relative path from this context's base_dir */
283 if (strncmp(fullpath, current_ctx->base_dir, current_ctx->base_dir_len) == 0) {
284 relative_path = fullpath + current_ctx->base_dir_len;
285 /* Skip leading slash */
286 while (*relative_path == '/')
287 relative_path++;
288 }
289 else {
290 /* Path not under this context's base, try parent */
291 continue;
292 }
293
294 /* Check patterns in order (last match wins) */
295 for (i = 0; i < current_ctx->patterns->nelts; i++) {
296 ft_ignore_pattern_t *pattern = APR_ARRAY_IDX(current_ctx->patterns, i, ft_ignore_pattern_t *);
297 int match;
298
299 /* Skip directory-only patterns if this is not a directory */
300 if ((pattern->flags & FT_IGNORE_DIR_ONLY) && !is_dir) {
301 continue;
302 }
303
304 /* Try to match */
305 match = pcre_exec(pattern->regex, NULL, relative_path, strlen(relative_path), 0, 0, NULL, 0);
306
307 if (match >= 0) {
308 /* Pattern matched */
309 if (pattern->flags & FT_IGNORE_NEGATE) {
311 }
312 else {
314 }
315 /* Don't break - continue checking for later patterns */
316 }
317 }
318 }
319
320 return result;
321}
UTIL debug output macros.
#define DEBUG_ERR(str, arg...)
Display error message at the level error.
Definition debug.h:31
apr_status_t ft_ignore_add_pattern_str(ft_ignore_context_t *ctx, const char *pattern_str)
Adds a single pattern string to a context.
Definition ft_ignore.c:197
ft_ignore_context_t * ft_ignore_context_create(apr_pool_t *pool, ft_ignore_context_t *parent, const char *base_dir)
Creates a new ignore context.
Definition ft_ignore.c:184
apr_status_t ft_ignore_load_file(ft_ignore_context_t *ctx, const char *filepath)
Loads and parses an ignore file (like .gitignore) into a context.
Definition ft_ignore.c:239
static char * ft_glob_to_pcre(const char *pattern, apr_pool_t *pool, unsigned int *flags)
Convert Git glob pattern to PCRE regex Handles: *, **, ?, [abc], !, /, escapes.
Definition ft_ignore.c:41
static const size_t MAX_PATTERN_LEN
The maximum length of a pattern string.
Definition ft_ignore.c:35
ft_ignore_match_result_t ft_ignore_match(ft_ignore_context_t *ctx, const char *fullpath, int is_dir)
Checks if a given path should be ignored based on the hierarchical context.
Definition ft_ignore.c:268
Interface for handling hierarchical ignore patterns, similar to .gitignore.
ft_ignore_match_result_t
Result codes for an ignore match operation.
Definition ft_ignore.h:63
@ FT_IGNORE_MATCH_NONE
The path is not matched by any pattern.
Definition ft_ignore.h:64
@ FT_IGNORE_MATCH_WHITELISTED
The path is matched by a negation (whitelist) pattern.
Definition ft_ignore.h:66
@ FT_IGNORE_MATCH_IGNORED
The path is matched by an ignore pattern.
Definition ft_ignore.h:65
Represents the ignore rules for a specific directory and its descendants.
Definition ft_ignore.h:51
apr_array_header_t * patterns
Array of ft_ignore_pattern_t pointers defined at this level.
Definition ft_ignore.h:53
const char * base_dir
The absolute path to the directory this context is anchored to.
Definition ft_ignore.h:54
struct ft_ignore_context_t * parent
Pointer to the parent directory's context, or NULL if root.
Definition ft_ignore.h:52
apr_size_t base_dir_len
The length of the base directory path.
Definition ft_ignore.h:55
apr_pool_t * pool
The memory pool used for allocations within this context.
Definition ft_ignore.h:56
Represents a single compiled ignore pattern.
Definition ft_ignore.h:37
unsigned int flags
Flags for the pattern (e.g., FT_IGNORE_NEGATE).
Definition ft_ignore.h:40
const char * pattern_str
The original, uncompiled pattern string for debugging.
Definition ft_ignore.h:39
pcre * regex
The compiled PCRE pattern.
Definition ft_ignore.h:38