Line data Source code
1 : /**
2 : * @file ft_file.c
3 : * @brief Functions for file comparison, checksum calculation (XXH128), and I/O handling.
4 : * @ingroup CoreLogic
5 : */
6 : /*
7 : *
8 : * Copyright (C) 2007 François Pesce : francois.pesce (at) gmail (dot) com
9 : *
10 : * Licensed under the Apache License, Version 2.0 (the "License");
11 : * you may not use this file except in compliance with the License.
12 : * You may obtain a copy of the License at
13 : *
14 : * http://www.apache.org/licenses/LICENSE-2.0
15 : *
16 : * Unless required by applicable law or agreed to in writing, software
17 : * distributed under the License is distributed on an "AS IS" BASIS,
18 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 : * See the License for the specific language governing permissions and
20 : * limitations under the License.
21 : */
22 :
23 : #include <apr_file_io.h>
24 : #include <apr_mmap.h>
25 :
26 : #include "checksum.h"
27 : #include "debug.h"
28 : #include "ft_file.h"
29 : #include "ft_config.h"
30 :
31 : static apr_status_t checksum_big_file(const char *filename, apr_off_t size, ft_hash_t *hash_out, apr_pool_t *gc_pool);
32 : static apr_status_t big_filecmp(apr_pool_t *pool, const char *fname1, const char *fname2, apr_off_t size, int *result_out);
33 :
34 : /**
35 : * @brief The chunk size for processing large files.
36 : *
37 : * This constant defines the size of the data chunk (in bytes) used for
38 : * reading and comparing large files. Using a larger chunk size can improve
39 : * I/O performance but will increase memory usage.
40 : */
41 : static const size_t HUGE_LEN = 64 * 1024;
42 :
43 99 : static apr_status_t checksum_small_file(const char *filename, apr_off_t size, ft_hash_t *hash_out, apr_pool_t *gc_pool)
44 99 : {
45 99 : char errbuf[CHAR_MAX_VAL];
46 99 : apr_file_t *file_descriptor = NULL;
47 99 : apr_mmap_t *memory_map = NULL;
48 99 : memset(errbuf, 0, sizeof(errbuf));
49 99 : apr_status_t status = APR_SUCCESS;
50 :
51 99 : status = apr_file_open(&file_descriptor, filename, APR_READ | APR_BINARY, APR_OS_DEFAULT, gc_pool);
52 99 : if (APR_SUCCESS != status) {
53 0 : return status;
54 : }
55 :
56 99 : status = apr_mmap_create(&memory_map, file_descriptor, 0, (apr_size_t) size, APR_MMAP_READ, gc_pool);
57 99 : if (APR_SUCCESS != status) {
58 1 : (void) apr_file_close(file_descriptor);
59 1 : return checksum_big_file(filename, size, hash_out, gc_pool);
60 : }
61 :
62 98 : *hash_out = XXH3_128bits(memory_map->mm, (size_t) size);
63 :
64 98 : status = apr_mmap_delete(memory_map);
65 98 : if (APR_SUCCESS != status) {
66 0 : DEBUG_ERR("error calling apr_mmap_delete: %s", apr_strerror(status, errbuf, sizeof(errbuf)));
67 0 : (void) apr_file_close(file_descriptor);
68 0 : return status;
69 : }
70 98 : status = apr_file_close(file_descriptor);
71 98 : if (APR_SUCCESS != status) {
72 0 : DEBUG_ERR("error calling apr_file_close: %s", apr_strerror(status, errbuf, sizeof(errbuf)));
73 0 : return status;
74 : }
75 :
76 98 : return APR_SUCCESS;
77 : }
78 :
79 7 : static apr_status_t checksum_big_file(const char *filename, apr_off_t size, ft_hash_t *hash_out, apr_pool_t *gc_pool)
80 7 : {
81 7 : unsigned char data_chunk[HUGE_LEN];
82 7 : char errbuf[CHAR_MAX_VAL];
83 : // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
84 : apr_size_t rbytes;
85 7 : apr_file_t *file_descriptor = NULL;
86 7 : memset(data_chunk, 0, sizeof(data_chunk));
87 7 : memset(errbuf, 0, sizeof(errbuf));
88 7 : apr_status_t status = APR_SUCCESS;
89 7 : XXH3_state_t *const state = XXH3_createState();
90 :
91 7 : if (state == NULL) {
92 0 : return APR_ENOMEM;
93 : }
94 7 : XXH3_128bits_reset(state);
95 :
96 7 : status = apr_file_open(&file_descriptor, filename, APR_READ | APR_BINARY, APR_OS_DEFAULT, gc_pool);
97 7 : if (APR_SUCCESS != status) {
98 0 : XXH3_freeState(state);
99 0 : return status;
100 : }
101 :
102 : do {
103 12 : rbytes = HUGE_LEN;
104 12 : status = apr_file_read(file_descriptor, data_chunk, &rbytes);
105 12 : if ((APR_SUCCESS == status || (APR_EOF == status && rbytes > 0))) {
106 5 : if (XXH3_128bits_update(state, data_chunk, rbytes) == XXH_ERROR) {
107 0 : DEBUG_ERR("Error during hash update for file: %s", filename);
108 0 : XXH3_freeState(state);
109 0 : (void) apr_file_close(file_descriptor);
110 0 : return APR_EGENERAL;
111 : }
112 : }
113 12 : } while (APR_SUCCESS == status);
114 :
115 7 : if (APR_EOF != status) {
116 0 : DEBUG_ERR("unable to read(%s, O_RDONLY), skipping: %s", filename, apr_strerror(status, errbuf, sizeof(errbuf)));
117 0 : XXH3_freeState(state);
118 0 : (void) apr_file_close(file_descriptor);
119 0 : return status;
120 : }
121 :
122 7 : *hash_out = XXH3_128bits_digest(state);
123 7 : XXH3_freeState(state);
124 :
125 7 : status = apr_file_close(file_descriptor);
126 7 : if (APR_SUCCESS != status) {
127 0 : DEBUG_ERR("error calling apr_file_close: %s", apr_strerror(status, errbuf, sizeof(errbuf)));
128 0 : return status;
129 : }
130 :
131 7 : return APR_SUCCESS;
132 : }
133 :
134 105 : extern apr_status_t checksum_file(const char *filename, apr_off_t size, apr_off_t excess_size, ft_hash_t *hash_out,
135 : apr_pool_t *gc_pool)
136 : {
137 105 : if (size < excess_size) {
138 99 : return checksum_small_file(filename, size, hash_out, gc_pool);
139 : }
140 :
141 6 : return checksum_big_file(filename, size, hash_out, gc_pool);
142 : }
143 :
144 74 : static apr_status_t small_filecmp(apr_pool_t *pool, const char *fname1, const char *fname2, apr_off_t size, int *result_out)
145 74 : {
146 74 : char errbuf[CHAR_MAX_VAL];
147 74 : apr_file_t *fd1 = NULL;
148 74 : apr_file_t *fd2 = NULL;
149 74 : apr_mmap_t *mm1 = NULL;
150 74 : apr_mmap_t *mm2 = NULL;
151 74 : memset(errbuf, 0, sizeof(errbuf));
152 74 : apr_status_t status = APR_SUCCESS;
153 :
154 74 : if (0 == size) {
155 4 : *result_out = 0;
156 4 : return APR_SUCCESS;
157 : }
158 :
159 70 : status = apr_file_open(&fd1, fname1, APR_READ | APR_BINARY, APR_OS_DEFAULT, pool);
160 70 : if (APR_SUCCESS != status) {
161 0 : return status;
162 : }
163 :
164 70 : status = apr_mmap_create(&mm1, fd1, 0, (apr_size_t) size, APR_MMAP_READ, pool);
165 70 : if (APR_SUCCESS != status) {
166 0 : (void) apr_file_close(fd1);
167 0 : return big_filecmp(pool, fname1, fname2, size, result_out);
168 : }
169 :
170 70 : status = apr_file_open(&fd2, fname2, APR_READ | APR_BINARY, APR_OS_DEFAULT, pool);
171 70 : if (APR_SUCCESS != status) {
172 0 : apr_mmap_delete(mm1);
173 0 : (void) apr_file_close(fd1);
174 0 : return status;
175 : }
176 :
177 70 : status = apr_mmap_create(&mm2, fd2, 0, size, APR_MMAP_READ, pool);
178 70 : if (APR_SUCCESS != status) {
179 0 : (void) apr_file_close(fd2);
180 0 : (void) apr_file_close(fd2);
181 0 : (void) apr_file_close(fd1);
182 0 : return big_filecmp(pool, fname1, fname2, size, result_out);
183 : }
184 :
185 70 : *result_out = memcmp(mm1->mm, mm2->mm, size);
186 :
187 70 : status = apr_mmap_delete(mm2);
188 70 : if (APR_SUCCESS != status) {
189 0 : DEBUG_ERR("error calling apr_mmap_delete: %s", apr_strerror(status, errbuf, sizeof(errbuf)));
190 0 : (void) apr_file_close(fd2);
191 0 : (void) apr_mmap_delete(mm1);
192 0 : (void) apr_file_close(fd1);
193 0 : return status;
194 : }
195 70 : status = apr_file_close(fd2);
196 70 : if (APR_SUCCESS != status) {
197 0 : DEBUG_ERR("error calling apr_file_close: %s", apr_strerror(status, errbuf, sizeof(errbuf)));
198 0 : (void) apr_mmap_delete(mm1);
199 0 : (void) apr_file_close(fd1);
200 0 : return status;
201 : }
202 :
203 70 : status = apr_mmap_delete(mm1);
204 70 : if (APR_SUCCESS != status) {
205 0 : DEBUG_ERR("error calling apr_mmap_delete: %s", apr_strerror(status, errbuf, sizeof(errbuf)));
206 0 : (void) apr_file_close(fd1);
207 0 : return status;
208 : }
209 70 : status = apr_file_close(fd1);
210 70 : if (APR_SUCCESS != status) {
211 0 : DEBUG_ERR("error calling apr_file_close: %s", apr_strerror(status, errbuf, sizeof(errbuf)));
212 0 : return status;
213 : }
214 :
215 70 : return APR_SUCCESS;
216 : }
217 :
218 3 : static apr_status_t big_filecmp(apr_pool_t *pool, const char *fname1, const char *fname2, apr_off_t size, int *result_out)
219 3 : {
220 3 : unsigned char data_chunk1[HUGE_LEN];
221 3 : unsigned char data_chunk2[HUGE_LEN];
222 3 : char errbuf[CHAR_MAX_VAL];
223 : // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
224 : apr_size_t rbytes1;
225 : // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
226 : apr_size_t rbytes2;
227 3 : apr_file_t *fd1 = NULL;
228 3 : apr_file_t *fd2 = NULL;
229 3 : memset(data_chunk1, 0, sizeof(data_chunk1));
230 3 : memset(data_chunk2, 0, sizeof(data_chunk2));
231 3 : memset(errbuf, 0, sizeof(errbuf));
232 3 : apr_status_t status1 = APR_SUCCESS;
233 3 : apr_status_t status2 = APR_SUCCESS;
234 :
235 3 : if (0 == size) {
236 1 : *result_out = 0;
237 1 : return APR_SUCCESS;
238 : }
239 :
240 2 : status1 = apr_file_open(&fd1, fname1, APR_READ | APR_BINARY, APR_OS_DEFAULT, pool);
241 2 : if (APR_SUCCESS != status1) {
242 0 : return status1;
243 : }
244 :
245 2 : status1 = apr_file_open(&fd2, fname2, APR_READ | APR_BINARY, APR_OS_DEFAULT, pool);
246 2 : if (APR_SUCCESS != status1) {
247 0 : (void) apr_file_close(fd1);
248 0 : return status1;
249 : }
250 :
251 : do {
252 3 : rbytes1 = HUGE_LEN;
253 3 : status1 = apr_file_read(fd1, data_chunk1, &rbytes1);
254 3 : rbytes2 = rbytes1;
255 3 : status2 = apr_file_read(fd2, data_chunk2, &rbytes2);
256 3 : if ((APR_SUCCESS == status1) && (APR_SUCCESS == status2) && (rbytes2 == rbytes1)) {
257 2 : *result_out = memcmp(data_chunk1, data_chunk2, rbytes1);
258 : }
259 3 : } while ((APR_SUCCESS == status1) && (APR_SUCCESS == status2) && (0 == *result_out) && (rbytes2 == rbytes1));
260 :
261 2 : if ((APR_EOF != status1) && (APR_EOF != status2) && (0 == *result_out)) {
262 0 : DEBUG_ERR("1:unable to read %s (%" APR_SIZE_T_FMT "): %s", fname1, rbytes1,
263 : apr_strerror(status1, errbuf, sizeof(errbuf)));
264 0 : DEBUG_ERR("2:unable to read %s (%" APR_SIZE_T_FMT "): %s", fname2, rbytes2,
265 : apr_strerror(status2, errbuf, sizeof(errbuf)));
266 0 : return status1;
267 : }
268 :
269 2 : status1 = apr_file_close(fd2);
270 2 : if (APR_SUCCESS != status1) {
271 0 : DEBUG_ERR("error calling apr_file_close: %s", apr_strerror(status1, errbuf, sizeof(errbuf)));
272 0 : (void) apr_file_close(fd1);
273 0 : return status1;
274 : }
275 :
276 2 : status1 = apr_file_close(fd1);
277 2 : if (APR_SUCCESS != status1) {
278 0 : DEBUG_ERR("error calling apr_file_close: %s", apr_strerror(status1, errbuf, sizeof(errbuf)));
279 0 : return status1;
280 : }
281 :
282 2 : return APR_SUCCESS;
283 : }
284 :
285 77 : extern apr_status_t filecmp(apr_pool_t *pool, const char *fname1, const char *fname2, apr_off_t size, apr_off_t excess_size,
286 : int *result_out)
287 : {
288 77 : if (size < excess_size) {
289 74 : return small_filecmp(pool, fname1, fname2, size, result_out);
290 : }
291 :
292 3 : return big_filecmp(pool, fname1, fname2, size, result_out);
293 : }
|