ftwin 0.8.10
ft_image.c
Go to the documentation of this file.
1
6/*
7 * Copyright (C) 2007 François Pesce : francois.pesce (at) gmail (dot) com
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 */
21
22#include "ft_image.h"
23
24#include <stdio.h>
25#include <puzzle.h>
26
27#include <apr_thread_mutex.h>
28
29#include "config.h"
30#include "debug.h"
31#include "ft_config.h"
32#include "ft_file.h"
33#include "napr_threadpool.h"
34#include "napr_heap.h"
35
36static const int NB_WORKER = 4;
37
38struct compute_vector_ctx_t
39{
40 apr_thread_mutex_t *mutex;
41 PuzzleContext *contextp;
42 ft_conf_t *conf;
43 unsigned int heap_size;
44 unsigned int nb_processed;
45};
46typedef struct compute_vector_ctx_t compute_vector_ctx_t;
47
48struct compute_vector_task_t
49{
50 compute_vector_ctx_t *cv_ctx;
51 ft_file_t *file;
52};
53typedef struct compute_vector_task_t compute_vector_task_t;
54
55static apr_status_t compute_vector(void *ctx, void *data)
56{
57 char errbuf[ERROR_BUFFER_SIZE];
58 compute_vector_ctx_t *cv_ctx = (compute_vector_ctx_t *) ctx;
59 compute_vector_task_t *task = (compute_vector_task_t *) data;
60 ft_file_t *file = task->file;
61 apr_status_t status = APR_SUCCESS;
62
63 memset(errbuf, 0, sizeof(errbuf));
64 puzzle_init_cvec(cv_ctx->contextp, &(file->cvec));
65 if (0 == puzzle_fill_cvec_from_file(cv_ctx->contextp, &(file->cvec), file->path)) {
66 file->cvec_ok |= 0x1;
67 }
68 else {
69 DEBUG_ERR("error calling puzzle_fill_cvec_from_file, ignoring file: %s", file->path);
70 }
71
72 status = apr_thread_mutex_lock(cv_ctx->mutex);
73 if (APR_SUCCESS != status) {
74 DEBUG_ERR("error calling apr_thread_mutex_lock: %s", apr_strerror(status, errbuf, ERROR_BUFFER_SIZE));
75 return status;
76 }
77 if (is_option_set(cv_ctx->conf->mask, OPTION_VERBO)) {
78 (void) fprintf(stderr, "\rProgress [%u/%u] %d%% ", cv_ctx->nb_processed, cv_ctx->heap_size,
79 (int) ((float) cv_ctx->nb_processed / (float) cv_ctx->heap_size * 100.0F));
80 }
81 cv_ctx->nb_processed += 1;
82 status = apr_thread_mutex_unlock(cv_ctx->mutex);
83 if (APR_SUCCESS != status) {
84 DEBUG_ERR("error calling apr_thread_mutex_unlock: %s", apr_strerror(status, errbuf, ERROR_BUFFER_SIZE));
85 return status;
86 }
87
88 return APR_SUCCESS;
89}
90
91static const int MAX_PUZZLE_WIDTH = 5000;
92static const int MAX_PUZZLE_HEIGHT = 5000;
93static const int PUZZLE_LAMBDAS = 13;
94static const int FIX_FOR_CLUSTERING = 0;
95static const int MAX_PERCENTAGE = 100;
96
97static void initialize_puzzle_context(PuzzleContext * context);
98static apr_status_t compute_image_vectors(ft_conf_t *conf, PuzzleContext * context);
99static void compare_image_vectors(ft_conf_t *conf, PuzzleContext * context);
100
102{
103 PuzzleContext context;
104 apr_status_t status = APR_SUCCESS;
105
106 initialize_puzzle_context(&context);
107
108 status = compute_image_vectors(conf, &context);
109 if (status != APR_SUCCESS) {
110 puzzle_free_context(&context);
111 return status;
112 }
113
114 compare_image_vectors(conf, &context);
115
116 puzzle_free_context(&context);
117
118 return APR_SUCCESS;
119}
120
121static void initialize_puzzle_context(PuzzleContext * context)
122{
123 puzzle_init_context(context);
124 puzzle_set_max_width(context, MAX_PUZZLE_WIDTH);
125 puzzle_set_max_height(context, MAX_PUZZLE_HEIGHT);
126 puzzle_set_lambdas(context, PUZZLE_LAMBDAS);
127}
128
129static apr_status_t compute_image_vectors(ft_conf_t *conf, PuzzleContext * context)
130{
131 char errbuf[ERROR_BUFFER_SIZE];
132 apr_status_t status = APR_SUCCESS;
133 napr_threadpool_t *threadpool = NULL;
134 compute_vector_ctx_t cv_ctx;
135 unsigned int heap_size = napr_heap_size(conf->heap);
136
137 memset(errbuf, 0, sizeof(errbuf));
138 cv_ctx.contextp = context;
139 cv_ctx.heap_size = heap_size;
140 cv_ctx.nb_processed = 0;
141 cv_ctx.conf = conf;
142
143 status = apr_thread_mutex_create(&cv_ctx.mutex, APR_THREAD_MUTEX_DEFAULT, conf->pool);
144 if (APR_SUCCESS != status) {
145 DEBUG_ERR("error calling apr_thread_mutex_create: %s", apr_strerror(status, errbuf, ERROR_BUFFER_SIZE));
146 return status;
147 }
148
149 status = napr_threadpool_init(&threadpool, &cv_ctx, NB_WORKER, compute_vector, conf->pool);
150 if (APR_SUCCESS != status) {
151 DEBUG_ERR("error calling napr_threadpool_init: %s", apr_strerror(status, errbuf, ERROR_BUFFER_SIZE));
152 return status;
153 }
154
155 for (unsigned int idx = 0; idx < heap_size; idx++) {
156 compute_vector_task_t *task = apr_palloc(conf->pool, sizeof(compute_vector_task_t));
157 task->cv_ctx = &cv_ctx;
158 task->file = napr_heap_get_nth(conf->heap, idx);
159 status = napr_threadpool_add(threadpool, task);
160 if (APR_SUCCESS != status) {
161 DEBUG_ERR("error calling napr_threadpool_add: %s", apr_strerror(status, errbuf, ERROR_BUFFER_SIZE));
162 return status;
163 }
164 }
165
166 napr_threadpool_wait(threadpool);
167 status = apr_thread_mutex_destroy(cv_ctx.mutex);
168 if (APR_SUCCESS != status) {
169 DEBUG_ERR("error calling apr_thread_mutex_destroy: %s", apr_strerror(status, errbuf, ERROR_BUFFER_SIZE));
170 return status;
171 }
172
173 if (is_option_set(conf->mask, OPTION_VERBO)) {
174 (void) fprintf(stderr, "\rProgress [%u/%u] %d%% ", heap_size, heap_size, MAX_PERCENTAGE);
175 (void) fprintf(stderr, "\n");
176 }
177
178 return APR_SUCCESS;
179}
180
181static void compare_image_vectors(ft_conf_t *conf, PuzzleContext * context)
182{
183 unsigned long nb_cmp = napr_heap_size(conf->heap) * (napr_heap_size(conf->heap) - 1) / 2;
184 unsigned long cnt_cmp = 0;
185 ft_file_t *file = NULL;
186
187 while (NULL != (file = napr_heap_extract(conf->heap))) {
188 if (!(file->cvec_ok & 0x1)) {
189 continue;
190 }
191
192 unsigned char already_printed = 0;
193 unsigned int heap_size = napr_heap_size(conf->heap);
194 for (unsigned int idx = 0; idx < heap_size; idx++) {
195 ft_file_t *file_cmp = napr_heap_get_nth(conf->heap, idx);
196 if (!(file_cmp->cvec_ok & 0x1)) {
197 continue;
198 }
199
200 double distance =
201 puzzle_vector_normalized_distance(context, &(file->cvec), &(file_cmp->cvec), FIX_FOR_CLUSTERING);
202 if (distance < conf->threshold) {
203 if (!already_printed) {
204 (void) printf("%s%c", file->path, conf->sep);
205 already_printed = 1;
206 }
207 else {
208 (void) printf("%c", conf->sep);
209 }
210 (void) printf("%s", file_cmp->path);
211 }
212 if (is_option_set(conf->mask, OPTION_VERBO)) {
213 (void) fprintf(stderr, "\rCompare progress [%10lu/%10lu] %02.2f%% ", cnt_cmp, nb_cmp,
214 (double) ((double) cnt_cmp / (double) nb_cmp * 100.0F));
215 }
216 cnt_cmp++;
217 }
218
219 if (already_printed) {
220 (void) printf("\n\n");
221 }
222
223 puzzle_free_cvec(context, &(file->cvec));
224 }
225
226 if (is_option_set(conf->mask, OPTION_VERBO)) {
227 (void) fprintf(stderr, "\rCompare progress [%10lu/%10lu] %02.2f%% ", cnt_cmp, nb_cmp, 100.0);
228 (void) fprintf(stderr, "\n");
229 }
230}
UTIL debug output macros.
#define DEBUG_ERR(str, arg...)
Display error message at the level error.
Definition debug.h:31
Interface for file comparison and checksum calculation.
apr_status_t ft_image_twin_report(ft_conf_t *conf)
Compares images using libpuzzle and reports similar images.
Definition ft_image.c:101
void * napr_heap_extract(napr_heap_t *heap)
Removes and returns the element at the top of the heap (the min or max element).
Definition napr_heap.c:122
unsigned int napr_heap_size(const napr_heap_t *heap)
Gets the current number of elements in the heap.
Definition napr_heap.c:181
void * napr_heap_get_nth(const napr_heap_t *heap, unsigned int n)
Gets the element at a specific index in the heap's internal array.
Definition napr_heap.c:173
A generic binary heap implementation (min-heap or max-heap).
apr_status_t napr_threadpool_wait(napr_threadpool_t *threadpool)
Waits until all tasks currently in the queue have been processed.
apr_status_t napr_threadpool_init(napr_threadpool_t **threadpool, void *ctx, unsigned long nb_thread, threadpool_process_data_callback_fn_t *process_data, apr_pool_t *pool)
Initializes a thread pool.
apr_status_t napr_threadpool_add(napr_threadpool_t *threadpool, void *data)
Adds a task (a data item) to the thread pool's processing queue.
A simple fixed-size thread pool for concurrent task processing.
struct napr_threadpool_t napr_threadpool_t
Opaque thread pool structure.
Main configuration structure for the ftwin application.
Definition ft_config.h:94