Line data Source code
1 : /*
2 : * xxHash - Extremely Fast Hash algorithm
3 : * Header File
4 : * Copyright (C) 2012-2021 Yann Collet
5 : *
6 : * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
7 : *
8 : * Redistribution and use in source and binary forms, with or without
9 : * modification, are permitted provided that the following conditions are
10 : * met:
11 : *
12 : * * Redistributions of source code must retain the above copyright
13 : * notice, this list of conditions and the following disclaimer.
14 : * * Redistributions in binary form must reproduce the above
15 : * copyright notice, this list of conditions and the following disclaimer
16 : * in the documentation and/or other materials provided with the
17 : * distribution.
18 : *
19 : * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 : * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 : * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 : * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 : * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 : * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 : * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 : * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 : * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 : * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 : * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 : *
31 : * You can contact the author at:
32 : * - xxHash homepage: https://www.xxhash.com
33 : * - xxHash source repository: https://github.com/Cyan4973/xxHash
34 : */
35 :
36 : /*!
37 : * @mainpage xxHash
38 : *
39 : * xxHash is an extremely fast non-cryptographic hash algorithm, working at RAM speed
40 : * limits.
41 : *
42 : * It is proposed in four flavors, in three families:
43 : * 1. @ref XXH32_family
44 : * - Classic 32-bit hash function. Simple, compact, and runs on almost all
45 : * 32-bit and 64-bit systems.
46 : * 2. @ref XXH64_family
47 : * - Classic 64-bit adaptation of XXH32. Just as simple, and runs well on most
48 : * 64-bit systems (but _not_ 32-bit systems).
49 : * 3. @ref XXH3_family
50 : * - Modern 64-bit and 128-bit hash function family which features improved
51 : * strength and performance across the board, especially on smaller data.
52 : * It benefits greatly from SIMD and 64-bit without requiring it.
53 : *
54 : * Benchmarks
55 : * ---
56 : * The reference system uses an Intel i7-9700K CPU, and runs Ubuntu x64 20.04.
57 : * The open source benchmark program is compiled with clang v10.0 using -O3 flag.
58 : *
59 : * | Hash Name | ISA ext | Width | Large Data Speed | Small Data Velocity |
60 : * | -------------------- | ------- | ----: | ---------------: | ------------------: |
61 : * | XXH3_64bits() | @b AVX2 | 64 | 59.4 GB/s | 133.1 |
62 : * | MeowHash | AES-NI | 128 | 58.2 GB/s | 52.5 |
63 : * | XXH3_128bits() | @b AVX2 | 128 | 57.9 GB/s | 118.1 |
64 : * | CLHash | PCLMUL | 64 | 37.1 GB/s | 58.1 |
65 : * | XXH3_64bits() | @b SSE2 | 64 | 31.5 GB/s | 133.1 |
66 : * | XXH3_128bits() | @b SSE2 | 128 | 29.6 GB/s | 118.1 |
67 : * | RAM sequential read | | N/A | 28.0 GB/s | N/A |
68 : * | ahash | AES-NI | 64 | 22.5 GB/s | 107.2 |
69 : * | City64 | | 64 | 22.0 GB/s | 76.6 |
70 : * | T1ha2 | | 64 | 22.0 GB/s | 99.0 |
71 : * | City128 | | 128 | 21.7 GB/s | 57.7 |
72 : * | FarmHash | AES-NI | 64 | 21.3 GB/s | 71.9 |
73 : * | XXH64() | | 64 | 19.4 GB/s | 71.0 |
74 : * | SpookyHash | | 64 | 19.3 GB/s | 53.2 |
75 : * | Mum | | 64 | 18.0 GB/s | 67.0 |
76 : * | CRC32C | SSE4.2 | 32 | 13.0 GB/s | 57.9 |
77 : * | XXH32() | | 32 | 9.7 GB/s | 71.9 |
78 : * | City32 | | 32 | 9.1 GB/s | 66.0 |
79 : * | Blake3* | @b AVX2 | 256 | 4.4 GB/s | 8.1 |
80 : * | Murmur3 | | 32 | 3.9 GB/s | 56.1 |
81 : * | SipHash* | | 64 | 3.0 GB/s | 43.2 |
82 : * | Blake3* | @b SSE2 | 256 | 2.4 GB/s | 8.1 |
83 : * | HighwayHash | | 64 | 1.4 GB/s | 6.0 |
84 : * | FNV64 | | 64 | 1.2 GB/s | 62.7 |
85 : * | Blake2* | | 256 | 1.1 GB/s | 5.1 |
86 : * | SHA1* | | 160 | 0.8 GB/s | 5.6 |
87 : * | MD5* | | 128 | 0.6 GB/s | 7.8 |
88 : * @note
89 : * - Hashes which require a specific ISA extension are noted. SSE2 is also noted,
90 : * even though it is mandatory on x64.
91 : * - Hashes with an asterisk are cryptographic. Note that MD5 is non-cryptographic
92 : * by modern standards.
93 : * - Small data velocity is a rough average of algorithm's efficiency for small
94 : * data. For more accurate information, see the wiki.
95 : * - More benchmarks and strength tests are found on the wiki:
96 : * https://github.com/Cyan4973/xxHash/wiki
97 : *
98 : * Usage
99 : * ------
100 : * All xxHash variants use a similar API. Changing the algorithm is a trivial
101 : * substitution.
102 : *
103 : * @pre
104 : * For functions which take an input and length parameter, the following
105 : * requirements are assumed:
106 : * - The range from [`input`, `input + length`) is valid, readable memory.
107 : * - The only exception is if the `length` is `0`, `input` may be `NULL`.
108 : * - For C++, the objects must have the *TriviallyCopyable* property, as the
109 : * functions access bytes directly as if it was an array of `unsigned char`.
110 : *
111 : * @anchor single_shot_example
112 : * **Single Shot**
113 : *
114 : * These functions are stateless functions which hash a contiguous block of memory,
115 : * immediately returning the result. They are the easiest and usually the fastest
116 : * option.
117 : *
118 : * XXH32(), XXH64(), XXH3_64bits(), XXH3_128bits()
119 : *
120 : * @code{.c}
121 : * #include <string.h>
122 : * #include "xxhash.h"
123 : *
124 : * // Example for a function which hashes a null terminated string with XXH32().
125 : * XXH32_hash_t hash_string(const char* string, XXH32_hash_t seed)
126 : * {
127 : * // NULL pointers are only valid if the length is zero
128 : * size_t length = (string == NULL) ? 0 : strlen(string);
129 : * return XXH32(string, length, seed);
130 : * }
131 : * @endcode
132 : *
133 : * @anchor streaming_example
134 : * **Streaming**
135 : *
136 : * These groups of functions allow incremental hashing of unknown size, even
137 : * more than what would fit in a size_t.
138 : *
139 : * XXH32_reset(), XXH64_reset(), XXH3_64bits_reset(), XXH3_128bits_reset()
140 : *
141 : * @code{.c}
142 : * #include <stdio.h>
143 : * #include <assert.h>
144 : * #include "xxhash.h"
145 : * // Example for a function which hashes a FILE incrementally with XXH3_64bits().
146 : * XXH64_hash_t hashFile(FILE* f)
147 : * {
148 : * // Allocate a state struct. Do not just use malloc() or new.
149 : * XXH3_state_t* state = XXH3_createState();
150 : * assert(state != NULL && "Out of memory!");
151 : * // Reset the state to start a new hashing session.
152 : * XXH3_64bits_reset(state);
153 : * char buffer[4096];
154 : * size_t count;
155 : * // Read the file in chunks
156 : * while ((count = fread(buffer, 1, sizeof(buffer), f)) != 0) {
157 : * // Run update() as many times as necessary to process the data
158 : * XXH3_64bits_update(state, buffer, count);
159 : * }
160 : * // Retrieve the finalized hash. This will not change the state.
161 : * XXH64_hash_t result = XXH3_64bits_digest(state);
162 : * // Free the state. Do not use free().
163 : * XXH3_freeState(state);
164 : * return result;
165 : * }
166 : * @endcode
167 : *
168 : * @file xxhash.h
169 : * xxHash prototypes and implementation
170 : */
171 :
172 : #if defined (__cplusplus)
173 : extern "C" {
174 : #endif
175 :
176 : /* ****************************
177 : * INLINE mode
178 : ******************************/
179 : /*!
180 : * @defgroup public Public API
181 : * Contains details on the public xxHash functions.
182 : * @{
183 : */
184 : #ifdef XXH_DOXYGEN
185 : /*!
186 : * @brief Gives access to internal state declaration, required for static allocation.
187 : *
188 : * Incompatible with dynamic linking, due to risks of ABI changes.
189 : *
190 : * Usage:
191 : * @code{.c}
192 : * #define XXH_STATIC_LINKING_ONLY
193 : * #include "xxhash.h"
194 : * @endcode
195 : */
196 : # define XXH_STATIC_LINKING_ONLY
197 : /* Do not undef XXH_STATIC_LINKING_ONLY for Doxygen */
198 :
199 : /*!
200 : * @brief Gives access to internal definitions.
201 : *
202 : * Usage:
203 : * @code{.c}
204 : * #define XXH_STATIC_LINKING_ONLY
205 : * #define XXH_IMPLEMENTATION
206 : * #include "xxhash.h"
207 : * @endcode
208 : */
209 : # define XXH_IMPLEMENTATION
210 : /* Do not undef XXH_IMPLEMENTATION for Doxygen */
211 :
212 : /*!
213 : * @brief Exposes the implementation and marks all functions as `inline`.
214 : *
215 : * Use these build macros to inline xxhash into the target unit.
216 : * Inlining improves performance on small inputs, especially when the length is
217 : * expressed as a compile-time constant:
218 : *
219 : * https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html
220 : *
221 : * It also keeps xxHash symbols private to the unit, so they are not exported.
222 : *
223 : * Usage:
224 : * @code{.c}
225 : * #define XXH_INLINE_ALL
226 : * #include "xxhash.h"
227 : * @endcode
228 : * Do not compile and link xxhash.o as a separate object, as it is not useful.
229 : */
230 : # define XXH_INLINE_ALL
231 : # undef XXH_INLINE_ALL
232 : /*!
233 : * @brief Exposes the implementation without marking functions as inline.
234 : */
235 : # define XXH_PRIVATE_API
236 : # undef XXH_PRIVATE_API
237 : /*!
238 : * @brief Emulate a namespace by transparently prefixing all symbols.
239 : *
240 : * If you want to include _and expose_ xxHash functions from within your own
241 : * library, but also want to avoid symbol collisions with other libraries which
242 : * may also include xxHash, you can use @ref XXH_NAMESPACE to automatically prefix
243 : * any public symbol from xxhash library with the value of @ref XXH_NAMESPACE
244 : * (therefore, avoid empty or numeric values).
245 : *
246 : * Note that no change is required within the calling program as long as it
247 : * includes `xxhash.h`: Regular symbol names will be automatically translated
248 : * by this header.
249 : */
250 : # define XXH_NAMESPACE /* YOUR NAME HERE */
251 : # undef XXH_NAMESPACE
252 : #endif
253 :
254 : #if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \
255 : && !defined(XXH_INLINE_ALL_31684351384)
256 : /* this section should be traversed only once */
257 : # define XXH_INLINE_ALL_31684351384
258 : /* give access to the advanced API, required to compile implementations */
259 : # undef XXH_STATIC_LINKING_ONLY /* avoid macro redef */
260 : # define XXH_STATIC_LINKING_ONLY
261 : /* make all functions private */
262 : # undef XXH_PUBLIC_API
263 : # if defined(__GNUC__)
264 : # define XXH_PUBLIC_API static __inline __attribute__((unused))
265 : # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
266 : # define XXH_PUBLIC_API static inline
267 : # elif defined(_MSC_VER)
268 : # define XXH_PUBLIC_API static __inline
269 : # else
270 : /* note: this version may generate warnings for unused static functions */
271 : # define XXH_PUBLIC_API static
272 : # endif
273 :
274 : /*
275 : * This part deals with the special case where a unit wants to inline xxHash,
276 : * but "xxhash.h" has previously been included without XXH_INLINE_ALL,
277 : * such as part of some previously included *.h header file.
278 : * Without further action, the new include would just be ignored,
279 : * and functions would effectively _not_ be inlined (silent failure).
280 : * The following macros solve this situation by prefixing all inlined names,
281 : * avoiding naming collision with previous inclusions.
282 : */
283 : /* Before that, we unconditionally #undef all symbols,
284 : * in case they were already defined with XXH_NAMESPACE.
285 : * They will then be redefined for XXH_INLINE_ALL
286 : */
287 : # undef XXH_versionNumber
288 : /* XXH32 */
289 : # undef XXH32
290 : # undef XXH32_createState
291 : # undef XXH32_freeState
292 : # undef XXH32_reset
293 : # undef XXH32_update
294 : # undef XXH32_digest
295 : # undef XXH32_copyState
296 : # undef XXH32_canonicalFromHash
297 : # undef XXH32_hashFromCanonical
298 : /* XXH64 */
299 : # undef XXH64
300 : # undef XXH64_createState
301 : # undef XXH64_freeState
302 : # undef XXH64_reset
303 : # undef XXH64_update
304 : # undef XXH64_digest
305 : # undef XXH64_copyState
306 : # undef XXH64_canonicalFromHash
307 : # undef XXH64_hashFromCanonical
308 : /* XXH3_64bits */
309 : # undef XXH3_64bits
310 : # undef XXH3_64bits_withSecret
311 : # undef XXH3_64bits_withSeed
312 : # undef XXH3_64bits_withSecretandSeed
313 : # undef XXH3_createState
314 : # undef XXH3_freeState
315 : # undef XXH3_copyState
316 : # undef XXH3_64bits_reset
317 : # undef XXH3_64bits_reset_withSeed
318 : # undef XXH3_64bits_reset_withSecret
319 : # undef XXH3_64bits_update
320 : # undef XXH3_64bits_digest
321 : # undef XXH3_generateSecret
322 : /* XXH3_128bits */
323 : # undef XXH128
324 : # undef XXH3_128bits
325 : # undef XXH3_128bits_withSeed
326 : # undef XXH3_128bits_withSecret
327 : # undef XXH3_128bits_reset
328 : # undef XXH3_128bits_reset_withSeed
329 : # undef XXH3_128bits_reset_withSecret
330 : # undef XXH3_128bits_reset_withSecretandSeed
331 : # undef XXH3_128bits_update
332 : # undef XXH3_128bits_digest
333 : # undef XXH128_isEqual
334 : # undef XXH128_cmp
335 : # undef XXH128_canonicalFromHash
336 : # undef XXH128_hashFromCanonical
337 : /* Finally, free the namespace itself */
338 : # undef XXH_NAMESPACE
339 :
340 : /* employ the namespace for XXH_INLINE_ALL */
341 : # define XXH_NAMESPACE XXH_INLINE_
342 : /*
343 : * Some identifiers (enums, type names) are not symbols,
344 : * but they must nonetheless be renamed to avoid redeclaration.
345 : * Alternative solution: do not redeclare them.
346 : * However, this requires some #ifdefs, and has a more dispersed impact.
347 : * Meanwhile, renaming can be achieved in a single place.
348 : */
349 : # define XXH_IPREF(Id) XXH_NAMESPACE ## Id
350 : # define XXH_OK XXH_IPREF(XXH_OK)
351 : # define XXH_ERROR XXH_IPREF(XXH_ERROR)
352 : # define XXH_errorcode XXH_IPREF(XXH_errorcode)
353 : # define XXH32_canonical_t XXH_IPREF(XXH32_canonical_t)
354 : # define XXH64_canonical_t XXH_IPREF(XXH64_canonical_t)
355 : # define XXH128_canonical_t XXH_IPREF(XXH128_canonical_t)
356 : # define XXH32_state_s XXH_IPREF(XXH32_state_s)
357 : # define XXH32_state_t XXH_IPREF(XXH32_state_t)
358 : # define XXH64_state_s XXH_IPREF(XXH64_state_s)
359 : # define XXH64_state_t XXH_IPREF(XXH64_state_t)
360 : # define XXH3_state_s XXH_IPREF(XXH3_state_s)
361 : # define XXH3_state_t XXH_IPREF(XXH3_state_t)
362 : # define XXH128_hash_t XXH_IPREF(XXH128_hash_t)
363 : /* Ensure the header is parsed again, even if it was previously included */
364 : # undef XXHASH_H_5627135585666179
365 : # undef XXHASH_H_STATIC_13879238742
366 : #endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
367 :
368 : /* ****************************************************************
369 : * Stable API
370 : *****************************************************************/
371 : #ifndef XXHASH_H_5627135585666179
372 : #define XXHASH_H_5627135585666179 1
373 :
374 : /*! @brief Marks a global symbol. */
375 : #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
376 : # if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
377 : # ifdef XXH_EXPORT
378 : # define XXH_PUBLIC_API __declspec(dllexport)
379 : # elif XXH_IMPORT
380 : # define XXH_PUBLIC_API __declspec(dllimport)
381 : # endif
382 : # else
383 : # define XXH_PUBLIC_API /* do nothing */
384 : # endif
385 : #endif
386 :
387 : #ifdef XXH_NAMESPACE
388 : # define XXH_CAT(A,B) A##B
389 : # define XXH_NAME2(A,B) XXH_CAT(A,B)
390 : # define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
391 : /* XXH32 */
392 : # define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
393 : # define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
394 : # define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
395 : # define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
396 : # define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
397 : # define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
398 : # define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
399 : # define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
400 : # define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
401 : /* XXH64 */
402 : # define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
403 : # define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
404 : # define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
405 : # define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
406 : # define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
407 : # define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
408 : # define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
409 : # define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
410 : # define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
411 : /* XXH3_64bits */
412 : # define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits)
413 : # define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret)
414 : # define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed)
415 : # define XXH3_64bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecretandSeed)
416 : # define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState)
417 : # define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState)
418 : # define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState)
419 : # define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset)
420 : # define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed)
421 : # define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret)
422 : # define XXH3_64bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecretandSeed)
423 : # define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update)
424 : # define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest)
425 : # define XXH3_generateSecret XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret)
426 : # define XXH3_generateSecret_fromSeed XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret_fromSeed)
427 : /* XXH3_128bits */
428 : # define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128)
429 : # define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits)
430 : # define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed)
431 : # define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret)
432 : # define XXH3_128bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecretandSeed)
433 : # define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset)
434 : # define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed)
435 : # define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret)
436 : # define XXH3_128bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecretandSeed)
437 : # define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update)
438 : # define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest)
439 : # define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual)
440 : # define XXH128_cmp XXH_NAME2(XXH_NAMESPACE, XXH128_cmp)
441 : # define XXH128_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH128_canonicalFromHash)
442 : # define XXH128_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH128_hashFromCanonical)
443 : #endif
444 :
445 :
446 : /* *************************************
447 : * Compiler specifics
448 : ***************************************/
449 :
450 : /* specific declaration modes for Windows */
451 : #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
452 : # if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
453 : # ifdef XXH_EXPORT
454 : # define XXH_PUBLIC_API __declspec(dllexport)
455 : # elif XXH_IMPORT
456 : # define XXH_PUBLIC_API __declspec(dllimport)
457 : # endif
458 : # else
459 : # define XXH_PUBLIC_API /* do nothing */
460 : # endif
461 : #endif
462 :
463 : #if defined (__GNUC__)
464 : # define XXH_CONSTF __attribute__((const))
465 : # define XXH_PUREF __attribute__((pure))
466 : # define XXH_MALLOCF __attribute__((malloc))
467 : #else
468 : # define XXH_CONSTF /* disable */
469 : # define XXH_PUREF
470 : # define XXH_MALLOCF
471 : #endif
472 :
473 : /* *************************************
474 : * Version
475 : ***************************************/
476 : #define XXH_VERSION_MAJOR 0
477 : #define XXH_VERSION_MINOR 8
478 : #define XXH_VERSION_RELEASE 2
479 : /*! @brief Version number, encoded as two digits each */
480 : #define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
481 :
482 : /*!
483 : * @brief Obtains the xxHash version.
484 : *
485 : * This is mostly useful when xxHash is compiled as a shared library,
486 : * since the returned value comes from the library, as opposed to header file.
487 : *
488 : * @return @ref XXH_VERSION_NUMBER of the invoked library.
489 : */
490 : XXH_PUBLIC_API XXH_CONSTF unsigned XXH_versionNumber (void);
491 :
492 :
493 : /* ****************************
494 : * Common basic types
495 : ******************************/
496 : #include <stddef.h> /* size_t */
497 : /*!
498 : * @brief Exit code for the streaming API.
499 : */
500 : typedef enum {
501 : XXH_OK = 0, /*!< OK */
502 : XXH_ERROR /*!< Error */
503 : } XXH_errorcode;
504 :
505 :
506 : /*-**********************************************************************
507 : * 32-bit hash
508 : ************************************************************************/
509 : #if defined(XXH_DOXYGEN) /* Don't show <stdint.h> include */
510 : /*!
511 : * @brief An unsigned 32-bit integer.
512 : *
513 : * Not necessarily defined to `uint32_t` but functionally equivalent.
514 : */
515 : typedef uint32_t XXH32_hash_t;
516 :
517 : #elif !defined (__VMS) \
518 : && (defined (__cplusplus) \
519 : || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
520 : # include <stdint.h>
521 : typedef uint32_t XXH32_hash_t;
522 :
523 : #else
524 : # include <limits.h>
525 : # if UINT_MAX == 0xFFFFFFFFUL
526 : typedef unsigned int XXH32_hash_t;
527 : # elif ULONG_MAX == 0xFFFFFFFFUL
528 : typedef unsigned long XXH32_hash_t;
529 : # else
530 : # error "unsupported platform: need a 32-bit type"
531 : # endif
532 : #endif
533 :
534 : /*!
535 : * @}
536 : *
537 : * @defgroup XXH32_family XXH32 family
538 : * @ingroup public
539 : * Contains functions used in the classic 32-bit xxHash algorithm.
540 : *
541 : * @note
542 : * XXH32 is useful for older platforms, with no or poor 64-bit performance.
543 : * Note that the @ref XXH3_family provides competitive speed for both 32-bit
544 : * and 64-bit systems, and offers true 64/128 bit hash results.
545 : *
546 : * @see @ref XXH64_family, @ref XXH3_family : Other xxHash families
547 : * @see @ref XXH32_impl for implementation details
548 : * @{
549 : */
550 :
551 : /*!
552 : * @brief Calculates the 32-bit hash of @p input using xxHash32.
553 : *
554 : * Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark): 5.4 GB/s
555 : *
556 : * See @ref single_shot_example "Single Shot Example" for an example.
557 : *
558 : * @param input The block of data to be hashed, at least @p length bytes in size.
559 : * @param length The length of @p input, in bytes.
560 : * @param seed The 32-bit seed to alter the hash's output predictably.
561 : *
562 : * @pre
563 : * The memory between @p input and @p input + @p length must be valid,
564 : * readable, contiguous memory. However, if @p length is `0`, @p input may be
565 : * `NULL`. In C++, this also must be *TriviallyCopyable*.
566 : *
567 : * @return The calculated 32-bit hash value.
568 : *
569 : * @see
570 : * XXH64(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128():
571 : * Direct equivalents for the other variants of xxHash.
572 : * @see
573 : * XXH32_createState(), XXH32_update(), XXH32_digest(): Streaming version.
574 : */
575 : XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
576 :
577 : #ifndef XXH_NO_STREAM
578 : /*!
579 : * Streaming functions generate the xxHash value from an incremental input.
580 : * This method is slower than single-call functions, due to state management.
581 : * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
582 : *
583 : * An XXH state must first be allocated using `XXH*_createState()`.
584 : *
585 : * Start a new hash by initializing the state with a seed using `XXH*_reset()`.
586 : *
587 : * Then, feed the hash state by calling `XXH*_update()` as many times as necessary.
588 : *
589 : * The function returns an error code, with 0 meaning OK, and any other value
590 : * meaning there is an error.
591 : *
592 : * Finally, a hash value can be produced anytime, by using `XXH*_digest()`.
593 : * This function returns the nn-bits hash as an int or long long.
594 : *
595 : * It's still possible to continue inserting input into the hash state after a
596 : * digest, and generate new hash values later on by invoking `XXH*_digest()`.
597 : *
598 : * When done, release the state using `XXH*_freeState()`.
599 : *
600 : * @see streaming_example at the top of @ref xxhash.h for an example.
601 : */
602 :
603 : /*!
604 : * @typedef struct XXH32_state_s XXH32_state_t
605 : * @brief The opaque state struct for the XXH32 streaming API.
606 : *
607 : * @see XXH32_state_s for details.
608 : */
609 : typedef struct XXH32_state_s XXH32_state_t;
610 :
611 : /*!
612 : * @brief Allocates an @ref XXH32_state_t.
613 : *
614 : * Must be freed with XXH32_freeState().
615 : * @return An allocated XXH32_state_t on success, `NULL` on failure.
616 : */
617 : XXH_PUBLIC_API XXH_MALLOCF XXH32_state_t* XXH32_createState(void);
618 : /*!
619 : * @brief Frees an @ref XXH32_state_t.
620 : *
621 : * Must be allocated with XXH32_createState().
622 : * @param statePtr A pointer to an @ref XXH32_state_t allocated with @ref XXH32_createState().
623 : * @return XXH_OK.
624 : */
625 : XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr);
626 : /*!
627 : * @brief Copies one @ref XXH32_state_t to another.
628 : *
629 : * @param dst_state The state to copy to.
630 : * @param src_state The state to copy from.
631 : * @pre
632 : * @p dst_state and @p src_state must not be `NULL` and must not overlap.
633 : */
634 : XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
635 :
636 : /*!
637 : * @brief Resets an @ref XXH32_state_t to begin a new hash.
638 : *
639 : * This function resets and seeds a state. Call it before @ref XXH32_update().
640 : *
641 : * @param statePtr The state struct to reset.
642 : * @param seed The 32-bit seed to alter the hash result predictably.
643 : *
644 : * @pre
645 : * @p statePtr must not be `NULL`.
646 : *
647 : * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
648 : */
649 : XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, XXH32_hash_t seed);
650 :
651 : /*!
652 : * @brief Consumes a block of @p input to an @ref XXH32_state_t.
653 : *
654 : * Call this to incrementally consume blocks of data.
655 : *
656 : * @param statePtr The state struct to update.
657 : * @param input The block of data to be hashed, at least @p length bytes in size.
658 : * @param length The length of @p input, in bytes.
659 : *
660 : * @pre
661 : * @p statePtr must not be `NULL`.
662 : * @pre
663 : * The memory between @p input and @p input + @p length must be valid,
664 : * readable, contiguous memory. However, if @p length is `0`, @p input may be
665 : * `NULL`. In C++, this also must be *TriviallyCopyable*.
666 : *
667 : * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
668 : */
669 : XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
670 :
671 : /*!
672 : * @brief Returns the calculated hash value from an @ref XXH32_state_t.
673 : *
674 : * @note
675 : * Calling XXH32_digest() will not affect @p statePtr, so you can update,
676 : * digest, and update again.
677 : *
678 : * @param statePtr The state struct to calculate the hash from.
679 : *
680 : * @pre
681 : * @p statePtr must not be `NULL`.
682 : *
683 : * @return The calculated xxHash32 value from that state.
684 : */
685 : XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
686 : #endif /* !XXH_NO_STREAM */
687 :
688 : /******* Canonical representation *******/
689 :
690 : /*
691 : * The default return values from XXH functions are unsigned 32 and 64 bit
692 : * integers.
693 : * This the simplest and fastest format for further post-processing.
694 : *
695 : * However, this leaves open the question of what is the order on the byte level,
696 : * since little and big endian conventions will store the same number differently.
697 : *
698 : * The canonical representation settles this issue by mandating big-endian
699 : * convention, the same convention as human-readable numbers (large digits first).
700 : *
701 : * When writing hash values to storage, sending them over a network, or printing
702 : * them, it's highly recommended to use the canonical representation to ensure
703 : * portability across a wider range of systems, present and future.
704 : *
705 : * The following functions allow transformation of hash values to and from
706 : * canonical format.
707 : */
708 :
709 : /*!
710 : * @brief Canonical (big endian) representation of @ref XXH32_hash_t.
711 : */
712 : typedef struct {
713 : unsigned char digest[4]; /*!< Hash bytes, big endian */
714 : } XXH32_canonical_t;
715 :
716 : /*!
717 : * @brief Converts an @ref XXH32_hash_t to a big endian @ref XXH32_canonical_t.
718 : *
719 : * @param dst The @ref XXH32_canonical_t pointer to be stored to.
720 : * @param hash The @ref XXH32_hash_t to be converted.
721 : *
722 : * @pre
723 : * @p dst must not be `NULL`.
724 : */
725 : XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
726 :
727 : /*!
728 : * @brief Converts an @ref XXH32_canonical_t to a native @ref XXH32_hash_t.
729 : *
730 : * @param src The @ref XXH32_canonical_t to convert.
731 : *
732 : * @pre
733 : * @p src must not be `NULL`.
734 : *
735 : * @return The converted hash.
736 : */
737 : XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
738 :
739 :
740 : /*! @cond Doxygen ignores this part */
741 : #ifdef __has_attribute
742 : # define XXH_HAS_ATTRIBUTE(x) __has_attribute(x)
743 : #else
744 : # define XXH_HAS_ATTRIBUTE(x) 0
745 : #endif
746 : /*! @endcond */
747 :
748 : /*! @cond Doxygen ignores this part */
749 : /*
750 : * C23 __STDC_VERSION__ number hasn't been specified yet. For now
751 : * leave as `201711L` (C17 + 1).
752 : * TODO: Update to correct value when its been specified.
753 : */
754 : #define XXH_C23_VN 201711L
755 : /*! @endcond */
756 :
757 : /*! @cond Doxygen ignores this part */
758 : /* C-language Attributes are added in C23. */
759 : #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN) && defined(__has_c_attribute)
760 : # define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
761 : #else
762 : # define XXH_HAS_C_ATTRIBUTE(x) 0
763 : #endif
764 : /*! @endcond */
765 :
766 : /*! @cond Doxygen ignores this part */
767 : #if defined(__cplusplus) && defined(__has_cpp_attribute)
768 : # define XXH_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
769 : #else
770 : # define XXH_HAS_CPP_ATTRIBUTE(x) 0
771 : #endif
772 : /*! @endcond */
773 :
774 : /*! @cond Doxygen ignores this part */
775 : /*
776 : * Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute
777 : * introduced in CPP17 and C23.
778 : * CPP17 : https://en.cppreference.com/w/cpp/language/attributes/fallthrough
779 : * C23 : https://en.cppreference.com/w/c/language/attributes/fallthrough
780 : */
781 : #if XXH_HAS_C_ATTRIBUTE(fallthrough) || XXH_HAS_CPP_ATTRIBUTE(fallthrough)
782 : # define XXH_FALLTHROUGH [[fallthrough]]
783 : #elif XXH_HAS_ATTRIBUTE(__fallthrough__)
784 : # define XXH_FALLTHROUGH __attribute__ ((__fallthrough__))
785 : #else
786 : # define XXH_FALLTHROUGH /* fallthrough */
787 : #endif
788 : /*! @endcond */
789 :
790 : /*! @cond Doxygen ignores this part */
791 : /*
792 : * Define XXH_NOESCAPE for annotated pointers in public API.
793 : * https://clang.llvm.org/docs/AttributeReference.html#noescape
794 : * As of writing this, only supported by clang.
795 : */
796 : #if XXH_HAS_ATTRIBUTE(noescape)
797 : # define XXH_NOESCAPE __attribute__((noescape))
798 : #else
799 : # define XXH_NOESCAPE
800 : #endif
801 : /*! @endcond */
802 :
803 :
804 : /*!
805 : * @}
806 : * @ingroup public
807 : * @{
808 : */
809 :
810 : #ifndef XXH_NO_LONG_LONG
811 : /*-**********************************************************************
812 : * 64-bit hash
813 : ************************************************************************/
814 : #if defined(XXH_DOXYGEN) /* don't include <stdint.h> */
815 : /*!
816 : * @brief An unsigned 64-bit integer.
817 : *
818 : * Not necessarily defined to `uint64_t` but functionally equivalent.
819 : */
820 : typedef uint64_t XXH64_hash_t;
821 : #elif !defined (__VMS) \
822 : && (defined (__cplusplus) \
823 : || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
824 : # include <stdint.h>
825 : typedef uint64_t XXH64_hash_t;
826 : #else
827 : # include <limits.h>
828 : # if defined(__LP64__) && ULONG_MAX == 0xFFFFFFFFFFFFFFFFULL
829 : /* LP64 ABI says uint64_t is unsigned long */
830 : typedef unsigned long XXH64_hash_t;
831 : # else
832 : /* the following type must have a width of 64-bit */
833 : typedef unsigned long long XXH64_hash_t;
834 : # endif
835 : #endif
836 :
837 : /*!
838 : * @}
839 : *
840 : * @defgroup XXH64_family XXH64 family
841 : * @ingroup public
842 : * @{
843 : * Contains functions used in the classic 64-bit xxHash algorithm.
844 : *
845 : * @note
846 : * XXH3 provides competitive speed for both 32-bit and 64-bit systems,
847 : * and offers true 64/128 bit hash results.
848 : * It provides better speed for systems with vector processing capabilities.
849 : */
850 :
851 : /*!
852 : * @brief Calculates the 64-bit hash of @p input using xxHash64.
853 : *
854 : * This function usually runs faster on 64-bit systems, but slower on 32-bit
855 : * systems (see benchmark).
856 : *
857 : * @param input The block of data to be hashed, at least @p length bytes in size.
858 : * @param length The length of @p input, in bytes.
859 : * @param seed The 64-bit seed to alter the hash's output predictably.
860 : *
861 : * @pre
862 : * The memory between @p input and @p input + @p length must be valid,
863 : * readable, contiguous memory. However, if @p length is `0`, @p input may be
864 : * `NULL`. In C++, this also must be *TriviallyCopyable*.
865 : *
866 : * @return The calculated 64-bit hash.
867 : *
868 : * @see
869 : * XXH32(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128():
870 : * Direct equivalents for the other variants of xxHash.
871 : * @see
872 : * XXH64_createState(), XXH64_update(), XXH64_digest(): Streaming version.
873 : */
874 : XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed);
875 :
876 : /******* Streaming *******/
877 : #ifndef XXH_NO_STREAM
878 : /*!
879 : * @brief The opaque state struct for the XXH64 streaming API.
880 : *
881 : * @see XXH64_state_s for details.
882 : */
883 : typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */
884 :
885 : /*!
886 : * @brief Allocates an @ref XXH64_state_t.
887 : *
888 : * Must be freed with XXH64_freeState().
889 : * @return An allocated XXH64_state_t on success, `NULL` on failure.
890 : */
891 : XXH_PUBLIC_API XXH_MALLOCF XXH64_state_t* XXH64_createState(void);
892 :
893 : /*!
894 : * @brief Frees an @ref XXH64_state_t.
895 : *
896 : * Must be allocated with XXH64_createState().
897 : * @param statePtr A pointer to an @ref XXH64_state_t allocated with @ref XXH64_createState().
898 : * @return XXH_OK.
899 : */
900 : XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);
901 :
902 : /*!
903 : * @brief Copies one @ref XXH64_state_t to another.
904 : *
905 : * @param dst_state The state to copy to.
906 : * @param src_state The state to copy from.
907 : * @pre
908 : * @p dst_state and @p src_state must not be `NULL` and must not overlap.
909 : */
910 : XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dst_state, const XXH64_state_t* src_state);
911 :
912 : /*!
913 : * @brief Resets an @ref XXH64_state_t to begin a new hash.
914 : *
915 : * This function resets and seeds a state. Call it before @ref XXH64_update().
916 : *
917 : * @param statePtr The state struct to reset.
918 : * @param seed The 64-bit seed to alter the hash result predictably.
919 : *
920 : * @pre
921 : * @p statePtr must not be `NULL`.
922 : *
923 : * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
924 : */
925 : XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed);
926 :
927 : /*!
928 : * @brief Consumes a block of @p input to an @ref XXH64_state_t.
929 : *
930 : * Call this to incrementally consume blocks of data.
931 : *
932 : * @param statePtr The state struct to update.
933 : * @param input The block of data to be hashed, at least @p length bytes in size.
934 : * @param length The length of @p input, in bytes.
935 : *
936 : * @pre
937 : * @p statePtr must not be `NULL`.
938 : * @pre
939 : * The memory between @p input and @p input + @p length must be valid,
940 : * readable, contiguous memory. However, if @p length is `0`, @p input may be
941 : * `NULL`. In C++, this also must be *TriviallyCopyable*.
942 : *
943 : * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
944 : */
945 : XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH_NOESCAPE XXH64_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
946 :
947 : /*!
948 : * @brief Returns the calculated hash value from an @ref XXH64_state_t.
949 : *
950 : * @note
951 : * Calling XXH64_digest() will not affect @p statePtr, so you can update,
952 : * digest, and update again.
953 : *
954 : * @param statePtr The state struct to calculate the hash from.
955 : *
956 : * @pre
957 : * @p statePtr must not be `NULL`.
958 : *
959 : * @return The calculated xxHash64 value from that state.
960 : */
961 : XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_digest (XXH_NOESCAPE const XXH64_state_t* statePtr);
962 : #endif /* !XXH_NO_STREAM */
963 : /******* Canonical representation *******/
964 :
965 : /*!
966 : * @brief Canonical (big endian) representation of @ref XXH64_hash_t.
967 : */
968 : typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t;
969 :
970 : /*!
971 : * @brief Converts an @ref XXH64_hash_t to a big endian @ref XXH64_canonical_t.
972 : *
973 : * @param dst The @ref XXH64_canonical_t pointer to be stored to.
974 : * @param hash The @ref XXH64_hash_t to be converted.
975 : *
976 : * @pre
977 : * @p dst must not be `NULL`.
978 : */
979 : XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash);
980 :
981 : /*!
982 : * @brief Converts an @ref XXH64_canonical_t to a native @ref XXH64_hash_t.
983 : *
984 : * @param src The @ref XXH64_canonical_t to convert.
985 : *
986 : * @pre
987 : * @p src must not be `NULL`.
988 : *
989 : * @return The converted hash.
990 : */
991 : XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src);
992 :
993 : #ifndef XXH_NO_XXH3
994 :
995 : /*!
996 : * @}
997 : * ************************************************************************
998 : * @defgroup XXH3_family XXH3 family
999 : * @ingroup public
1000 : * @{
1001 : *
1002 : * XXH3 is a more recent hash algorithm featuring:
1003 : * - Improved speed for both small and large inputs
1004 : * - True 64-bit and 128-bit outputs
1005 : * - SIMD acceleration
1006 : * - Improved 32-bit viability
1007 : *
1008 : * Speed analysis methodology is explained here:
1009 : *
1010 : * https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html
1011 : *
1012 : * Compared to XXH64, expect XXH3 to run approximately
1013 : * ~2x faster on large inputs and >3x faster on small ones,
1014 : * exact differences vary depending on platform.
1015 : *
1016 : * XXH3's speed benefits greatly from SIMD and 64-bit arithmetic,
1017 : * but does not require it.
1018 : * Most 32-bit and 64-bit targets that can run XXH32 smoothly can run XXH3
1019 : * at competitive speeds, even without vector support. Further details are
1020 : * explained in the implementation.
1021 : *
1022 : * XXH3 has a fast scalar implementation, but it also includes accelerated SIMD
1023 : * implementations for many common platforms:
1024 : * - AVX512
1025 : * - AVX2
1026 : * - SSE2
1027 : * - ARM NEON
1028 : * - WebAssembly SIMD128
1029 : * - POWER8 VSX
1030 : * - s390x ZVector
1031 : * This can be controlled via the @ref XXH_VECTOR macro, but it automatically
1032 : * selects the best version according to predefined macros. For the x86 family, an
1033 : * automatic runtime dispatcher is included separately in @ref xxh_x86dispatch.c.
1034 : *
1035 : * XXH3 implementation is portable:
1036 : * it has a generic C90 formulation that can be compiled on any platform,
1037 : * all implementations generate exactly the same hash value on all platforms.
1038 : * Starting from v0.8.0, it's also labelled "stable", meaning that
1039 : * any future version will also generate the same hash value.
1040 : *
1041 : * XXH3 offers 2 variants, _64bits and _128bits.
1042 : *
1043 : * When only 64 bits are needed, prefer invoking the _64bits variant, as it
1044 : * reduces the amount of mixing, resulting in faster speed on small inputs.
1045 : * It's also generally simpler to manipulate a scalar return type than a struct.
1046 : *
1047 : * The API supports one-shot hashing, streaming mode, and custom secrets.
1048 : */
1049 : /*-**********************************************************************
1050 : * XXH3 64-bit variant
1051 : ************************************************************************/
1052 :
1053 : /*!
1054 : * @brief 64-bit unseeded variant of XXH3.
1055 : *
1056 : * This is equivalent to @ref XXH3_64bits_withSeed() with a seed of 0, however
1057 : * it may have slightly better performance due to constant propagation of the
1058 : * defaults.
1059 : *
1060 : * @see
1061 : * XXH32(), XXH64(), XXH3_128bits(): equivalent for the other xxHash algorithms
1062 : * @see
1063 : * XXH3_64bits_withSeed(), XXH3_64bits_withSecret(): other seeding variants
1064 : * @see
1065 : * XXH3_64bits_reset(), XXH3_64bits_update(), XXH3_64bits_digest(): Streaming version.
1066 : */
1067 : XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length);
1068 :
1069 : /*!
1070 : * @brief 64-bit seeded variant of XXH3
1071 : *
1072 : * This variant generates a custom secret on the fly based on default secret
1073 : * altered using the `seed` value.
1074 : *
1075 : * While this operation is decently fast, note that it's not completely free.
1076 : *
1077 : * @note
1078 : * seed == 0 produces the same results as @ref XXH3_64bits().
1079 : *
1080 : * @param input The data to hash
1081 : * @param length The length
1082 : * @param seed The 64-bit seed to alter the state.
1083 : */
1084 : XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed);
1085 :
1086 : /*!
1087 : * The bare minimum size for a custom secret.
1088 : *
1089 : * @see
1090 : * XXH3_64bits_withSecret(), XXH3_64bits_reset_withSecret(),
1091 : * XXH3_128bits_withSecret(), XXH3_128bits_reset_withSecret().
1092 : */
1093 : #define XXH3_SECRET_SIZE_MIN 136
1094 :
1095 : /*!
1096 : * @brief 64-bit variant of XXH3 with a custom "secret".
1097 : *
1098 : * It's possible to provide any blob of bytes as a "secret" to generate the hash.
1099 : * This makes it more difficult for an external actor to prepare an intentional collision.
1100 : * The main condition is that secretSize *must* be large enough (>= XXH3_SECRET_SIZE_MIN).
1101 : * However, the quality of the secret impacts the dispersion of the hash algorithm.
1102 : * Therefore, the secret _must_ look like a bunch of random bytes.
1103 : * Avoid "trivial" or structured data such as repeated sequences or a text document.
1104 : * Whenever in doubt about the "randomness" of the blob of bytes,
1105 : * consider employing "XXH3_generateSecret()" instead (see below).
1106 : * It will generate a proper high entropy secret derived from the blob of bytes.
1107 : * Another advantage of using XXH3_generateSecret() is that
1108 : * it guarantees that all bits within the initial blob of bytes
1109 : * will impact every bit of the output.
1110 : * This is not necessarily the case when using the blob of bytes directly
1111 : * because, when hashing _small_ inputs, only a portion of the secret is employed.
1112 : */
1113 : XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);
1114 :
1115 :
1116 : /******* Streaming *******/
1117 : #ifndef XXH_NO_STREAM
1118 : /*
1119 : * Streaming requires state maintenance.
1120 : * This operation costs memory and CPU.
1121 : * As a consequence, streaming is slower than one-shot hashing.
1122 : * For better performance, prefer one-shot functions whenever applicable.
1123 : */
1124 :
1125 : /*!
1126 : * @brief The state struct for the XXH3 streaming API.
1127 : *
1128 : * @see XXH3_state_s for details.
1129 : */
1130 : typedef struct XXH3_state_s XXH3_state_t;
1131 : XXH_PUBLIC_API XXH_MALLOCF XXH3_state_t* XXH3_createState(void);
1132 : XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr);
1133 :
1134 : /*!
1135 : * @brief Copies one @ref XXH3_state_t to another.
1136 : *
1137 : * @param dst_state The state to copy to.
1138 : * @param src_state The state to copy from.
1139 : * @pre
1140 : * @p dst_state and @p src_state must not be `NULL` and must not overlap.
1141 : */
1142 : XXH_PUBLIC_API void XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state);
1143 :
1144 : /*!
1145 : * @brief Resets an @ref XXH3_state_t to begin a new hash.
1146 : *
1147 : * This function resets `statePtr` and generate a secret with default parameters. Call it before @ref XXH3_64bits_update().
1148 : * Digest will be equivalent to `XXH3_64bits()`.
1149 : *
1150 : * @param statePtr The state struct to reset.
1151 : *
1152 : * @pre
1153 : * @p statePtr must not be `NULL`.
1154 : *
1155 : * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
1156 : *
1157 : */
1158 : XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr);
1159 :
1160 : /*!
1161 : * @brief Resets an @ref XXH3_state_t with 64-bit seed to begin a new hash.
1162 : *
1163 : * This function resets `statePtr` and generate a secret from `seed`. Call it before @ref XXH3_64bits_update().
1164 : * Digest will be equivalent to `XXH3_64bits_withSeed()`.
1165 : *
1166 : * @param statePtr The state struct to reset.
1167 : * @param seed The 64-bit seed to alter the state.
1168 : *
1169 : * @pre
1170 : * @p statePtr must not be `NULL`.
1171 : *
1172 : * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
1173 : *
1174 : */
1175 : XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed);
1176 :
1177 : /*!
1178 : * XXH3_64bits_reset_withSecret():
1179 : * `secret` is referenced, it _must outlive_ the hash streaming session.
1180 : * Similar to one-shot API, `secretSize` must be >= `XXH3_SECRET_SIZE_MIN`,
1181 : * and the quality of produced hash values depends on secret's entropy
1182 : * (secret's content should look like a bunch of random bytes).
1183 : * When in doubt about the randomness of a candidate `secret`,
1184 : * consider employing `XXH3_generateSecret()` instead (see below).
1185 : */
1186 : XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize);
1187 :
1188 : /*!
1189 : * @brief Consumes a block of @p input to an @ref XXH3_state_t.
1190 : *
1191 : * Call this to incrementally consume blocks of data.
1192 : *
1193 : * @param statePtr The state struct to update.
1194 : * @param input The block of data to be hashed, at least @p length bytes in size.
1195 : * @param length The length of @p input, in bytes.
1196 : *
1197 : * @pre
1198 : * @p statePtr must not be `NULL`.
1199 : * @pre
1200 : * The memory between @p input and @p input + @p length must be valid,
1201 : * readable, contiguous memory. However, if @p length is `0`, @p input may be
1202 : * `NULL`. In C++, this also must be *TriviallyCopyable*.
1203 : *
1204 : * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
1205 : */
1206 : XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
1207 :
1208 : /*!
1209 : * @brief Returns the calculated XXH3 64-bit hash value from an @ref XXH3_state_t.
1210 : *
1211 : * @note
1212 : * Calling XXH3_64bits_digest() will not affect @p statePtr, so you can update,
1213 : * digest, and update again.
1214 : *
1215 : * @param statePtr The state struct to calculate the hash from.
1216 : *
1217 : * @pre
1218 : * @p statePtr must not be `NULL`.
1219 : *
1220 : * @return The calculated XXH3 64-bit hash value from that state.
1221 : */
1222 : XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr);
1223 : #endif /* !XXH_NO_STREAM */
1224 :
1225 : /* note : canonical representation of XXH3 is the same as XXH64
1226 : * since they both produce XXH64_hash_t values */
1227 :
1228 :
1229 : /*-**********************************************************************
1230 : * XXH3 128-bit variant
1231 : ************************************************************************/
1232 :
1233 : /*!
1234 : * @brief The return value from 128-bit hashes.
1235 : *
1236 : * Stored in little endian order, although the fields themselves are in native
1237 : * endianness.
1238 : */
1239 : typedef struct {
1240 : XXH64_hash_t low64; /*!< `value & 0xFFFFFFFFFFFFFFFF` */
1241 : XXH64_hash_t high64; /*!< `value >> 64` */
1242 : } XXH128_hash_t;
1243 :
1244 : /*!
1245 : * @brief Unseeded 128-bit variant of XXH3
1246 : *
1247 : * The 128-bit variant of XXH3 has more strength, but it has a bit of overhead
1248 : * for shorter inputs.
1249 : *
1250 : * This is equivalent to @ref XXH3_128bits_withSeed() with a seed of 0, however
1251 : * it may have slightly better performance due to constant propagation of the
1252 : * defaults.
1253 : *
1254 : * @see
1255 : * XXH32(), XXH64(), XXH3_64bits(): equivalent for the other xxHash algorithms
1256 : * @see
1257 : * XXH3_128bits_withSeed(), XXH3_128bits_withSecret(): other seeding variants
1258 : * @see
1259 : * XXH3_128bits_reset(), XXH3_128bits_update(), XXH3_128bits_digest(): Streaming version.
1260 : */
1261 : XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* data, size_t len);
1262 : /*! @brief Seeded 128-bit variant of XXH3. @see XXH3_64bits_withSeed(). */
1263 : XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSeed(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed);
1264 : /*! @brief Custom secret 128-bit variant of XXH3. @see XXH3_64bits_withSecret(). */
1265 : XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);
1266 :
1267 : /******* Streaming *******/
1268 : #ifndef XXH_NO_STREAM
1269 : /*
1270 : * Streaming requires state maintenance.
1271 : * This operation costs memory and CPU.
1272 : * As a consequence, streaming is slower than one-shot hashing.
1273 : * For better performance, prefer one-shot functions whenever applicable.
1274 : *
1275 : * XXH3_128bits uses the same XXH3_state_t as XXH3_64bits().
1276 : * Use already declared XXH3_createState() and XXH3_freeState().
1277 : *
1278 : * All reset and streaming functions have same meaning as their 64-bit counterpart.
1279 : */
1280 :
1281 : /*!
1282 : * @brief Resets an @ref XXH3_state_t to begin a new hash.
1283 : *
1284 : * This function resets `statePtr` and generate a secret with default parameters. Call it before @ref XXH3_128bits_update().
1285 : * Digest will be equivalent to `XXH3_128bits()`.
1286 : *
1287 : * @param statePtr The state struct to reset.
1288 : *
1289 : * @pre
1290 : * @p statePtr must not be `NULL`.
1291 : *
1292 : * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
1293 : *
1294 : */
1295 : XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr);
1296 :
1297 : /*!
1298 : * @brief Resets an @ref XXH3_state_t with 64-bit seed to begin a new hash.
1299 : *
1300 : * This function resets `statePtr` and generate a secret from `seed`. Call it before @ref XXH3_128bits_update().
1301 : * Digest will be equivalent to `XXH3_128bits_withSeed()`.
1302 : *
1303 : * @param statePtr The state struct to reset.
1304 : * @param seed The 64-bit seed to alter the state.
1305 : *
1306 : * @pre
1307 : * @p statePtr must not be `NULL`.
1308 : *
1309 : * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
1310 : *
1311 : */
1312 : XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed);
1313 : /*! @brief Custom secret 128-bit variant of XXH3. @see XXH_64bits_reset_withSecret(). */
1314 : XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize);
1315 :
1316 : /*!
1317 : * @brief Consumes a block of @p input to an @ref XXH3_state_t.
1318 : *
1319 : * Call this to incrementally consume blocks of data.
1320 : *
1321 : * @param statePtr The state struct to update.
1322 : * @param input The block of data to be hashed, at least @p length bytes in size.
1323 : * @param length The length of @p input, in bytes.
1324 : *
1325 : * @pre
1326 : * @p statePtr must not be `NULL`.
1327 : * @pre
1328 : * The memory between @p input and @p input + @p length must be valid,
1329 : * readable, contiguous memory. However, if @p length is `0`, @p input may be
1330 : * `NULL`. In C++, this also must be *TriviallyCopyable*.
1331 : *
1332 : * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
1333 : */
1334 : XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
1335 :
1336 : /*!
1337 : * @brief Returns the calculated XXH3 128-bit hash value from an @ref XXH3_state_t.
1338 : *
1339 : * @note
1340 : * Calling XXH3_128bits_digest() will not affect @p statePtr, so you can update,
1341 : * digest, and update again.
1342 : *
1343 : * @param statePtr The state struct to calculate the hash from.
1344 : *
1345 : * @pre
1346 : * @p statePtr must not be `NULL`.
1347 : *
1348 : * @return The calculated XXH3 128-bit hash value from that state.
1349 : */
1350 : XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr);
1351 : #endif /* !XXH_NO_STREAM */
1352 :
1353 : /* Following helper functions make it possible to compare XXH128_hast_t values.
1354 : * Since XXH128_hash_t is a structure, this capability is not offered by the language.
1355 : * Note: For better performance, these functions can be inlined using XXH_INLINE_ALL */
1356 :
1357 : /*!
1358 : * XXH128_isEqual():
1359 : * Return: 1 if `h1` and `h2` are equal, 0 if they are not.
1360 : */
1361 : XXH_PUBLIC_API XXH_PUREF int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2);
1362 :
1363 : /*!
1364 : * @brief Compares two @ref XXH128_hash_t
1365 : * This comparator is compatible with stdlib's `qsort()`/`bsearch()`.
1366 : *
1367 : * @return: >0 if *h128_1 > *h128_2
1368 : * =0 if *h128_1 == *h128_2
1369 : * <0 if *h128_1 < *h128_2
1370 : */
1371 : XXH_PUBLIC_API XXH_PUREF int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2);
1372 :
1373 :
1374 : /******* Canonical representation *******/
1375 : typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical_t;
1376 :
1377 :
1378 : /*!
1379 : * @brief Converts an @ref XXH128_hash_t to a big endian @ref XXH128_canonical_t.
1380 : *
1381 : * @param dst The @ref XXH128_canonical_t pointer to be stored to.
1382 : * @param hash The @ref XXH128_hash_t to be converted.
1383 : *
1384 : * @pre
1385 : * @p dst must not be `NULL`.
1386 : */
1387 : XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash);
1388 :
1389 : /*!
1390 : * @brief Converts an @ref XXH128_canonical_t to a native @ref XXH128_hash_t.
1391 : *
1392 : * @param src The @ref XXH128_canonical_t to convert.
1393 : *
1394 : * @pre
1395 : * @p src must not be `NULL`.
1396 : *
1397 : * @return The converted hash.
1398 : */
1399 : XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src);
1400 :
1401 :
1402 : #endif /* !XXH_NO_XXH3 */
1403 : #endif /* XXH_NO_LONG_LONG */
1404 :
1405 : /*!
1406 : * @}
1407 : */
1408 : #endif /* XXHASH_H_5627135585666179 */
1409 :
1410 :
1411 :
1412 : #if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742)
1413 : #define XXHASH_H_STATIC_13879238742
1414 : /* ****************************************************************************
1415 : * This section contains declarations which are not guaranteed to remain stable.
1416 : * They may change in future versions, becoming incompatible with a different
1417 : * version of the library.
1418 : * These declarations should only be used with static linking.
1419 : * Never use them in association with dynamic linking!
1420 : ***************************************************************************** */
1421 :
1422 : /*
1423 : * These definitions are only present to allow static allocation
1424 : * of XXH states, on stack or in a struct, for example.
1425 : * Never **ever** access their members directly.
1426 : */
1427 :
1428 : /*!
1429 : * @internal
1430 : * @brief Structure for XXH32 streaming API.
1431 : *
1432 : * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
1433 : * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is
1434 : * an opaque type. This allows fields to safely be changed.
1435 : *
1436 : * Typedef'd to @ref XXH32_state_t.
1437 : * Do not access the members of this struct directly.
1438 : * @see XXH64_state_s, XXH3_state_s
1439 : */
1440 : struct XXH32_state_s {
1441 : XXH32_hash_t total_len_32; /*!< Total length hashed, modulo 2^32 */
1442 : XXH32_hash_t large_len; /*!< Whether the hash is >= 16 (handles @ref total_len_32 overflow) */
1443 : XXH32_hash_t v[4]; /*!< Accumulator lanes */
1444 : XXH32_hash_t mem32[4]; /*!< Internal buffer for partial reads. Treated as unsigned char[16]. */
1445 : XXH32_hash_t memsize; /*!< Amount of data in @ref mem32 */
1446 : XXH32_hash_t reserved; /*!< Reserved field. Do not read nor write to it. */
1447 : }; /* typedef'd to XXH32_state_t */
1448 :
1449 :
1450 : #ifndef XXH_NO_LONG_LONG /* defined when there is no 64-bit support */
1451 :
1452 : /*!
1453 : * @internal
1454 : * @brief Structure for XXH64 streaming API.
1455 : *
1456 : * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
1457 : * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is
1458 : * an opaque type. This allows fields to safely be changed.
1459 : *
1460 : * Typedef'd to @ref XXH64_state_t.
1461 : * Do not access the members of this struct directly.
1462 : * @see XXH32_state_s, XXH3_state_s
1463 : */
1464 : struct XXH64_state_s {
1465 : XXH64_hash_t total_len; /*!< Total length hashed. This is always 64-bit. */
1466 : XXH64_hash_t v[4]; /*!< Accumulator lanes */
1467 : XXH64_hash_t mem64[4]; /*!< Internal buffer for partial reads. Treated as unsigned char[32]. */
1468 : XXH32_hash_t memsize; /*!< Amount of data in @ref mem64 */
1469 : XXH32_hash_t reserved32; /*!< Reserved field, needed for padding anyways*/
1470 : XXH64_hash_t reserved64; /*!< Reserved field. Do not read or write to it. */
1471 : }; /* typedef'd to XXH64_state_t */
1472 :
1473 : #ifndef XXH_NO_XXH3
1474 :
1475 : #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* >= C11 */
1476 : # include <stdalign.h>
1477 : # define XXH_ALIGN(n) alignas(n)
1478 : #elif defined(__cplusplus) && (__cplusplus >= 201103L) /* >= C++11 */
1479 : /* In C++ alignas() is a keyword */
1480 : # define XXH_ALIGN(n) alignas(n)
1481 : #elif defined(__GNUC__)
1482 : # define XXH_ALIGN(n) __attribute__ ((aligned(n)))
1483 : #elif defined(_MSC_VER)
1484 : # define XXH_ALIGN(n) __declspec(align(n))
1485 : #else
1486 : # define XXH_ALIGN(n) /* disabled */
1487 : #endif
1488 :
1489 : /* Old GCC versions only accept the attribute after the type in structures. */
1490 : #if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) /* C11+ */ \
1491 : && ! (defined(__cplusplus) && (__cplusplus >= 201103L)) /* >= C++11 */ \
1492 : && defined(__GNUC__)
1493 : # define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align)
1494 : #else
1495 : # define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type
1496 : #endif
1497 :
1498 : /*!
1499 : * @brief The size of the internal XXH3 buffer.
1500 : *
1501 : * This is the optimal update size for incremental hashing.
1502 : *
1503 : * @see XXH3_64b_update(), XXH3_128b_update().
1504 : */
1505 : #define XXH3_INTERNALBUFFER_SIZE 256
1506 :
1507 : /*!
1508 : * @internal
1509 : * @brief Default size of the secret buffer (and @ref XXH3_kSecret).
1510 : *
1511 : * This is the size used in @ref XXH3_kSecret and the seeded functions.
1512 : *
1513 : * Not to be confused with @ref XXH3_SECRET_SIZE_MIN.
1514 : */
1515 : #define XXH3_SECRET_DEFAULT_SIZE 192
1516 :
1517 : /*!
1518 : * @internal
1519 : * @brief Structure for XXH3 streaming API.
1520 : *
1521 : * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
1522 : * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined.
1523 : * Otherwise it is an opaque type.
1524 : * Never use this definition in combination with dynamic library.
1525 : * This allows fields to safely be changed in the future.
1526 : *
1527 : * @note ** This structure has a strict alignment requirement of 64 bytes!! **
1528 : * Do not allocate this with `malloc()` or `new`,
1529 : * it will not be sufficiently aligned.
1530 : * Use @ref XXH3_createState() and @ref XXH3_freeState(), or stack allocation.
1531 : *
1532 : * Typedef'd to @ref XXH3_state_t.
1533 : * Do never access the members of this struct directly.
1534 : *
1535 : * @see XXH3_INITSTATE() for stack initialization.
1536 : * @see XXH3_createState(), XXH3_freeState().
1537 : * @see XXH32_state_s, XXH64_state_s
1538 : */
1539 : struct XXH3_state_s {
1540 : XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]);
1541 : /*!< The 8 accumulators. See @ref XXH32_state_s::v and @ref XXH64_state_s::v */
1542 : XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]);
1543 : /*!< Used to store a custom secret generated from a seed. */
1544 : XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]);
1545 : /*!< The internal buffer. @see XXH32_state_s::mem32 */
1546 : XXH32_hash_t bufferedSize;
1547 : /*!< The amount of memory in @ref buffer, @see XXH32_state_s::memsize */
1548 : XXH32_hash_t useSeed;
1549 : /*!< Reserved field. Needed for padding on 64-bit. */
1550 : size_t nbStripesSoFar;
1551 : /*!< Number or stripes processed. */
1552 : XXH64_hash_t totalLen;
1553 : /*!< Total length hashed. 64-bit even on 32-bit targets. */
1554 : size_t nbStripesPerBlock;
1555 : /*!< Number of stripes per block. */
1556 : size_t secretLimit;
1557 : /*!< Size of @ref customSecret or @ref extSecret */
1558 : XXH64_hash_t seed;
1559 : /*!< Seed for _withSeed variants. Must be zero otherwise, @see XXH3_INITSTATE() */
1560 : XXH64_hash_t reserved64;
1561 : /*!< Reserved field. */
1562 : const unsigned char* extSecret;
1563 : /*!< Reference to an external secret for the _withSecret variants, NULL
1564 : * for other variants. */
1565 : /* note: there may be some padding at the end due to alignment on 64 bytes */
1566 : }; /* typedef'd to XXH3_state_t */
1567 :
1568 : #undef XXH_ALIGN_MEMBER
1569 :
1570 : /*!
1571 : * @brief Initializes a stack-allocated `XXH3_state_s`.
1572 : *
1573 : * When the @ref XXH3_state_t structure is merely emplaced on stack,
1574 : * it should be initialized with XXH3_INITSTATE() or a memset()
1575 : * in case its first reset uses XXH3_NNbits_reset_withSeed().
1576 : * This init can be omitted if the first reset uses default or _withSecret mode.
1577 : * This operation isn't necessary when the state is created with XXH3_createState().
1578 : * Note that this doesn't prepare the state for a streaming operation,
1579 : * it's still necessary to use XXH3_NNbits_reset*() afterwards.
1580 : */
1581 : #define XXH3_INITSTATE(XXH3_state_ptr) \
1582 : do { \
1583 : XXH3_state_t* tmp_xxh3_state_ptr = (XXH3_state_ptr); \
1584 : tmp_xxh3_state_ptr->seed = 0; \
1585 : tmp_xxh3_state_ptr->extSecret = NULL; \
1586 : } while(0)
1587 :
1588 :
1589 : /*!
1590 : * simple alias to pre-selected XXH3_128bits variant
1591 : */
1592 : XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed);
1593 :
1594 :
1595 : /* === Experimental API === */
1596 : /* Symbols defined below must be considered tied to a specific library version. */
1597 :
1598 : /*!
1599 : * XXH3_generateSecret():
1600 : *
1601 : * Derive a high-entropy secret from any user-defined content, named customSeed.
1602 : * The generated secret can be used in combination with `*_withSecret()` functions.
1603 : * The `_withSecret()` variants are useful to provide a higher level of protection
1604 : * than 64-bit seed, as it becomes much more difficult for an external actor to
1605 : * guess how to impact the calculation logic.
1606 : *
1607 : * The function accepts as input a custom seed of any length and any content,
1608 : * and derives from it a high-entropy secret of length @p secretSize into an
1609 : * already allocated buffer @p secretBuffer.
1610 : *
1611 : * The generated secret can then be used with any `*_withSecret()` variant.
1612 : * The functions @ref XXH3_128bits_withSecret(), @ref XXH3_64bits_withSecret(),
1613 : * @ref XXH3_128bits_reset_withSecret() and @ref XXH3_64bits_reset_withSecret()
1614 : * are part of this list. They all accept a `secret` parameter
1615 : * which must be large enough for implementation reasons (>= @ref XXH3_SECRET_SIZE_MIN)
1616 : * _and_ feature very high entropy (consist of random-looking bytes).
1617 : * These conditions can be a high bar to meet, so @ref XXH3_generateSecret() can
1618 : * be employed to ensure proper quality.
1619 : *
1620 : * @p customSeed can be anything. It can have any size, even small ones,
1621 : * and its content can be anything, even "poor entropy" sources such as a bunch
1622 : * of zeroes. The resulting `secret` will nonetheless provide all required qualities.
1623 : *
1624 : * @pre
1625 : * - @p secretSize must be >= @ref XXH3_SECRET_SIZE_MIN
1626 : * - When @p customSeedSize > 0, supplying NULL as customSeed is undefined behavior.
1627 : *
1628 : * Example code:
1629 : * @code{.c}
1630 : * #include <stdio.h>
1631 : * #include <stdlib.h>
1632 : * #include <string.h>
1633 : * #define XXH_STATIC_LINKING_ONLY // expose unstable API
1634 : * #include "xxhash.h"
1635 : * // Hashes argv[2] using the entropy from argv[1].
1636 : * int main(int argc, char* argv[])
1637 : * {
1638 : * char secret[XXH3_SECRET_SIZE_MIN];
1639 : * if (argv != 3) { return 1; }
1640 : * XXH3_generateSecret(secret, sizeof(secret), argv[1], strlen(argv[1]));
1641 : * XXH64_hash_t h = XXH3_64bits_withSecret(
1642 : * argv[2], strlen(argv[2]),
1643 : * secret, sizeof(secret)
1644 : * );
1645 : * printf("%016llx\n", (unsigned long long) h);
1646 : * }
1647 : * @endcode
1648 : */
1649 : XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize);
1650 :
1651 : /*!
1652 : * @brief Generate the same secret as the _withSeed() variants.
1653 : *
1654 : * The generated secret can be used in combination with
1655 : *`*_withSecret()` and `_withSecretandSeed()` variants.
1656 : *
1657 : * Example C++ `std::string` hash class:
1658 : * @code{.cpp}
1659 : * #include <string>
1660 : * #define XXH_STATIC_LINKING_ONLY // expose unstable API
1661 : * #include "xxhash.h"
1662 : * // Slow, seeds each time
1663 : * class HashSlow {
1664 : * XXH64_hash_t seed;
1665 : * public:
1666 : * HashSlow(XXH64_hash_t s) : seed{s} {}
1667 : * size_t operator()(const std::string& x) const {
1668 : * return size_t{XXH3_64bits_withSeed(x.c_str(), x.length(), seed)};
1669 : * }
1670 : * };
1671 : * // Fast, caches the seeded secret for future uses.
1672 : * class HashFast {
1673 : * unsigned char secret[XXH3_SECRET_SIZE_MIN];
1674 : * public:
1675 : * HashFast(XXH64_hash_t s) {
1676 : * XXH3_generateSecret_fromSeed(secret, seed);
1677 : * }
1678 : * size_t operator()(const std::string& x) const {
1679 : * return size_t{
1680 : * XXH3_64bits_withSecret(x.c_str(), x.length(), secret, sizeof(secret))
1681 : * };
1682 : * }
1683 : * };
1684 : * @endcode
1685 : * @param secretBuffer A writable buffer of @ref XXH3_SECRET_SIZE_MIN bytes
1686 : * @param seed The seed to seed the state.
1687 : */
1688 : XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed);
1689 :
1690 : /*!
1691 : * These variants generate hash values using either
1692 : * @p seed for "short" keys (< XXH3_MIDSIZE_MAX = 240 bytes)
1693 : * or @p secret for "large" keys (>= XXH3_MIDSIZE_MAX).
1694 : *
1695 : * This generally benefits speed, compared to `_withSeed()` or `_withSecret()`.
1696 : * `_withSeed()` has to generate the secret on the fly for "large" keys.
1697 : * It's fast, but can be perceptible for "not so large" keys (< 1 KB).
1698 : * `_withSecret()` has to generate the masks on the fly for "small" keys,
1699 : * which requires more instructions than _withSeed() variants.
1700 : * Therefore, _withSecretandSeed variant combines the best of both worlds.
1701 : *
1702 : * When @p secret has been generated by XXH3_generateSecret_fromSeed(),
1703 : * this variant produces *exactly* the same results as `_withSeed()` variant,
1704 : * hence offering only a pure speed benefit on "large" input,
1705 : * by skipping the need to regenerate the secret for every large input.
1706 : *
1707 : * Another usage scenario is to hash the secret to a 64-bit hash value,
1708 : * for example with XXH3_64bits(), which then becomes the seed,
1709 : * and then employ both the seed and the secret in _withSecretandSeed().
1710 : * On top of speed, an added benefit is that each bit in the secret
1711 : * has a 50% chance to swap each bit in the output, via its impact to the seed.
1712 : *
1713 : * This is not guaranteed when using the secret directly in "small data" scenarios,
1714 : * because only portions of the secret are employed for small data.
1715 : */
1716 : XXH_PUBLIC_API XXH_PUREF XXH64_hash_t
1717 : XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* data, size_t len,
1718 : XXH_NOESCAPE const void* secret, size_t secretSize,
1719 : XXH64_hash_t seed);
1720 : /*! @copydoc XXH3_64bits_withSecretandSeed() */
1721 : XXH_PUBLIC_API XXH_PUREF XXH128_hash_t
1722 : XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length,
1723 : XXH_NOESCAPE const void* secret, size_t secretSize,
1724 : XXH64_hash_t seed64);
1725 : #ifndef XXH_NO_STREAM
1726 : /*! @copydoc XXH3_64bits_withSecretandSeed() */
1727 : XXH_PUBLIC_API XXH_errorcode
1728 : XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
1729 : XXH_NOESCAPE const void* secret, size_t secretSize,
1730 : XXH64_hash_t seed64);
1731 : /*! @copydoc XXH3_64bits_withSecretandSeed() */
1732 : XXH_PUBLIC_API XXH_errorcode
1733 : XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
1734 : XXH_NOESCAPE const void* secret, size_t secretSize,
1735 : XXH64_hash_t seed64);
1736 : #endif /* !XXH_NO_STREAM */
1737 :
1738 : #endif /* !XXH_NO_XXH3 */
1739 : #endif /* XXH_NO_LONG_LONG */
1740 : #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
1741 : # define XXH_IMPLEMENTATION
1742 : #endif
1743 :
1744 : #endif /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */
1745 :
1746 :
1747 : /* ======================================================================== */
1748 : /* ======================================================================== */
1749 : /* ======================================================================== */
1750 :
1751 :
1752 : /*-**********************************************************************
1753 : * xxHash implementation
1754 : *-**********************************************************************
1755 : * xxHash's implementation used to be hosted inside xxhash.c.
1756 : *
1757 : * However, inlining requires implementation to be visible to the compiler,
1758 : * hence be included alongside the header.
1759 : * Previously, implementation was hosted inside xxhash.c,
1760 : * which was then #included when inlining was activated.
1761 : * This construction created issues with a few build and install systems,
1762 : * as it required xxhash.c to be stored in /include directory.
1763 : *
1764 : * xxHash implementation is now directly integrated within xxhash.h.
1765 : * As a consequence, xxhash.c is no longer needed in /include.
1766 : *
1767 : * xxhash.c is still available and is still useful.
1768 : * In a "normal" setup, when xxhash is not inlined,
1769 : * xxhash.h only exposes the prototypes and public symbols,
1770 : * while xxhash.c can be built into an object file xxhash.o
1771 : * which can then be linked into the final binary.
1772 : ************************************************************************/
1773 :
1774 : #if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \
1775 : || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387)
1776 : # define XXH_IMPLEM_13a8737387
1777 :
1778 : /* *************************************
1779 : * Tuning parameters
1780 : ***************************************/
1781 :
1782 : /*!
1783 : * @defgroup tuning Tuning parameters
1784 : * @{
1785 : *
1786 : * Various macros to control xxHash's behavior.
1787 : */
1788 : #ifdef XXH_DOXYGEN
1789 : /*!
1790 : * @brief Define this to disable 64-bit code.
1791 : *
1792 : * Useful if only using the @ref XXH32_family and you have a strict C90 compiler.
1793 : */
1794 : # define XXH_NO_LONG_LONG
1795 : # undef XXH_NO_LONG_LONG /* don't actually */
1796 : /*!
1797 : * @brief Controls how unaligned memory is accessed.
1798 : *
1799 : * By default, access to unaligned memory is controlled by `memcpy()`, which is
1800 : * safe and portable.
1801 : *
1802 : * Unfortunately, on some target/compiler combinations, the generated assembly
1803 : * is sub-optimal.
1804 : *
1805 : * The below switch allow selection of a different access method
1806 : * in the search for improved performance.
1807 : *
1808 : * @par Possible options:
1809 : *
1810 : * - `XXH_FORCE_MEMORY_ACCESS=0` (default): `memcpy`
1811 : * @par
1812 : * Use `memcpy()`. Safe and portable. Note that most modern compilers will
1813 : * eliminate the function call and treat it as an unaligned access.
1814 : *
1815 : * - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((aligned(1)))`
1816 : * @par
1817 : * Depends on compiler extensions and is therefore not portable.
1818 : * This method is safe _if_ your compiler supports it,
1819 : * and *generally* as fast or faster than `memcpy`.
1820 : *
1821 : * - `XXH_FORCE_MEMORY_ACCESS=2`: Direct cast
1822 : * @par
1823 : * Casts directly and dereferences. This method doesn't depend on the
1824 : * compiler, but it violates the C standard as it directly dereferences an
1825 : * unaligned pointer. It can generate buggy code on targets which do not
1826 : * support unaligned memory accesses, but in some circumstances, it's the
1827 : * only known way to get the most performance.
1828 : *
1829 : * - `XXH_FORCE_MEMORY_ACCESS=3`: Byteshift
1830 : * @par
1831 : * Also portable. This can generate the best code on old compilers which don't
1832 : * inline small `memcpy()` calls, and it might also be faster on big-endian
1833 : * systems which lack a native byteswap instruction. However, some compilers
1834 : * will emit literal byteshifts even if the target supports unaligned access.
1835 : *
1836 : *
1837 : * @warning
1838 : * Methods 1 and 2 rely on implementation-defined behavior. Use these with
1839 : * care, as what works on one compiler/platform/optimization level may cause
1840 : * another to read garbage data or even crash.
1841 : *
1842 : * See https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html for details.
1843 : *
1844 : * Prefer these methods in priority order (0 > 3 > 1 > 2)
1845 : */
1846 : # define XXH_FORCE_MEMORY_ACCESS 0
1847 :
1848 : /*!
1849 : * @def XXH_SIZE_OPT
1850 : * @brief Controls how much xxHash optimizes for size.
1851 : *
1852 : * xxHash, when compiled, tends to result in a rather large binary size. This
1853 : * is mostly due to heavy usage to forced inlining and constant folding of the
1854 : * @ref XXH3_family to increase performance.
1855 : *
1856 : * However, some developers prefer size over speed. This option can
1857 : * significantly reduce the size of the generated code. When using the `-Os`
1858 : * or `-Oz` options on GCC or Clang, this is defined to 1 by default,
1859 : * otherwise it is defined to 0.
1860 : *
1861 : * Most of these size optimizations can be controlled manually.
1862 : *
1863 : * This is a number from 0-2.
1864 : * - `XXH_SIZE_OPT` == 0: Default. xxHash makes no size optimizations. Speed
1865 : * comes first.
1866 : * - `XXH_SIZE_OPT` == 1: Default for `-Os` and `-Oz`. xxHash is more
1867 : * conservative and disables hacks that increase code size. It implies the
1868 : * options @ref XXH_NO_INLINE_HINTS == 1, @ref XXH_FORCE_ALIGN_CHECK == 0,
1869 : * and @ref XXH3_NEON_LANES == 8 if they are not already defined.
1870 : * - `XXH_SIZE_OPT` == 2: xxHash tries to make itself as small as possible.
1871 : * Performance may cry. For example, the single shot functions just use the
1872 : * streaming API.
1873 : */
1874 : # define XXH_SIZE_OPT 0
1875 :
1876 : /*!
1877 : * @def XXH_FORCE_ALIGN_CHECK
1878 : * @brief If defined to non-zero, adds a special path for aligned inputs (XXH32()
1879 : * and XXH64() only).
1880 : *
1881 : * This is an important performance trick for architectures without decent
1882 : * unaligned memory access performance.
1883 : *
1884 : * It checks for input alignment, and when conditions are met, uses a "fast
1885 : * path" employing direct 32-bit/64-bit reads, resulting in _dramatically
1886 : * faster_ read speed.
1887 : *
1888 : * The check costs one initial branch per hash, which is generally negligible,
1889 : * but not zero.
1890 : *
1891 : * Moreover, it's not useful to generate an additional code path if memory
1892 : * access uses the same instruction for both aligned and unaligned
1893 : * addresses (e.g. x86 and aarch64).
1894 : *
1895 : * In these cases, the alignment check can be removed by setting this macro to 0.
1896 : * Then the code will always use unaligned memory access.
1897 : * Align check is automatically disabled on x86, x64, ARM64, and some ARM chips
1898 : * which are platforms known to offer good unaligned memory accesses performance.
1899 : *
1900 : * It is also disabled by default when @ref XXH_SIZE_OPT >= 1.
1901 : *
1902 : * This option does not affect XXH3 (only XXH32 and XXH64).
1903 : */
1904 : # define XXH_FORCE_ALIGN_CHECK 0
1905 :
1906 : /*!
1907 : * @def XXH_NO_INLINE_HINTS
1908 : * @brief When non-zero, sets all functions to `static`.
1909 : *
1910 : * By default, xxHash tries to force the compiler to inline almost all internal
1911 : * functions.
1912 : *
1913 : * This can usually improve performance due to reduced jumping and improved
1914 : * constant folding, but significantly increases the size of the binary which
1915 : * might not be favorable.
1916 : *
1917 : * Additionally, sometimes the forced inlining can be detrimental to performance,
1918 : * depending on the architecture.
1919 : *
1920 : * XXH_NO_INLINE_HINTS marks all internal functions as static, giving the
1921 : * compiler full control on whether to inline or not.
1922 : *
1923 : * When not optimizing (-O0), using `-fno-inline` with GCC or Clang, or if
1924 : * @ref XXH_SIZE_OPT >= 1, this will automatically be defined.
1925 : */
1926 : # define XXH_NO_INLINE_HINTS 0
1927 :
1928 : /*!
1929 : * @def XXH3_INLINE_SECRET
1930 : * @brief Determines whether to inline the XXH3 withSecret code.
1931 : *
1932 : * When the secret size is known, the compiler can improve the performance
1933 : * of XXH3_64bits_withSecret() and XXH3_128bits_withSecret().
1934 : *
1935 : * However, if the secret size is not known, it doesn't have any benefit. This
1936 : * happens when xxHash is compiled into a global symbol. Therefore, if
1937 : * @ref XXH_INLINE_ALL is *not* defined, this will be defined to 0.
1938 : *
1939 : * Additionally, this defaults to 0 on GCC 12+, which has an issue with function pointers
1940 : * that are *sometimes* force inline on -Og, and it is impossible to automatically
1941 : * detect this optimization level.
1942 : */
1943 : # define XXH3_INLINE_SECRET 0
1944 :
1945 : /*!
1946 : * @def XXH32_ENDJMP
1947 : * @brief Whether to use a jump for `XXH32_finalize`.
1948 : *
1949 : * For performance, `XXH32_finalize` uses multiple branches in the finalizer.
1950 : * This is generally preferable for performance,
1951 : * but depending on exact architecture, a jmp may be preferable.
1952 : *
1953 : * This setting is only possibly making a difference for very small inputs.
1954 : */
1955 : # define XXH32_ENDJMP 0
1956 :
1957 : /*!
1958 : * @internal
1959 : * @brief Redefines old internal names.
1960 : *
1961 : * For compatibility with code that uses xxHash's internals before the names
1962 : * were changed to improve namespacing. There is no other reason to use this.
1963 : */
1964 : # define XXH_OLD_NAMES
1965 : # undef XXH_OLD_NAMES /* don't actually use, it is ugly. */
1966 :
1967 : /*!
1968 : * @def XXH_NO_STREAM
1969 : * @brief Disables the streaming API.
1970 : *
1971 : * When xxHash is not inlined and the streaming functions are not used, disabling
1972 : * the streaming functions can improve code size significantly, especially with
1973 : * the @ref XXH3_family which tends to make constant folded copies of itself.
1974 : */
1975 : # define XXH_NO_STREAM
1976 : # undef XXH_NO_STREAM /* don't actually */
1977 : #endif /* XXH_DOXYGEN */
1978 : /*!
1979 : * @}
1980 : */
1981 :
1982 : #ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
1983 : /* prefer __packed__ structures (method 1) for GCC
1984 : * < ARMv7 with unaligned access (e.g. Raspbian armhf) still uses byte shifting, so we use memcpy
1985 : * which for some reason does unaligned loads. */
1986 : # if defined(__GNUC__) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED))
1987 : # define XXH_FORCE_MEMORY_ACCESS 1
1988 : # endif
1989 : #endif
1990 :
1991 : #ifndef XXH_SIZE_OPT
1992 : /* default to 1 for -Os or -Oz */
1993 : # if (defined(__GNUC__) || defined(__clang__)) && defined(__OPTIMIZE_SIZE__)
1994 : # define XXH_SIZE_OPT 1
1995 : # else
1996 : # define XXH_SIZE_OPT 0
1997 : # endif
1998 : #endif
1999 :
2000 : #ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
2001 : /* don't check on sizeopt, x86, aarch64, or arm when unaligned access is available */
2002 : # if XXH_SIZE_OPT >= 1 || \
2003 : defined(__i386) || defined(__x86_64__) || defined(__aarch64__) || defined(__ARM_FEATURE_UNALIGNED) \
2004 : || defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64) || defined(_M_ARM) /* visual */
2005 : # define XXH_FORCE_ALIGN_CHECK 0
2006 : # else
2007 : # define XXH_FORCE_ALIGN_CHECK 1
2008 : # endif
2009 : #endif
2010 :
2011 : #ifndef XXH_NO_INLINE_HINTS
2012 : # if XXH_SIZE_OPT >= 1 || defined(__NO_INLINE__) /* -O0, -fno-inline */
2013 : # define XXH_NO_INLINE_HINTS 1
2014 : # else
2015 : # define XXH_NO_INLINE_HINTS 0
2016 : # endif
2017 : #endif
2018 :
2019 : #ifndef XXH3_INLINE_SECRET
2020 : # if (defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 12) \
2021 : || !defined(XXH_INLINE_ALL)
2022 : # define XXH3_INLINE_SECRET 0
2023 : # else
2024 : # define XXH3_INLINE_SECRET 1
2025 : # endif
2026 : #endif
2027 :
2028 : #ifndef XXH32_ENDJMP
2029 : /* generally preferable for performance */
2030 : # define XXH32_ENDJMP 0
2031 : #endif
2032 :
2033 : /*!
2034 : * @defgroup impl Implementation
2035 : * @{
2036 : */
2037 :
2038 :
2039 : /* *************************************
2040 : * Includes & Memory related functions
2041 : ***************************************/
2042 : #if defined(XXH_NO_STREAM)
2043 : /* nothing */
2044 : #elif defined(XXH_NO_STDLIB)
2045 :
2046 : /* When requesting to disable any mention of stdlib,
2047 : * the library loses the ability to invoked malloc / free.
2048 : * In practice, it means that functions like `XXH*_createState()`
2049 : * will always fail, and return NULL.
2050 : * This flag is useful in situations where
2051 : * xxhash.h is integrated into some kernel, embedded or limited environment
2052 : * without access to dynamic allocation.
2053 : */
2054 :
2055 : static XXH_CONSTF void* XXH_malloc(size_t s) { (void)s; return NULL; }
2056 : static void XXH_free(void* p) { (void)p; }
2057 :
2058 : #else
2059 :
2060 : /*
2061 : * Modify the local functions below should you wish to use
2062 : * different memory routines for malloc() and free()
2063 : */
2064 : #include <stdlib.h>
2065 :
2066 : /*!
2067 : * @internal
2068 : * @brief Modify this function to use a different routine than malloc().
2069 : */
2070 7 : static XXH_MALLOCF void* XXH_malloc(size_t s) { return malloc(s); }
2071 :
2072 : /*!
2073 : * @internal
2074 : * @brief Modify this function to use a different routine than free().
2075 : */
2076 7 : static void XXH_free(void* p) { free(p); }
2077 :
2078 : #endif /* XXH_NO_STDLIB */
2079 :
2080 : #include <string.h>
2081 :
2082 : /*!
2083 : * @internal
2084 : * @brief Modify this function to use a different routine than memcpy().
2085 : */
2086 25 : static void* XXH_memcpy(void* dest, const void* src, size_t size)
2087 : {
2088 25 : return memcpy(dest,src,size);
2089 : }
2090 :
2091 : #include <limits.h> /* ULLONG_MAX */
2092 :
2093 :
2094 : /* *************************************
2095 : * Compiler Specific Options
2096 : ***************************************/
2097 : #ifdef _MSC_VER /* Visual Studio warning fix */
2098 : # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
2099 : #endif
2100 :
2101 : #if XXH_NO_INLINE_HINTS /* disable inlining hints */
2102 : # if defined(__GNUC__) || defined(__clang__)
2103 : # define XXH_FORCE_INLINE static __attribute__((unused))
2104 : # else
2105 : # define XXH_FORCE_INLINE static
2106 : # endif
2107 : # define XXH_NO_INLINE static
2108 : /* enable inlining hints */
2109 : #elif defined(__GNUC__) || defined(__clang__)
2110 : # define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused))
2111 : # define XXH_NO_INLINE static __attribute__((noinline))
2112 : #elif defined(_MSC_VER) /* Visual Studio */
2113 : # define XXH_FORCE_INLINE static __forceinline
2114 : # define XXH_NO_INLINE static __declspec(noinline)
2115 : #elif defined (__cplusplus) \
2116 : || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* C99 */
2117 : # define XXH_FORCE_INLINE static inline
2118 : # define XXH_NO_INLINE static
2119 : #else
2120 : # define XXH_FORCE_INLINE static
2121 : # define XXH_NO_INLINE static
2122 : #endif
2123 :
2124 : #if XXH3_INLINE_SECRET
2125 : # define XXH3_WITH_SECRET_INLINE XXH_FORCE_INLINE
2126 : #else
2127 : # define XXH3_WITH_SECRET_INLINE XXH_NO_INLINE
2128 : #endif
2129 :
2130 :
2131 : /* *************************************
2132 : * Debug
2133 : ***************************************/
2134 : /*!
2135 : * @ingroup tuning
2136 : * @def XXH_DEBUGLEVEL
2137 : * @brief Sets the debugging level.
2138 : *
2139 : * XXH_DEBUGLEVEL is expected to be defined externally, typically via the
2140 : * compiler's command line options. The value must be a number.
2141 : */
2142 : #ifndef XXH_DEBUGLEVEL
2143 : # ifdef DEBUGLEVEL /* backwards compat */
2144 : # define XXH_DEBUGLEVEL DEBUGLEVEL
2145 : # else
2146 : # define XXH_DEBUGLEVEL 0
2147 : # endif
2148 : #endif
2149 :
2150 : #if (XXH_DEBUGLEVEL>=1)
2151 : # include <assert.h> /* note: can still be disabled with NDEBUG */
2152 : # define XXH_ASSERT(c) assert(c)
2153 : #else
2154 : # if defined(__INTEL_COMPILER)
2155 : # define XXH_ASSERT(c) XXH_ASSUME((unsigned char) (c))
2156 : # else
2157 : # define XXH_ASSERT(c) XXH_ASSUME(c)
2158 : # endif
2159 : #endif
2160 :
2161 : /* note: use after variable declarations */
2162 : #ifndef XXH_STATIC_ASSERT
2163 : # if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */
2164 : # define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { _Static_assert((c),m); } while(0)
2165 : # elif defined(__cplusplus) && (__cplusplus >= 201103L) /* C++11 */
2166 : # define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)
2167 : # else
2168 : # define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { struct xxh_sa { char x[(c) ? 1 : -1]; }; } while(0)
2169 : # endif
2170 : # define XXH_STATIC_ASSERT(c) XXH_STATIC_ASSERT_WITH_MESSAGE((c),#c)
2171 : #endif
2172 :
2173 : /*!
2174 : * @internal
2175 : * @def XXH_COMPILER_GUARD(var)
2176 : * @brief Used to prevent unwanted optimizations for @p var.
2177 : *
2178 : * It uses an empty GCC inline assembly statement with a register constraint
2179 : * which forces @p var into a general purpose register (eg eax, ebx, ecx
2180 : * on x86) and marks it as modified.
2181 : *
2182 : * This is used in a few places to avoid unwanted autovectorization (e.g.
2183 : * XXH32_round()). All vectorization we want is explicit via intrinsics,
2184 : * and _usually_ isn't wanted elsewhere.
2185 : *
2186 : * We also use it to prevent unwanted constant folding for AArch64 in
2187 : * XXH3_initCustomSecret_scalar().
2188 : */
2189 : #if defined(__GNUC__) || defined(__clang__)
2190 : # define XXH_COMPILER_GUARD(var) __asm__("" : "+r" (var))
2191 : #else
2192 : # define XXH_COMPILER_GUARD(var) ((void)0)
2193 : #endif
2194 :
2195 : /* Specifically for NEON vectors which use the "w" constraint, on
2196 : * Clang. */
2197 : #if defined(__clang__) && defined(__ARM_ARCH) && !defined(__wasm__)
2198 : # define XXH_COMPILER_GUARD_CLANG_NEON(var) __asm__("" : "+w" (var))
2199 : #else
2200 : # define XXH_COMPILER_GUARD_CLANG_NEON(var) ((void)0)
2201 : #endif
2202 :
2203 : /* *************************************
2204 : * Basic Types
2205 : ***************************************/
2206 : #if !defined (__VMS) \
2207 : && (defined (__cplusplus) \
2208 : || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
2209 : # include <stdint.h>
2210 : typedef uint8_t xxh_u8;
2211 : #else
2212 : typedef unsigned char xxh_u8;
2213 : #endif
2214 : typedef XXH32_hash_t xxh_u32;
2215 :
2216 : #ifdef XXH_OLD_NAMES
2217 : # warning "XXH_OLD_NAMES is planned to be removed starting v0.9. If the program depends on it, consider moving away from it by employing newer type names directly"
2218 : # define BYTE xxh_u8
2219 : # define U8 xxh_u8
2220 : # define U32 xxh_u32
2221 : #endif
2222 :
2223 : /* *** Memory access *** */
2224 :
2225 : /*!
2226 : * @internal
2227 : * @fn xxh_u32 XXH_read32(const void* ptr)
2228 : * @brief Reads an unaligned 32-bit integer from @p ptr in native endianness.
2229 : *
2230 : * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
2231 : *
2232 : * @param ptr The pointer to read from.
2233 : * @return The 32-bit native endian integer from the bytes at @p ptr.
2234 : */
2235 :
2236 : /*!
2237 : * @internal
2238 : * @fn xxh_u32 XXH_readLE32(const void* ptr)
2239 : * @brief Reads an unaligned 32-bit little endian integer from @p ptr.
2240 : *
2241 : * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
2242 : *
2243 : * @param ptr The pointer to read from.
2244 : * @return The 32-bit little endian integer from the bytes at @p ptr.
2245 : */
2246 :
2247 : /*!
2248 : * @internal
2249 : * @fn xxh_u32 XXH_readBE32(const void* ptr)
2250 : * @brief Reads an unaligned 32-bit big endian integer from @p ptr.
2251 : *
2252 : * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
2253 : *
2254 : * @param ptr The pointer to read from.
2255 : * @return The 32-bit big endian integer from the bytes at @p ptr.
2256 : */
2257 :
2258 : /*!
2259 : * @internal
2260 : * @fn xxh_u32 XXH_readLE32_align(const void* ptr, XXH_alignment align)
2261 : * @brief Like @ref XXH_readLE32(), but has an option for aligned reads.
2262 : *
2263 : * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
2264 : * Note that when @ref XXH_FORCE_ALIGN_CHECK == 0, the @p align parameter is
2265 : * always @ref XXH_alignment::XXH_unaligned.
2266 : *
2267 : * @param ptr The pointer to read from.
2268 : * @param align Whether @p ptr is aligned.
2269 : * @pre
2270 : * If @p align == @ref XXH_alignment::XXH_aligned, @p ptr must be 4 byte
2271 : * aligned.
2272 : * @return The 32-bit little endian integer from the bytes at @p ptr.
2273 : */
2274 :
2275 : #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
2276 : /*
2277 : * Manual byteshift. Best for old compilers which don't inline memcpy.
2278 : * We actually directly use XXH_readLE32 and XXH_readBE32.
2279 : */
2280 : #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
2281 :
2282 : /*
2283 : * Force direct memory access. Only works on CPU which support unaligned memory
2284 : * access in hardware.
2285 : */
2286 : static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; }
2287 :
2288 : #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
2289 :
2290 : /*
2291 : * __attribute__((aligned(1))) is supported by gcc and clang. Originally the
2292 : * documentation claimed that it only increased the alignment, but actually it
2293 : * can decrease it on gcc, clang, and icc:
2294 : * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
2295 : * https://gcc.godbolt.org/z/xYez1j67Y.
2296 : */
2297 : #ifdef XXH_OLD_NAMES
2298 : typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
2299 : #endif
2300 1018 : static xxh_u32 XXH_read32(const void* ptr)
2301 : {
2302 : typedef __attribute__((aligned(1))) xxh_u32 xxh_unalign32;
2303 1018 : return *((const xxh_unalign32*)ptr);
2304 : }
2305 :
2306 : #else
2307 :
2308 : /*
2309 : * Portable and safe solution. Generally efficient.
2310 : * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
2311 : */
2312 : static xxh_u32 XXH_read32(const void* memPtr)
2313 : {
2314 : xxh_u32 val;
2315 : XXH_memcpy(&val, memPtr, sizeof(val));
2316 : return val;
2317 : }
2318 :
2319 : #endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
2320 :
2321 :
2322 : /* *** Endianness *** */
2323 :
2324 : /*!
2325 : * @ingroup tuning
2326 : * @def XXH_CPU_LITTLE_ENDIAN
2327 : * @brief Whether the target is little endian.
2328 : *
2329 : * Defined to 1 if the target is little endian, or 0 if it is big endian.
2330 : * It can be defined externally, for example on the compiler command line.
2331 : *
2332 : * If it is not defined,
2333 : * a runtime check (which is usually constant folded) is used instead.
2334 : *
2335 : * @note
2336 : * This is not necessarily defined to an integer constant.
2337 : *
2338 : * @see XXH_isLittleEndian() for the runtime check.
2339 : */
2340 : #ifndef XXH_CPU_LITTLE_ENDIAN
2341 : /*
2342 : * Try to detect endianness automatically, to avoid the nonstandard behavior
2343 : * in `XXH_isLittleEndian()`
2344 : */
2345 : # if defined(_WIN32) /* Windows is always little endian */ \
2346 : || defined(__LITTLE_ENDIAN__) \
2347 : || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
2348 : # define XXH_CPU_LITTLE_ENDIAN 1
2349 : # elif defined(__BIG_ENDIAN__) \
2350 : || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
2351 : # define XXH_CPU_LITTLE_ENDIAN 0
2352 : # else
2353 : /*!
2354 : * @internal
2355 : * @brief Runtime check for @ref XXH_CPU_LITTLE_ENDIAN.
2356 : *
2357 : * Most compilers will constant fold this.
2358 : */
2359 : static int XXH_isLittleEndian(void)
2360 : {
2361 : /*
2362 : * Portable and well-defined behavior.
2363 : * Don't use static: it is detrimental to performance.
2364 : */
2365 : const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 };
2366 : return one.c[0];
2367 : }
2368 : # define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian()
2369 : # endif
2370 : #endif
2371 :
2372 :
2373 :
2374 :
2375 : /* ****************************************
2376 : * Compiler-specific Functions and Macros
2377 : ******************************************/
2378 : #define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
2379 :
2380 : #ifdef __has_builtin
2381 : # define XXH_HAS_BUILTIN(x) __has_builtin(x)
2382 : #else
2383 : # define XXH_HAS_BUILTIN(x) 0
2384 : #endif
2385 :
2386 :
2387 :
2388 : /*
2389 : * C23 and future versions have standard "unreachable()".
2390 : * Once it has been implemented reliably we can add it as an
2391 : * additional case:
2392 : *
2393 : * ```
2394 : * #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN)
2395 : * # include <stddef.h>
2396 : * # ifdef unreachable
2397 : * # define XXH_UNREACHABLE() unreachable()
2398 : * # endif
2399 : * #endif
2400 : * ```
2401 : *
2402 : * Note C++23 also has std::unreachable() which can be detected
2403 : * as follows:
2404 : * ```
2405 : * #if defined(__cpp_lib_unreachable) && (__cpp_lib_unreachable >= 202202L)
2406 : * # include <utility>
2407 : * # define XXH_UNREACHABLE() std::unreachable()
2408 : * #endif
2409 : * ```
2410 : * NB: `__cpp_lib_unreachable` is defined in the `<version>` header.
2411 : * We don't use that as including `<utility>` in `extern "C"` blocks
2412 : * doesn't work on GCC12
2413 : */
2414 :
2415 : #if XXH_HAS_BUILTIN(__builtin_unreachable)
2416 : # define XXH_UNREACHABLE() __builtin_unreachable()
2417 :
2418 : #elif defined(_MSC_VER)
2419 : # define XXH_UNREACHABLE() __assume(0)
2420 :
2421 : #else
2422 : # define XXH_UNREACHABLE()
2423 : #endif
2424 :
2425 : #if XXH_HAS_BUILTIN(__builtin_assume)
2426 : # define XXH_ASSUME(c) __builtin_assume(c)
2427 : #else
2428 : # define XXH_ASSUME(c) if (!(c)) { XXH_UNREACHABLE(); }
2429 : #endif
2430 :
2431 : /*!
2432 : * @internal
2433 : * @def XXH_rotl32(x,r)
2434 : * @brief 32-bit rotate left.
2435 : *
2436 : * @param x The 32-bit integer to be rotated.
2437 : * @param r The number of bits to rotate.
2438 : * @pre
2439 : * @p r > 0 && @p r < 32
2440 : * @note
2441 : * @p x and @p r may be evaluated multiple times.
2442 : * @return The rotated result.
2443 : */
2444 : #if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \
2445 : && XXH_HAS_BUILTIN(__builtin_rotateleft64)
2446 : # define XXH_rotl32 __builtin_rotateleft32
2447 : # define XXH_rotl64 __builtin_rotateleft64
2448 : /* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */
2449 : #elif defined(_MSC_VER)
2450 : # define XXH_rotl32(x,r) _rotl(x,r)
2451 : # define XXH_rotl64(x,r) _rotl64(x,r)
2452 : #else
2453 : # define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
2454 : # define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
2455 : #endif
2456 :
2457 : /*!
2458 : * @internal
2459 : * @fn xxh_u32 XXH_swap32(xxh_u32 x)
2460 : * @brief A 32-bit byteswap.
2461 : *
2462 : * @param x The 32-bit integer to byteswap.
2463 : * @return @p x, byteswapped.
2464 : */
2465 : #if defined(_MSC_VER) /* Visual Studio */
2466 : # define XXH_swap32 _byteswap_ulong
2467 : #elif XXH_GCC_VERSION >= 403
2468 : # define XXH_swap32 __builtin_bswap32
2469 : #else
2470 : static xxh_u32 XXH_swap32 (xxh_u32 x)
2471 : {
2472 : return ((x << 24) & 0xff000000 ) |
2473 : ((x << 8) & 0x00ff0000 ) |
2474 : ((x >> 8) & 0x0000ff00 ) |
2475 : ((x >> 24) & 0x000000ff );
2476 : }
2477 : #endif
2478 :
2479 :
2480 : /* ***************************
2481 : * Memory reads
2482 : *****************************/
2483 :
2484 : /*!
2485 : * @internal
2486 : * @brief Enum to indicate whether a pointer is aligned.
2487 : */
2488 : typedef enum {
2489 : XXH_aligned, /*!< Aligned */
2490 : XXH_unaligned /*!< Possibly unaligned */
2491 : } XXH_alignment;
2492 :
2493 : /*
2494 : * XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load.
2495 : *
2496 : * This is ideal for older compilers which don't inline memcpy.
2497 : */
2498 : #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
2499 :
2500 : XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* memPtr)
2501 : {
2502 : const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
2503 : return bytePtr[0]
2504 : | ((xxh_u32)bytePtr[1] << 8)
2505 : | ((xxh_u32)bytePtr[2] << 16)
2506 : | ((xxh_u32)bytePtr[3] << 24);
2507 : }
2508 :
2509 : XXH_FORCE_INLINE xxh_u32 XXH_readBE32(const void* memPtr)
2510 : {
2511 : const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
2512 : return bytePtr[3]
2513 : | ((xxh_u32)bytePtr[2] << 8)
2514 : | ((xxh_u32)bytePtr[1] << 16)
2515 : | ((xxh_u32)bytePtr[0] << 24);
2516 : }
2517 :
2518 : #else
2519 1018 : XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr)
2520 : {
2521 1018 : return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
2522 : }
2523 :
2524 0 : static xxh_u32 XXH_readBE32(const void* ptr)
2525 : {
2526 0 : return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
2527 : }
2528 : #endif
2529 :
2530 : XXH_FORCE_INLINE xxh_u32
2531 1018 : XXH_readLE32_align(const void* ptr, XXH_alignment align)
2532 : {
2533 1018 : if (align==XXH_unaligned) {
2534 1018 : return XXH_readLE32(ptr);
2535 : } else {
2536 0 : return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr);
2537 : }
2538 : }
2539 :
2540 :
2541 : /* *************************************
2542 : * Misc
2543 : ***************************************/
2544 : /*! @ingroup public */
2545 0 : XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
2546 :
2547 :
2548 : /* *******************************************************************
2549 : * 32-bit hash functions
2550 : *********************************************************************/
2551 : /*!
2552 : * @}
2553 : * @defgroup XXH32_impl XXH32 implementation
2554 : * @ingroup impl
2555 : *
2556 : * Details on the XXH32 implementation.
2557 : * @{
2558 : */
2559 : /* #define instead of static const, to be used as initializers */
2560 : #define XXH_PRIME32_1 0x9E3779B1U /*!< 0b10011110001101110111100110110001 */
2561 : #define XXH_PRIME32_2 0x85EBCA77U /*!< 0b10000101111010111100101001110111 */
2562 : #define XXH_PRIME32_3 0xC2B2AE3DU /*!< 0b11000010101100101010111000111101 */
2563 : #define XXH_PRIME32_4 0x27D4EB2FU /*!< 0b00100111110101001110101100101111 */
2564 : #define XXH_PRIME32_5 0x165667B1U /*!< 0b00010110010101100110011110110001 */
2565 :
2566 : #ifdef XXH_OLD_NAMES
2567 : # define PRIME32_1 XXH_PRIME32_1
2568 : # define PRIME32_2 XXH_PRIME32_2
2569 : # define PRIME32_3 XXH_PRIME32_3
2570 : # define PRIME32_4 XXH_PRIME32_4
2571 : # define PRIME32_5 XXH_PRIME32_5
2572 : #endif
2573 :
2574 : /*!
2575 : * @internal
2576 : * @brief Normal stripe processing routine.
2577 : *
2578 : * This shuffles the bits so that any bit from @p input impacts several bits in
2579 : * @p acc.
2580 : *
2581 : * @param acc The accumulator lane.
2582 : * @param input The stripe of input to mix.
2583 : * @return The mixed accumulator lane.
2584 : */
2585 0 : static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
2586 : {
2587 0 : acc += input * XXH_PRIME32_2;
2588 0 : acc = XXH_rotl32(acc, 13);
2589 0 : acc *= XXH_PRIME32_1;
2590 : #if (defined(__SSE4_1__) || defined(__aarch64__) || defined(__wasm_simd128__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)
2591 : /*
2592 : * UGLY HACK:
2593 : * A compiler fence is the only thing that prevents GCC and Clang from
2594 : * autovectorizing the XXH32 loop (pragmas and attributes don't work for some
2595 : * reason) without globally disabling SSE4.1.
2596 : *
2597 : * The reason we want to avoid vectorization is because despite working on
2598 : * 4 integers at a time, there are multiple factors slowing XXH32 down on
2599 : * SSE4:
2600 : * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on
2601 : * newer chips!) making it slightly slower to multiply four integers at
2602 : * once compared to four integers independently. Even when pmulld was
2603 : * fastest, Sandy/Ivy Bridge, it is still not worth it to go into SSE
2604 : * just to multiply unless doing a long operation.
2605 : *
2606 : * - Four instructions are required to rotate,
2607 : * movqda tmp, v // not required with VEX encoding
2608 : * pslld tmp, 13 // tmp <<= 13
2609 : * psrld v, 19 // x >>= 19
2610 : * por v, tmp // x |= tmp
2611 : * compared to one for scalar:
2612 : * roll v, 13 // reliably fast across the board
2613 : * shldl v, v, 13 // Sandy Bridge and later prefer this for some reason
2614 : *
2615 : * - Instruction level parallelism is actually more beneficial here because
2616 : * the SIMD actually serializes this operation: While v1 is rotating, v2
2617 : * can load data, while v3 can multiply. SSE forces them to operate
2618 : * together.
2619 : *
2620 : * This is also enabled on AArch64, as Clang is *very aggressive* in vectorizing
2621 : * the loop. NEON is only faster on the A53, and with the newer cores, it is less
2622 : * than half the speed.
2623 : *
2624 : * Additionally, this is used on WASM SIMD128 because it JITs to the same
2625 : * SIMD instructions and has the same issue.
2626 : */
2627 : XXH_COMPILER_GUARD(acc);
2628 : #endif
2629 0 : return acc;
2630 : }
2631 :
2632 : /*!
2633 : * @internal
2634 : * @brief Mixes all bits to finalize the hash.
2635 : *
2636 : * The final mix ensures that all input bits have a chance to impact any bit in
2637 : * the output digest, resulting in an unbiased distribution.
2638 : *
2639 : * @param hash The hash to avalanche.
2640 : * @return The avalanched hash.
2641 : */
2642 746 : static xxh_u32 XXH32_avalanche(xxh_u32 hash)
2643 : {
2644 746 : hash ^= hash >> 15;
2645 746 : hash *= XXH_PRIME32_2;
2646 746 : hash ^= hash >> 13;
2647 746 : hash *= XXH_PRIME32_3;
2648 746 : hash ^= hash >> 16;
2649 746 : return hash;
2650 : }
2651 :
2652 : #define XXH_get32bits(p) XXH_readLE32_align(p, align)
2653 :
2654 : /*!
2655 : * @internal
2656 : * @brief Processes the last 0-15 bytes of @p ptr.
2657 : *
2658 : * There may be up to 15 bytes remaining to consume from the input.
2659 : * This final stage will digest them to ensure that all input bytes are present
2660 : * in the final mix.
2661 : *
2662 : * @param hash The hash to finalize.
2663 : * @param ptr The pointer to the remaining input.
2664 : * @param len The remaining length, modulo 16.
2665 : * @param align Whether @p ptr is aligned.
2666 : * @return The finalized hash.
2667 : * @see XXH64_finalize().
2668 : */
2669 : static XXH_PUREF xxh_u32
2670 746 : XXH32_finalize(xxh_u32 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)
2671 : {
2672 : #define XXH_PROCESS1 do { \
2673 : hash += (*ptr++) * XXH_PRIME32_5; \
2674 : hash = XXH_rotl32(hash, 11) * XXH_PRIME32_1; \
2675 : } while (0)
2676 :
2677 : #define XXH_PROCESS4 do { \
2678 : hash += XXH_get32bits(ptr) * XXH_PRIME32_3; \
2679 : ptr += 4; \
2680 : hash = XXH_rotl32(hash, 17) * XXH_PRIME32_4; \
2681 : } while (0)
2682 :
2683 746 : if (ptr==NULL) XXH_ASSERT(len == 0);
2684 :
2685 : /* Compact rerolled version; generally faster */
2686 : if (!XXH32_ENDJMP) {
2687 746 : len &= 15;
2688 1764 : while (len >= 4) {
2689 1018 : XXH_PROCESS4;
2690 1018 : len -= 4;
2691 : }
2692 1345 : while (len > 0) {
2693 599 : XXH_PROCESS1;
2694 599 : --len;
2695 : }
2696 746 : return XXH32_avalanche(hash);
2697 : } else {
2698 : switch(len&15) /* or switch(bEnd - p) */ {
2699 : case 12: XXH_PROCESS4;
2700 : XXH_FALLTHROUGH; /* fallthrough */
2701 : case 8: XXH_PROCESS4;
2702 : XXH_FALLTHROUGH; /* fallthrough */
2703 : case 4: XXH_PROCESS4;
2704 : return XXH32_avalanche(hash);
2705 :
2706 : case 13: XXH_PROCESS4;
2707 : XXH_FALLTHROUGH; /* fallthrough */
2708 : case 9: XXH_PROCESS4;
2709 : XXH_FALLTHROUGH; /* fallthrough */
2710 : case 5: XXH_PROCESS4;
2711 : XXH_PROCESS1;
2712 : return XXH32_avalanche(hash);
2713 :
2714 : case 14: XXH_PROCESS4;
2715 : XXH_FALLTHROUGH; /* fallthrough */
2716 : case 10: XXH_PROCESS4;
2717 : XXH_FALLTHROUGH; /* fallthrough */
2718 : case 6: XXH_PROCESS4;
2719 : XXH_PROCESS1;
2720 : XXH_PROCESS1;
2721 : return XXH32_avalanche(hash);
2722 :
2723 : case 15: XXH_PROCESS4;
2724 : XXH_FALLTHROUGH; /* fallthrough */
2725 : case 11: XXH_PROCESS4;
2726 : XXH_FALLTHROUGH; /* fallthrough */
2727 : case 7: XXH_PROCESS4;
2728 : XXH_FALLTHROUGH; /* fallthrough */
2729 : case 3: XXH_PROCESS1;
2730 : XXH_FALLTHROUGH; /* fallthrough */
2731 : case 2: XXH_PROCESS1;
2732 : XXH_FALLTHROUGH; /* fallthrough */
2733 : case 1: XXH_PROCESS1;
2734 : XXH_FALLTHROUGH; /* fallthrough */
2735 : case 0: return XXH32_avalanche(hash);
2736 : }
2737 : XXH_ASSERT(0);
2738 : return hash; /* reaching this point is deemed impossible */
2739 : }
2740 : }
2741 :
2742 : #ifdef XXH_OLD_NAMES
2743 : # define PROCESS1 XXH_PROCESS1
2744 : # define PROCESS4 XXH_PROCESS4
2745 : #else
2746 : # undef XXH_PROCESS1
2747 : # undef XXH_PROCESS4
2748 : #endif
2749 :
2750 : /*!
2751 : * @internal
2752 : * @brief The implementation for @ref XXH32().
2753 : *
2754 : * @param input , len , seed Directly passed from @ref XXH32().
2755 : * @param align Whether @p input is aligned.
2756 : * @return The calculated hash.
2757 : */
2758 : XXH_FORCE_INLINE XXH_PUREF xxh_u32
2759 746 : XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)
2760 : {
2761 : xxh_u32 h32;
2762 :
2763 746 : if (input==NULL) XXH_ASSERT(len == 0);
2764 :
2765 746 : if (len>=16) {
2766 0 : const xxh_u8* const bEnd = input + len;
2767 0 : const xxh_u8* const limit = bEnd - 15;
2768 0 : xxh_u32 v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
2769 0 : xxh_u32 v2 = seed + XXH_PRIME32_2;
2770 0 : xxh_u32 v3 = seed + 0;
2771 0 : xxh_u32 v4 = seed - XXH_PRIME32_1;
2772 :
2773 : do {
2774 0 : v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4;
2775 0 : v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4;
2776 0 : v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4;
2777 0 : v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4;
2778 0 : } while (input < limit);
2779 :
2780 0 : h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7)
2781 0 : + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
2782 : } else {
2783 746 : h32 = seed + XXH_PRIME32_5;
2784 : }
2785 :
2786 746 : h32 += (xxh_u32)len;
2787 :
2788 746 : return XXH32_finalize(h32, input, len&15, align);
2789 : }
2790 :
2791 : /*! @ingroup XXH32_family */
2792 746 : XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)
2793 : {
2794 : #if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
2795 : /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
2796 : XXH32_state_t state;
2797 : XXH32_reset(&state, seed);
2798 : XXH32_update(&state, (const xxh_u8*)input, len);
2799 : return XXH32_digest(&state);
2800 : #else
2801 : if (XXH_FORCE_ALIGN_CHECK) {
2802 : if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */
2803 : return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
2804 : } }
2805 :
2806 746 : return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
2807 : #endif
2808 : }
2809 :
2810 :
2811 :
2812 : /******* Hash streaming *******/
2813 : #ifndef XXH_NO_STREAM
2814 : /*! @ingroup XXH32_family */
2815 0 : XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
2816 : {
2817 0 : return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
2818 : }
2819 : /*! @ingroup XXH32_family */
2820 0 : XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
2821 : {
2822 0 : XXH_free(statePtr);
2823 0 : return XXH_OK;
2824 : }
2825 :
2826 : /*! @ingroup XXH32_family */
2827 0 : XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
2828 : {
2829 0 : XXH_memcpy(dstState, srcState, sizeof(*dstState));
2830 0 : }
2831 :
2832 : /*! @ingroup XXH32_family */
2833 0 : XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)
2834 : {
2835 0 : XXH_ASSERT(statePtr != NULL);
2836 0 : memset(statePtr, 0, sizeof(*statePtr));
2837 0 : statePtr->v[0] = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
2838 0 : statePtr->v[1] = seed + XXH_PRIME32_2;
2839 0 : statePtr->v[2] = seed + 0;
2840 0 : statePtr->v[3] = seed - XXH_PRIME32_1;
2841 0 : return XXH_OK;
2842 : }
2843 :
2844 :
2845 : /*! @ingroup XXH32_family */
2846 : XXH_PUBLIC_API XXH_errorcode
2847 0 : XXH32_update(XXH32_state_t* state, const void* input, size_t len)
2848 : {
2849 0 : if (input==NULL) {
2850 0 : XXH_ASSERT(len == 0);
2851 0 : return XXH_OK;
2852 : }
2853 :
2854 0 : { const xxh_u8* p = (const xxh_u8*)input;
2855 0 : const xxh_u8* const bEnd = p + len;
2856 :
2857 0 : state->total_len_32 += (XXH32_hash_t)len;
2858 0 : state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16));
2859 :
2860 0 : if (state->memsize + len < 16) { /* fill in tmp buffer */
2861 0 : XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, len);
2862 0 : state->memsize += (XXH32_hash_t)len;
2863 0 : return XXH_OK;
2864 : }
2865 :
2866 0 : if (state->memsize) { /* some data left from previous update */
2867 0 : XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize);
2868 0 : { const xxh_u32* p32 = state->mem32;
2869 0 : state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p32)); p32++;
2870 0 : state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p32)); p32++;
2871 0 : state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p32)); p32++;
2872 0 : state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p32));
2873 : }
2874 0 : p += 16-state->memsize;
2875 0 : state->memsize = 0;
2876 : }
2877 :
2878 0 : if (p <= bEnd-16) {
2879 0 : const xxh_u8* const limit = bEnd - 16;
2880 :
2881 : do {
2882 0 : state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p)); p+=4;
2883 0 : state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p)); p+=4;
2884 0 : state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p)); p+=4;
2885 0 : state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p)); p+=4;
2886 0 : } while (p<=limit);
2887 :
2888 : }
2889 :
2890 0 : if (p < bEnd) {
2891 0 : XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
2892 0 : state->memsize = (unsigned)(bEnd-p);
2893 : }
2894 : }
2895 :
2896 0 : return XXH_OK;
2897 : }
2898 :
2899 :
2900 : /*! @ingroup XXH32_family */
2901 0 : XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state)
2902 : {
2903 : xxh_u32 h32;
2904 :
2905 0 : if (state->large_len) {
2906 0 : h32 = XXH_rotl32(state->v[0], 1)
2907 0 : + XXH_rotl32(state->v[1], 7)
2908 0 : + XXH_rotl32(state->v[2], 12)
2909 0 : + XXH_rotl32(state->v[3], 18);
2910 : } else {
2911 0 : h32 = state->v[2] /* == seed */ + XXH_PRIME32_5;
2912 : }
2913 :
2914 0 : h32 += state->total_len_32;
2915 :
2916 0 : return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned);
2917 : }
2918 : #endif /* !XXH_NO_STREAM */
2919 :
2920 : /******* Canonical representation *******/
2921 :
2922 : /*!
2923 : * @ingroup XXH32_family
2924 : * The default return values from XXH functions are unsigned 32 and 64 bit
2925 : * integers.
2926 : *
2927 : * The canonical representation uses big endian convention, the same convention
2928 : * as human-readable numbers (large digits first).
2929 : *
2930 : * This way, hash values can be written into a file or buffer, remaining
2931 : * comparable across different systems.
2932 : *
2933 : * The following functions allow transformation of hash values to and from their
2934 : * canonical format.
2935 : */
2936 0 : XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
2937 : {
2938 : XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
2939 0 : if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
2940 0 : XXH_memcpy(dst, &hash, sizeof(*dst));
2941 0 : }
2942 : /*! @ingroup XXH32_family */
2943 0 : XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
2944 : {
2945 0 : return XXH_readBE32(src);
2946 : }
2947 :
2948 :
2949 : #ifndef XXH_NO_LONG_LONG
2950 :
2951 : /* *******************************************************************
2952 : * 64-bit hash functions
2953 : *********************************************************************/
2954 : /*!
2955 : * @}
2956 : * @ingroup impl
2957 : * @{
2958 : */
2959 : /******* Memory access *******/
2960 :
2961 : typedef XXH64_hash_t xxh_u64;
2962 :
2963 : #ifdef XXH_OLD_NAMES
2964 : # define U64 xxh_u64
2965 : #endif
2966 :
2967 : #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
2968 : /*
2969 : * Manual byteshift. Best for old compilers which don't inline memcpy.
2970 : * We actually directly use XXH_readLE64 and XXH_readBE64.
2971 : */
2972 : #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
2973 :
2974 : /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
2975 : static xxh_u64 XXH_read64(const void* memPtr)
2976 : {
2977 : return *(const xxh_u64*) memPtr;
2978 : }
2979 :
2980 : #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
2981 :
2982 : /*
2983 : * __attribute__((aligned(1))) is supported by gcc and clang. Originally the
2984 : * documentation claimed that it only increased the alignment, but actually it
2985 : * can decrease it on gcc, clang, and icc:
2986 : * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
2987 : * https://gcc.godbolt.org/z/xYez1j67Y.
2988 : */
2989 : #ifdef XXH_OLD_NAMES
2990 : typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
2991 : #endif
2992 1656 : static xxh_u64 XXH_read64(const void* ptr)
2993 : {
2994 : typedef __attribute__((aligned(1))) xxh_u64 xxh_unalign64;
2995 1656 : return *((const xxh_unalign64*)ptr);
2996 : }
2997 :
2998 : #else
2999 :
3000 : /*
3001 : * Portable and safe solution. Generally efficient.
3002 : * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
3003 : */
3004 : static xxh_u64 XXH_read64(const void* memPtr)
3005 : {
3006 : xxh_u64 val;
3007 : XXH_memcpy(&val, memPtr, sizeof(val));
3008 : return val;
3009 : }
3010 :
3011 : #endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
3012 :
3013 : #if defined(_MSC_VER) /* Visual Studio */
3014 : # define XXH_swap64 _byteswap_uint64
3015 : #elif XXH_GCC_VERSION >= 403
3016 : # define XXH_swap64 __builtin_bswap64
3017 : #else
3018 : static xxh_u64 XXH_swap64(xxh_u64 x)
3019 : {
3020 : return ((x << 56) & 0xff00000000000000ULL) |
3021 : ((x << 40) & 0x00ff000000000000ULL) |
3022 : ((x << 24) & 0x0000ff0000000000ULL) |
3023 : ((x << 8) & 0x000000ff00000000ULL) |
3024 : ((x >> 8) & 0x00000000ff000000ULL) |
3025 : ((x >> 24) & 0x0000000000ff0000ULL) |
3026 : ((x >> 40) & 0x000000000000ff00ULL) |
3027 : ((x >> 56) & 0x00000000000000ffULL);
3028 : }
3029 : #endif
3030 :
3031 :
3032 : /* XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. */
3033 : #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
3034 :
3035 : XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* memPtr)
3036 : {
3037 : const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
3038 : return bytePtr[0]
3039 : | ((xxh_u64)bytePtr[1] << 8)
3040 : | ((xxh_u64)bytePtr[2] << 16)
3041 : | ((xxh_u64)bytePtr[3] << 24)
3042 : | ((xxh_u64)bytePtr[4] << 32)
3043 : | ((xxh_u64)bytePtr[5] << 40)
3044 : | ((xxh_u64)bytePtr[6] << 48)
3045 : | ((xxh_u64)bytePtr[7] << 56);
3046 : }
3047 :
3048 : XXH_FORCE_INLINE xxh_u64 XXH_readBE64(const void* memPtr)
3049 : {
3050 : const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
3051 : return bytePtr[7]
3052 : | ((xxh_u64)bytePtr[6] << 8)
3053 : | ((xxh_u64)bytePtr[5] << 16)
3054 : | ((xxh_u64)bytePtr[4] << 24)
3055 : | ((xxh_u64)bytePtr[3] << 32)
3056 : | ((xxh_u64)bytePtr[2] << 40)
3057 : | ((xxh_u64)bytePtr[1] << 48)
3058 : | ((xxh_u64)bytePtr[0] << 56);
3059 : }
3060 :
3061 : #else
3062 1656 : XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr)
3063 : {
3064 1656 : return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
3065 : }
3066 :
3067 0 : static xxh_u64 XXH_readBE64(const void* ptr)
3068 : {
3069 0 : return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
3070 : }
3071 : #endif
3072 :
3073 : XXH_FORCE_INLINE xxh_u64
3074 0 : XXH_readLE64_align(const void* ptr, XXH_alignment align)
3075 : {
3076 0 : if (align==XXH_unaligned)
3077 0 : return XXH_readLE64(ptr);
3078 : else
3079 0 : return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr);
3080 : }
3081 :
3082 :
3083 : /******* xxh64 *******/
3084 : /*!
3085 : * @}
3086 : * @defgroup XXH64_impl XXH64 implementation
3087 : * @ingroup impl
3088 : *
3089 : * Details on the XXH64 implementation.
3090 : * @{
3091 : */
3092 : /* #define rather that static const, to be used as initializers */
3093 : #define XXH_PRIME64_1 0x9E3779B185EBCA87ULL /*!< 0b1001111000110111011110011011000110000101111010111100101010000111 */
3094 : #define XXH_PRIME64_2 0xC2B2AE3D27D4EB4FULL /*!< 0b1100001010110010101011100011110100100111110101001110101101001111 */
3095 : #define XXH_PRIME64_3 0x165667B19E3779F9ULL /*!< 0b0001011001010110011001111011000110011110001101110111100111111001 */
3096 : #define XXH_PRIME64_4 0x85EBCA77C2B2AE63ULL /*!< 0b1000010111101011110010100111011111000010101100101010111001100011 */
3097 : #define XXH_PRIME64_5 0x27D4EB2F165667C5ULL /*!< 0b0010011111010100111010110010111100010110010101100110011111000101 */
3098 :
3099 : #ifdef XXH_OLD_NAMES
3100 : # define PRIME64_1 XXH_PRIME64_1
3101 : # define PRIME64_2 XXH_PRIME64_2
3102 : # define PRIME64_3 XXH_PRIME64_3
3103 : # define PRIME64_4 XXH_PRIME64_4
3104 : # define PRIME64_5 XXH_PRIME64_5
3105 : #endif
3106 :
3107 : /*! @copydoc XXH32_round */
3108 0 : static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
3109 : {
3110 0 : acc += input * XXH_PRIME64_2;
3111 0 : acc = XXH_rotl64(acc, 31);
3112 0 : acc *= XXH_PRIME64_1;
3113 0 : return acc;
3114 : }
3115 :
3116 0 : static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val)
3117 : {
3118 0 : val = XXH64_round(0, val);
3119 0 : acc ^= val;
3120 0 : acc = acc * XXH_PRIME64_1 + XXH_PRIME64_4;
3121 0 : return acc;
3122 : }
3123 :
3124 : /*! @copydoc XXH32_avalanche */
3125 4 : static xxh_u64 XXH64_avalanche(xxh_u64 hash)
3126 : {
3127 4 : hash ^= hash >> 33;
3128 4 : hash *= XXH_PRIME64_2;
3129 4 : hash ^= hash >> 29;
3130 4 : hash *= XXH_PRIME64_3;
3131 4 : hash ^= hash >> 32;
3132 4 : return hash;
3133 : }
3134 :
3135 :
3136 : #define XXH_get64bits(p) XXH_readLE64_align(p, align)
3137 :
3138 : /*!
3139 : * @internal
3140 : * @brief Processes the last 0-31 bytes of @p ptr.
3141 : *
3142 : * There may be up to 31 bytes remaining to consume from the input.
3143 : * This final stage will digest them to ensure that all input bytes are present
3144 : * in the final mix.
3145 : *
3146 : * @param hash The hash to finalize.
3147 : * @param ptr The pointer to the remaining input.
3148 : * @param len The remaining length, modulo 32.
3149 : * @param align Whether @p ptr is aligned.
3150 : * @return The finalized hash
3151 : * @see XXH32_finalize().
3152 : */
3153 : static XXH_PUREF xxh_u64
3154 0 : XXH64_finalize(xxh_u64 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)
3155 : {
3156 0 : if (ptr==NULL) XXH_ASSERT(len == 0);
3157 0 : len &= 31;
3158 0 : while (len >= 8) {
3159 0 : xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr));
3160 0 : ptr += 8;
3161 0 : hash ^= k1;
3162 0 : hash = XXH_rotl64(hash,27) * XXH_PRIME64_1 + XXH_PRIME64_4;
3163 0 : len -= 8;
3164 : }
3165 0 : if (len >= 4) {
3166 0 : hash ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1;
3167 0 : ptr += 4;
3168 0 : hash = XXH_rotl64(hash, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
3169 0 : len -= 4;
3170 : }
3171 0 : while (len > 0) {
3172 0 : hash ^= (*ptr++) * XXH_PRIME64_5;
3173 0 : hash = XXH_rotl64(hash, 11) * XXH_PRIME64_1;
3174 0 : --len;
3175 : }
3176 0 : return XXH64_avalanche(hash);
3177 : }
3178 :
3179 : #ifdef XXH_OLD_NAMES
3180 : # define PROCESS1_64 XXH_PROCESS1_64
3181 : # define PROCESS4_64 XXH_PROCESS4_64
3182 : # define PROCESS8_64 XXH_PROCESS8_64
3183 : #else
3184 : # undef XXH_PROCESS1_64
3185 : # undef XXH_PROCESS4_64
3186 : # undef XXH_PROCESS8_64
3187 : #endif
3188 :
3189 : /*!
3190 : * @internal
3191 : * @brief The implementation for @ref XXH64().
3192 : *
3193 : * @param input , len , seed Directly passed from @ref XXH64().
3194 : * @param align Whether @p input is aligned.
3195 : * @return The calculated hash.
3196 : */
3197 : XXH_FORCE_INLINE XXH_PUREF xxh_u64
3198 0 : XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)
3199 : {
3200 : xxh_u64 h64;
3201 0 : if (input==NULL) XXH_ASSERT(len == 0);
3202 :
3203 0 : if (len>=32) {
3204 0 : const xxh_u8* const bEnd = input + len;
3205 0 : const xxh_u8* const limit = bEnd - 31;
3206 0 : xxh_u64 v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
3207 0 : xxh_u64 v2 = seed + XXH_PRIME64_2;
3208 0 : xxh_u64 v3 = seed + 0;
3209 0 : xxh_u64 v4 = seed - XXH_PRIME64_1;
3210 :
3211 : do {
3212 0 : v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8;
3213 0 : v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8;
3214 0 : v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8;
3215 0 : v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8;
3216 0 : } while (input<limit);
3217 :
3218 0 : h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
3219 0 : h64 = XXH64_mergeRound(h64, v1);
3220 0 : h64 = XXH64_mergeRound(h64, v2);
3221 0 : h64 = XXH64_mergeRound(h64, v3);
3222 0 : h64 = XXH64_mergeRound(h64, v4);
3223 :
3224 : } else {
3225 0 : h64 = seed + XXH_PRIME64_5;
3226 : }
3227 :
3228 0 : h64 += (xxh_u64) len;
3229 :
3230 0 : return XXH64_finalize(h64, input, len, align);
3231 : }
3232 :
3233 :
3234 : /*! @ingroup XXH64_family */
3235 0 : XXH_PUBLIC_API XXH64_hash_t XXH64 (XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
3236 : {
3237 : #if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
3238 : /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
3239 : XXH64_state_t state;
3240 : XXH64_reset(&state, seed);
3241 : XXH64_update(&state, (const xxh_u8*)input, len);
3242 : return XXH64_digest(&state);
3243 : #else
3244 : if (XXH_FORCE_ALIGN_CHECK) {
3245 : if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */
3246 : return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
3247 : } }
3248 :
3249 0 : return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
3250 :
3251 : #endif
3252 : }
3253 :
3254 : /******* Hash Streaming *******/
3255 : #ifndef XXH_NO_STREAM
3256 : /*! @ingroup XXH64_family*/
3257 0 : XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
3258 : {
3259 0 : return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
3260 : }
3261 : /*! @ingroup XXH64_family */
3262 0 : XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
3263 : {
3264 0 : XXH_free(statePtr);
3265 0 : return XXH_OK;
3266 : }
3267 :
3268 : /*! @ingroup XXH64_family */
3269 0 : XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dstState, const XXH64_state_t* srcState)
3270 : {
3271 0 : XXH_memcpy(dstState, srcState, sizeof(*dstState));
3272 0 : }
3273 :
3274 : /*! @ingroup XXH64_family */
3275 0 : XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed)
3276 : {
3277 0 : XXH_ASSERT(statePtr != NULL);
3278 0 : memset(statePtr, 0, sizeof(*statePtr));
3279 0 : statePtr->v[0] = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
3280 0 : statePtr->v[1] = seed + XXH_PRIME64_2;
3281 0 : statePtr->v[2] = seed + 0;
3282 0 : statePtr->v[3] = seed - XXH_PRIME64_1;
3283 0 : return XXH_OK;
3284 : }
3285 :
3286 : /*! @ingroup XXH64_family */
3287 : XXH_PUBLIC_API XXH_errorcode
3288 0 : XXH64_update (XXH_NOESCAPE XXH64_state_t* state, XXH_NOESCAPE const void* input, size_t len)
3289 : {
3290 0 : if (input==NULL) {
3291 0 : XXH_ASSERT(len == 0);
3292 0 : return XXH_OK;
3293 : }
3294 :
3295 0 : { const xxh_u8* p = (const xxh_u8*)input;
3296 0 : const xxh_u8* const bEnd = p + len;
3297 :
3298 0 : state->total_len += len;
3299 :
3300 0 : if (state->memsize + len < 32) { /* fill in tmp buffer */
3301 0 : XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len);
3302 0 : state->memsize += (xxh_u32)len;
3303 0 : return XXH_OK;
3304 : }
3305 :
3306 0 : if (state->memsize) { /* tmp buffer is full */
3307 0 : XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize);
3308 0 : state->v[0] = XXH64_round(state->v[0], XXH_readLE64(state->mem64+0));
3309 0 : state->v[1] = XXH64_round(state->v[1], XXH_readLE64(state->mem64+1));
3310 0 : state->v[2] = XXH64_round(state->v[2], XXH_readLE64(state->mem64+2));
3311 0 : state->v[3] = XXH64_round(state->v[3], XXH_readLE64(state->mem64+3));
3312 0 : p += 32 - state->memsize;
3313 0 : state->memsize = 0;
3314 : }
3315 :
3316 0 : if (p+32 <= bEnd) {
3317 0 : const xxh_u8* const limit = bEnd - 32;
3318 :
3319 : do {
3320 0 : state->v[0] = XXH64_round(state->v[0], XXH_readLE64(p)); p+=8;
3321 0 : state->v[1] = XXH64_round(state->v[1], XXH_readLE64(p)); p+=8;
3322 0 : state->v[2] = XXH64_round(state->v[2], XXH_readLE64(p)); p+=8;
3323 0 : state->v[3] = XXH64_round(state->v[3], XXH_readLE64(p)); p+=8;
3324 0 : } while (p<=limit);
3325 :
3326 : }
3327 :
3328 0 : if (p < bEnd) {
3329 0 : XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
3330 0 : state->memsize = (unsigned)(bEnd-p);
3331 : }
3332 : }
3333 :
3334 0 : return XXH_OK;
3335 : }
3336 :
3337 :
3338 : /*! @ingroup XXH64_family */
3339 0 : XXH_PUBLIC_API XXH64_hash_t XXH64_digest(XXH_NOESCAPE const XXH64_state_t* state)
3340 : {
3341 : xxh_u64 h64;
3342 :
3343 0 : if (state->total_len >= 32) {
3344 0 : h64 = XXH_rotl64(state->v[0], 1) + XXH_rotl64(state->v[1], 7) + XXH_rotl64(state->v[2], 12) + XXH_rotl64(state->v[3], 18);
3345 0 : h64 = XXH64_mergeRound(h64, state->v[0]);
3346 0 : h64 = XXH64_mergeRound(h64, state->v[1]);
3347 0 : h64 = XXH64_mergeRound(h64, state->v[2]);
3348 0 : h64 = XXH64_mergeRound(h64, state->v[3]);
3349 : } else {
3350 0 : h64 = state->v[2] /*seed*/ + XXH_PRIME64_5;
3351 : }
3352 :
3353 0 : h64 += (xxh_u64) state->total_len;
3354 :
3355 0 : return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);
3356 : }
3357 : #endif /* !XXH_NO_STREAM */
3358 :
3359 : /******* Canonical representation *******/
3360 :
3361 : /*! @ingroup XXH64_family */
3362 0 : XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash)
3363 : {
3364 : XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
3365 0 : if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
3366 0 : XXH_memcpy(dst, &hash, sizeof(*dst));
3367 0 : }
3368 :
3369 : /*! @ingroup XXH64_family */
3370 0 : XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src)
3371 : {
3372 0 : return XXH_readBE64(src);
3373 : }
3374 :
3375 : #ifndef XXH_NO_XXH3
3376 :
3377 : /* *********************************************************************
3378 : * XXH3
3379 : * New generation hash designed for speed on small keys and vectorization
3380 : ************************************************************************ */
3381 : /*!
3382 : * @}
3383 : * @defgroup XXH3_impl XXH3 implementation
3384 : * @ingroup impl
3385 : * @{
3386 : */
3387 :
3388 : /* === Compiler specifics === */
3389 :
3390 : #if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */
3391 : # define XXH_RESTRICT /* disable */
3392 : #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* >= C99 */
3393 : # define XXH_RESTRICT restrict
3394 : #elif (defined (__GNUC__) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) \
3395 : || (defined (__clang__)) \
3396 : || (defined (_MSC_VER) && (_MSC_VER >= 1400)) \
3397 : || (defined (__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300))
3398 : /*
3399 : * There are a LOT more compilers that recognize __restrict but this
3400 : * covers the major ones.
3401 : */
3402 : # define XXH_RESTRICT __restrict
3403 : #else
3404 : # define XXH_RESTRICT /* disable */
3405 : #endif
3406 :
3407 : #if (defined(__GNUC__) && (__GNUC__ >= 3)) \
3408 : || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \
3409 : || defined(__clang__)
3410 : # define XXH_likely(x) __builtin_expect(x, 1)
3411 : # define XXH_unlikely(x) __builtin_expect(x, 0)
3412 : #else
3413 : # define XXH_likely(x) (x)
3414 : # define XXH_unlikely(x) (x)
3415 : #endif
3416 :
3417 : #ifndef XXH_HAS_INCLUDE
3418 : # ifdef __has_include
3419 : # define XXH_HAS_INCLUDE(x) __has_include(x)
3420 : # else
3421 : # define XXH_HAS_INCLUDE(x) 0
3422 : # endif
3423 : #endif
3424 :
3425 : #if defined(__GNUC__) || defined(__clang__)
3426 : # if defined(__ARM_FEATURE_SVE)
3427 : # include <arm_sve.h>
3428 : # endif
3429 : # if defined(__ARM_NEON__) || defined(__ARM_NEON) \
3430 : || (defined(_M_ARM) && _M_ARM >= 7) \
3431 : || defined(_M_ARM64) || defined(_M_ARM64EC) \
3432 : || (defined(__wasm_simd128__) && XXH_HAS_INCLUDE(<arm_neon.h>)) /* WASM SIMD128 via SIMDe */
3433 : # define inline __inline__ /* circumvent a clang bug */
3434 : # include <arm_neon.h>
3435 : # undef inline
3436 : # elif defined(__AVX2__)
3437 : # include <immintrin.h>
3438 : # elif defined(__SSE2__)
3439 : # include <emmintrin.h>
3440 : # endif
3441 : #endif
3442 :
3443 : #if defined(_MSC_VER)
3444 : # include <intrin.h>
3445 : #endif
3446 :
3447 : /*
3448 : * One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while
3449 : * remaining a true 64-bit/128-bit hash function.
3450 : *
3451 : * This is done by prioritizing a subset of 64-bit operations that can be
3452 : * emulated without too many steps on the average 32-bit machine.
3453 : *
3454 : * For example, these two lines seem similar, and run equally fast on 64-bit:
3455 : *
3456 : * xxh_u64 x;
3457 : * x ^= (x >> 47); // good
3458 : * x ^= (x >> 13); // bad
3459 : *
3460 : * However, to a 32-bit machine, there is a major difference.
3461 : *
3462 : * x ^= (x >> 47) looks like this:
3463 : *
3464 : * x.lo ^= (x.hi >> (47 - 32));
3465 : *
3466 : * while x ^= (x >> 13) looks like this:
3467 : *
3468 : * // note: funnel shifts are not usually cheap.
3469 : * x.lo ^= (x.lo >> 13) | (x.hi << (32 - 13));
3470 : * x.hi ^= (x.hi >> 13);
3471 : *
3472 : * The first one is significantly faster than the second, simply because the
3473 : * shift is larger than 32. This means:
3474 : * - All the bits we need are in the upper 32 bits, so we can ignore the lower
3475 : * 32 bits in the shift.
3476 : * - The shift result will always fit in the lower 32 bits, and therefore,
3477 : * we can ignore the upper 32 bits in the xor.
3478 : *
3479 : * Thanks to this optimization, XXH3 only requires these features to be efficient:
3480 : *
3481 : * - Usable unaligned access
3482 : * - A 32-bit or 64-bit ALU
3483 : * - If 32-bit, a decent ADC instruction
3484 : * - A 32 or 64-bit multiply with a 64-bit result
3485 : * - For the 128-bit variant, a decent byteswap helps short inputs.
3486 : *
3487 : * The first two are already required by XXH32, and almost all 32-bit and 64-bit
3488 : * platforms which can run XXH32 can run XXH3 efficiently.
3489 : *
3490 : * Thumb-1, the classic 16-bit only subset of ARM's instruction set, is one
3491 : * notable exception.
3492 : *
3493 : * First of all, Thumb-1 lacks support for the UMULL instruction which
3494 : * performs the important long multiply. This means numerous __aeabi_lmul
3495 : * calls.
3496 : *
3497 : * Second of all, the 8 functional registers are just not enough.
3498 : * Setup for __aeabi_lmul, byteshift loads, pointers, and all arithmetic need
3499 : * Lo registers, and this shuffling results in thousands more MOVs than A32.
3500 : *
3501 : * A32 and T32 don't have this limitation. They can access all 14 registers,
3502 : * do a 32->64 multiply with UMULL, and the flexible operand allowing free
3503 : * shifts is helpful, too.
3504 : *
3505 : * Therefore, we do a quick sanity check.
3506 : *
3507 : * If compiling Thumb-1 for a target which supports ARM instructions, we will
3508 : * emit a warning, as it is not a "sane" platform to compile for.
3509 : *
3510 : * Usually, if this happens, it is because of an accident and you probably need
3511 : * to specify -march, as you likely meant to compile for a newer architecture.
3512 : *
3513 : * Credit: large sections of the vectorial and asm source code paths
3514 : * have been contributed by @easyaspi314
3515 : */
3516 : #if defined(__thumb__) && !defined(__thumb2__) && defined(__ARM_ARCH_ISA_ARM)
3517 : # warning "XXH3 is highly inefficient without ARM or Thumb-2."
3518 : #endif
3519 :
3520 : /* ==========================================
3521 : * Vectorization detection
3522 : * ========================================== */
3523 :
3524 : #ifdef XXH_DOXYGEN
3525 : /*!
3526 : * @ingroup tuning
3527 : * @brief Overrides the vectorization implementation chosen for XXH3.
3528 : *
3529 : * Can be defined to 0 to disable SIMD or any of the values mentioned in
3530 : * @ref XXH_VECTOR_TYPE.
3531 : *
3532 : * If this is not defined, it uses predefined macros to determine the best
3533 : * implementation.
3534 : */
3535 : # define XXH_VECTOR XXH_SCALAR
3536 : /*!
3537 : * @ingroup tuning
3538 : * @brief Possible values for @ref XXH_VECTOR.
3539 : *
3540 : * Note that these are actually implemented as macros.
3541 : *
3542 : * If this is not defined, it is detected automatically.
3543 : * internal macro XXH_X86DISPATCH overrides this.
3544 : */
3545 : enum XXH_VECTOR_TYPE /* fake enum */ {
3546 : XXH_SCALAR = 0, /*!< Portable scalar version */
3547 : XXH_SSE2 = 1, /*!<
3548 : * SSE2 for Pentium 4, Opteron, all x86_64.
3549 : *
3550 : * @note SSE2 is also guaranteed on Windows 10, macOS, and
3551 : * Android x86.
3552 : */
3553 : XXH_AVX2 = 2, /*!< AVX2 for Haswell and Bulldozer */
3554 : XXH_AVX512 = 3, /*!< AVX512 for Skylake and Icelake */
3555 : XXH_NEON = 4, /*!<
3556 : * NEON for most ARMv7-A, all AArch64, and WASM SIMD128
3557 : * via the SIMDeverywhere polyfill provided with the
3558 : * Emscripten SDK.
3559 : */
3560 : XXH_VSX = 5, /*!< VSX and ZVector for POWER8/z13 (64-bit) */
3561 : XXH_SVE = 6, /*!< SVE for some ARMv8-A and ARMv9-A */
3562 : };
3563 : /*!
3564 : * @ingroup tuning
3565 : * @brief Selects the minimum alignment for XXH3's accumulators.
3566 : *
3567 : * When using SIMD, this should match the alignment required for said vector
3568 : * type, so, for example, 32 for AVX2.
3569 : *
3570 : * Default: Auto detected.
3571 : */
3572 : # define XXH_ACC_ALIGN 8
3573 : #endif
3574 :
3575 : /* Actual definition */
3576 : #ifndef XXH_DOXYGEN
3577 : # define XXH_SCALAR 0
3578 : # define XXH_SSE2 1
3579 : # define XXH_AVX2 2
3580 : # define XXH_AVX512 3
3581 : # define XXH_NEON 4
3582 : # define XXH_VSX 5
3583 : # define XXH_SVE 6
3584 : #endif
3585 :
3586 : #ifndef XXH_VECTOR /* can be defined on command line */
3587 : # if defined(__ARM_FEATURE_SVE)
3588 : # define XXH_VECTOR XXH_SVE
3589 : # elif ( \
3590 : defined(__ARM_NEON__) || defined(__ARM_NEON) /* gcc */ \
3591 : || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) /* msvc */ \
3592 : || (defined(__wasm_simd128__) && XXH_HAS_INCLUDE(<arm_neon.h>)) /* wasm simd128 via SIMDe */ \
3593 : ) && ( \
3594 : defined(_WIN32) || defined(__LITTLE_ENDIAN__) /* little endian only */ \
3595 : || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \
3596 : )
3597 : # define XXH_VECTOR XXH_NEON
3598 : # elif defined(__AVX512F__)
3599 : # define XXH_VECTOR XXH_AVX512
3600 : # elif defined(__AVX2__)
3601 : # define XXH_VECTOR XXH_AVX2
3602 : # elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
3603 : # define XXH_VECTOR XXH_SSE2
3604 : # elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \
3605 : || (defined(__s390x__) && defined(__VEC__)) \
3606 : && defined(__GNUC__) /* TODO: IBM XL */
3607 : # define XXH_VECTOR XXH_VSX
3608 : # else
3609 : # define XXH_VECTOR XXH_SCALAR
3610 : # endif
3611 : #endif
3612 :
3613 : /* __ARM_FEATURE_SVE is only supported by GCC & Clang. */
3614 : #if (XXH_VECTOR == XXH_SVE) && !defined(__ARM_FEATURE_SVE)
3615 : # ifdef _MSC_VER
3616 : # pragma warning(once : 4606)
3617 : # else
3618 : # warning "__ARM_FEATURE_SVE isn't supported. Use SCALAR instead."
3619 : # endif
3620 : # undef XXH_VECTOR
3621 : # define XXH_VECTOR XXH_SCALAR
3622 : #endif
3623 :
3624 : /*
3625 : * Controls the alignment of the accumulator,
3626 : * for compatibility with aligned vector loads, which are usually faster.
3627 : */
3628 : #ifndef XXH_ACC_ALIGN
3629 : # if defined(XXH_X86DISPATCH)
3630 : # define XXH_ACC_ALIGN 64 /* for compatibility with avx512 */
3631 : # elif XXH_VECTOR == XXH_SCALAR /* scalar */
3632 : # define XXH_ACC_ALIGN 8
3633 : # elif XXH_VECTOR == XXH_SSE2 /* sse2 */
3634 : # define XXH_ACC_ALIGN 16
3635 : # elif XXH_VECTOR == XXH_AVX2 /* avx2 */
3636 : # define XXH_ACC_ALIGN 32
3637 : # elif XXH_VECTOR == XXH_NEON /* neon */
3638 : # define XXH_ACC_ALIGN 16
3639 : # elif XXH_VECTOR == XXH_VSX /* vsx */
3640 : # define XXH_ACC_ALIGN 16
3641 : # elif XXH_VECTOR == XXH_AVX512 /* avx512 */
3642 : # define XXH_ACC_ALIGN 64
3643 : # elif XXH_VECTOR == XXH_SVE /* sve */
3644 : # define XXH_ACC_ALIGN 64
3645 : # endif
3646 : #endif
3647 :
3648 : #if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \
3649 : || XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512
3650 : # define XXH_SEC_ALIGN XXH_ACC_ALIGN
3651 : #elif XXH_VECTOR == XXH_SVE
3652 : # define XXH_SEC_ALIGN XXH_ACC_ALIGN
3653 : #else
3654 : # define XXH_SEC_ALIGN 8
3655 : #endif
3656 :
3657 : #if defined(__GNUC__) || defined(__clang__)
3658 : # define XXH_ALIASING __attribute__((may_alias))
3659 : #else
3660 : # define XXH_ALIASING /* nothing */
3661 : #endif
3662 :
3663 : /*
3664 : * UGLY HACK:
3665 : * GCC usually generates the best code with -O3 for xxHash.
3666 : *
3667 : * However, when targeting AVX2, it is overzealous in its unrolling resulting
3668 : * in code roughly 3/4 the speed of Clang.
3669 : *
3670 : * There are other issues, such as GCC splitting _mm256_loadu_si256 into
3671 : * _mm_loadu_si128 + _mm256_inserti128_si256. This is an optimization which
3672 : * only applies to Sandy and Ivy Bridge... which don't even support AVX2.
3673 : *
3674 : * That is why when compiling the AVX2 version, it is recommended to use either
3675 : * -O2 -mavx2 -march=haswell
3676 : * or
3677 : * -O2 -mavx2 -mno-avx256-split-unaligned-load
3678 : * for decent performance, or to use Clang instead.
3679 : *
3680 : * Fortunately, we can control the first one with a pragma that forces GCC into
3681 : * -O2, but the other one we can't control without "failed to inline always
3682 : * inline function due to target mismatch" warnings.
3683 : */
3684 : #if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
3685 : && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
3686 : && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
3687 : # pragma GCC push_options
3688 : # pragma GCC optimize("-O2")
3689 : #endif
3690 :
3691 : #if XXH_VECTOR == XXH_NEON
3692 :
3693 : /*
3694 : * UGLY HACK: While AArch64 GCC on Linux does not seem to care, on macOS, GCC -O3
3695 : * optimizes out the entire hashLong loop because of the aliasing violation.
3696 : *
3697 : * However, GCC is also inefficient at load-store optimization with vld1q/vst1q,
3698 : * so the only option is to mark it as aliasing.
3699 : */
3700 : typedef uint64x2_t xxh_aliasing_uint64x2_t XXH_ALIASING;
3701 :
3702 : /*!
3703 : * @internal
3704 : * @brief `vld1q_u64` but faster and alignment-safe.
3705 : *
3706 : * On AArch64, unaligned access is always safe, but on ARMv7-a, it is only
3707 : * *conditionally* safe (`vld1` has an alignment bit like `movdq[ua]` in x86).
3708 : *
3709 : * GCC for AArch64 sees `vld1q_u8` as an intrinsic instead of a load, so it
3710 : * prohibits load-store optimizations. Therefore, a direct dereference is used.
3711 : *
3712 : * Otherwise, `vld1q_u8` is used with `vreinterpretq_u8_u64` to do a safe
3713 : * unaligned load.
3714 : */
3715 : #if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__)
3716 : XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) /* silence -Wcast-align */
3717 : {
3718 : return *(xxh_aliasing_uint64x2_t const *)ptr;
3719 : }
3720 : #else
3721 : XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr)
3722 : {
3723 : return vreinterpretq_u64_u8(vld1q_u8((uint8_t const*)ptr));
3724 : }
3725 : #endif
3726 :
3727 : /*!
3728 : * @internal
3729 : * @brief `vmlal_u32` on low and high halves of a vector.
3730 : *
3731 : * This is a workaround for AArch64 GCC < 11 which implemented arm_neon.h with
3732 : * inline assembly and were therefore incapable of merging the `vget_{low, high}_u32`
3733 : * with `vmlal_u32`.
3734 : */
3735 : #if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 11
3736 : XXH_FORCE_INLINE uint64x2_t
3737 : XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
3738 : {
3739 : /* Inline assembly is the only way */
3740 : __asm__("umlal %0.2d, %1.2s, %2.2s" : "+w" (acc) : "w" (lhs), "w" (rhs));
3741 : return acc;
3742 : }
3743 : XXH_FORCE_INLINE uint64x2_t
3744 : XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
3745 : {
3746 : /* This intrinsic works as expected */
3747 : return vmlal_high_u32(acc, lhs, rhs);
3748 : }
3749 : #else
3750 : /* Portable intrinsic versions */
3751 : XXH_FORCE_INLINE uint64x2_t
3752 : XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
3753 : {
3754 : return vmlal_u32(acc, vget_low_u32(lhs), vget_low_u32(rhs));
3755 : }
3756 : /*! @copydoc XXH_vmlal_low_u32
3757 : * Assume the compiler converts this to vmlal_high_u32 on aarch64 */
3758 : XXH_FORCE_INLINE uint64x2_t
3759 : XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
3760 : {
3761 : return vmlal_u32(acc, vget_high_u32(lhs), vget_high_u32(rhs));
3762 : }
3763 : #endif
3764 :
3765 : /*!
3766 : * @ingroup tuning
3767 : * @brief Controls the NEON to scalar ratio for XXH3
3768 : *
3769 : * This can be set to 2, 4, 6, or 8.
3770 : *
3771 : * ARM Cortex CPUs are _very_ sensitive to how their pipelines are used.
3772 : *
3773 : * For example, the Cortex-A73 can dispatch 3 micro-ops per cycle, but only 2 of those
3774 : * can be NEON. If you are only using NEON instructions, you are only using 2/3 of the CPU
3775 : * bandwidth.
3776 : *
3777 : * This is even more noticeable on the more advanced cores like the Cortex-A76 which
3778 : * can dispatch 8 micro-ops per cycle, but still only 2 NEON micro-ops at once.
3779 : *
3780 : * Therefore, to make the most out of the pipeline, it is beneficial to run 6 NEON lanes
3781 : * and 2 scalar lanes, which is chosen by default.
3782 : *
3783 : * This does not apply to Apple processors or 32-bit processors, which run better with
3784 : * full NEON. These will default to 8. Additionally, size-optimized builds run 8 lanes.
3785 : *
3786 : * This change benefits CPUs with large micro-op buffers without negatively affecting
3787 : * most other CPUs:
3788 : *
3789 : * | Chipset | Dispatch type | NEON only | 6:2 hybrid | Diff. |
3790 : * |:----------------------|:--------------------|----------:|-----------:|------:|
3791 : * | Snapdragon 730 (A76) | 2 NEON/8 micro-ops | 8.8 GB/s | 10.1 GB/s | ~16% |
3792 : * | Snapdragon 835 (A73) | 2 NEON/3 micro-ops | 5.1 GB/s | 5.3 GB/s | ~5% |
3793 : * | Marvell PXA1928 (A53) | In-order dual-issue | 1.9 GB/s | 1.9 GB/s | 0% |
3794 : * | Apple M1 | 4 NEON/8 micro-ops | 37.3 GB/s | 36.1 GB/s | ~-3% |
3795 : *
3796 : * It also seems to fix some bad codegen on GCC, making it almost as fast as clang.
3797 : *
3798 : * When using WASM SIMD128, if this is 2 or 6, SIMDe will scalarize 2 of the lanes meaning
3799 : * it effectively becomes worse 4.
3800 : *
3801 : * @see XXH3_accumulate_512_neon()
3802 : */
3803 : # ifndef XXH3_NEON_LANES
3804 : # if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) \
3805 : && !defined(__APPLE__) && XXH_SIZE_OPT <= 0
3806 : # define XXH3_NEON_LANES 6
3807 : # else
3808 : # define XXH3_NEON_LANES XXH_ACC_NB
3809 : # endif
3810 : # endif
3811 : #endif /* XXH_VECTOR == XXH_NEON */
3812 :
3813 : /*
3814 : * VSX and Z Vector helpers.
3815 : *
3816 : * This is very messy, and any pull requests to clean this up are welcome.
3817 : *
3818 : * There are a lot of problems with supporting VSX and s390x, due to
3819 : * inconsistent intrinsics, spotty coverage, and multiple endiannesses.
3820 : */
3821 : #if XXH_VECTOR == XXH_VSX
3822 : /* Annoyingly, these headers _may_ define three macros: `bool`, `vector`,
3823 : * and `pixel`. This is a problem for obvious reasons.
3824 : *
3825 : * These keywords are unnecessary; the spec literally says they are
3826 : * equivalent to `__bool`, `__vector`, and `__pixel` and may be undef'd
3827 : * after including the header.
3828 : *
3829 : * We use pragma push_macro/pop_macro to keep the namespace clean. */
3830 : # pragma push_macro("bool")
3831 : # pragma push_macro("vector")
3832 : # pragma push_macro("pixel")
3833 : /* silence potential macro redefined warnings */
3834 : # undef bool
3835 : # undef vector
3836 : # undef pixel
3837 :
3838 : # if defined(__s390x__)
3839 : # include <s390intrin.h>
3840 : # else
3841 : # include <altivec.h>
3842 : # endif
3843 :
3844 : /* Restore the original macro values, if applicable. */
3845 : # pragma pop_macro("pixel")
3846 : # pragma pop_macro("vector")
3847 : # pragma pop_macro("bool")
3848 :
3849 : typedef __vector unsigned long long xxh_u64x2;
3850 : typedef __vector unsigned char xxh_u8x16;
3851 : typedef __vector unsigned xxh_u32x4;
3852 :
3853 : /*
3854 : * UGLY HACK: Similar to aarch64 macOS GCC, s390x GCC has the same aliasing issue.
3855 : */
3856 : typedef xxh_u64x2 xxh_aliasing_u64x2 XXH_ALIASING;
3857 :
3858 : # ifndef XXH_VSX_BE
3859 : # if defined(__BIG_ENDIAN__) \
3860 : || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
3861 : # define XXH_VSX_BE 1
3862 : # elif defined(__VEC_ELEMENT_REG_ORDER__) && __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__
3863 : # warning "-maltivec=be is not recommended. Please use native endianness."
3864 : # define XXH_VSX_BE 1
3865 : # else
3866 : # define XXH_VSX_BE 0
3867 : # endif
3868 : # endif /* !defined(XXH_VSX_BE) */
3869 :
3870 : # if XXH_VSX_BE
3871 : # if defined(__POWER9_VECTOR__) || (defined(__clang__) && defined(__s390x__))
3872 : # define XXH_vec_revb vec_revb
3873 : # else
3874 : /*!
3875 : * A polyfill for POWER9's vec_revb().
3876 : */
3877 : XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val)
3878 : {
3879 : xxh_u8x16 const vByteSwap = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
3880 : 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
3881 : return vec_perm(val, val, vByteSwap);
3882 : }
3883 : # endif
3884 : # endif /* XXH_VSX_BE */
3885 :
3886 : /*!
3887 : * Performs an unaligned vector load and byte swaps it on big endian.
3888 : */
3889 : XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr)
3890 : {
3891 : xxh_u64x2 ret;
3892 : XXH_memcpy(&ret, ptr, sizeof(xxh_u64x2));
3893 : # if XXH_VSX_BE
3894 : ret = XXH_vec_revb(ret);
3895 : # endif
3896 : return ret;
3897 : }
3898 :
3899 : /*
3900 : * vec_mulo and vec_mule are very problematic intrinsics on PowerPC
3901 : *
3902 : * These intrinsics weren't added until GCC 8, despite existing for a while,
3903 : * and they are endian dependent. Also, their meaning swap depending on version.
3904 : * */
3905 : # if defined(__s390x__)
3906 : /* s390x is always big endian, no issue on this platform */
3907 : # define XXH_vec_mulo vec_mulo
3908 : # define XXH_vec_mule vec_mule
3909 : # elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw) && !defined(__ibmxl__)
3910 : /* Clang has a better way to control this, we can just use the builtin which doesn't swap. */
3911 : /* The IBM XL Compiler (which defined __clang__) only implements the vec_* operations */
3912 : # define XXH_vec_mulo __builtin_altivec_vmulouw
3913 : # define XXH_vec_mule __builtin_altivec_vmuleuw
3914 : # else
3915 : /* gcc needs inline assembly */
3916 : /* Adapted from https://github.com/google/highwayhash/blob/master/highwayhash/hh_vsx.h. */
3917 : XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mulo(xxh_u32x4 a, xxh_u32x4 b)
3918 : {
3919 : xxh_u64x2 result;
3920 : __asm__("vmulouw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
3921 : return result;
3922 : }
3923 : XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b)
3924 : {
3925 : xxh_u64x2 result;
3926 : __asm__("vmuleuw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
3927 : return result;
3928 : }
3929 : # endif /* XXH_vec_mulo, XXH_vec_mule */
3930 : #endif /* XXH_VECTOR == XXH_VSX */
3931 :
3932 : #if XXH_VECTOR == XXH_SVE
3933 : #define ACCRND(acc, offset) \
3934 : do { \
3935 : svuint64_t input_vec = svld1_u64(mask, xinput + offset); \
3936 : svuint64_t secret_vec = svld1_u64(mask, xsecret + offset); \
3937 : svuint64_t mixed = sveor_u64_x(mask, secret_vec, input_vec); \
3938 : svuint64_t swapped = svtbl_u64(input_vec, kSwap); \
3939 : svuint64_t mixed_lo = svextw_u64_x(mask, mixed); \
3940 : svuint64_t mixed_hi = svlsr_n_u64_x(mask, mixed, 32); \
3941 : svuint64_t mul = svmad_u64_x(mask, mixed_lo, mixed_hi, swapped); \
3942 : acc = svadd_u64_x(mask, acc, mul); \
3943 : } while (0)
3944 : #endif /* XXH_VECTOR == XXH_SVE */
3945 :
3946 : /* prefetch
3947 : * can be disabled, by declaring XXH_NO_PREFETCH build macro */
3948 : #if defined(XXH_NO_PREFETCH)
3949 : # define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */
3950 : #else
3951 : # if XXH_SIZE_OPT >= 1
3952 : # define XXH_PREFETCH(ptr) (void)(ptr)
3953 : # elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) /* _mm_prefetch() not defined outside of x86/x64 */
3954 : # include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
3955 : # define XXH_PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
3956 : # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
3957 : # define XXH_PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
3958 : # else
3959 : # define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */
3960 : # endif
3961 : #endif /* XXH_NO_PREFETCH */
3962 :
3963 :
3964 : /* ==========================================
3965 : * XXH3 default settings
3966 : * ========================================== */
3967 :
3968 : #define XXH_SECRET_DEFAULT_SIZE 192 /* minimum XXH3_SECRET_SIZE_MIN */
3969 :
3970 : #if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN)
3971 : # error "default keyset is not large enough"
3972 : #endif
3973 :
3974 : /*! Pseudorandom secret taken directly from FARSH. */
3975 : XXH_ALIGN(64) static const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = {
3976 : 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
3977 : 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
3978 : 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
3979 : 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
3980 : 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
3981 : 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
3982 : 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
3983 : 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
3984 : 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
3985 : 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
3986 : 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
3987 : 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
3988 : };
3989 :
3990 : static const xxh_u64 PRIME_MX1 = 0x165667919E3779F9ULL; /*!< 0b0001011001010110011001111001000110011110001101110111100111111001 */
3991 : static const xxh_u64 PRIME_MX2 = 0x9FB21C651E98DF25ULL; /*!< 0b1001111110110010000111000110010100011110100110001101111100100101 */
3992 :
3993 : #ifdef XXH_OLD_NAMES
3994 : # define kSecret XXH3_kSecret
3995 : #endif
3996 :
3997 : #ifdef XXH_DOXYGEN
3998 : /*!
3999 : * @brief Calculates a 32-bit to 64-bit long multiply.
4000 : *
4001 : * Implemented as a macro.
4002 : *
4003 : * Wraps `__emulu` on MSVC x86 because it tends to call `__allmul` when it doesn't
4004 : * need to (but it shouldn't need to anyways, it is about 7 instructions to do
4005 : * a 64x64 multiply...). Since we know that this will _always_ emit `MULL`, we
4006 : * use that instead of the normal method.
4007 : *
4008 : * If you are compiling for platforms like Thumb-1 and don't have a better option,
4009 : * you may also want to write your own long multiply routine here.
4010 : *
4011 : * @param x, y Numbers to be multiplied
4012 : * @return 64-bit product of the low 32 bits of @p x and @p y.
4013 : */
4014 : XXH_FORCE_INLINE xxh_u64
4015 : XXH_mult32to64(xxh_u64 x, xxh_u64 y)
4016 : {
4017 : return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF);
4018 : }
4019 : #elif defined(_MSC_VER) && defined(_M_IX86)
4020 : # define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y))
4021 : #else
4022 : /*
4023 : * Downcast + upcast is usually better than masking on older compilers like
4024 : * GCC 4.2 (especially 32-bit ones), all without affecting newer compilers.
4025 : *
4026 : * The other method, (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF), will AND both operands
4027 : * and perform a full 64x64 multiply -- entirely redundant on 32-bit.
4028 : */
4029 : # define XXH_mult32to64(x, y) ((xxh_u64)(xxh_u32)(x) * (xxh_u64)(xxh_u32)(y))
4030 : #endif
4031 :
4032 : /*!
4033 : * @brief Calculates a 64->128-bit long multiply.
4034 : *
4035 : * Uses `__uint128_t` and `_umul128` if available, otherwise uses a scalar
4036 : * version.
4037 : *
4038 : * @param lhs , rhs The 64-bit integers to be multiplied
4039 : * @return The 128-bit result represented in an @ref XXH128_hash_t.
4040 : */
4041 : static XXH128_hash_t
4042 824 : XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
4043 : {
4044 : /*
4045 : * GCC/Clang __uint128_t method.
4046 : *
4047 : * On most 64-bit targets, GCC and Clang define a __uint128_t type.
4048 : * This is usually the best way as it usually uses a native long 64-bit
4049 : * multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64.
4050 : *
4051 : * Usually.
4052 : *
4053 : * Despite being a 32-bit platform, Clang (and emscripten) define this type
4054 : * despite not having the arithmetic for it. This results in a laggy
4055 : * compiler builtin call which calculates a full 128-bit multiply.
4056 : * In that case it is best to use the portable one.
4057 : * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677
4058 : */
4059 : #if (defined(__GNUC__) || defined(__clang__)) && !defined(__wasm__) \
4060 : && defined(__SIZEOF_INT128__) \
4061 : || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
4062 :
4063 824 : __uint128_t const product = (__uint128_t)lhs * (__uint128_t)rhs;
4064 : XXH128_hash_t r128;
4065 824 : r128.low64 = (xxh_u64)(product);
4066 824 : r128.high64 = (xxh_u64)(product >> 64);
4067 824 : return r128;
4068 :
4069 : /*
4070 : * MSVC for x64's _umul128 method.
4071 : *
4072 : * xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64 *HighProduct);
4073 : *
4074 : * This compiles to single operand MUL on x64.
4075 : */
4076 : #elif (defined(_M_X64) || defined(_M_IA64)) && !defined(_M_ARM64EC)
4077 :
4078 : #ifndef _MSC_VER
4079 : # pragma intrinsic(_umul128)
4080 : #endif
4081 : xxh_u64 product_high;
4082 : xxh_u64 const product_low = _umul128(lhs, rhs, &product_high);
4083 : XXH128_hash_t r128;
4084 : r128.low64 = product_low;
4085 : r128.high64 = product_high;
4086 : return r128;
4087 :
4088 : /*
4089 : * MSVC for ARM64's __umulh method.
4090 : *
4091 : * This compiles to the same MUL + UMULH as GCC/Clang's __uint128_t method.
4092 : */
4093 : #elif defined(_M_ARM64) || defined(_M_ARM64EC)
4094 :
4095 : #ifndef _MSC_VER
4096 : # pragma intrinsic(__umulh)
4097 : #endif
4098 : XXH128_hash_t r128;
4099 : r128.low64 = lhs * rhs;
4100 : r128.high64 = __umulh(lhs, rhs);
4101 : return r128;
4102 :
4103 : #else
4104 : /*
4105 : * Portable scalar method. Optimized for 32-bit and 64-bit ALUs.
4106 : *
4107 : * This is a fast and simple grade school multiply, which is shown below
4108 : * with base 10 arithmetic instead of base 0x100000000.
4109 : *
4110 : * 9 3 // D2 lhs = 93
4111 : * x 7 5 // D2 rhs = 75
4112 : * ----------
4113 : * 1 5 // D2 lo_lo = (93 % 10) * (75 % 10) = 15
4114 : * 4 5 | // D2 hi_lo = (93 / 10) * (75 % 10) = 45
4115 : * 2 1 | // D2 lo_hi = (93 % 10) * (75 / 10) = 21
4116 : * + 6 3 | | // D2 hi_hi = (93 / 10) * (75 / 10) = 63
4117 : * ---------
4118 : * 2 7 | // D2 cross = (15 / 10) + (45 % 10) + 21 = 27
4119 : * + 6 7 | | // D2 upper = (27 / 10) + (45 / 10) + 63 = 67
4120 : * ---------
4121 : * 6 9 7 5 // D4 res = (27 * 10) + (15 % 10) + (67 * 100) = 6975
4122 : *
4123 : * The reasons for adding the products like this are:
4124 : * 1. It avoids manual carry tracking. Just like how
4125 : * (9 * 9) + 9 + 9 = 99, the same applies with this for UINT64_MAX.
4126 : * This avoids a lot of complexity.
4127 : *
4128 : * 2. It hints for, and on Clang, compiles to, the powerful UMAAL
4129 : * instruction available in ARM's Digital Signal Processing extension
4130 : * in 32-bit ARMv6 and later, which is shown below:
4131 : *
4132 : * void UMAAL(xxh_u32 *RdLo, xxh_u32 *RdHi, xxh_u32 Rn, xxh_u32 Rm)
4133 : * {
4134 : * xxh_u64 product = (xxh_u64)*RdLo * (xxh_u64)*RdHi + Rn + Rm;
4135 : * *RdLo = (xxh_u32)(product & 0xFFFFFFFF);
4136 : * *RdHi = (xxh_u32)(product >> 32);
4137 : * }
4138 : *
4139 : * This instruction was designed for efficient long multiplication, and
4140 : * allows this to be calculated in only 4 instructions at speeds
4141 : * comparable to some 64-bit ALUs.
4142 : *
4143 : * 3. It isn't terrible on other platforms. Usually this will be a couple
4144 : * of 32-bit ADD/ADCs.
4145 : */
4146 :
4147 : /* First calculate all of the cross products. */
4148 : xxh_u64 const lo_lo = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF);
4149 : xxh_u64 const hi_lo = XXH_mult32to64(lhs >> 32, rhs & 0xFFFFFFFF);
4150 : xxh_u64 const lo_hi = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs >> 32);
4151 : xxh_u64 const hi_hi = XXH_mult32to64(lhs >> 32, rhs >> 32);
4152 :
4153 : /* Now add the products together. These will never overflow. */
4154 : xxh_u64 const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi;
4155 : xxh_u64 const upper = (hi_lo >> 32) + (cross >> 32) + hi_hi;
4156 : xxh_u64 const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF);
4157 :
4158 : XXH128_hash_t r128;
4159 : r128.low64 = lower;
4160 : r128.high64 = upper;
4161 : return r128;
4162 : #endif
4163 : }
4164 :
4165 : /*!
4166 : * @brief Calculates a 64-bit to 128-bit multiply, then XOR folds it.
4167 : *
4168 : * The reason for the separate function is to prevent passing too many structs
4169 : * around by value. This will hopefully inline the multiply, but we don't force it.
4170 : *
4171 : * @param lhs , rhs The 64-bit integers to multiply
4172 : * @return The low 64 bits of the product XOR'd by the high 64 bits.
4173 : * @see XXH_mult64to128()
4174 : */
4175 : static xxh_u64
4176 824 : XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs)
4177 : {
4178 824 : XXH128_hash_t product = XXH_mult64to128(lhs, rhs);
4179 824 : return product.low64 ^ product.high64;
4180 : }
4181 :
4182 : /*! Seems to produce slightly better code on GCC for some reason. */
4183 412 : XXH_FORCE_INLINE XXH_CONSTF xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)
4184 : {
4185 412 : XXH_ASSERT(0 <= shift && shift < 64);
4186 412 : return v64 ^ (v64 >> shift);
4187 : }
4188 :
4189 : /*
4190 : * This is a fast avalanche stage,
4191 : * suitable when input bits are already partially mixed
4192 : */
4193 206 : static XXH64_hash_t XXH3_avalanche(xxh_u64 h64)
4194 : {
4195 206 : h64 = XXH_xorshift64(h64, 37);
4196 206 : h64 *= PRIME_MX1;
4197 206 : h64 = XXH_xorshift64(h64, 32);
4198 206 : return h64;
4199 : }
4200 :
4201 : /*
4202 : * This is a stronger avalanche,
4203 : * inspired by Pelle Evensen's rrmxmx
4204 : * preferable when input has not been previously mixed
4205 : */
4206 0 : static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len)
4207 : {
4208 : /* this mix is inspired by Pelle Evensen's rrmxmx */
4209 0 : h64 ^= XXH_rotl64(h64, 49) ^ XXH_rotl64(h64, 24);
4210 0 : h64 *= PRIME_MX2;
4211 0 : h64 ^= (h64 >> 35) + len ;
4212 0 : h64 *= PRIME_MX2;
4213 0 : return XXH_xorshift64(h64, 28);
4214 : }
4215 :
4216 :
4217 : /* ==========================================
4218 : * Short keys
4219 : * ==========================================
4220 : * One of the shortcomings of XXH32 and XXH64 was that their performance was
4221 : * sub-optimal on short lengths. It used an iterative algorithm which strongly
4222 : * favored lengths that were a multiple of 4 or 8.
4223 : *
4224 : * Instead of iterating over individual inputs, we use a set of single shot
4225 : * functions which piece together a range of lengths and operate in constant time.
4226 : *
4227 : * Additionally, the number of multiplies has been significantly reduced. This
4228 : * reduces latency, especially when emulating 64-bit multiplies on 32-bit.
4229 : *
4230 : * Depending on the platform, this may or may not be faster than XXH32, but it
4231 : * is almost guaranteed to be faster than XXH64.
4232 : */
4233 :
4234 : /*
4235 : * At very short lengths, there isn't enough input to fully hide secrets, or use
4236 : * the entire secret.
4237 : *
4238 : * There is also only a limited amount of mixing we can do before significantly
4239 : * impacting performance.
4240 : *
4241 : * Therefore, we use different sections of the secret and always mix two secret
4242 : * samples with an XOR. This should have no effect on performance on the
4243 : * seedless or withSeed variants because everything _should_ be constant folded
4244 : * by modern compilers.
4245 : *
4246 : * The XOR mixing hides individual parts of the secret and increases entropy.
4247 : *
4248 : * This adds an extra layer of strength for custom secrets.
4249 : */
4250 : XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
4251 0 : XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
4252 : {
4253 0 : XXH_ASSERT(input != NULL);
4254 0 : XXH_ASSERT(1 <= len && len <= 3);
4255 0 : XXH_ASSERT(secret != NULL);
4256 : /*
4257 : * len = 1: combined = { input[0], 0x01, input[0], input[0] }
4258 : * len = 2: combined = { input[1], 0x02, input[0], input[1] }
4259 : * len = 3: combined = { input[2], 0x03, input[0], input[1] }
4260 : */
4261 0 : { xxh_u8 const c1 = input[0];
4262 0 : xxh_u8 const c2 = input[len >> 1];
4263 0 : xxh_u8 const c3 = input[len - 1];
4264 0 : xxh_u32 const combined = ((xxh_u32)c1 << 16) | ((xxh_u32)c2 << 24)
4265 0 : | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8);
4266 0 : xxh_u64 const bitflip = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;
4267 0 : xxh_u64 const keyed = (xxh_u64)combined ^ bitflip;
4268 0 : return XXH64_avalanche(keyed);
4269 : }
4270 : }
4271 :
4272 : XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
4273 0 : XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
4274 : {
4275 0 : XXH_ASSERT(input != NULL);
4276 0 : XXH_ASSERT(secret != NULL);
4277 0 : XXH_ASSERT(4 <= len && len <= 8);
4278 0 : seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;
4279 0 : { xxh_u32 const input1 = XXH_readLE32(input);
4280 0 : xxh_u32 const input2 = XXH_readLE32(input + len - 4);
4281 0 : xxh_u64 const bitflip = (XXH_readLE64(secret+8) ^ XXH_readLE64(secret+16)) - seed;
4282 0 : xxh_u64 const input64 = input2 + (((xxh_u64)input1) << 32);
4283 0 : xxh_u64 const keyed = input64 ^ bitflip;
4284 0 : return XXH3_rrmxmx(keyed, len);
4285 : }
4286 : }
4287 :
4288 : XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
4289 0 : XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
4290 : {
4291 0 : XXH_ASSERT(input != NULL);
4292 0 : XXH_ASSERT(secret != NULL);
4293 0 : XXH_ASSERT(9 <= len && len <= 16);
4294 0 : { xxh_u64 const bitflip1 = (XXH_readLE64(secret+24) ^ XXH_readLE64(secret+32)) + seed;
4295 0 : xxh_u64 const bitflip2 = (XXH_readLE64(secret+40) ^ XXH_readLE64(secret+48)) - seed;
4296 0 : xxh_u64 const input_lo = XXH_readLE64(input) ^ bitflip1;
4297 0 : xxh_u64 const input_hi = XXH_readLE64(input + len - 8) ^ bitflip2;
4298 0 : xxh_u64 const acc = len
4299 0 : + XXH_swap64(input_lo) + input_hi
4300 0 : + XXH3_mul128_fold64(input_lo, input_hi);
4301 0 : return XXH3_avalanche(acc);
4302 : }
4303 : }
4304 :
4305 : XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
4306 0 : XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
4307 : {
4308 0 : XXH_ASSERT(len <= 16);
4309 0 : { if (XXH_likely(len > 8)) return XXH3_len_9to16_64b(input, len, secret, seed);
4310 0 : if (XXH_likely(len >= 4)) return XXH3_len_4to8_64b(input, len, secret, seed);
4311 0 : if (len) return XXH3_len_1to3_64b(input, len, secret, seed);
4312 0 : return XXH64_avalanche(seed ^ (XXH_readLE64(secret+56) ^ XXH_readLE64(secret+64)));
4313 : }
4314 : }
4315 :
4316 : /*
4317 : * DISCLAIMER: There are known *seed-dependent* multicollisions here due to
4318 : * multiplication by zero, affecting hashes of lengths 17 to 240.
4319 : *
4320 : * However, they are very unlikely.
4321 : *
4322 : * Keep this in mind when using the unseeded XXH3_64bits() variant: As with all
4323 : * unseeded non-cryptographic hashes, it does not attempt to defend itself
4324 : * against specially crafted inputs, only random inputs.
4325 : *
4326 : * Compared to classic UMAC where a 1 in 2^31 chance of 4 consecutive bytes
4327 : * cancelling out the secret is taken an arbitrary number of times (addressed
4328 : * in XXH3_accumulate_512), this collision is very unlikely with random inputs
4329 : * and/or proper seeding:
4330 : *
4331 : * This only has a 1 in 2^63 chance of 8 consecutive bytes cancelling out, in a
4332 : * function that is only called up to 16 times per hash with up to 240 bytes of
4333 : * input.
4334 : *
4335 : * This is not too bad for a non-cryptographic hash function, especially with
4336 : * only 64 bit outputs.
4337 : *
4338 : * The 128-bit variant (which trades some speed for strength) is NOT affected
4339 : * by this, although it is always a good idea to use a proper seed if you care
4340 : * about strength.
4341 : */
4342 0 : XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input,
4343 : const xxh_u8* XXH_RESTRICT secret, xxh_u64 seed64)
4344 : {
4345 : #if defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
4346 : && defined(__i386__) && defined(__SSE2__) /* x86 + SSE2 */ \
4347 : && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable like XXH32 hack */
4348 : /*
4349 : * UGLY HACK:
4350 : * GCC for x86 tends to autovectorize the 128-bit multiply, resulting in
4351 : * slower code.
4352 : *
4353 : * By forcing seed64 into a register, we disrupt the cost model and
4354 : * cause it to scalarize. See `XXH32_round()`
4355 : *
4356 : * FIXME: Clang's output is still _much_ faster -- On an AMD Ryzen 3600,
4357 : * XXH3_64bits @ len=240 runs at 4.6 GB/s with Clang 9, but 3.3 GB/s on
4358 : * GCC 9.2, despite both emitting scalar code.
4359 : *
4360 : * GCC generates much better scalar code than Clang for the rest of XXH3,
4361 : * which is why finding a more optimal codepath is an interest.
4362 : */
4363 : XXH_COMPILER_GUARD(seed64);
4364 : #endif
4365 0 : { xxh_u64 const input_lo = XXH_readLE64(input);
4366 0 : xxh_u64 const input_hi = XXH_readLE64(input+8);
4367 0 : return XXH3_mul128_fold64(
4368 0 : input_lo ^ (XXH_readLE64(secret) + seed64),
4369 0 : input_hi ^ (XXH_readLE64(secret+8) - seed64)
4370 : );
4371 : }
4372 : }
4373 :
4374 : /* For mid range keys, XXH3 uses a Mum-hash variant. */
4375 : XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
4376 0 : XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
4377 : const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
4378 : XXH64_hash_t seed)
4379 : {
4380 0 : XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
4381 0 : XXH_ASSERT(16 < len && len <= 128);
4382 :
4383 0 : { xxh_u64 acc = len * XXH_PRIME64_1;
4384 : #if XXH_SIZE_OPT >= 1
4385 : /* Smaller and cleaner, but slightly slower. */
4386 : unsigned int i = (unsigned int)(len - 1) / 32;
4387 : do {
4388 : acc += XXH3_mix16B(input+16 * i, secret+32*i, seed);
4389 : acc += XXH3_mix16B(input+len-16*(i+1), secret+32*i+16, seed);
4390 : } while (i-- != 0);
4391 : #else
4392 0 : if (len > 32) {
4393 0 : if (len > 64) {
4394 0 : if (len > 96) {
4395 0 : acc += XXH3_mix16B(input+48, secret+96, seed);
4396 0 : acc += XXH3_mix16B(input+len-64, secret+112, seed);
4397 : }
4398 0 : acc += XXH3_mix16B(input+32, secret+64, seed);
4399 0 : acc += XXH3_mix16B(input+len-48, secret+80, seed);
4400 : }
4401 0 : acc += XXH3_mix16B(input+16, secret+32, seed);
4402 0 : acc += XXH3_mix16B(input+len-32, secret+48, seed);
4403 : }
4404 0 : acc += XXH3_mix16B(input+0, secret+0, seed);
4405 0 : acc += XXH3_mix16B(input+len-16, secret+16, seed);
4406 : #endif
4407 0 : return XXH3_avalanche(acc);
4408 : }
4409 : }
4410 :
4411 : #define XXH3_MIDSIZE_MAX 240
4412 :
4413 : XXH_NO_INLINE XXH_PUREF XXH64_hash_t
4414 0 : XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
4415 : const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
4416 : XXH64_hash_t seed)
4417 : {
4418 0 : XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
4419 0 : XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
4420 :
4421 : #define XXH3_MIDSIZE_STARTOFFSET 3
4422 : #define XXH3_MIDSIZE_LASTOFFSET 17
4423 :
4424 0 : { xxh_u64 acc = len * XXH_PRIME64_1;
4425 : xxh_u64 acc_end;
4426 0 : unsigned int const nbRounds = (unsigned int)len / 16;
4427 : unsigned int i;
4428 0 : XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
4429 0 : for (i=0; i<8; i++) {
4430 0 : acc += XXH3_mix16B(input+(16*i), secret+(16*i), seed);
4431 : }
4432 : /* last bytes */
4433 0 : acc_end = XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
4434 0 : XXH_ASSERT(nbRounds >= 8);
4435 0 : acc = XXH3_avalanche(acc);
4436 : #if defined(__clang__) /* Clang */ \
4437 : && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \
4438 : && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */
4439 : /*
4440 : * UGLY HACK:
4441 : * Clang for ARMv7-A tries to vectorize this loop, similar to GCC x86.
4442 : * In everywhere else, it uses scalar code.
4443 : *
4444 : * For 64->128-bit multiplies, even if the NEON was 100% optimal, it
4445 : * would still be slower than UMAAL (see XXH_mult64to128).
4446 : *
4447 : * Unfortunately, Clang doesn't handle the long multiplies properly and
4448 : * converts them to the nonexistent "vmulq_u64" intrinsic, which is then
4449 : * scalarized into an ugly mess of VMOV.32 instructions.
4450 : *
4451 : * This mess is difficult to avoid without turning autovectorization
4452 : * off completely, but they are usually relatively minor and/or not
4453 : * worth it to fix.
4454 : *
4455 : * This loop is the easiest to fix, as unlike XXH32, this pragma
4456 : * _actually works_ because it is a loop vectorization instead of an
4457 : * SLP vectorization.
4458 : */
4459 : #pragma clang loop vectorize(disable)
4460 : #endif
4461 0 : for (i=8 ; i < nbRounds; i++) {
4462 : /*
4463 : * Prevents clang for unrolling the acc loop and interleaving with this one.
4464 : */
4465 0 : XXH_COMPILER_GUARD(acc);
4466 0 : acc_end += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed);
4467 : }
4468 0 : return XXH3_avalanche(acc + acc_end);
4469 : }
4470 : }
4471 :
4472 :
4473 : /* ======= Long Keys ======= */
4474 :
4475 : #define XXH_STRIPE_LEN 64
4476 : #define XXH_SECRET_CONSUME_RATE 8 /* nb of secret bytes consumed at each accumulation */
4477 : #define XXH_ACC_NB (XXH_STRIPE_LEN / sizeof(xxh_u64))
4478 :
4479 : #ifdef XXH_OLD_NAMES
4480 : # define STRIPE_LEN XXH_STRIPE_LEN
4481 : # define ACC_NB XXH_ACC_NB
4482 : #endif
4483 :
4484 : #ifndef XXH_PREFETCH_DIST
4485 : # ifdef __clang__
4486 : # define XXH_PREFETCH_DIST 320
4487 : # else
4488 : # if (XXH_VECTOR == XXH_AVX512)
4489 : # define XXH_PREFETCH_DIST 512
4490 : # else
4491 : # define XXH_PREFETCH_DIST 384
4492 : # endif
4493 : # endif /* __clang__ */
4494 : #endif /* XXH_PREFETCH_DIST */
4495 :
4496 : /*
4497 : * These macros are to generate an XXH3_accumulate() function.
4498 : * The two arguments select the name suffix and target attribute.
4499 : *
4500 : * The name of this symbol is XXH3_accumulate_<name>() and it calls
4501 : * XXH3_accumulate_512_<name>().
4502 : *
4503 : * It may be useful to hand implement this function if the compiler fails to
4504 : * optimize the inline function.
4505 : */
4506 : #define XXH3_ACCUMULATE_TEMPLATE(name) \
4507 : void \
4508 : XXH3_accumulate_##name(xxh_u64* XXH_RESTRICT acc, \
4509 : const xxh_u8* XXH_RESTRICT input, \
4510 : const xxh_u8* XXH_RESTRICT secret, \
4511 : size_t nbStripes) \
4512 : { \
4513 : size_t n; \
4514 : for (n = 0; n < nbStripes; n++ ) { \
4515 : const xxh_u8* const in = input + n*XXH_STRIPE_LEN; \
4516 : XXH_PREFETCH(in + XXH_PREFETCH_DIST); \
4517 : XXH3_accumulate_512_##name( \
4518 : acc, \
4519 : in, \
4520 : secret + n*XXH_SECRET_CONSUME_RATE); \
4521 : } \
4522 : }
4523 :
4524 :
4525 0 : XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)
4526 : {
4527 : if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64);
4528 0 : XXH_memcpy(dst, &v64, sizeof(v64));
4529 0 : }
4530 :
4531 : /* Several intrinsic functions below are supposed to accept __int64 as argument,
4532 : * as documented in https://software.intel.com/sites/landingpage/IntrinsicsGuide/ .
4533 : * However, several environments do not define __int64 type,
4534 : * requiring a workaround.
4535 : */
4536 : #if !defined (__VMS) \
4537 : && (defined (__cplusplus) \
4538 : || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
4539 : typedef int64_t xxh_i64;
4540 : #else
4541 : /* the following type must have a width of 64-bit */
4542 : typedef long long xxh_i64;
4543 : #endif
4544 :
4545 :
4546 : /*
4547 : * XXH3_accumulate_512 is the tightest loop for long inputs, and it is the most optimized.
4548 : *
4549 : * It is a hardened version of UMAC, based off of FARSH's implementation.
4550 : *
4551 : * This was chosen because it adapts quite well to 32-bit, 64-bit, and SIMD
4552 : * implementations, and it is ridiculously fast.
4553 : *
4554 : * We harden it by mixing the original input to the accumulators as well as the product.
4555 : *
4556 : * This means that in the (relatively likely) case of a multiply by zero, the
4557 : * original input is preserved.
4558 : *
4559 : * On 128-bit inputs, we swap 64-bit pairs when we add the input to improve
4560 : * cross-pollination, as otherwise the upper and lower halves would be
4561 : * essentially independent.
4562 : *
4563 : * This doesn't matter on 64-bit hashes since they all get merged together in
4564 : * the end, so we skip the extra step.
4565 : *
4566 : * Both XXH3_64bits and XXH3_128bits use this subroutine.
4567 : */
4568 :
4569 : #if (XXH_VECTOR == XXH_AVX512) \
4570 : || (defined(XXH_DISPATCH_AVX512) && XXH_DISPATCH_AVX512 != 0)
4571 :
4572 : #ifndef XXH_TARGET_AVX512
4573 : # define XXH_TARGET_AVX512 /* disable attribute target */
4574 : #endif
4575 :
4576 : XXH_FORCE_INLINE XXH_TARGET_AVX512 void
4577 : XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc,
4578 : const void* XXH_RESTRICT input,
4579 : const void* XXH_RESTRICT secret)
4580 : {
4581 : __m512i* const xacc = (__m512i *) acc;
4582 : XXH_ASSERT((((size_t)acc) & 63) == 0);
4583 : XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
4584 :
4585 : {
4586 : /* data_vec = input[0]; */
4587 : __m512i const data_vec = _mm512_loadu_si512 (input);
4588 : /* key_vec = secret[0]; */
4589 : __m512i const key_vec = _mm512_loadu_si512 (secret);
4590 : /* data_key = data_vec ^ key_vec; */
4591 : __m512i const data_key = _mm512_xor_si512 (data_vec, key_vec);
4592 : /* data_key_lo = data_key >> 32; */
4593 : __m512i const data_key_lo = _mm512_srli_epi64 (data_key, 32);
4594 : /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
4595 : __m512i const product = _mm512_mul_epu32 (data_key, data_key_lo);
4596 : /* xacc[0] += swap(data_vec); */
4597 : __m512i const data_swap = _mm512_shuffle_epi32(data_vec, (_MM_PERM_ENUM)_MM_SHUFFLE(1, 0, 3, 2));
4598 : __m512i const sum = _mm512_add_epi64(*xacc, data_swap);
4599 : /* xacc[0] += product; */
4600 : *xacc = _mm512_add_epi64(product, sum);
4601 : }
4602 : }
4603 : XXH_FORCE_INLINE XXH_TARGET_AVX512 XXH3_ACCUMULATE_TEMPLATE(avx512)
4604 :
4605 : /*
4606 : * XXH3_scrambleAcc: Scrambles the accumulators to improve mixing.
4607 : *
4608 : * Multiplication isn't perfect, as explained by Google in HighwayHash:
4609 : *
4610 : * // Multiplication mixes/scrambles bytes 0-7 of the 64-bit result to
4611 : * // varying degrees. In descending order of goodness, bytes
4612 : * // 3 4 2 5 1 6 0 7 have quality 228 224 164 160 100 96 36 32.
4613 : * // As expected, the upper and lower bytes are much worse.
4614 : *
4615 : * Source: https://github.com/google/highwayhash/blob/0aaf66b/highwayhash/hh_avx2.h#L291
4616 : *
4617 : * Since our algorithm uses a pseudorandom secret to add some variance into the
4618 : * mix, we don't need to (or want to) mix as often or as much as HighwayHash does.
4619 : *
4620 : * This isn't as tight as XXH3_accumulate, but still written in SIMD to avoid
4621 : * extraction.
4622 : *
4623 : * Both XXH3_64bits and XXH3_128bits use this subroutine.
4624 : */
4625 :
4626 : XXH_FORCE_INLINE XXH_TARGET_AVX512 void
4627 : XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
4628 : {
4629 : XXH_ASSERT((((size_t)acc) & 63) == 0);
4630 : XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
4631 : { __m512i* const xacc = (__m512i*) acc;
4632 : const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1);
4633 :
4634 : /* xacc[0] ^= (xacc[0] >> 47) */
4635 : __m512i const acc_vec = *xacc;
4636 : __m512i const shifted = _mm512_srli_epi64 (acc_vec, 47);
4637 : /* xacc[0] ^= secret; */
4638 : __m512i const key_vec = _mm512_loadu_si512 (secret);
4639 : __m512i const data_key = _mm512_ternarylogic_epi32(key_vec, acc_vec, shifted, 0x96 /* key_vec ^ acc_vec ^ shifted */);
4640 :
4641 : /* xacc[0] *= XXH_PRIME32_1; */
4642 : __m512i const data_key_hi = _mm512_srli_epi64 (data_key, 32);
4643 : __m512i const prod_lo = _mm512_mul_epu32 (data_key, prime32);
4644 : __m512i const prod_hi = _mm512_mul_epu32 (data_key_hi, prime32);
4645 : *xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32));
4646 : }
4647 : }
4648 :
4649 : XXH_FORCE_INLINE XXH_TARGET_AVX512 void
4650 : XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
4651 : {
4652 : XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 63) == 0);
4653 : XXH_STATIC_ASSERT(XXH_SEC_ALIGN == 64);
4654 : XXH_ASSERT(((size_t)customSecret & 63) == 0);
4655 : (void)(&XXH_writeLE64);
4656 : { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i);
4657 : __m512i const seed_pos = _mm512_set1_epi64((xxh_i64)seed64);
4658 : __m512i const seed = _mm512_mask_sub_epi64(seed_pos, 0xAA, _mm512_set1_epi8(0), seed_pos);
4659 :
4660 : const __m512i* const src = (const __m512i*) ((const void*) XXH3_kSecret);
4661 : __m512i* const dest = ( __m512i*) customSecret;
4662 : int i;
4663 : XXH_ASSERT(((size_t)src & 63) == 0); /* control alignment */
4664 : XXH_ASSERT(((size_t)dest & 63) == 0);
4665 : for (i=0; i < nbRounds; ++i) {
4666 : dest[i] = _mm512_add_epi64(_mm512_load_si512(src + i), seed);
4667 : } }
4668 : }
4669 :
4670 : #endif
4671 :
4672 : #if (XXH_VECTOR == XXH_AVX2) \
4673 : || (defined(XXH_DISPATCH_AVX2) && XXH_DISPATCH_AVX2 != 0)
4674 :
4675 : #ifndef XXH_TARGET_AVX2
4676 : # define XXH_TARGET_AVX2 /* disable attribute target */
4677 : #endif
4678 :
4679 : XXH_FORCE_INLINE XXH_TARGET_AVX2 void
4680 : XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,
4681 : const void* XXH_RESTRICT input,
4682 : const void* XXH_RESTRICT secret)
4683 : {
4684 : XXH_ASSERT((((size_t)acc) & 31) == 0);
4685 : { __m256i* const xacc = (__m256i *) acc;
4686 : /* Unaligned. This is mainly for pointer arithmetic, and because
4687 : * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
4688 : const __m256i* const xinput = (const __m256i *) input;
4689 : /* Unaligned. This is mainly for pointer arithmetic, and because
4690 : * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
4691 : const __m256i* const xsecret = (const __m256i *) secret;
4692 :
4693 : size_t i;
4694 : for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {
4695 : /* data_vec = xinput[i]; */
4696 : __m256i const data_vec = _mm256_loadu_si256 (xinput+i);
4697 : /* key_vec = xsecret[i]; */
4698 : __m256i const key_vec = _mm256_loadu_si256 (xsecret+i);
4699 : /* data_key = data_vec ^ key_vec; */
4700 : __m256i const data_key = _mm256_xor_si256 (data_vec, key_vec);
4701 : /* data_key_lo = data_key >> 32; */
4702 : __m256i const data_key_lo = _mm256_srli_epi64 (data_key, 32);
4703 : /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
4704 : __m256i const product = _mm256_mul_epu32 (data_key, data_key_lo);
4705 : /* xacc[i] += swap(data_vec); */
4706 : __m256i const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2));
4707 : __m256i const sum = _mm256_add_epi64(xacc[i], data_swap);
4708 : /* xacc[i] += product; */
4709 : xacc[i] = _mm256_add_epi64(product, sum);
4710 : } }
4711 : }
4712 : XXH_FORCE_INLINE XXH_TARGET_AVX2 XXH3_ACCUMULATE_TEMPLATE(avx2)
4713 :
4714 : XXH_FORCE_INLINE XXH_TARGET_AVX2 void
4715 : XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
4716 : {
4717 : XXH_ASSERT((((size_t)acc) & 31) == 0);
4718 : { __m256i* const xacc = (__m256i*) acc;
4719 : /* Unaligned. This is mainly for pointer arithmetic, and because
4720 : * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
4721 : const __m256i* const xsecret = (const __m256i *) secret;
4722 : const __m256i prime32 = _mm256_set1_epi32((int)XXH_PRIME32_1);
4723 :
4724 : size_t i;
4725 : for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {
4726 : /* xacc[i] ^= (xacc[i] >> 47) */
4727 : __m256i const acc_vec = xacc[i];
4728 : __m256i const shifted = _mm256_srli_epi64 (acc_vec, 47);
4729 : __m256i const data_vec = _mm256_xor_si256 (acc_vec, shifted);
4730 : /* xacc[i] ^= xsecret; */
4731 : __m256i const key_vec = _mm256_loadu_si256 (xsecret+i);
4732 : __m256i const data_key = _mm256_xor_si256 (data_vec, key_vec);
4733 :
4734 : /* xacc[i] *= XXH_PRIME32_1; */
4735 : __m256i const data_key_hi = _mm256_srli_epi64 (data_key, 32);
4736 : __m256i const prod_lo = _mm256_mul_epu32 (data_key, prime32);
4737 : __m256i const prod_hi = _mm256_mul_epu32 (data_key_hi, prime32);
4738 : xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32));
4739 : }
4740 : }
4741 : }
4742 :
4743 : XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
4744 : {
4745 : XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 31) == 0);
4746 : XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE / sizeof(__m256i)) == 6);
4747 : XXH_STATIC_ASSERT(XXH_SEC_ALIGN <= 64);
4748 : (void)(&XXH_writeLE64);
4749 : XXH_PREFETCH(customSecret);
4750 : { __m256i const seed = _mm256_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64, (xxh_i64)(0U - seed64), (xxh_i64)seed64);
4751 :
4752 : const __m256i* const src = (const __m256i*) ((const void*) XXH3_kSecret);
4753 : __m256i* dest = ( __m256i*) customSecret;
4754 :
4755 : # if defined(__GNUC__) || defined(__clang__)
4756 : /*
4757 : * On GCC & Clang, marking 'dest' as modified will cause the compiler:
4758 : * - do not extract the secret from sse registers in the internal loop
4759 : * - use less common registers, and avoid pushing these reg into stack
4760 : */
4761 : XXH_COMPILER_GUARD(dest);
4762 : # endif
4763 : XXH_ASSERT(((size_t)src & 31) == 0); /* control alignment */
4764 : XXH_ASSERT(((size_t)dest & 31) == 0);
4765 :
4766 : /* GCC -O2 need unroll loop manually */
4767 : dest[0] = _mm256_add_epi64(_mm256_load_si256(src+0), seed);
4768 : dest[1] = _mm256_add_epi64(_mm256_load_si256(src+1), seed);
4769 : dest[2] = _mm256_add_epi64(_mm256_load_si256(src+2), seed);
4770 : dest[3] = _mm256_add_epi64(_mm256_load_si256(src+3), seed);
4771 : dest[4] = _mm256_add_epi64(_mm256_load_si256(src+4), seed);
4772 : dest[5] = _mm256_add_epi64(_mm256_load_si256(src+5), seed);
4773 : }
4774 : }
4775 :
4776 : #endif
4777 :
4778 : /* x86dispatch always generates SSE2 */
4779 : #if (XXH_VECTOR == XXH_SSE2) || defined(XXH_X86DISPATCH)
4780 :
4781 : #ifndef XXH_TARGET_SSE2
4782 : # define XXH_TARGET_SSE2 /* disable attribute target */
4783 : #endif
4784 :
4785 : XXH_FORCE_INLINE XXH_TARGET_SSE2 void
4786 7046 : XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc,
4787 : const void* XXH_RESTRICT input,
4788 : const void* XXH_RESTRICT secret)
4789 : {
4790 : /* SSE2 is just a half-scale version of the AVX2 version. */
4791 7046 : XXH_ASSERT((((size_t)acc) & 15) == 0);
4792 7046 : { __m128i* const xacc = (__m128i *) acc;
4793 : /* Unaligned. This is mainly for pointer arithmetic, and because
4794 : * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
4795 7046 : const __m128i* const xinput = (const __m128i *) input;
4796 : /* Unaligned. This is mainly for pointer arithmetic, and because
4797 : * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
4798 7046 : const __m128i* const xsecret = (const __m128i *) secret;
4799 :
4800 : size_t i;
4801 35230 : for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {
4802 : /* data_vec = xinput[i]; */
4803 28184 : __m128i const data_vec = _mm_loadu_si128 (xinput+i);
4804 : /* key_vec = xsecret[i]; */
4805 56368 : __m128i const key_vec = _mm_loadu_si128 (xsecret+i);
4806 : /* data_key = data_vec ^ key_vec; */
4807 28184 : __m128i const data_key = _mm_xor_si128 (data_vec, key_vec);
4808 : /* data_key_lo = data_key >> 32; */
4809 28184 : __m128i const data_key_lo = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
4810 : /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
4811 28184 : __m128i const product = _mm_mul_epu32 (data_key, data_key_lo);
4812 : /* xacc[i] += swap(data_vec); */
4813 28184 : __m128i const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2));
4814 28184 : __m128i const sum = _mm_add_epi64(xacc[i], data_swap);
4815 : /* xacc[i] += product; */
4816 56368 : xacc[i] = _mm_add_epi64(product, sum);
4817 : } }
4818 7046 : }
4819 7410 : XXH_FORCE_INLINE XXH_TARGET_SSE2 XXH3_ACCUMULATE_TEMPLATE(sse2)
4820 :
4821 : XXH_FORCE_INLINE XXH_TARGET_SSE2 void
4822 364 : XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
4823 : {
4824 364 : XXH_ASSERT((((size_t)acc) & 15) == 0);
4825 364 : { __m128i* const xacc = (__m128i*) acc;
4826 : /* Unaligned. This is mainly for pointer arithmetic, and because
4827 : * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
4828 364 : const __m128i* const xsecret = (const __m128i *) secret;
4829 364 : const __m128i prime32 = _mm_set1_epi32((int)XXH_PRIME32_1);
4830 :
4831 : size_t i;
4832 1820 : for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {
4833 : /* xacc[i] ^= (xacc[i] >> 47) */
4834 1456 : __m128i const acc_vec = xacc[i];
4835 1456 : __m128i const shifted = _mm_srli_epi64 (acc_vec, 47);
4836 1456 : __m128i const data_vec = _mm_xor_si128 (acc_vec, shifted);
4837 : /* xacc[i] ^= xsecret[i]; */
4838 2912 : __m128i const key_vec = _mm_loadu_si128 (xsecret+i);
4839 1456 : __m128i const data_key = _mm_xor_si128 (data_vec, key_vec);
4840 :
4841 : /* xacc[i] *= XXH_PRIME32_1; */
4842 1456 : __m128i const data_key_hi = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
4843 1456 : __m128i const prod_lo = _mm_mul_epu32 (data_key, prime32);
4844 1456 : __m128i const prod_hi = _mm_mul_epu32 (data_key_hi, prime32);
4845 2912 : xacc[i] = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32));
4846 : }
4847 : }
4848 364 : }
4849 :
4850 0 : XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
4851 : {
4852 : XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
4853 : (void)(&XXH_writeLE64);
4854 0 : { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m128i);
4855 :
4856 : # if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900
4857 : /* MSVC 32bit mode does not support _mm_set_epi64x before 2015 */
4858 : XXH_ALIGN(16) const xxh_i64 seed64x2[2] = { (xxh_i64)seed64, (xxh_i64)(0U - seed64) };
4859 : __m128i const seed = _mm_load_si128((__m128i const*)seed64x2);
4860 : # else
4861 0 : __m128i const seed = _mm_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64);
4862 : # endif
4863 : int i;
4864 :
4865 0 : const void* const src16 = XXH3_kSecret;
4866 0 : __m128i* dst16 = (__m128i*) customSecret;
4867 : # if defined(__GNUC__) || defined(__clang__)
4868 : /*
4869 : * On GCC & Clang, marking 'dest' as modified will cause the compiler:
4870 : * - do not extract the secret from sse registers in the internal loop
4871 : * - use less common registers, and avoid pushing these reg into stack
4872 : */
4873 0 : XXH_COMPILER_GUARD(dst16);
4874 : # endif
4875 0 : XXH_ASSERT(((size_t)src16 & 15) == 0); /* control alignment */
4876 0 : XXH_ASSERT(((size_t)dst16 & 15) == 0);
4877 :
4878 0 : for (i=0; i < nbRounds; ++i) {
4879 0 : dst16[i] = _mm_add_epi64(_mm_load_si128((const __m128i *)src16+i), seed);
4880 : } }
4881 0 : }
4882 :
4883 : #endif
4884 :
4885 : #if (XXH_VECTOR == XXH_NEON)
4886 :
4887 : /* forward declarations for the scalar routines */
4888 : XXH_FORCE_INLINE void
4889 : XXH3_scalarRound(void* XXH_RESTRICT acc, void const* XXH_RESTRICT input,
4890 : void const* XXH_RESTRICT secret, size_t lane);
4891 :
4892 : XXH_FORCE_INLINE void
4893 : XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
4894 : void const* XXH_RESTRICT secret, size_t lane);
4895 :
4896 : /*!
4897 : * @internal
4898 : * @brief The bulk processing loop for NEON and WASM SIMD128.
4899 : *
4900 : * The NEON code path is actually partially scalar when running on AArch64. This
4901 : * is to optimize the pipelining and can have up to 15% speedup depending on the
4902 : * CPU, and it also mitigates some GCC codegen issues.
4903 : *
4904 : * @see XXH3_NEON_LANES for configuring this and details about this optimization.
4905 : *
4906 : * NEON's 32-bit to 64-bit long multiply takes a half vector of 32-bit
4907 : * integers instead of the other platforms which mask full 64-bit vectors,
4908 : * so the setup is more complicated than just shifting right.
4909 : *
4910 : * Additionally, there is an optimization for 4 lanes at once noted below.
4911 : *
4912 : * Since, as stated, the most optimal amount of lanes for Cortexes is 6,
4913 : * there needs to be *three* versions of the accumulate operation used
4914 : * for the remaining 2 lanes.
4915 : *
4916 : * WASM's SIMD128 uses SIMDe's arm_neon.h polyfill because the intrinsics overlap
4917 : * nearly perfectly.
4918 : */
4919 :
4920 : XXH_FORCE_INLINE void
4921 : XXH3_accumulate_512_neon( void* XXH_RESTRICT acc,
4922 : const void* XXH_RESTRICT input,
4923 : const void* XXH_RESTRICT secret)
4924 : {
4925 : XXH_ASSERT((((size_t)acc) & 15) == 0);
4926 : XXH_STATIC_ASSERT(XXH3_NEON_LANES > 0 && XXH3_NEON_LANES <= XXH_ACC_NB && XXH3_NEON_LANES % 2 == 0);
4927 : { /* GCC for darwin arm64 does not like aliasing here */
4928 : xxh_aliasing_uint64x2_t* const xacc = (xxh_aliasing_uint64x2_t*) acc;
4929 : /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */
4930 : uint8_t const* xinput = (const uint8_t *) input;
4931 : uint8_t const* xsecret = (const uint8_t *) secret;
4932 :
4933 : size_t i;
4934 : #ifdef __wasm_simd128__
4935 : /*
4936 : * On WASM SIMD128, Clang emits direct address loads when XXH3_kSecret
4937 : * is constant propagated, which results in it converting it to this
4938 : * inside the loop:
4939 : *
4940 : * a = v128.load(XXH3_kSecret + 0 + $secret_offset, offset = 0)
4941 : * b = v128.load(XXH3_kSecret + 16 + $secret_offset, offset = 0)
4942 : * ...
4943 : *
4944 : * This requires a full 32-bit address immediate (and therefore a 6 byte
4945 : * instruction) as well as an add for each offset.
4946 : *
4947 : * Putting an asm guard prevents it from folding (at the cost of losing
4948 : * the alignment hint), and uses the free offset in `v128.load` instead
4949 : * of adding secret_offset each time which overall reduces code size by
4950 : * about a kilobyte and improves performance.
4951 : */
4952 : XXH_COMPILER_GUARD(xsecret);
4953 : #endif
4954 : /* Scalar lanes use the normal scalarRound routine */
4955 : for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
4956 : XXH3_scalarRound(acc, input, secret, i);
4957 : }
4958 : i = 0;
4959 : /* 4 NEON lanes at a time. */
4960 : for (; i+1 < XXH3_NEON_LANES / 2; i+=2) {
4961 : /* data_vec = xinput[i]; */
4962 : uint64x2_t data_vec_1 = XXH_vld1q_u64(xinput + (i * 16));
4963 : uint64x2_t data_vec_2 = XXH_vld1q_u64(xinput + ((i+1) * 16));
4964 : /* key_vec = xsecret[i]; */
4965 : uint64x2_t key_vec_1 = XXH_vld1q_u64(xsecret + (i * 16));
4966 : uint64x2_t key_vec_2 = XXH_vld1q_u64(xsecret + ((i+1) * 16));
4967 : /* data_swap = swap(data_vec) */
4968 : uint64x2_t data_swap_1 = vextq_u64(data_vec_1, data_vec_1, 1);
4969 : uint64x2_t data_swap_2 = vextq_u64(data_vec_2, data_vec_2, 1);
4970 : /* data_key = data_vec ^ key_vec; */
4971 : uint64x2_t data_key_1 = veorq_u64(data_vec_1, key_vec_1);
4972 : uint64x2_t data_key_2 = veorq_u64(data_vec_2, key_vec_2);
4973 :
4974 : /*
4975 : * If we reinterpret the 64x2 vectors as 32x4 vectors, we can use a
4976 : * de-interleave operation for 4 lanes in 1 step with `vuzpq_u32` to
4977 : * get one vector with the low 32 bits of each lane, and one vector
4978 : * with the high 32 bits of each lane.
4979 : *
4980 : * The intrinsic returns a double vector because the original ARMv7-a
4981 : * instruction modified both arguments in place. AArch64 and SIMD128 emit
4982 : * two instructions from this intrinsic.
4983 : *
4984 : * [ dk11L | dk11H | dk12L | dk12H ] -> [ dk11L | dk12L | dk21L | dk22L ]
4985 : * [ dk21L | dk21H | dk22L | dk22H ] -> [ dk11H | dk12H | dk21H | dk22H ]
4986 : */
4987 : uint32x4x2_t unzipped = vuzpq_u32(
4988 : vreinterpretq_u32_u64(data_key_1),
4989 : vreinterpretq_u32_u64(data_key_2)
4990 : );
4991 : /* data_key_lo = data_key & 0xFFFFFFFF */
4992 : uint32x4_t data_key_lo = unzipped.val[0];
4993 : /* data_key_hi = data_key >> 32 */
4994 : uint32x4_t data_key_hi = unzipped.val[1];
4995 : /*
4996 : * Then, we can split the vectors horizontally and multiply which, as for most
4997 : * widening intrinsics, have a variant that works on both high half vectors
4998 : * for free on AArch64. A similar instruction is available on SIMD128.
4999 : *
5000 : * sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi
5001 : */
5002 : uint64x2_t sum_1 = XXH_vmlal_low_u32(data_swap_1, data_key_lo, data_key_hi);
5003 : uint64x2_t sum_2 = XXH_vmlal_high_u32(data_swap_2, data_key_lo, data_key_hi);
5004 : /*
5005 : * Clang reorders
5006 : * a += b * c; // umlal swap.2d, dkl.2s, dkh.2s
5007 : * c += a; // add acc.2d, acc.2d, swap.2d
5008 : * to
5009 : * c += a; // add acc.2d, acc.2d, swap.2d
5010 : * c += b * c; // umlal acc.2d, dkl.2s, dkh.2s
5011 : *
5012 : * While it would make sense in theory since the addition is faster,
5013 : * for reasons likely related to umlal being limited to certain NEON
5014 : * pipelines, this is worse. A compiler guard fixes this.
5015 : */
5016 : XXH_COMPILER_GUARD_CLANG_NEON(sum_1);
5017 : XXH_COMPILER_GUARD_CLANG_NEON(sum_2);
5018 : /* xacc[i] = acc_vec + sum; */
5019 : xacc[i] = vaddq_u64(xacc[i], sum_1);
5020 : xacc[i+1] = vaddq_u64(xacc[i+1], sum_2);
5021 : }
5022 : /* Operate on the remaining NEON lanes 2 at a time. */
5023 : for (; i < XXH3_NEON_LANES / 2; i++) {
5024 : /* data_vec = xinput[i]; */
5025 : uint64x2_t data_vec = XXH_vld1q_u64(xinput + (i * 16));
5026 : /* key_vec = xsecret[i]; */
5027 : uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16));
5028 : /* acc_vec_2 = swap(data_vec) */
5029 : uint64x2_t data_swap = vextq_u64(data_vec, data_vec, 1);
5030 : /* data_key = data_vec ^ key_vec; */
5031 : uint64x2_t data_key = veorq_u64(data_vec, key_vec);
5032 : /* For two lanes, just use VMOVN and VSHRN. */
5033 : /* data_key_lo = data_key & 0xFFFFFFFF; */
5034 : uint32x2_t data_key_lo = vmovn_u64(data_key);
5035 : /* data_key_hi = data_key >> 32; */
5036 : uint32x2_t data_key_hi = vshrn_n_u64(data_key, 32);
5037 : /* sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi; */
5038 : uint64x2_t sum = vmlal_u32(data_swap, data_key_lo, data_key_hi);
5039 : /* Same Clang workaround as before */
5040 : XXH_COMPILER_GUARD_CLANG_NEON(sum);
5041 : /* xacc[i] = acc_vec + sum; */
5042 : xacc[i] = vaddq_u64 (xacc[i], sum);
5043 : }
5044 : }
5045 : }
5046 : XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(neon)
5047 :
5048 : XXH_FORCE_INLINE void
5049 : XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
5050 : {
5051 : XXH_ASSERT((((size_t)acc) & 15) == 0);
5052 :
5053 : { xxh_aliasing_uint64x2_t* xacc = (xxh_aliasing_uint64x2_t*) acc;
5054 : uint8_t const* xsecret = (uint8_t const*) secret;
5055 :
5056 : size_t i;
5057 : /* WASM uses operator overloads and doesn't need these. */
5058 : #ifndef __wasm_simd128__
5059 : /* { prime32_1, prime32_1 } */
5060 : uint32x2_t const kPrimeLo = vdup_n_u32(XXH_PRIME32_1);
5061 : /* { 0, prime32_1, 0, prime32_1 } */
5062 : uint32x4_t const kPrimeHi = vreinterpretq_u32_u64(vdupq_n_u64((xxh_u64)XXH_PRIME32_1 << 32));
5063 : #endif
5064 :
5065 : /* AArch64 uses both scalar and neon at the same time */
5066 : for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
5067 : XXH3_scalarScrambleRound(acc, secret, i);
5068 : }
5069 : for (i=0; i < XXH3_NEON_LANES / 2; i++) {
5070 : /* xacc[i] ^= (xacc[i] >> 47); */
5071 : uint64x2_t acc_vec = xacc[i];
5072 : uint64x2_t shifted = vshrq_n_u64(acc_vec, 47);
5073 : uint64x2_t data_vec = veorq_u64(acc_vec, shifted);
5074 :
5075 : /* xacc[i] ^= xsecret[i]; */
5076 : uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16));
5077 : uint64x2_t data_key = veorq_u64(data_vec, key_vec);
5078 : /* xacc[i] *= XXH_PRIME32_1 */
5079 : #ifdef __wasm_simd128__
5080 : /* SIMD128 has multiply by u64x2, use it instead of expanding and scalarizing */
5081 : xacc[i] = data_key * XXH_PRIME32_1;
5082 : #else
5083 : /*
5084 : * Expanded version with portable NEON intrinsics
5085 : *
5086 : * lo(x) * lo(y) + (hi(x) * lo(y) << 32)
5087 : *
5088 : * prod_hi = hi(data_key) * lo(prime) << 32
5089 : *
5090 : * Since we only need 32 bits of this multiply a trick can be used, reinterpreting the vector
5091 : * as a uint32x4_t and multiplying by { 0, prime, 0, prime } to cancel out the unwanted bits
5092 : * and avoid the shift.
5093 : */
5094 : uint32x4_t prod_hi = vmulq_u32 (vreinterpretq_u32_u64(data_key), kPrimeHi);
5095 : /* Extract low bits for vmlal_u32 */
5096 : uint32x2_t data_key_lo = vmovn_u64(data_key);
5097 : /* xacc[i] = prod_hi + lo(data_key) * XXH_PRIME32_1; */
5098 : xacc[i] = vmlal_u32(vreinterpretq_u64_u32(prod_hi), data_key_lo, kPrimeLo);
5099 : #endif
5100 : }
5101 : }
5102 : }
5103 : #endif
5104 :
5105 : #if (XXH_VECTOR == XXH_VSX)
5106 :
5107 : XXH_FORCE_INLINE void
5108 : XXH3_accumulate_512_vsx( void* XXH_RESTRICT acc,
5109 : const void* XXH_RESTRICT input,
5110 : const void* XXH_RESTRICT secret)
5111 : {
5112 : /* presumed aligned */
5113 : xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc;
5114 : xxh_u8 const* const xinput = (xxh_u8 const*) input; /* no alignment restriction */
5115 : xxh_u8 const* const xsecret = (xxh_u8 const*) secret; /* no alignment restriction */
5116 : xxh_u64x2 const v32 = { 32, 32 };
5117 : size_t i;
5118 : for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
5119 : /* data_vec = xinput[i]; */
5120 : xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + 16*i);
5121 : /* key_vec = xsecret[i]; */
5122 : xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + 16*i);
5123 : xxh_u64x2 const data_key = data_vec ^ key_vec;
5124 : /* shuffled = (data_key << 32) | (data_key >> 32); */
5125 : xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32);
5126 : /* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */
5127 : xxh_u64x2 const product = XXH_vec_mulo((xxh_u32x4)data_key, shuffled);
5128 : /* acc_vec = xacc[i]; */
5129 : xxh_u64x2 acc_vec = xacc[i];
5130 : acc_vec += product;
5131 :
5132 : /* swap high and low halves */
5133 : #ifdef __s390x__
5134 : acc_vec += vec_permi(data_vec, data_vec, 2);
5135 : #else
5136 : acc_vec += vec_xxpermdi(data_vec, data_vec, 2);
5137 : #endif
5138 : xacc[i] = acc_vec;
5139 : }
5140 : }
5141 : XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(vsx)
5142 :
5143 : XXH_FORCE_INLINE void
5144 : XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
5145 : {
5146 : XXH_ASSERT((((size_t)acc) & 15) == 0);
5147 :
5148 : { xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc;
5149 : const xxh_u8* const xsecret = (const xxh_u8*) secret;
5150 : /* constants */
5151 : xxh_u64x2 const v32 = { 32, 32 };
5152 : xxh_u64x2 const v47 = { 47, 47 };
5153 : xxh_u32x4 const prime = { XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1 };
5154 : size_t i;
5155 : for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
5156 : /* xacc[i] ^= (xacc[i] >> 47); */
5157 : xxh_u64x2 const acc_vec = xacc[i];
5158 : xxh_u64x2 const data_vec = acc_vec ^ (acc_vec >> v47);
5159 :
5160 : /* xacc[i] ^= xsecret[i]; */
5161 : xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + 16*i);
5162 : xxh_u64x2 const data_key = data_vec ^ key_vec;
5163 :
5164 : /* xacc[i] *= XXH_PRIME32_1 */
5165 : /* prod_lo = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)prime & 0xFFFFFFFF); */
5166 : xxh_u64x2 const prod_even = XXH_vec_mule((xxh_u32x4)data_key, prime);
5167 : /* prod_hi = ((xxh_u64x2)data_key >> 32) * ((xxh_u64x2)prime >> 32); */
5168 : xxh_u64x2 const prod_odd = XXH_vec_mulo((xxh_u32x4)data_key, prime);
5169 : xacc[i] = prod_odd + (prod_even << v32);
5170 : } }
5171 : }
5172 :
5173 : #endif
5174 :
5175 : #if (XXH_VECTOR == XXH_SVE)
5176 :
5177 : XXH_FORCE_INLINE void
5178 : XXH3_accumulate_512_sve( void* XXH_RESTRICT acc,
5179 : const void* XXH_RESTRICT input,
5180 : const void* XXH_RESTRICT secret)
5181 : {
5182 : uint64_t *xacc = (uint64_t *)acc;
5183 : const uint64_t *xinput = (const uint64_t *)(const void *)input;
5184 : const uint64_t *xsecret = (const uint64_t *)(const void *)secret;
5185 : svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
5186 : uint64_t element_count = svcntd();
5187 : if (element_count >= 8) {
5188 : svbool_t mask = svptrue_pat_b64(SV_VL8);
5189 : svuint64_t vacc = svld1_u64(mask, xacc);
5190 : ACCRND(vacc, 0);
5191 : svst1_u64(mask, xacc, vacc);
5192 : } else if (element_count == 2) { /* sve128 */
5193 : svbool_t mask = svptrue_pat_b64(SV_VL2);
5194 : svuint64_t acc0 = svld1_u64(mask, xacc + 0);
5195 : svuint64_t acc1 = svld1_u64(mask, xacc + 2);
5196 : svuint64_t acc2 = svld1_u64(mask, xacc + 4);
5197 : svuint64_t acc3 = svld1_u64(mask, xacc + 6);
5198 : ACCRND(acc0, 0);
5199 : ACCRND(acc1, 2);
5200 : ACCRND(acc2, 4);
5201 : ACCRND(acc3, 6);
5202 : svst1_u64(mask, xacc + 0, acc0);
5203 : svst1_u64(mask, xacc + 2, acc1);
5204 : svst1_u64(mask, xacc + 4, acc2);
5205 : svst1_u64(mask, xacc + 6, acc3);
5206 : } else {
5207 : svbool_t mask = svptrue_pat_b64(SV_VL4);
5208 : svuint64_t acc0 = svld1_u64(mask, xacc + 0);
5209 : svuint64_t acc1 = svld1_u64(mask, xacc + 4);
5210 : ACCRND(acc0, 0);
5211 : ACCRND(acc1, 4);
5212 : svst1_u64(mask, xacc + 0, acc0);
5213 : svst1_u64(mask, xacc + 4, acc1);
5214 : }
5215 : }
5216 :
5217 : XXH_FORCE_INLINE void
5218 : XXH3_accumulate_sve(xxh_u64* XXH_RESTRICT acc,
5219 : const xxh_u8* XXH_RESTRICT input,
5220 : const xxh_u8* XXH_RESTRICT secret,
5221 : size_t nbStripes)
5222 : {
5223 : if (nbStripes != 0) {
5224 : uint64_t *xacc = (uint64_t *)acc;
5225 : const uint64_t *xinput = (const uint64_t *)(const void *)input;
5226 : const uint64_t *xsecret = (const uint64_t *)(const void *)secret;
5227 : svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
5228 : uint64_t element_count = svcntd();
5229 : if (element_count >= 8) {
5230 : svbool_t mask = svptrue_pat_b64(SV_VL8);
5231 : svuint64_t vacc = svld1_u64(mask, xacc + 0);
5232 : do {
5233 : /* svprfd(svbool_t, void *, enum svfprop); */
5234 : svprfd(mask, xinput + 128, SV_PLDL1STRM);
5235 : ACCRND(vacc, 0);
5236 : xinput += 8;
5237 : xsecret += 1;
5238 : nbStripes--;
5239 : } while (nbStripes != 0);
5240 :
5241 : svst1_u64(mask, xacc + 0, vacc);
5242 : } else if (element_count == 2) { /* sve128 */
5243 : svbool_t mask = svptrue_pat_b64(SV_VL2);
5244 : svuint64_t acc0 = svld1_u64(mask, xacc + 0);
5245 : svuint64_t acc1 = svld1_u64(mask, xacc + 2);
5246 : svuint64_t acc2 = svld1_u64(mask, xacc + 4);
5247 : svuint64_t acc3 = svld1_u64(mask, xacc + 6);
5248 : do {
5249 : svprfd(mask, xinput + 128, SV_PLDL1STRM);
5250 : ACCRND(acc0, 0);
5251 : ACCRND(acc1, 2);
5252 : ACCRND(acc2, 4);
5253 : ACCRND(acc3, 6);
5254 : xinput += 8;
5255 : xsecret += 1;
5256 : nbStripes--;
5257 : } while (nbStripes != 0);
5258 :
5259 : svst1_u64(mask, xacc + 0, acc0);
5260 : svst1_u64(mask, xacc + 2, acc1);
5261 : svst1_u64(mask, xacc + 4, acc2);
5262 : svst1_u64(mask, xacc + 6, acc3);
5263 : } else {
5264 : svbool_t mask = svptrue_pat_b64(SV_VL4);
5265 : svuint64_t acc0 = svld1_u64(mask, xacc + 0);
5266 : svuint64_t acc1 = svld1_u64(mask, xacc + 4);
5267 : do {
5268 : svprfd(mask, xinput + 128, SV_PLDL1STRM);
5269 : ACCRND(acc0, 0);
5270 : ACCRND(acc1, 4);
5271 : xinput += 8;
5272 : xsecret += 1;
5273 : nbStripes--;
5274 : } while (nbStripes != 0);
5275 :
5276 : svst1_u64(mask, xacc + 0, acc0);
5277 : svst1_u64(mask, xacc + 4, acc1);
5278 : }
5279 : }
5280 : }
5281 :
5282 : #endif
5283 :
5284 : /* scalar variants - universal */
5285 :
5286 : #if defined(__aarch64__) && (defined(__GNUC__) || defined(__clang__))
5287 : /*
5288 : * In XXH3_scalarRound(), GCC and Clang have a similar codegen issue, where they
5289 : * emit an excess mask and a full 64-bit multiply-add (MADD X-form).
5290 : *
5291 : * While this might not seem like much, as AArch64 is a 64-bit architecture, only
5292 : * big Cortex designs have a full 64-bit multiplier.
5293 : *
5294 : * On the little cores, the smaller 32-bit multiplier is used, and full 64-bit
5295 : * multiplies expand to 2-3 multiplies in microcode. This has a major penalty
5296 : * of up to 4 latency cycles and 2 stall cycles in the multiply pipeline.
5297 : *
5298 : * Thankfully, AArch64 still provides the 32-bit long multiply-add (UMADDL) which does
5299 : * not have this penalty and does the mask automatically.
5300 : */
5301 : XXH_FORCE_INLINE xxh_u64
5302 : XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc)
5303 : {
5304 : xxh_u64 ret;
5305 : /* note: %x = 64-bit register, %w = 32-bit register */
5306 : __asm__("umaddl %x0, %w1, %w2, %x3" : "=r" (ret) : "r" (lhs), "r" (rhs), "r" (acc));
5307 : return ret;
5308 : }
5309 : #else
5310 : XXH_FORCE_INLINE xxh_u64
5311 0 : XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc)
5312 : {
5313 0 : return XXH_mult32to64((xxh_u32)lhs, (xxh_u32)rhs) + acc;
5314 : }
5315 : #endif
5316 :
5317 : /*!
5318 : * @internal
5319 : * @brief Scalar round for @ref XXH3_accumulate_512_scalar().
5320 : *
5321 : * This is extracted to its own function because the NEON path uses a combination
5322 : * of NEON and scalar.
5323 : */
5324 : XXH_FORCE_INLINE void
5325 0 : XXH3_scalarRound(void* XXH_RESTRICT acc,
5326 : void const* XXH_RESTRICT input,
5327 : void const* XXH_RESTRICT secret,
5328 : size_t lane)
5329 : {
5330 0 : xxh_u64* xacc = (xxh_u64*) acc;
5331 0 : xxh_u8 const* xinput = (xxh_u8 const*) input;
5332 0 : xxh_u8 const* xsecret = (xxh_u8 const*) secret;
5333 0 : XXH_ASSERT(lane < XXH_ACC_NB);
5334 0 : XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0);
5335 : {
5336 0 : xxh_u64 const data_val = XXH_readLE64(xinput + lane * 8);
5337 0 : xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + lane * 8);
5338 0 : xacc[lane ^ 1] += data_val; /* swap adjacent lanes */
5339 0 : xacc[lane] = XXH_mult32to64_add64(data_key /* & 0xFFFFFFFF */, data_key >> 32, xacc[lane]);
5340 : }
5341 0 : }
5342 :
5343 : /*!
5344 : * @internal
5345 : * @brief Processes a 64 byte block of data using the scalar path.
5346 : */
5347 : XXH_FORCE_INLINE void
5348 0 : XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc,
5349 : const void* XXH_RESTRICT input,
5350 : const void* XXH_RESTRICT secret)
5351 : {
5352 : size_t i;
5353 : /* ARM GCC refuses to unroll this loop, resulting in a 24% slowdown on ARMv6. */
5354 : #if defined(__GNUC__) && !defined(__clang__) \
5355 : && (defined(__arm__) || defined(__thumb2__)) \
5356 : && defined(__ARM_FEATURE_UNALIGNED) /* no unaligned access just wastes bytes */ \
5357 : && XXH_SIZE_OPT <= 0
5358 : # pragma GCC unroll 8
5359 : #endif
5360 0 : for (i=0; i < XXH_ACC_NB; i++) {
5361 0 : XXH3_scalarRound(acc, input, secret, i);
5362 : }
5363 0 : }
5364 0 : XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(scalar)
5365 :
5366 : /*!
5367 : * @internal
5368 : * @brief Scalar scramble step for @ref XXH3_scrambleAcc_scalar().
5369 : *
5370 : * This is extracted to its own function because the NEON path uses a combination
5371 : * of NEON and scalar.
5372 : */
5373 : XXH_FORCE_INLINE void
5374 0 : XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
5375 : void const* XXH_RESTRICT secret,
5376 : size_t lane)
5377 : {
5378 0 : xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */
5379 0 : const xxh_u8* const xsecret = (const xxh_u8*) secret; /* no alignment restriction */
5380 0 : XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0);
5381 0 : XXH_ASSERT(lane < XXH_ACC_NB);
5382 : {
5383 0 : xxh_u64 const key64 = XXH_readLE64(xsecret + lane * 8);
5384 0 : xxh_u64 acc64 = xacc[lane];
5385 0 : acc64 = XXH_xorshift64(acc64, 47);
5386 0 : acc64 ^= key64;
5387 0 : acc64 *= XXH_PRIME32_1;
5388 0 : xacc[lane] = acc64;
5389 : }
5390 0 : }
5391 :
5392 : /*!
5393 : * @internal
5394 : * @brief Scrambles the accumulators after a large chunk has been read
5395 : */
5396 : XXH_FORCE_INLINE void
5397 0 : XXH3_scrambleAcc_scalar(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
5398 : {
5399 : size_t i;
5400 0 : for (i=0; i < XXH_ACC_NB; i++) {
5401 0 : XXH3_scalarScrambleRound(acc, secret, i);
5402 : }
5403 0 : }
5404 :
5405 : XXH_FORCE_INLINE void
5406 0 : XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
5407 : {
5408 : /*
5409 : * We need a separate pointer for the hack below,
5410 : * which requires a non-const pointer.
5411 : * Any decent compiler will optimize this out otherwise.
5412 : */
5413 0 : const xxh_u8* kSecretPtr = XXH3_kSecret;
5414 : XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
5415 :
5416 : #if defined(__GNUC__) && defined(__aarch64__)
5417 : /*
5418 : * UGLY HACK:
5419 : * GCC and Clang generate a bunch of MOV/MOVK pairs for aarch64, and they are
5420 : * placed sequentially, in order, at the top of the unrolled loop.
5421 : *
5422 : * While MOVK is great for generating constants (2 cycles for a 64-bit
5423 : * constant compared to 4 cycles for LDR), it fights for bandwidth with
5424 : * the arithmetic instructions.
5425 : *
5426 : * I L S
5427 : * MOVK
5428 : * MOVK
5429 : * MOVK
5430 : * MOVK
5431 : * ADD
5432 : * SUB STR
5433 : * STR
5434 : * By forcing loads from memory (as the asm line causes the compiler to assume
5435 : * that XXH3_kSecretPtr has been changed), the pipelines are used more
5436 : * efficiently:
5437 : * I L S
5438 : * LDR
5439 : * ADD LDR
5440 : * SUB STR
5441 : * STR
5442 : *
5443 : * See XXH3_NEON_LANES for details on the pipsline.
5444 : *
5445 : * XXH3_64bits_withSeed, len == 256, Snapdragon 835
5446 : * without hack: 2654.4 MB/s
5447 : * with hack: 3202.9 MB/s
5448 : */
5449 : XXH_COMPILER_GUARD(kSecretPtr);
5450 : #endif
5451 0 : { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16;
5452 : int i;
5453 0 : for (i=0; i < nbRounds; i++) {
5454 : /*
5455 : * The asm hack causes the compiler to assume that kSecretPtr aliases with
5456 : * customSecret, and on aarch64, this prevented LDP from merging two
5457 : * loads together for free. Putting the loads together before the stores
5458 : * properly generates LDP.
5459 : */
5460 0 : xxh_u64 lo = XXH_readLE64(kSecretPtr + 16*i) + seed64;
5461 0 : xxh_u64 hi = XXH_readLE64(kSecretPtr + 16*i + 8) - seed64;
5462 0 : XXH_writeLE64((xxh_u8*)customSecret + 16*i, lo);
5463 0 : XXH_writeLE64((xxh_u8*)customSecret + 16*i + 8, hi);
5464 : } }
5465 0 : }
5466 :
5467 :
5468 : typedef void (*XXH3_f_accumulate)(xxh_u64* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, size_t);
5469 : typedef void (*XXH3_f_scrambleAcc)(void* XXH_RESTRICT, const void*);
5470 : typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);
5471 :
5472 :
5473 : #if (XXH_VECTOR == XXH_AVX512)
5474 :
5475 : #define XXH3_accumulate_512 XXH3_accumulate_512_avx512
5476 : #define XXH3_accumulate XXH3_accumulate_avx512
5477 : #define XXH3_scrambleAcc XXH3_scrambleAcc_avx512
5478 : #define XXH3_initCustomSecret XXH3_initCustomSecret_avx512
5479 :
5480 : #elif (XXH_VECTOR == XXH_AVX2)
5481 :
5482 : #define XXH3_accumulate_512 XXH3_accumulate_512_avx2
5483 : #define XXH3_accumulate XXH3_accumulate_avx2
5484 : #define XXH3_scrambleAcc XXH3_scrambleAcc_avx2
5485 : #define XXH3_initCustomSecret XXH3_initCustomSecret_avx2
5486 :
5487 : #elif (XXH_VECTOR == XXH_SSE2)
5488 :
5489 : #define XXH3_accumulate_512 XXH3_accumulate_512_sse2
5490 : #define XXH3_accumulate XXH3_accumulate_sse2
5491 : #define XXH3_scrambleAcc XXH3_scrambleAcc_sse2
5492 : #define XXH3_initCustomSecret XXH3_initCustomSecret_sse2
5493 :
5494 : #elif (XXH_VECTOR == XXH_NEON)
5495 :
5496 : #define XXH3_accumulate_512 XXH3_accumulate_512_neon
5497 : #define XXH3_accumulate XXH3_accumulate_neon
5498 : #define XXH3_scrambleAcc XXH3_scrambleAcc_neon
5499 : #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
5500 :
5501 : #elif (XXH_VECTOR == XXH_VSX)
5502 :
5503 : #define XXH3_accumulate_512 XXH3_accumulate_512_vsx
5504 : #define XXH3_accumulate XXH3_accumulate_vsx
5505 : #define XXH3_scrambleAcc XXH3_scrambleAcc_vsx
5506 : #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
5507 :
5508 : #elif (XXH_VECTOR == XXH_SVE)
5509 : #define XXH3_accumulate_512 XXH3_accumulate_512_sve
5510 : #define XXH3_accumulate XXH3_accumulate_sve
5511 : #define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
5512 : #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
5513 :
5514 : #else /* scalar */
5515 :
5516 : #define XXH3_accumulate_512 XXH3_accumulate_512_scalar
5517 : #define XXH3_accumulate XXH3_accumulate_scalar
5518 : #define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
5519 : #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
5520 :
5521 : #endif
5522 :
5523 : #if XXH_SIZE_OPT >= 1 /* don't do SIMD for initialization */
5524 : # undef XXH3_initCustomSecret
5525 : # define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
5526 : #endif
5527 :
5528 : XXH_FORCE_INLINE void
5529 98 : XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc,
5530 : const xxh_u8* XXH_RESTRICT input, size_t len,
5531 : const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
5532 : XXH3_f_accumulate f_acc,
5533 : XXH3_f_scrambleAcc f_scramble)
5534 : {
5535 98 : size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;
5536 98 : size_t const block_len = XXH_STRIPE_LEN * nbStripesPerBlock;
5537 98 : size_t const nb_blocks = (len - 1) / block_len;
5538 :
5539 : size_t n;
5540 :
5541 98 : XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
5542 :
5543 413 : for (n = 0; n < nb_blocks; n++) {
5544 315 : f_acc(acc, input + n*block_len, secret, nbStripesPerBlock);
5545 315 : f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN);
5546 : }
5547 :
5548 : /* last partial block */
5549 98 : XXH_ASSERT(len > XXH_STRIPE_LEN);
5550 98 : { size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN;
5551 98 : XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE));
5552 98 : f_acc(acc, input + nb_blocks*block_len, secret, nbStripes);
5553 :
5554 : /* last stripe */
5555 98 : { const xxh_u8* const p = input + len - XXH_STRIPE_LEN;
5556 : #define XXH_SECRET_LASTACC_START 7 /* not aligned on 8, last secret is different from acc & scrambler */
5557 98 : XXH3_accumulate_512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);
5558 : } }
5559 98 : }
5560 :
5561 : XXH_FORCE_INLINE xxh_u64
5562 824 : XXH3_mix2Accs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret)
5563 : {
5564 824 : return XXH3_mul128_fold64(
5565 824 : acc[0] ^ XXH_readLE64(secret),
5566 824 : acc[1] ^ XXH_readLE64(secret+8) );
5567 : }
5568 :
5569 : static XXH64_hash_t
5570 206 : XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, xxh_u64 start)
5571 : {
5572 206 : xxh_u64 result64 = start;
5573 206 : size_t i = 0;
5574 :
5575 1030 : for (i = 0; i < 4; i++) {
5576 824 : result64 += XXH3_mix2Accs(acc+2*i, secret + 16*i);
5577 : #if defined(__clang__) /* Clang */ \
5578 : && (defined(__arm__) || defined(__thumb__)) /* ARMv7 */ \
5579 : && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \
5580 : && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */
5581 : /*
5582 : * UGLY HACK:
5583 : * Prevent autovectorization on Clang ARMv7-a. Exact same problem as
5584 : * the one in XXH3_len_129to240_64b. Speeds up shorter keys > 240b.
5585 : * XXH3_64bits, len == 256, Snapdragon 835:
5586 : * without hack: 2063.7 MB/s
5587 : * with hack: 2560.7 MB/s
5588 : */
5589 : XXH_COMPILER_GUARD(result64);
5590 : #endif
5591 : }
5592 :
5593 206 : return XXH3_avalanche(result64);
5594 : }
5595 :
5596 : #define XXH3_INIT_ACC { XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \
5597 : XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1 }
5598 :
5599 : XXH_FORCE_INLINE XXH64_hash_t
5600 0 : XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,
5601 : const void* XXH_RESTRICT secret, size_t secretSize,
5602 : XXH3_f_accumulate f_acc,
5603 : XXH3_f_scrambleAcc f_scramble)
5604 : {
5605 0 : XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
5606 :
5607 0 : XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc, f_scramble);
5608 :
5609 : /* converge into final hash */
5610 : XXH_STATIC_ASSERT(sizeof(acc) == 64);
5611 : /* do not align on 8, so that the secret is different from the accumulator */
5612 : #define XXH_SECRET_MERGEACCS_START 11
5613 0 : XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
5614 0 : return XXH3_mergeAccs(acc, (const xxh_u8*)secret + XXH_SECRET_MERGEACCS_START, (xxh_u64)len * XXH_PRIME64_1);
5615 : }
5616 :
5617 : /*
5618 : * It's important for performance to transmit secret's size (when it's static)
5619 : * so that the compiler can properly optimize the vectorized loop.
5620 : * This makes a big performance difference for "medium" keys (<1 KB) when using AVX instruction set.
5621 : * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE
5622 : * breaks -Og, this is XXH_NO_INLINE.
5623 : */
5624 : XXH3_WITH_SECRET_INLINE XXH64_hash_t
5625 0 : XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,
5626 : XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
5627 : {
5628 : (void)seed64;
5629 0 : return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate, XXH3_scrambleAcc);
5630 : }
5631 :
5632 : /*
5633 : * It's preferable for performance that XXH3_hashLong is not inlined,
5634 : * as it results in a smaller function for small data, easier to the instruction cache.
5635 : * Note that inside this no_inline function, we do inline the internal loop,
5636 : * and provide a statically defined secret size to allow optimization of vector loop.
5637 : */
5638 : XXH_NO_INLINE XXH_PUREF XXH64_hash_t
5639 0 : XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len,
5640 : XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
5641 : {
5642 : (void)seed64; (void)secret; (void)secretLen;
5643 0 : return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate, XXH3_scrambleAcc);
5644 : }
5645 :
5646 : /*
5647 : * XXH3_hashLong_64b_withSeed():
5648 : * Generate a custom key based on alteration of default XXH3_kSecret with the seed,
5649 : * and then use this key for long mode hashing.
5650 : *
5651 : * This operation is decently fast but nonetheless costs a little bit of time.
5652 : * Try to avoid it whenever possible (typically when seed==0).
5653 : *
5654 : * It's important for performance that XXH3_hashLong is not inlined. Not sure
5655 : * why (uop cache maybe?), but the difference is large and easily measurable.
5656 : */
5657 : XXH_FORCE_INLINE XXH64_hash_t
5658 0 : XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len,
5659 : XXH64_hash_t seed,
5660 : XXH3_f_accumulate f_acc,
5661 : XXH3_f_scrambleAcc f_scramble,
5662 : XXH3_f_initCustomSecret f_initSec)
5663 : {
5664 : #if XXH_SIZE_OPT <= 0
5665 0 : if (seed == 0)
5666 0 : return XXH3_hashLong_64b_internal(input, len,
5667 : XXH3_kSecret, sizeof(XXH3_kSecret),
5668 : f_acc, f_scramble);
5669 : #endif
5670 : { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
5671 0 : f_initSec(secret, seed);
5672 0 : return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret),
5673 : f_acc, f_scramble);
5674 : }
5675 : }
5676 :
5677 : /*
5678 : * It's important for performance that XXH3_hashLong is not inlined.
5679 : */
5680 : XXH_NO_INLINE XXH64_hash_t
5681 0 : XXH3_hashLong_64b_withSeed(const void* XXH_RESTRICT input, size_t len,
5682 : XXH64_hash_t seed, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
5683 : {
5684 : (void)secret; (void)secretLen;
5685 0 : return XXH3_hashLong_64b_withSeed_internal(input, len, seed,
5686 : XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
5687 : }
5688 :
5689 :
5690 : typedef XXH64_hash_t (*XXH3_hashLong64_f)(const void* XXH_RESTRICT, size_t,
5691 : XXH64_hash_t, const xxh_u8* XXH_RESTRICT, size_t);
5692 :
5693 : XXH_FORCE_INLINE XXH64_hash_t
5694 0 : XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len,
5695 : XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,
5696 : XXH3_hashLong64_f f_hashLong)
5697 : {
5698 0 : XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
5699 : /*
5700 : * If an action is to be taken if `secretLen` condition is not respected,
5701 : * it should be done here.
5702 : * For now, it's a contract pre-condition.
5703 : * Adding a check and a branch here would cost performance at every hash.
5704 : * Also, note that function signature doesn't offer room to return an error.
5705 : */
5706 0 : if (len <= 16)
5707 0 : return XXH3_len_0to16_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);
5708 0 : if (len <= 128)
5709 0 : return XXH3_len_17to128_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
5710 0 : if (len <= XXH3_MIDSIZE_MAX)
5711 0 : return XXH3_len_129to240_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
5712 0 : return f_hashLong(input, len, seed64, (const xxh_u8*)secret, secretLen);
5713 : }
5714 :
5715 :
5716 : /* === Public entry point === */
5717 :
5718 : /*! @ingroup XXH3_family */
5719 0 : XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length)
5720 : {
5721 0 : return XXH3_64bits_internal(input, length, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);
5722 : }
5723 :
5724 : /*! @ingroup XXH3_family */
5725 : XXH_PUBLIC_API XXH64_hash_t
5726 0 : XXH3_64bits_withSecret(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize)
5727 : {
5728 0 : return XXH3_64bits_internal(input, length, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);
5729 : }
5730 :
5731 : /*! @ingroup XXH3_family */
5732 : XXH_PUBLIC_API XXH64_hash_t
5733 0 : XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed)
5734 : {
5735 0 : return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
5736 : }
5737 :
5738 : XXH_PUBLIC_API XXH64_hash_t
5739 0 : XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
5740 : {
5741 0 : if (length <= XXH3_MIDSIZE_MAX)
5742 0 : return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
5743 0 : return XXH3_hashLong_64b_withSecret(input, length, seed, (const xxh_u8*)secret, secretSize);
5744 : }
5745 :
5746 :
5747 : /* === XXH3 streaming === */
5748 : #ifndef XXH_NO_STREAM
5749 : /*
5750 : * Malloc's a pointer that is always aligned to align.
5751 : *
5752 : * This must be freed with `XXH_alignedFree()`.
5753 : *
5754 : * malloc typically guarantees 16 byte alignment on 64-bit systems and 8 byte
5755 : * alignment on 32-bit. This isn't enough for the 32 byte aligned loads in AVX2
5756 : * or on 32-bit, the 16 byte aligned loads in SSE2 and NEON.
5757 : *
5758 : * This underalignment previously caused a rather obvious crash which went
5759 : * completely unnoticed due to XXH3_createState() not actually being tested.
5760 : * Credit to RedSpah for noticing this bug.
5761 : *
5762 : * The alignment is done manually: Functions like posix_memalign or _mm_malloc
5763 : * are avoided: To maintain portability, we would have to write a fallback
5764 : * like this anyways, and besides, testing for the existence of library
5765 : * functions without relying on external build tools is impossible.
5766 : *
5767 : * The method is simple: Overallocate, manually align, and store the offset
5768 : * to the original behind the returned pointer.
5769 : *
5770 : * Align must be a power of 2 and 8 <= align <= 128.
5771 : */
5772 7 : static XXH_MALLOCF void* XXH_alignedMalloc(size_t s, size_t align)
5773 : {
5774 7 : XXH_ASSERT(align <= 128 && align >= 8); /* range check */
5775 7 : XXH_ASSERT((align & (align-1)) == 0); /* power of 2 */
5776 7 : XXH_ASSERT(s != 0 && s < (s + align)); /* empty/overflow */
5777 : { /* Overallocate to make room for manual realignment and an offset byte */
5778 7 : xxh_u8* base = (xxh_u8*)XXH_malloc(s + align);
5779 7 : if (base != NULL) {
5780 : /*
5781 : * Get the offset needed to align this pointer.
5782 : *
5783 : * Even if the returned pointer is aligned, there will always be
5784 : * at least one byte to store the offset to the original pointer.
5785 : */
5786 7 : size_t offset = align - ((size_t)base & (align - 1)); /* base % align */
5787 : /* Add the offset for the now-aligned pointer */
5788 7 : xxh_u8* ptr = base + offset;
5789 :
5790 7 : XXH_ASSERT((size_t)ptr % align == 0);
5791 :
5792 : /* Store the offset immediately before the returned pointer. */
5793 7 : ptr[-1] = (xxh_u8)offset;
5794 7 : return ptr;
5795 : }
5796 0 : return NULL;
5797 : }
5798 : }
5799 : /*
5800 : * Frees an aligned pointer allocated by XXH_alignedMalloc(). Don't pass
5801 : * normal malloc'd pointers, XXH_alignedMalloc has a specific data layout.
5802 : */
5803 7 : static void XXH_alignedFree(void* p)
5804 : {
5805 7 : if (p != NULL) {
5806 7 : xxh_u8* ptr = (xxh_u8*)p;
5807 : /* Get the offset byte we added in XXH_malloc. */
5808 7 : xxh_u8 offset = ptr[-1];
5809 : /* Free the original malloc'd pointer */
5810 7 : xxh_u8* base = ptr - offset;
5811 7 : XXH_free(base);
5812 : }
5813 7 : }
5814 : /*! @ingroup XXH3_family */
5815 : /*!
5816 : * @brief Allocate an @ref XXH3_state_t.
5817 : *
5818 : * Must be freed with XXH3_freeState().
5819 : * @return An allocated XXH3_state_t on success, `NULL` on failure.
5820 : */
5821 7 : XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)
5822 : {
5823 7 : XXH3_state_t* const state = (XXH3_state_t*)XXH_alignedMalloc(sizeof(XXH3_state_t), 64);
5824 7 : if (state==NULL) return NULL;
5825 7 : XXH3_INITSTATE(state);
5826 7 : return state;
5827 : }
5828 :
5829 : /*! @ingroup XXH3_family */
5830 : /*!
5831 : * @brief Frees an @ref XXH3_state_t.
5832 : *
5833 : * Must be allocated with XXH3_createState().
5834 : * @param statePtr A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState().
5835 : * @return XXH_OK.
5836 : */
5837 7 : XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr)
5838 : {
5839 7 : XXH_alignedFree(statePtr);
5840 7 : return XXH_OK;
5841 : }
5842 :
5843 : /*! @ingroup XXH3_family */
5844 : XXH_PUBLIC_API void
5845 0 : XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state)
5846 : {
5847 0 : XXH_memcpy(dst_state, src_state, sizeof(*dst_state));
5848 0 : }
5849 :
5850 : static void
5851 7 : XXH3_reset_internal(XXH3_state_t* statePtr,
5852 : XXH64_hash_t seed,
5853 : const void* secret, size_t secretSize)
5854 : {
5855 7 : size_t const initStart = offsetof(XXH3_state_t, bufferedSize);
5856 7 : size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart;
5857 7 : XXH_ASSERT(offsetof(XXH3_state_t, nbStripesPerBlock) > initStart);
5858 7 : XXH_ASSERT(statePtr != NULL);
5859 : /* set members from bufferedSize to nbStripesPerBlock (excluded) to 0 */
5860 7 : memset((char*)statePtr + initStart, 0, initLength);
5861 7 : statePtr->acc[0] = XXH_PRIME32_3;
5862 7 : statePtr->acc[1] = XXH_PRIME64_1;
5863 7 : statePtr->acc[2] = XXH_PRIME64_2;
5864 7 : statePtr->acc[3] = XXH_PRIME64_3;
5865 7 : statePtr->acc[4] = XXH_PRIME64_4;
5866 7 : statePtr->acc[5] = XXH_PRIME32_2;
5867 7 : statePtr->acc[6] = XXH_PRIME64_5;
5868 7 : statePtr->acc[7] = XXH_PRIME32_1;
5869 7 : statePtr->seed = seed;
5870 7 : statePtr->useSeed = (seed != 0);
5871 7 : statePtr->extSecret = (const unsigned char*)secret;
5872 7 : XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
5873 7 : statePtr->secretLimit = secretSize - XXH_STRIPE_LEN;
5874 7 : statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;
5875 7 : }
5876 :
5877 : /*! @ingroup XXH3_family */
5878 : XXH_PUBLIC_API XXH_errorcode
5879 7 : XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr)
5880 : {
5881 7 : if (statePtr == NULL) return XXH_ERROR;
5882 7 : XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
5883 7 : return XXH_OK;
5884 : }
5885 :
5886 : /*! @ingroup XXH3_family */
5887 : XXH_PUBLIC_API XXH_errorcode
5888 0 : XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize)
5889 : {
5890 0 : if (statePtr == NULL) return XXH_ERROR;
5891 0 : XXH3_reset_internal(statePtr, 0, secret, secretSize);
5892 0 : if (secret == NULL) return XXH_ERROR;
5893 0 : if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
5894 0 : return XXH_OK;
5895 : }
5896 :
5897 : /*! @ingroup XXH3_family */
5898 : XXH_PUBLIC_API XXH_errorcode
5899 0 : XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed)
5900 : {
5901 0 : if (statePtr == NULL) return XXH_ERROR;
5902 0 : if (seed==0) return XXH3_64bits_reset(statePtr);
5903 0 : if ((seed != statePtr->seed) || (statePtr->extSecret != NULL))
5904 0 : XXH3_initCustomSecret(statePtr->customSecret, seed);
5905 0 : XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
5906 0 : return XXH_OK;
5907 : }
5908 :
5909 : /*! @ingroup XXH3_family */
5910 : XXH_PUBLIC_API XXH_errorcode
5911 0 : XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed64)
5912 : {
5913 0 : if (statePtr == NULL) return XXH_ERROR;
5914 0 : if (secret == NULL) return XXH_ERROR;
5915 0 : if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
5916 0 : XXH3_reset_internal(statePtr, seed64, secret, secretSize);
5917 0 : statePtr->useSeed = 1; /* always, even if seed64==0 */
5918 0 : return XXH_OK;
5919 : }
5920 :
5921 : /*!
5922 : * @internal
5923 : * @brief Processes a large input for XXH3_update() and XXH3_digest_long().
5924 : *
5925 : * Unlike XXH3_hashLong_internal_loop(), this can process data that overlaps a block.
5926 : *
5927 : * @param acc Pointer to the 8 accumulator lanes
5928 : * @param nbStripesSoFarPtr In/out pointer to the number of leftover stripes in the block*
5929 : * @param nbStripesPerBlock Number of stripes in a block
5930 : * @param input Input pointer
5931 : * @param nbStripes Number of stripes to process
5932 : * @param secret Secret pointer
5933 : * @param secretLimit Offset of the last block in @p secret
5934 : * @param f_acc Pointer to an XXH3_accumulate implementation
5935 : * @param f_scramble Pointer to an XXH3_scrambleAcc implementation
5936 : * @return Pointer past the end of @p input after processing
5937 : */
5938 : XXH_FORCE_INLINE const xxh_u8 *
5939 10 : XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
5940 : size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock,
5941 : const xxh_u8* XXH_RESTRICT input, size_t nbStripes,
5942 : const xxh_u8* XXH_RESTRICT secret, size_t secretLimit,
5943 : XXH3_f_accumulate f_acc,
5944 : XXH3_f_scrambleAcc f_scramble)
5945 : {
5946 10 : const xxh_u8* initialSecret = secret + *nbStripesSoFarPtr * XXH_SECRET_CONSUME_RATE;
5947 : /* Process full blocks */
5948 10 : if (nbStripes >= (nbStripesPerBlock - *nbStripesSoFarPtr)) {
5949 : /* Process the initial partial block... */
5950 4 : size_t nbStripesThisIter = nbStripesPerBlock - *nbStripesSoFarPtr;
5951 :
5952 : do {
5953 : /* Accumulate and scramble */
5954 49 : f_acc(acc, input, initialSecret, nbStripesThisIter);
5955 49 : f_scramble(acc, secret + secretLimit);
5956 49 : input += nbStripesThisIter * XXH_STRIPE_LEN;
5957 49 : nbStripes -= nbStripesThisIter;
5958 : /* Then continue the loop with the full block size */
5959 49 : nbStripesThisIter = nbStripesPerBlock;
5960 49 : initialSecret = secret;
5961 49 : } while (nbStripes >= nbStripesPerBlock);
5962 4 : *nbStripesSoFarPtr = 0;
5963 : }
5964 : /* Process a partial block */
5965 10 : if (nbStripes > 0) {
5966 5 : f_acc(acc, input, initialSecret, nbStripes);
5967 5 : input += nbStripes * XXH_STRIPE_LEN;
5968 5 : *nbStripesSoFarPtr += nbStripes;
5969 : }
5970 : /* Return end pointer */
5971 10 : return input;
5972 : }
5973 :
5974 : #ifndef XXH3_STREAM_USE_STACK
5975 : # if XXH_SIZE_OPT <= 0 && !defined(__clang__) /* clang doesn't need additional stack space */
5976 : # define XXH3_STREAM_USE_STACK 1
5977 : # endif
5978 : #endif
5979 : /*
5980 : * Both XXH3_64bits_update and XXH3_128bits_update use this routine.
5981 : */
5982 : XXH_FORCE_INLINE XXH_errorcode
5983 5 : XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
5984 : const xxh_u8* XXH_RESTRICT input, size_t len,
5985 : XXH3_f_accumulate f_acc,
5986 : XXH3_f_scrambleAcc f_scramble)
5987 : {
5988 5 : if (input==NULL) {
5989 0 : XXH_ASSERT(len == 0);
5990 0 : return XXH_OK;
5991 : }
5992 :
5993 5 : XXH_ASSERT(state != NULL);
5994 5 : { const xxh_u8* const bEnd = input + len;
5995 5 : const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
5996 : #if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
5997 : /* For some reason, gcc and MSVC seem to suffer greatly
5998 : * when operating accumulators directly into state.
5999 : * Operating into stack space seems to enable proper optimization.
6000 : * clang, on the other hand, doesn't seem to need this trick */
6001 : XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8];
6002 5 : XXH_memcpy(acc, state->acc, sizeof(acc));
6003 : #else
6004 : xxh_u64* XXH_RESTRICT const acc = state->acc;
6005 : #endif
6006 5 : state->totalLen += len;
6007 5 : XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);
6008 :
6009 : /* small input : just fill in tmp buffer */
6010 5 : if (len <= XXH3_INTERNALBUFFER_SIZE - state->bufferedSize) {
6011 0 : XXH_memcpy(state->buffer + state->bufferedSize, input, len);
6012 0 : state->bufferedSize += (XXH32_hash_t)len;
6013 0 : return XXH_OK;
6014 : }
6015 :
6016 : /* total input is now > XXH3_INTERNALBUFFER_SIZE */
6017 : #define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN)
6018 : XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0); /* clean multiple */
6019 :
6020 : /*
6021 : * Internal buffer is partially filled (always, except at beginning)
6022 : * Complete it, then consume it.
6023 : */
6024 5 : if (state->bufferedSize) {
6025 0 : size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize;
6026 0 : XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize);
6027 0 : input += loadSize;
6028 0 : XXH3_consumeStripes(acc,
6029 : &state->nbStripesSoFar, state->nbStripesPerBlock,
6030 0 : state->buffer, XXH3_INTERNALBUFFER_STRIPES,
6031 : secret, state->secretLimit,
6032 : f_acc, f_scramble);
6033 0 : state->bufferedSize = 0;
6034 : }
6035 5 : XXH_ASSERT(input < bEnd);
6036 5 : if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
6037 5 : size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN;
6038 5 : input = XXH3_consumeStripes(acc,
6039 : &state->nbStripesSoFar, state->nbStripesPerBlock,
6040 : input, nbStripes,
6041 : secret, state->secretLimit,
6042 : f_acc, f_scramble);
6043 5 : XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
6044 :
6045 : }
6046 : /* Some remaining input (always) : buffer it */
6047 5 : XXH_ASSERT(input < bEnd);
6048 5 : XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE);
6049 5 : XXH_ASSERT(state->bufferedSize == 0);
6050 5 : XXH_memcpy(state->buffer, input, (size_t)(bEnd-input));
6051 5 : state->bufferedSize = (XXH32_hash_t)(bEnd-input);
6052 : #if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
6053 : /* save stack accumulators into state */
6054 5 : XXH_memcpy(state->acc, acc, sizeof(acc));
6055 : #endif
6056 : }
6057 :
6058 5 : return XXH_OK;
6059 : }
6060 :
6061 : /*! @ingroup XXH3_family */
6062 : XXH_PUBLIC_API XXH_errorcode
6063 5 : XXH3_64bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
6064 : {
6065 5 : return XXH3_update(state, (const xxh_u8*)input, len,
6066 : XXH3_accumulate, XXH3_scrambleAcc);
6067 : }
6068 :
6069 :
6070 : XXH_FORCE_INLINE void
6071 5 : XXH3_digest_long (XXH64_hash_t* acc,
6072 : const XXH3_state_t* state,
6073 : const unsigned char* secret)
6074 : {
6075 : xxh_u8 lastStripe[XXH_STRIPE_LEN];
6076 : const xxh_u8* lastStripePtr;
6077 :
6078 : /*
6079 : * Digest on a local copy. This way, the state remains unaltered, and it can
6080 : * continue ingesting more input afterwards.
6081 : */
6082 5 : XXH_memcpy(acc, state->acc, sizeof(state->acc));
6083 5 : if (state->bufferedSize >= XXH_STRIPE_LEN) {
6084 : /* Consume remaining stripes then point to remaining data in buffer */
6085 5 : size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;
6086 5 : size_t nbStripesSoFar = state->nbStripesSoFar;
6087 5 : XXH3_consumeStripes(acc,
6088 5 : &nbStripesSoFar, state->nbStripesPerBlock,
6089 5 : state->buffer, nbStripes,
6090 5 : secret, state->secretLimit,
6091 : XXH3_accumulate, XXH3_scrambleAcc);
6092 5 : lastStripePtr = state->buffer + state->bufferedSize - XXH_STRIPE_LEN;
6093 : } else { /* bufferedSize < XXH_STRIPE_LEN */
6094 : /* Copy to temp buffer */
6095 0 : size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;
6096 0 : XXH_ASSERT(state->bufferedSize > 0); /* there is always some input buffered */
6097 0 : XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);
6098 0 : XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
6099 0 : lastStripePtr = lastStripe;
6100 : }
6101 : /* Last stripe */
6102 5 : XXH3_accumulate_512(acc,
6103 : lastStripePtr,
6104 5 : secret + state->secretLimit - XXH_SECRET_LASTACC_START);
6105 5 : }
6106 :
6107 : /*! @ingroup XXH3_family */
6108 0 : XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* state)
6109 : {
6110 0 : const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
6111 0 : if (state->totalLen > XXH3_MIDSIZE_MAX) {
6112 : XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
6113 0 : XXH3_digest_long(acc, state, secret);
6114 0 : return XXH3_mergeAccs(acc,
6115 : secret + XXH_SECRET_MERGEACCS_START,
6116 0 : (xxh_u64)state->totalLen * XXH_PRIME64_1);
6117 : }
6118 : /* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */
6119 0 : if (state->useSeed)
6120 0 : return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
6121 0 : return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen),
6122 0 : secret, state->secretLimit + XXH_STRIPE_LEN);
6123 : }
6124 : #endif /* !XXH_NO_STREAM */
6125 :
6126 :
6127 : /* ==========================================
6128 : * XXH3 128 bits (a.k.a XXH128)
6129 : * ==========================================
6130 : * XXH3's 128-bit variant has better mixing and strength than the 64-bit variant,
6131 : * even without counting the significantly larger output size.
6132 : *
6133 : * For example, extra steps are taken to avoid the seed-dependent collisions
6134 : * in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B).
6135 : *
6136 : * This strength naturally comes at the cost of some speed, especially on short
6137 : * lengths. Note that longer hashes are about as fast as the 64-bit version
6138 : * due to it using only a slight modification of the 64-bit loop.
6139 : *
6140 : * XXH128 is also more oriented towards 64-bit machines. It is still extremely
6141 : * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64).
6142 : */
6143 :
6144 : XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
6145 0 : XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
6146 : {
6147 : /* A doubled version of 1to3_64b with different constants. */
6148 0 : XXH_ASSERT(input != NULL);
6149 0 : XXH_ASSERT(1 <= len && len <= 3);
6150 0 : XXH_ASSERT(secret != NULL);
6151 : /*
6152 : * len = 1: combinedl = { input[0], 0x01, input[0], input[0] }
6153 : * len = 2: combinedl = { input[1], 0x02, input[0], input[1] }
6154 : * len = 3: combinedl = { input[2], 0x03, input[0], input[1] }
6155 : */
6156 0 : { xxh_u8 const c1 = input[0];
6157 0 : xxh_u8 const c2 = input[len >> 1];
6158 0 : xxh_u8 const c3 = input[len - 1];
6159 0 : xxh_u32 const combinedl = ((xxh_u32)c1 <<16) | ((xxh_u32)c2 << 24)
6160 0 : | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8);
6161 0 : xxh_u32 const combinedh = XXH_rotl32(XXH_swap32(combinedl), 13);
6162 0 : xxh_u64 const bitflipl = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;
6163 0 : xxh_u64 const bitfliph = (XXH_readLE32(secret+8) ^ XXH_readLE32(secret+12)) - seed;
6164 0 : xxh_u64 const keyed_lo = (xxh_u64)combinedl ^ bitflipl;
6165 0 : xxh_u64 const keyed_hi = (xxh_u64)combinedh ^ bitfliph;
6166 : XXH128_hash_t h128;
6167 0 : h128.low64 = XXH64_avalanche(keyed_lo);
6168 0 : h128.high64 = XXH64_avalanche(keyed_hi);
6169 0 : return h128;
6170 : }
6171 : }
6172 :
6173 : XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
6174 0 : XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
6175 : {
6176 0 : XXH_ASSERT(input != NULL);
6177 0 : XXH_ASSERT(secret != NULL);
6178 0 : XXH_ASSERT(4 <= len && len <= 8);
6179 0 : seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;
6180 0 : { xxh_u32 const input_lo = XXH_readLE32(input);
6181 0 : xxh_u32 const input_hi = XXH_readLE32(input + len - 4);
6182 0 : xxh_u64 const input_64 = input_lo + ((xxh_u64)input_hi << 32);
6183 0 : xxh_u64 const bitflip = (XXH_readLE64(secret+16) ^ XXH_readLE64(secret+24)) + seed;
6184 0 : xxh_u64 const keyed = input_64 ^ bitflip;
6185 :
6186 : /* Shift len to the left to ensure it is even, this avoids even multiplies. */
6187 0 : XXH128_hash_t m128 = XXH_mult64to128(keyed, XXH_PRIME64_1 + (len << 2));
6188 :
6189 0 : m128.high64 += (m128.low64 << 1);
6190 0 : m128.low64 ^= (m128.high64 >> 3);
6191 :
6192 0 : m128.low64 = XXH_xorshift64(m128.low64, 35);
6193 0 : m128.low64 *= PRIME_MX2;
6194 0 : m128.low64 = XXH_xorshift64(m128.low64, 28);
6195 0 : m128.high64 = XXH3_avalanche(m128.high64);
6196 0 : return m128;
6197 : }
6198 : }
6199 :
6200 : XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
6201 0 : XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
6202 : {
6203 0 : XXH_ASSERT(input != NULL);
6204 0 : XXH_ASSERT(secret != NULL);
6205 0 : XXH_ASSERT(9 <= len && len <= 16);
6206 0 : { xxh_u64 const bitflipl = (XXH_readLE64(secret+32) ^ XXH_readLE64(secret+40)) - seed;
6207 0 : xxh_u64 const bitfliph = (XXH_readLE64(secret+48) ^ XXH_readLE64(secret+56)) + seed;
6208 0 : xxh_u64 const input_lo = XXH_readLE64(input);
6209 0 : xxh_u64 input_hi = XXH_readLE64(input + len - 8);
6210 0 : XXH128_hash_t m128 = XXH_mult64to128(input_lo ^ input_hi ^ bitflipl, XXH_PRIME64_1);
6211 : /*
6212 : * Put len in the middle of m128 to ensure that the length gets mixed to
6213 : * both the low and high bits in the 128x64 multiply below.
6214 : */
6215 0 : m128.low64 += (xxh_u64)(len - 1) << 54;
6216 0 : input_hi ^= bitfliph;
6217 : /*
6218 : * Add the high 32 bits of input_hi to the high 32 bits of m128, then
6219 : * add the long product of the low 32 bits of input_hi and XXH_PRIME32_2 to
6220 : * the high 64 bits of m128.
6221 : *
6222 : * The best approach to this operation is different on 32-bit and 64-bit.
6223 : */
6224 : if (sizeof(void *) < sizeof(xxh_u64)) { /* 32-bit */
6225 : /*
6226 : * 32-bit optimized version, which is more readable.
6227 : *
6228 : * On 32-bit, it removes an ADC and delays a dependency between the two
6229 : * halves of m128.high64, but it generates an extra mask on 64-bit.
6230 : */
6231 : m128.high64 += (input_hi & 0xFFFFFFFF00000000ULL) + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2);
6232 : } else {
6233 : /*
6234 : * 64-bit optimized (albeit more confusing) version.
6235 : *
6236 : * Uses some properties of addition and multiplication to remove the mask:
6237 : *
6238 : * Let:
6239 : * a = input_hi.lo = (input_hi & 0x00000000FFFFFFFF)
6240 : * b = input_hi.hi = (input_hi & 0xFFFFFFFF00000000)
6241 : * c = XXH_PRIME32_2
6242 : *
6243 : * a + (b * c)
6244 : * Inverse Property: x + y - x == y
6245 : * a + (b * (1 + c - 1))
6246 : * Distributive Property: x * (y + z) == (x * y) + (x * z)
6247 : * a + (b * 1) + (b * (c - 1))
6248 : * Identity Property: x * 1 == x
6249 : * a + b + (b * (c - 1))
6250 : *
6251 : * Substitute a, b, and c:
6252 : * input_hi.hi + input_hi.lo + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))
6253 : *
6254 : * Since input_hi.hi + input_hi.lo == input_hi, we get this:
6255 : * input_hi + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))
6256 : */
6257 0 : m128.high64 += input_hi + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2 - 1);
6258 : }
6259 : /* m128 ^= XXH_swap64(m128 >> 64); */
6260 0 : m128.low64 ^= XXH_swap64(m128.high64);
6261 :
6262 : { /* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */
6263 0 : XXH128_hash_t h128 = XXH_mult64to128(m128.low64, XXH_PRIME64_2);
6264 0 : h128.high64 += m128.high64 * XXH_PRIME64_2;
6265 :
6266 0 : h128.low64 = XXH3_avalanche(h128.low64);
6267 0 : h128.high64 = XXH3_avalanche(h128.high64);
6268 0 : return h128;
6269 : } }
6270 : }
6271 :
6272 : /*
6273 : * Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN
6274 : */
6275 : XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
6276 2 : XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
6277 : {
6278 2 : XXH_ASSERT(len <= 16);
6279 2 : { if (len > 8) return XXH3_len_9to16_128b(input, len, secret, seed);
6280 2 : if (len >= 4) return XXH3_len_4to8_128b(input, len, secret, seed);
6281 2 : if (len) return XXH3_len_1to3_128b(input, len, secret, seed);
6282 : { XXH128_hash_t h128;
6283 2 : xxh_u64 const bitflipl = XXH_readLE64(secret+64) ^ XXH_readLE64(secret+72);
6284 2 : xxh_u64 const bitfliph = XXH_readLE64(secret+80) ^ XXH_readLE64(secret+88);
6285 2 : h128.low64 = XXH64_avalanche(seed ^ bitflipl);
6286 2 : h128.high64 = XXH64_avalanche( seed ^ bitfliph);
6287 2 : return h128;
6288 : } }
6289 : }
6290 :
6291 : /*
6292 : * A bit slower than XXH3_mix16B, but handles multiply by zero better.
6293 : */
6294 : XXH_FORCE_INLINE XXH128_hash_t
6295 0 : XXH128_mix32B(XXH128_hash_t acc, const xxh_u8* input_1, const xxh_u8* input_2,
6296 : const xxh_u8* secret, XXH64_hash_t seed)
6297 : {
6298 0 : acc.low64 += XXH3_mix16B (input_1, secret+0, seed);
6299 0 : acc.low64 ^= XXH_readLE64(input_2) + XXH_readLE64(input_2 + 8);
6300 0 : acc.high64 += XXH3_mix16B (input_2, secret+16, seed);
6301 0 : acc.high64 ^= XXH_readLE64(input_1) + XXH_readLE64(input_1 + 8);
6302 0 : return acc;
6303 : }
6304 :
6305 :
6306 : XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
6307 0 : XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
6308 : const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
6309 : XXH64_hash_t seed)
6310 : {
6311 0 : XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
6312 0 : XXH_ASSERT(16 < len && len <= 128);
6313 :
6314 : { XXH128_hash_t acc;
6315 0 : acc.low64 = len * XXH_PRIME64_1;
6316 0 : acc.high64 = 0;
6317 :
6318 : #if XXH_SIZE_OPT >= 1
6319 : {
6320 : /* Smaller, but slightly slower. */
6321 : unsigned int i = (unsigned int)(len - 1) / 32;
6322 : do {
6323 : acc = XXH128_mix32B(acc, input+16*i, input+len-16*(i+1), secret+32*i, seed);
6324 : } while (i-- != 0);
6325 : }
6326 : #else
6327 0 : if (len > 32) {
6328 0 : if (len > 64) {
6329 0 : if (len > 96) {
6330 0 : acc = XXH128_mix32B(acc, input+48, input+len-64, secret+96, seed);
6331 : }
6332 0 : acc = XXH128_mix32B(acc, input+32, input+len-48, secret+64, seed);
6333 : }
6334 0 : acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed);
6335 : }
6336 0 : acc = XXH128_mix32B(acc, input, input+len-16, secret, seed);
6337 : #endif
6338 : { XXH128_hash_t h128;
6339 0 : h128.low64 = acc.low64 + acc.high64;
6340 0 : h128.high64 = (acc.low64 * XXH_PRIME64_1)
6341 0 : + (acc.high64 * XXH_PRIME64_4)
6342 0 : + ((len - seed) * XXH_PRIME64_2);
6343 0 : h128.low64 = XXH3_avalanche(h128.low64);
6344 0 : h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);
6345 0 : return h128;
6346 : }
6347 : }
6348 : }
6349 :
6350 : XXH_NO_INLINE XXH_PUREF XXH128_hash_t
6351 0 : XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
6352 : const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
6353 : XXH64_hash_t seed)
6354 : {
6355 0 : XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
6356 0 : XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
6357 :
6358 : { XXH128_hash_t acc;
6359 : unsigned i;
6360 0 : acc.low64 = len * XXH_PRIME64_1;
6361 0 : acc.high64 = 0;
6362 : /*
6363 : * We set as `i` as offset + 32. We do this so that unchanged
6364 : * `len` can be used as upper bound. This reaches a sweet spot
6365 : * where both x86 and aarch64 get simple agen and good codegen
6366 : * for the loop.
6367 : */
6368 0 : for (i = 32; i < 160; i += 32) {
6369 0 : acc = XXH128_mix32B(acc,
6370 0 : input + i - 32,
6371 0 : input + i - 16,
6372 0 : secret + i - 32,
6373 : seed);
6374 : }
6375 0 : acc.low64 = XXH3_avalanche(acc.low64);
6376 0 : acc.high64 = XXH3_avalanche(acc.high64);
6377 : /*
6378 : * NB: `i <= len` will duplicate the last 32-bytes if
6379 : * len % 32 was zero. This is an unfortunate necessity to keep
6380 : * the hash result stable.
6381 : */
6382 0 : for (i=160; i <= len; i += 32) {
6383 0 : acc = XXH128_mix32B(acc,
6384 0 : input + i - 32,
6385 0 : input + i - 16,
6386 0 : secret + XXH3_MIDSIZE_STARTOFFSET + i - 160,
6387 : seed);
6388 : }
6389 : /* last bytes */
6390 0 : acc = XXH128_mix32B(acc,
6391 0 : input + len - 16,
6392 0 : input + len - 32,
6393 : secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16,
6394 : (XXH64_hash_t)0 - seed);
6395 :
6396 : { XXH128_hash_t h128;
6397 0 : h128.low64 = acc.low64 + acc.high64;
6398 0 : h128.high64 = (acc.low64 * XXH_PRIME64_1)
6399 0 : + (acc.high64 * XXH_PRIME64_4)
6400 0 : + ((len - seed) * XXH_PRIME64_2);
6401 0 : h128.low64 = XXH3_avalanche(h128.low64);
6402 0 : h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);
6403 0 : return h128;
6404 : }
6405 : }
6406 : }
6407 :
6408 : XXH_FORCE_INLINE XXH128_hash_t
6409 98 : XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len,
6410 : const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
6411 : XXH3_f_accumulate f_acc,
6412 : XXH3_f_scrambleAcc f_scramble)
6413 : {
6414 98 : XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
6415 :
6416 98 : XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc, f_scramble);
6417 :
6418 : /* converge into final hash */
6419 : XXH_STATIC_ASSERT(sizeof(acc) == 64);
6420 98 : XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
6421 : { XXH128_hash_t h128;
6422 196 : h128.low64 = XXH3_mergeAccs(acc,
6423 : secret + XXH_SECRET_MERGEACCS_START,
6424 98 : (xxh_u64)len * XXH_PRIME64_1);
6425 196 : h128.high64 = XXH3_mergeAccs(acc,
6426 : secret + secretSize
6427 98 : - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
6428 98 : ~((xxh_u64)len * XXH_PRIME64_2));
6429 98 : return h128;
6430 : }
6431 : }
6432 :
6433 : /*
6434 : * It's important for performance that XXH3_hashLong() is not inlined.
6435 : */
6436 : XXH_NO_INLINE XXH_PUREF XXH128_hash_t
6437 98 : XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len,
6438 : XXH64_hash_t seed64,
6439 : const void* XXH_RESTRICT secret, size_t secretLen)
6440 : {
6441 : (void)seed64; (void)secret; (void)secretLen;
6442 98 : return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret),
6443 : XXH3_accumulate, XXH3_scrambleAcc);
6444 : }
6445 :
6446 : /*
6447 : * It's important for performance to pass @p secretLen (when it's static)
6448 : * to the compiler, so that it can properly optimize the vectorized loop.
6449 : *
6450 : * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE
6451 : * breaks -Og, this is XXH_NO_INLINE.
6452 : */
6453 : XXH3_WITH_SECRET_INLINE XXH128_hash_t
6454 0 : XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len,
6455 : XXH64_hash_t seed64,
6456 : const void* XXH_RESTRICT secret, size_t secretLen)
6457 : {
6458 : (void)seed64;
6459 0 : return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen,
6460 : XXH3_accumulate, XXH3_scrambleAcc);
6461 : }
6462 :
6463 : XXH_FORCE_INLINE XXH128_hash_t
6464 0 : XXH3_hashLong_128b_withSeed_internal(const void* XXH_RESTRICT input, size_t len,
6465 : XXH64_hash_t seed64,
6466 : XXH3_f_accumulate f_acc,
6467 : XXH3_f_scrambleAcc f_scramble,
6468 : XXH3_f_initCustomSecret f_initSec)
6469 : {
6470 0 : if (seed64 == 0)
6471 0 : return XXH3_hashLong_128b_internal(input, len,
6472 : XXH3_kSecret, sizeof(XXH3_kSecret),
6473 : f_acc, f_scramble);
6474 : { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
6475 0 : f_initSec(secret, seed64);
6476 0 : return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, sizeof(secret),
6477 : f_acc, f_scramble);
6478 : }
6479 : }
6480 :
6481 : /*
6482 : * It's important for performance that XXH3_hashLong is not inlined.
6483 : */
6484 : XXH_NO_INLINE XXH128_hash_t
6485 0 : XXH3_hashLong_128b_withSeed(const void* input, size_t len,
6486 : XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen)
6487 : {
6488 : (void)secret; (void)secretLen;
6489 0 : return XXH3_hashLong_128b_withSeed_internal(input, len, seed64,
6490 : XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
6491 : }
6492 :
6493 : typedef XXH128_hash_t (*XXH3_hashLong128_f)(const void* XXH_RESTRICT, size_t,
6494 : XXH64_hash_t, const void* XXH_RESTRICT, size_t);
6495 :
6496 : XXH_FORCE_INLINE XXH128_hash_t
6497 100 : XXH3_128bits_internal(const void* input, size_t len,
6498 : XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,
6499 : XXH3_hashLong128_f f_hl128)
6500 : {
6501 100 : XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
6502 : /*
6503 : * If an action is to be taken if `secret` conditions are not respected,
6504 : * it should be done here.
6505 : * For now, it's a contract pre-condition.
6506 : * Adding a check and a branch here would cost performance at every hash.
6507 : */
6508 100 : if (len <= 16)
6509 2 : return XXH3_len_0to16_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);
6510 98 : if (len <= 128)
6511 0 : return XXH3_len_17to128_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
6512 98 : if (len <= XXH3_MIDSIZE_MAX)
6513 0 : return XXH3_len_129to240_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
6514 98 : return f_hl128(input, len, seed64, secret, secretLen);
6515 : }
6516 :
6517 :
6518 : /* === Public XXH128 API === */
6519 :
6520 : /*! @ingroup XXH3_family */
6521 98 : XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* input, size_t len)
6522 : {
6523 98 : return XXH3_128bits_internal(input, len, 0,
6524 : XXH3_kSecret, sizeof(XXH3_kSecret),
6525 : XXH3_hashLong_128b_default);
6526 : }
6527 :
6528 : /*! @ingroup XXH3_family */
6529 : XXH_PUBLIC_API XXH128_hash_t
6530 2 : XXH3_128bits_withSecret(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize)
6531 : {
6532 2 : return XXH3_128bits_internal(input, len, 0,
6533 : (const xxh_u8*)secret, secretSize,
6534 : XXH3_hashLong_128b_withSecret);
6535 : }
6536 :
6537 : /*! @ingroup XXH3_family */
6538 : XXH_PUBLIC_API XXH128_hash_t
6539 0 : XXH3_128bits_withSeed(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
6540 : {
6541 0 : return XXH3_128bits_internal(input, len, seed,
6542 : XXH3_kSecret, sizeof(XXH3_kSecret),
6543 : XXH3_hashLong_128b_withSeed);
6544 : }
6545 :
6546 : /*! @ingroup XXH3_family */
6547 : XXH_PUBLIC_API XXH128_hash_t
6548 0 : XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
6549 : {
6550 0 : if (len <= XXH3_MIDSIZE_MAX)
6551 0 : return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
6552 0 : return XXH3_hashLong_128b_withSecret(input, len, seed, secret, secretSize);
6553 : }
6554 :
6555 : /*! @ingroup XXH3_family */
6556 : XXH_PUBLIC_API XXH128_hash_t
6557 0 : XXH128(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
6558 : {
6559 0 : return XXH3_128bits_withSeed(input, len, seed);
6560 : }
6561 :
6562 :
6563 : /* === XXH3 128-bit streaming === */
6564 : #ifndef XXH_NO_STREAM
6565 : /*
6566 : * All initialization and update functions are identical to 64-bit streaming variant.
6567 : * The only difference is the finalization routine.
6568 : */
6569 :
6570 : /*! @ingroup XXH3_family */
6571 : XXH_PUBLIC_API XXH_errorcode
6572 7 : XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr)
6573 : {
6574 7 : return XXH3_64bits_reset(statePtr);
6575 : }
6576 :
6577 : /*! @ingroup XXH3_family */
6578 : XXH_PUBLIC_API XXH_errorcode
6579 0 : XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize)
6580 : {
6581 0 : return XXH3_64bits_reset_withSecret(statePtr, secret, secretSize);
6582 : }
6583 :
6584 : /*! @ingroup XXH3_family */
6585 : XXH_PUBLIC_API XXH_errorcode
6586 0 : XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed)
6587 : {
6588 0 : return XXH3_64bits_reset_withSeed(statePtr, seed);
6589 : }
6590 :
6591 : /*! @ingroup XXH3_family */
6592 : XXH_PUBLIC_API XXH_errorcode
6593 0 : XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
6594 : {
6595 0 : return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed);
6596 : }
6597 :
6598 : /*! @ingroup XXH3_family */
6599 : XXH_PUBLIC_API XXH_errorcode
6600 5 : XXH3_128bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
6601 : {
6602 5 : return XXH3_64bits_update(state, input, len);
6603 : }
6604 :
6605 : /*! @ingroup XXH3_family */
6606 7 : XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* state)
6607 : {
6608 7 : const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
6609 7 : if (state->totalLen > XXH3_MIDSIZE_MAX) {
6610 : XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
6611 5 : XXH3_digest_long(acc, state, secret);
6612 5 : XXH_ASSERT(state->secretLimit + XXH_STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
6613 : { XXH128_hash_t h128;
6614 10 : h128.low64 = XXH3_mergeAccs(acc,
6615 : secret + XXH_SECRET_MERGEACCS_START,
6616 5 : (xxh_u64)state->totalLen * XXH_PRIME64_1);
6617 10 : h128.high64 = XXH3_mergeAccs(acc,
6618 5 : secret + state->secretLimit + XXH_STRIPE_LEN
6619 5 : - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
6620 5 : ~((xxh_u64)state->totalLen * XXH_PRIME64_2));
6621 5 : return h128;
6622 : }
6623 : }
6624 : /* len <= XXH3_MIDSIZE_MAX : short code */
6625 2 : if (state->seed)
6626 0 : return XXH3_128bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
6627 2 : return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen),
6628 2 : secret, state->secretLimit + XXH_STRIPE_LEN);
6629 : }
6630 : #endif /* !XXH_NO_STREAM */
6631 : /* 128-bit utility functions */
6632 :
6633 : #include <string.h> /* memcmp, memcpy */
6634 :
6635 : /* return : 1 is equal, 0 if different */
6636 : /*! @ingroup XXH3_family */
6637 0 : XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)
6638 : {
6639 : /* note : XXH128_hash_t is compact, it has no padding byte */
6640 0 : return !(memcmp(&h1, &h2, sizeof(h1)));
6641 : }
6642 :
6643 : /* This prototype is compatible with stdlib's qsort().
6644 : * @return : >0 if *h128_1 > *h128_2
6645 : * <0 if *h128_1 < *h128_2
6646 : * =0 if *h128_1 == *h128_2 */
6647 : /*! @ingroup XXH3_family */
6648 0 : XXH_PUBLIC_API int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2)
6649 : {
6650 0 : XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1;
6651 0 : XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2;
6652 0 : int const hcmp = (h1.high64 > h2.high64) - (h2.high64 > h1.high64);
6653 : /* note : bets that, in most cases, hash values are different */
6654 0 : if (hcmp) return hcmp;
6655 0 : return (h1.low64 > h2.low64) - (h2.low64 > h1.low64);
6656 : }
6657 :
6658 :
6659 : /*====== Canonical representation ======*/
6660 : /*! @ingroup XXH3_family */
6661 : XXH_PUBLIC_API void
6662 0 : XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash)
6663 : {
6664 : XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t));
6665 : if (XXH_CPU_LITTLE_ENDIAN) {
6666 0 : hash.high64 = XXH_swap64(hash.high64);
6667 0 : hash.low64 = XXH_swap64(hash.low64);
6668 : }
6669 0 : XXH_memcpy(dst, &hash.high64, sizeof(hash.high64));
6670 0 : XXH_memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64));
6671 0 : }
6672 :
6673 : /*! @ingroup XXH3_family */
6674 : XXH_PUBLIC_API XXH128_hash_t
6675 0 : XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src)
6676 : {
6677 : XXH128_hash_t h;
6678 0 : h.high64 = XXH_readBE64(src);
6679 0 : h.low64 = XXH_readBE64(src->digest + 8);
6680 0 : return h;
6681 : }
6682 :
6683 :
6684 :
6685 : /* ==========================================
6686 : * Secret generators
6687 : * ==========================================
6688 : */
6689 : #define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
6690 :
6691 0 : XXH_FORCE_INLINE void XXH3_combine16(void* dst, XXH128_hash_t h128)
6692 : {
6693 0 : XXH_writeLE64( dst, XXH_readLE64(dst) ^ h128.low64 );
6694 0 : XXH_writeLE64( (char*)dst+8, XXH_readLE64((char*)dst+8) ^ h128.high64 );
6695 0 : }
6696 :
6697 : /*! @ingroup XXH3_family */
6698 : XXH_PUBLIC_API XXH_errorcode
6699 0 : XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize)
6700 : {
6701 : #if (XXH_DEBUGLEVEL >= 1)
6702 : XXH_ASSERT(secretBuffer != NULL);
6703 : XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
6704 : #else
6705 : /* production mode, assert() are disabled */
6706 0 : if (secretBuffer == NULL) return XXH_ERROR;
6707 0 : if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
6708 : #endif
6709 :
6710 0 : if (customSeedSize == 0) {
6711 0 : customSeed = XXH3_kSecret;
6712 0 : customSeedSize = XXH_SECRET_DEFAULT_SIZE;
6713 : }
6714 : #if (XXH_DEBUGLEVEL >= 1)
6715 : XXH_ASSERT(customSeed != NULL);
6716 : #else
6717 0 : if (customSeed == NULL) return XXH_ERROR;
6718 : #endif
6719 :
6720 : /* Fill secretBuffer with a copy of customSeed - repeat as needed */
6721 0 : { size_t pos = 0;
6722 0 : while (pos < secretSize) {
6723 0 : size_t const toCopy = XXH_MIN((secretSize - pos), customSeedSize);
6724 0 : memcpy((char*)secretBuffer + pos, customSeed, toCopy);
6725 0 : pos += toCopy;
6726 : } }
6727 :
6728 0 : { size_t const nbSeg16 = secretSize / 16;
6729 : size_t n;
6730 : XXH128_canonical_t scrambler;
6731 0 : XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0));
6732 0 : for (n=0; n<nbSeg16; n++) {
6733 0 : XXH128_hash_t const h128 = XXH128(&scrambler, sizeof(scrambler), n);
6734 0 : XXH3_combine16((char*)secretBuffer + n*16, h128);
6735 : }
6736 : /* last segment */
6737 0 : XXH3_combine16((char*)secretBuffer + secretSize - 16, XXH128_hashFromCanonical(&scrambler));
6738 : }
6739 0 : return XXH_OK;
6740 : }
6741 :
6742 : /*! @ingroup XXH3_family */
6743 : XXH_PUBLIC_API void
6744 0 : XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed)
6745 : {
6746 : XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
6747 0 : XXH3_initCustomSecret(secret, seed);
6748 0 : XXH_ASSERT(secretBuffer != NULL);
6749 0 : memcpy(secretBuffer, secret, XXH_SECRET_DEFAULT_SIZE);
6750 0 : }
6751 :
6752 :
6753 :
6754 : /* Pop our optimization override from above */
6755 : #if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
6756 : && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
6757 : && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
6758 : # pragma GCC pop_options
6759 : #endif
6760 :
6761 : #endif /* XXH_NO_LONG_LONG */
6762 :
6763 : #endif /* XXH_NO_XXH3 */
6764 :
6765 : /*!
6766 : * @}
6767 : */
6768 : #endif /* XXH_IMPLEMENTATION */
6769 :
6770 :
6771 : #if defined (__cplusplus)
6772 : } /* extern "C" */
6773 : #endif
|