VirtualBox

source: vbox/trunk/src/libs/zlib-1.2.13/crc32.c@ 104389

Last change on this file since 104389 was 97832, checked in by vboxsync, 23 months ago

zlib-1.2.13: adding svn:sync-process property to all zlib files (except LICESNSE and zlib.3.pdf). bugref:10335

  • Property svn:eol-style set to native
File size: 31.5 KB
Line 
1/* crc32.c -- compute the CRC-32 of a data stream
2 * Copyright (C) 1995-2022 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 *
5 * This interleaved implementation of a CRC makes use of pipelined multiple
6 * arithmetic-logic units, commonly found in modern CPU cores. It is due to
7 * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
8 */
9
10/* @(#) $Id$ */
11
12/*
13 Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
14 protection on the static variables used to control the first-use generation
15 of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
16 first call get_crc_table() to initialize the tables before allowing more than
17 one thread to use crc32().
18
19 MAKECRCH can be #defined to write out crc32.h. A main() routine is also
20 produced, so that this one source file can be compiled to an executable.
21 */
22
23#ifdef MAKECRCH
24# include <stdio.h>
25# ifndef DYNAMIC_CRC_TABLE
26# define DYNAMIC_CRC_TABLE
27# endif /* !DYNAMIC_CRC_TABLE */
28#endif /* MAKECRCH */
29
30#include "zutil.h" /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */
31
32 /*
33 A CRC of a message is computed on N braids of words in the message, where
34 each word consists of W bytes (4 or 8). If N is 3, for example, then three
35 running sparse CRCs are calculated respectively on each braid, at these
36 indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
37 This is done starting at a word boundary, and continues until as many blocks
38 of N * W bytes as are available have been processed. The results are combined
39 into a single CRC at the end. For this code, N must be in the range 1..6 and
40 W must be 4 or 8. The upper limit on N can be increased if desired by adding
41 more #if blocks, extending the patterns apparent in the code. In addition,
42 crc32.h would need to be regenerated, if the maximum N value is increased.
43
44 N and W are chosen empirically by benchmarking the execution time on a given
45 processor. The choices for N and W below were based on testing on Intel Kaby
46 Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64
47 Octeon II processors. The Intel, AMD, and ARM processors were all fastest
48 with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.
49 They were all tested with either gcc or clang, all using the -O3 optimization
50 level. Your mileage may vary.
51 */
52
53/* Define N */
54#ifdef Z_TESTN
55# define N Z_TESTN
56#else
57# define N 5
58#endif
59#if N < 1 || N > 6
60# error N must be in 1..6
61#endif
62
63/*
64 z_crc_t must be at least 32 bits. z_word_t must be at least as long as
65 z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and
66 that bytes are eight bits.
67 */
68
69/*
70 Define W and the associated z_word_t type. If W is not defined, then a
71 braided calculation is not used, and the associated tables and code are not
72 compiled.
73 */
74#ifdef Z_TESTW
75# if Z_TESTW-1 != -1
76# define W Z_TESTW
77# endif
78#else
79# ifdef MAKECRCH
80# define W 8 /* required for MAKECRCH */
81# else
82# if defined(__x86_64__) || defined(__aarch64__)
83# define W 8
84# else
85# define W 4
86# endif
87# endif
88#endif
89#ifdef W
90# if W == 8 && defined(Z_U8)
91 typedef Z_U8 z_word_t;
92# elif defined(Z_U4)
93# undef W
94# define W 4
95 typedef Z_U4 z_word_t;
96# else
97# undef W
98# endif
99#endif
100
101/* If available, use the ARM processor CRC32 instruction. */
102#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8
103# define ARMCRC32
104#endif
105
106/* Local functions. */
107local z_crc_t multmodp OF((z_crc_t a, z_crc_t b));
108local z_crc_t x2nmodp OF((z_off64_t n, unsigned k));
109
110#if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
111 local z_word_t byte_swap OF((z_word_t word));
112#endif
113
114#if defined(W) && !defined(ARMCRC32)
115 local z_crc_t crc_word OF((z_word_t data));
116 local z_word_t crc_word_big OF((z_word_t data));
117#endif
118
119#if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
120/*
121 Swap the bytes in a z_word_t to convert between little and big endian. Any
122 self-respecting compiler will optimize this to a single machine byte-swap
123 instruction, if one is available. This assumes that word_t is either 32 bits
124 or 64 bits.
125 */
126local z_word_t byte_swap(word)
127 z_word_t word;
128{
129# if W == 8
130 return
131 (word & 0xff00000000000000) >> 56 |
132 (word & 0xff000000000000) >> 40 |
133 (word & 0xff0000000000) >> 24 |
134 (word & 0xff00000000) >> 8 |
135 (word & 0xff000000) << 8 |
136 (word & 0xff0000) << 24 |
137 (word & 0xff00) << 40 |
138 (word & 0xff) << 56;
139# else /* W == 4 */
140 return
141 (word & 0xff000000) >> 24 |
142 (word & 0xff0000) >> 8 |
143 (word & 0xff00) << 8 |
144 (word & 0xff) << 24;
145# endif
146}
147#endif
148
149/* CRC polynomial. */
150#define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */
151
152#ifdef DYNAMIC_CRC_TABLE
153
154local z_crc_t FAR crc_table[256];
155local z_crc_t FAR x2n_table[32];
156local void make_crc_table OF((void));
157#ifdef W
158 local z_word_t FAR crc_big_table[256];
159 local z_crc_t FAR crc_braid_table[W][256];
160 local z_word_t FAR crc_braid_big_table[W][256];
161 local void braid OF((z_crc_t [][256], z_word_t [][256], int, int));
162#endif
163#ifdef MAKECRCH
164 local void write_table OF((FILE *, const z_crc_t FAR *, int));
165 local void write_table32hi OF((FILE *, const z_word_t FAR *, int));
166 local void write_table64 OF((FILE *, const z_word_t FAR *, int));
167#endif /* MAKECRCH */
168
169/*
170 Define a once() function depending on the availability of atomics. If this is
171 compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in
172 multiple threads, and if atomics are not available, then get_crc_table() must
173 be called to initialize the tables and must return before any threads are
174 allowed to compute or combine CRCs.
175 */
176
177/* Definition of once functionality. */
178typedef struct once_s once_t;
179local void once OF((once_t *, void (*)(void)));
180
181/* Check for the availability of atomics. */
182#if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \
183 !defined(__STDC_NO_ATOMICS__)
184
185#include <stdatomic.h>
186
187/* Structure for once(), which must be initialized with ONCE_INIT. */
188struct once_s {
189 atomic_flag begun;
190 atomic_int done;
191};
192#define ONCE_INIT {ATOMIC_FLAG_INIT, 0}
193
194/*
195 Run the provided init() function exactly once, even if multiple threads
196 invoke once() at the same time. The state must be a once_t initialized with
197 ONCE_INIT.
198 */
199local void once(state, init)
200 once_t *state;
201 void (*init)(void);
202{
203 if (!atomic_load(&state->done)) {
204 if (atomic_flag_test_and_set(&state->begun))
205 while (!atomic_load(&state->done))
206 ;
207 else {
208 init();
209 atomic_store(&state->done, 1);
210 }
211 }
212}
213
214#else /* no atomics */
215
216/* Structure for once(), which must be initialized with ONCE_INIT. */
217struct once_s {
218 volatile int begun;
219 volatile int done;
220};
221#define ONCE_INIT {0, 0}
222
223/* Test and set. Alas, not atomic, but tries to minimize the period of
224 vulnerability. */
225local int test_and_set OF((int volatile *));
226local int test_and_set(flag)
227 int volatile *flag;
228{
229 int was;
230
231 was = *flag;
232 *flag = 1;
233 return was;
234}
235
236/* Run the provided init() function once. This is not thread-safe. */
237local void once(state, init)
238 once_t *state;
239 void (*init)(void);
240{
241 if (!state->done) {
242 if (test_and_set(&state->begun))
243 while (!state->done)
244 ;
245 else {
246 init();
247 state->done = 1;
248 }
249 }
250}
251
252#endif
253
254/* State for once(). */
255local once_t made = ONCE_INIT;
256
257/*
258 Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
259 x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
260
261 Polynomials over GF(2) are represented in binary, one bit per coefficient,
262 with the lowest powers in the most significant bit. Then adding polynomials
263 is just exclusive-or, and multiplying a polynomial by x is a right shift by
264 one. If we call the above polynomial p, and represent a byte as the
265 polynomial q, also with the lowest power in the most significant bit (so the
266 byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,
267 where a mod b means the remainder after dividing a by b.
268
269 This calculation is done using the shift-register method of multiplying and
270 taking the remainder. The register is initialized to zero, and for each
271 incoming bit, x^32 is added mod p to the register if the bit is a one (where
272 x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x
273 (which is shifting right by one and adding x^32 mod p if the bit shifted out
274 is a one). We start with the highest power (least significant bit) of q and
275 repeat for all eight bits of q.
276
277 The table is simply the CRC of all possible eight bit values. This is all the
278 information needed to generate CRCs on data a byte at a time for all
279 combinations of CRC register values and incoming bytes.
280 */
281
282local void make_crc_table()
283{
284 unsigned i, j, n;
285 z_crc_t p;
286
287 /* initialize the CRC of bytes tables */
288 for (i = 0; i < 256; i++) {
289 p = i;
290 for (j = 0; j < 8; j++)
291 p = p & 1 ? (p >> 1) ^ POLY : p >> 1;
292 crc_table[i] = p;
293#ifdef W
294 crc_big_table[i] = byte_swap(p);
295#endif
296 }
297
298 /* initialize the x^2^n mod p(x) table */
299 p = (z_crc_t)1 << 30; /* x^1 */
300 x2n_table[0] = p;
301 for (n = 1; n < 32; n++)
302 x2n_table[n] = p = multmodp(p, p);
303
304#ifdef W
305 /* initialize the braiding tables -- needs x2n_table[] */
306 braid(crc_braid_table, crc_braid_big_table, N, W);
307#endif
308
309#ifdef MAKECRCH
310 {
311 /*
312 The crc32.h header file contains tables for both 32-bit and 64-bit
313 z_word_t's, and so requires a 64-bit type be available. In that case,
314 z_word_t must be defined to be 64-bits. This code then also generates
315 and writes out the tables for the case that z_word_t is 32 bits.
316 */
317#if !defined(W) || W != 8
318# error Need a 64-bit integer type in order to generate crc32.h.
319#endif
320 FILE *out;
321 int k, n;
322 z_crc_t ltl[8][256];
323 z_word_t big[8][256];
324
325 out = fopen("crc32.h", "w");
326 if (out == NULL) return;
327
328 /* write out little-endian CRC table to crc32.h */
329 fprintf(out,
330 "/* crc32.h -- tables for rapid CRC calculation\n"
331 " * Generated automatically by crc32.c\n */\n"
332 "\n"
333 "local const z_crc_t FAR crc_table[] = {\n"
334 " ");
335 write_table(out, crc_table, 256);
336 fprintf(out,
337 "};\n");
338
339 /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */
340 fprintf(out,
341 "\n"
342 "#ifdef W\n"
343 "\n"
344 "#if W == 8\n"
345 "\n"
346 "local const z_word_t FAR crc_big_table[] = {\n"
347 " ");
348 write_table64(out, crc_big_table, 256);
349 fprintf(out,
350 "};\n");
351
352 /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */
353 fprintf(out,
354 "\n"
355 "#else /* W == 4 */\n"
356 "\n"
357 "local const z_word_t FAR crc_big_table[] = {\n"
358 " ");
359 write_table32hi(out, crc_big_table, 256);
360 fprintf(out,
361 "};\n"
362 "\n"
363 "#endif\n");
364
365 /* write out braid tables for each value of N */
366 for (n = 1; n <= 6; n++) {
367 fprintf(out,
368 "\n"
369 "#if N == %d\n", n);
370
371 /* compute braid tables for this N and 64-bit word_t */
372 braid(ltl, big, n, 8);
373
374 /* write out braid tables for 64-bit z_word_t to crc32.h */
375 fprintf(out,
376 "\n"
377 "#if W == 8\n"
378 "\n"
379 "local const z_crc_t FAR crc_braid_table[][256] = {\n");
380 for (k = 0; k < 8; k++) {
381 fprintf(out, " {");
382 write_table(out, ltl[k], 256);
383 fprintf(out, "}%s", k < 7 ? ",\n" : "");
384 }
385 fprintf(out,
386 "};\n"
387 "\n"
388 "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
389 for (k = 0; k < 8; k++) {
390 fprintf(out, " {");
391 write_table64(out, big[k], 256);
392 fprintf(out, "}%s", k < 7 ? ",\n" : "");
393 }
394 fprintf(out,
395 "};\n");
396
397 /* compute braid tables for this N and 32-bit word_t */
398 braid(ltl, big, n, 4);
399
400 /* write out braid tables for 32-bit z_word_t to crc32.h */
401 fprintf(out,
402 "\n"
403 "#else /* W == 4 */\n"
404 "\n"
405 "local const z_crc_t FAR crc_braid_table[][256] = {\n");
406 for (k = 0; k < 4; k++) {
407 fprintf(out, " {");
408 write_table(out, ltl[k], 256);
409 fprintf(out, "}%s", k < 3 ? ",\n" : "");
410 }
411 fprintf(out,
412 "};\n"
413 "\n"
414 "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
415 for (k = 0; k < 4; k++) {
416 fprintf(out, " {");
417 write_table32hi(out, big[k], 256);
418 fprintf(out, "}%s", k < 3 ? ",\n" : "");
419 }
420 fprintf(out,
421 "};\n"
422 "\n"
423 "#endif\n"
424 "\n"
425 "#endif\n");
426 }
427 fprintf(out,
428 "\n"
429 "#endif\n");
430
431 /* write out zeros operator table to crc32.h */
432 fprintf(out,
433 "\n"
434 "local const z_crc_t FAR x2n_table[] = {\n"
435 " ");
436 write_table(out, x2n_table, 32);
437 fprintf(out,
438 "};\n");
439 fclose(out);
440 }
441#endif /* MAKECRCH */
442}
443
444#ifdef MAKECRCH
445
446/*
447 Write the 32-bit values in table[0..k-1] to out, five per line in
448 hexadecimal separated by commas.
449 */
450local void write_table(out, table, k)
451 FILE *out;
452 const z_crc_t FAR *table;
453 int k;
454{
455 int n;
456
457 for (n = 0; n < k; n++)
458 fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ",
459 (unsigned long)(table[n]),
460 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
461}
462
463/*
464 Write the high 32-bits of each value in table[0..k-1] to out, five per line
465 in hexadecimal separated by commas.
466 */
467local void write_table32hi(out, table, k)
468FILE *out;
469const z_word_t FAR *table;
470int k;
471{
472 int n;
473
474 for (n = 0; n < k; n++)
475 fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ",
476 (unsigned long)(table[n] >> 32),
477 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
478}
479
480/*
481 Write the 64-bit values in table[0..k-1] to out, three per line in
482 hexadecimal separated by commas. This assumes that if there is a 64-bit
483 type, then there is also a long long integer type, and it is at least 64
484 bits. If not, then the type cast and format string can be adjusted
485 accordingly.
486 */
487local void write_table64(out, table, k)
488 FILE *out;
489 const z_word_t FAR *table;
490 int k;
491{
492 int n;
493
494 for (n = 0; n < k; n++)
495 fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : " ",
496 (unsigned long long)(table[n]),
497 n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));
498}
499
500/* Actually do the deed. */
501int main()
502{
503 make_crc_table();
504 return 0;
505}
506
507#endif /* MAKECRCH */
508
509#ifdef W
510/*
511 Generate the little and big-endian braid tables for the given n and z_word_t
512 size w. Each array must have room for w blocks of 256 elements.
513 */
514local void braid(ltl, big, n, w)
515 z_crc_t ltl[][256];
516 z_word_t big[][256];
517 int n;
518 int w;
519{
520 int k;
521 z_crc_t i, p, q;
522 for (k = 0; k < w; k++) {
523 p = x2nmodp((n * w + 3 - k) << 3, 0);
524 ltl[k][0] = 0;
525 big[w - 1 - k][0] = 0;
526 for (i = 1; i < 256; i++) {
527 ltl[k][i] = q = multmodp(i << 24, p);
528 big[w - 1 - k][i] = byte_swap(q);
529 }
530 }
531}
532#endif
533
534#else /* !DYNAMIC_CRC_TABLE */
535/* ========================================================================
536 * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
537 * of x for combining CRC-32s, all made by make_crc_table().
538 */
539#include "crc32.h"
540#endif /* DYNAMIC_CRC_TABLE */
541
542/* ========================================================================
543 * Routines used for CRC calculation. Some are also required for the table
544 * generation above.
545 */
546
547/*
548 Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
549 reflected. For speed, this requires that a not be zero.
550 */
551local z_crc_t multmodp(a, b)
552 z_crc_t a;
553 z_crc_t b;
554{
555 z_crc_t m, p;
556
557 m = (z_crc_t)1 << 31;
558 p = 0;
559 for (;;) {
560 if (a & m) {
561 p ^= b;
562 if ((a & (m - 1)) == 0)
563 break;
564 }
565 m >>= 1;
566 b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
567 }
568 return p;
569}
570
571/*
572 Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
573 initialized.
574 */
575local z_crc_t x2nmodp(n, k)
576 z_off64_t n;
577 unsigned k;
578{
579 z_crc_t p;
580
581 p = (z_crc_t)1 << 31; /* x^0 == 1 */
582 while (n) {
583 if (n & 1)
584 p = multmodp(x2n_table[k & 31], p);
585 n >>= 1;
586 k++;
587 }
588 return p;
589}
590
591/* =========================================================================
592 * This function can be used by asm versions of crc32(), and to force the
593 * generation of the CRC tables in a threaded application.
594 */
595const z_crc_t FAR * ZEXPORT get_crc_table()
596{
597#ifdef DYNAMIC_CRC_TABLE
598 once(&made, make_crc_table);
599#endif /* DYNAMIC_CRC_TABLE */
600 return (const z_crc_t FAR *)crc_table;
601}
602
603/* =========================================================================
604 * Use ARM machine instructions if available. This will compute the CRC about
605 * ten times faster than the braided calculation. This code does not check for
606 * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will
607 * only be defined if the compilation specifies an ARM processor architecture
608 * that has the instructions. For example, compiling with -march=armv8.1-a or
609 * -march=armv8-a+crc, or -march=native if the compile machine has the crc32
610 * instructions.
611 */
612#ifdef ARMCRC32
613
614/*
615 Constants empirically determined to maximize speed. These values are from
616 measurements on a Cortex-A57. Your mileage may vary.
617 */
618#define Z_BATCH 3990 /* number of words in a batch */
619#define Z_BATCH_ZEROS 0xa10d3d0c /* computed from Z_BATCH = 3990 */
620#define Z_BATCH_MIN 800 /* fewest words in a final batch */
621
622unsigned long ZEXPORT crc32_z(crc, buf, len)
623 unsigned long crc;
624 const unsigned char FAR *buf;
625 z_size_t len;
626{
627 z_crc_t val;
628 z_word_t crc1, crc2;
629 const z_word_t *word;
630 z_word_t val0, val1, val2;
631 z_size_t last, last2, i;
632 z_size_t num;
633
634 /* Return initial CRC, if requested. */
635 if (buf == Z_NULL) return 0;
636
637#ifdef DYNAMIC_CRC_TABLE
638 once(&made, make_crc_table);
639#endif /* DYNAMIC_CRC_TABLE */
640
641 /* Pre-condition the CRC */
642 crc = (~crc) & 0xffffffff;
643
644 /* Compute the CRC up to a word boundary. */
645 while (len && ((z_size_t)buf & 7) != 0) {
646 len--;
647 val = *buf++;
648 __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
649 }
650
651 /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */
652 word = (z_word_t const *)buf;
653 num = len >> 3;
654 len &= 7;
655
656 /* Do three interleaved CRCs to realize the throughput of one crc32x
657 instruction per cycle. Each CRC is calculated on Z_BATCH words. The
658 three CRCs are combined into a single CRC after each set of batches. */
659 while (num >= 3 * Z_BATCH) {
660 crc1 = 0;
661 crc2 = 0;
662 for (i = 0; i < Z_BATCH; i++) {
663 val0 = word[i];
664 val1 = word[i + Z_BATCH];
665 val2 = word[i + 2 * Z_BATCH];
666 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
667 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
668 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
669 }
670 word += 3 * Z_BATCH;
671 num -= 3 * Z_BATCH;
672 crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1;
673 crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2;
674 }
675
676 /* Do one last smaller batch with the remaining words, if there are enough
677 to pay for the combination of CRCs. */
678 last = num / 3;
679 if (last >= Z_BATCH_MIN) {
680 last2 = last << 1;
681 crc1 = 0;
682 crc2 = 0;
683 for (i = 0; i < last; i++) {
684 val0 = word[i];
685 val1 = word[i + last];
686 val2 = word[i + last2];
687 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
688 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
689 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
690 }
691 word += 3 * last;
692 num -= 3 * last;
693 val = x2nmodp(last, 6);
694 crc = multmodp(val, crc) ^ crc1;
695 crc = multmodp(val, crc) ^ crc2;
696 }
697
698 /* Compute the CRC on any remaining words. */
699 for (i = 0; i < num; i++) {
700 val0 = word[i];
701 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
702 }
703 word += num;
704
705 /* Complete the CRC on any remaining bytes. */
706 buf = (const unsigned char FAR *)word;
707 while (len) {
708 len--;
709 val = *buf++;
710 __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
711 }
712
713 /* Return the CRC, post-conditioned. */
714 return crc ^ 0xffffffff;
715}
716
717#else
718
719#ifdef W
720
721/*
722 Return the CRC of the W bytes in the word_t data, taking the
723 least-significant byte of the word as the first byte of data, without any pre
724 or post conditioning. This is used to combine the CRCs of each braid.
725 */
726local z_crc_t crc_word(data)
727 z_word_t data;
728{
729 int k;
730 for (k = 0; k < W; k++)
731 data = (data >> 8) ^ crc_table[data & 0xff];
732 return (z_crc_t)data;
733}
734
735local z_word_t crc_word_big(data)
736 z_word_t data;
737{
738 int k;
739 for (k = 0; k < W; k++)
740 data = (data << 8) ^
741 crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
742 return data;
743}
744
745#endif
746
747/* ========================================================================= */
748unsigned long ZEXPORT crc32_z(crc, buf, len)
749 unsigned long crc;
750 const unsigned char FAR *buf;
751 z_size_t len;
752{
753 /* Return initial CRC, if requested. */
754 if (buf == Z_NULL) return 0;
755
756#ifdef DYNAMIC_CRC_TABLE
757 once(&made, make_crc_table);
758#endif /* DYNAMIC_CRC_TABLE */
759
760 /* Pre-condition the CRC */
761 crc = (~crc) & 0xffffffff;
762
763#ifdef W
764
765 /* If provided enough bytes, do a braided CRC calculation. */
766 if (len >= N * W + W - 1) {
767 z_size_t blks;
768 z_word_t const *words;
769 unsigned endian;
770 int k;
771
772 /* Compute the CRC up to a z_word_t boundary. */
773 while (len && ((z_size_t)buf & (W - 1)) != 0) {
774 len--;
775 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
776 }
777
778 /* Compute the CRC on as many N z_word_t blocks as are available. */
779 blks = len / (N * W);
780 len -= blks * N * W;
781 words = (z_word_t const *)buf;
782
783 /* Do endian check at execution time instead of compile time, since ARM
784 processors can change the endianess at execution time. If the
785 compiler knows what the endianess will be, it can optimize out the
786 check and the unused branch. */
787 endian = 1;
788 if (*(unsigned char *)&endian) {
789 /* Little endian. */
790
791 z_crc_t crc0;
792 z_word_t word0;
793#if N > 1
794 z_crc_t crc1;
795 z_word_t word1;
796#if N > 2
797 z_crc_t crc2;
798 z_word_t word2;
799#if N > 3
800 z_crc_t crc3;
801 z_word_t word3;
802#if N > 4
803 z_crc_t crc4;
804 z_word_t word4;
805#if N > 5
806 z_crc_t crc5;
807 z_word_t word5;
808#endif
809#endif
810#endif
811#endif
812#endif
813
814 /* Initialize the CRC for each braid. */
815 crc0 = crc;
816#if N > 1
817 crc1 = 0;
818#if N > 2
819 crc2 = 0;
820#if N > 3
821 crc3 = 0;
822#if N > 4
823 crc4 = 0;
824#if N > 5
825 crc5 = 0;
826#endif
827#endif
828#endif
829#endif
830#endif
831
832 /*
833 Process the first blks-1 blocks, computing the CRCs on each braid
834 independently.
835 */
836 while (--blks) {
837 /* Load the word for each braid into registers. */
838 word0 = crc0 ^ words[0];
839#if N > 1
840 word1 = crc1 ^ words[1];
841#if N > 2
842 word2 = crc2 ^ words[2];
843#if N > 3
844 word3 = crc3 ^ words[3];
845#if N > 4
846 word4 = crc4 ^ words[4];
847#if N > 5
848 word5 = crc5 ^ words[5];
849#endif
850#endif
851#endif
852#endif
853#endif
854 words += N;
855
856 /* Compute and update the CRC for each word. The loop should
857 get unrolled. */
858 crc0 = crc_braid_table[0][word0 & 0xff];
859#if N > 1
860 crc1 = crc_braid_table[0][word1 & 0xff];
861#if N > 2
862 crc2 = crc_braid_table[0][word2 & 0xff];
863#if N > 3
864 crc3 = crc_braid_table[0][word3 & 0xff];
865#if N > 4
866 crc4 = crc_braid_table[0][word4 & 0xff];
867#if N > 5
868 crc5 = crc_braid_table[0][word5 & 0xff];
869#endif
870#endif
871#endif
872#endif
873#endif
874 for (k = 1; k < W; k++) {
875 crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff];
876#if N > 1
877 crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff];
878#if N > 2
879 crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff];
880#if N > 3
881 crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff];
882#if N > 4
883 crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff];
884#if N > 5
885 crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff];
886#endif
887#endif
888#endif
889#endif
890#endif
891 }
892 }
893
894 /*
895 Process the last block, combining the CRCs of the N braids at the
896 same time.
897 */
898 crc = crc_word(crc0 ^ words[0]);
899#if N > 1
900 crc = crc_word(crc1 ^ words[1] ^ crc);
901#if N > 2
902 crc = crc_word(crc2 ^ words[2] ^ crc);
903#if N > 3
904 crc = crc_word(crc3 ^ words[3] ^ crc);
905#if N > 4
906 crc = crc_word(crc4 ^ words[4] ^ crc);
907#if N > 5
908 crc = crc_word(crc5 ^ words[5] ^ crc);
909#endif
910#endif
911#endif
912#endif
913#endif
914 words += N;
915 }
916 else {
917 /* Big endian. */
918
919 z_word_t crc0, word0, comb;
920#if N > 1
921 z_word_t crc1, word1;
922#if N > 2
923 z_word_t crc2, word2;
924#if N > 3
925 z_word_t crc3, word3;
926#if N > 4
927 z_word_t crc4, word4;
928#if N > 5
929 z_word_t crc5, word5;
930#endif
931#endif
932#endif
933#endif
934#endif
935
936 /* Initialize the CRC for each braid. */
937 crc0 = byte_swap(crc);
938#if N > 1
939 crc1 = 0;
940#if N > 2
941 crc2 = 0;
942#if N > 3
943 crc3 = 0;
944#if N > 4
945 crc4 = 0;
946#if N > 5
947 crc5 = 0;
948#endif
949#endif
950#endif
951#endif
952#endif
953
954 /*
955 Process the first blks-1 blocks, computing the CRCs on each braid
956 independently.
957 */
958 while (--blks) {
959 /* Load the word for each braid into registers. */
960 word0 = crc0 ^ words[0];
961#if N > 1
962 word1 = crc1 ^ words[1];
963#if N > 2
964 word2 = crc2 ^ words[2];
965#if N > 3
966 word3 = crc3 ^ words[3];
967#if N > 4
968 word4 = crc4 ^ words[4];
969#if N > 5
970 word5 = crc5 ^ words[5];
971#endif
972#endif
973#endif
974#endif
975#endif
976 words += N;
977
978 /* Compute and update the CRC for each word. The loop should
979 get unrolled. */
980 crc0 = crc_braid_big_table[0][word0 & 0xff];
981#if N > 1
982 crc1 = crc_braid_big_table[0][word1 & 0xff];
983#if N > 2
984 crc2 = crc_braid_big_table[0][word2 & 0xff];
985#if N > 3
986 crc3 = crc_braid_big_table[0][word3 & 0xff];
987#if N > 4
988 crc4 = crc_braid_big_table[0][word4 & 0xff];
989#if N > 5
990 crc5 = crc_braid_big_table[0][word5 & 0xff];
991#endif
992#endif
993#endif
994#endif
995#endif
996 for (k = 1; k < W; k++) {
997 crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff];
998#if N > 1
999 crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff];
1000#if N > 2
1001 crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff];
1002#if N > 3
1003 crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff];
1004#if N > 4
1005 crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff];
1006#if N > 5
1007 crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff];
1008#endif
1009#endif
1010#endif
1011#endif
1012#endif
1013 }
1014 }
1015
1016 /*
1017 Process the last block, combining the CRCs of the N braids at the
1018 same time.
1019 */
1020 comb = crc_word_big(crc0 ^ words[0]);
1021#if N > 1
1022 comb = crc_word_big(crc1 ^ words[1] ^ comb);
1023#if N > 2
1024 comb = crc_word_big(crc2 ^ words[2] ^ comb);
1025#if N > 3
1026 comb = crc_word_big(crc3 ^ words[3] ^ comb);
1027#if N > 4
1028 comb = crc_word_big(crc4 ^ words[4] ^ comb);
1029#if N > 5
1030 comb = crc_word_big(crc5 ^ words[5] ^ comb);
1031#endif
1032#endif
1033#endif
1034#endif
1035#endif
1036 words += N;
1037 crc = byte_swap(comb);
1038 }
1039
1040 /*
1041 Update the pointer to the remaining bytes to process.
1042 */
1043 buf = (unsigned char const *)words;
1044 }
1045
1046#endif /* W */
1047
1048 /* Complete the computation of the CRC on any remaining bytes. */
1049 while (len >= 8) {
1050 len -= 8;
1051 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1052 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1053 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1054 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1055 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1056 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1057 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1058 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1059 }
1060 while (len) {
1061 len--;
1062 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1063 }
1064
1065 /* Return the CRC, post-conditioned. */
1066 return crc ^ 0xffffffff;
1067}
1068
1069#endif
1070
1071/* ========================================================================= */
1072unsigned long ZEXPORT crc32(crc, buf, len)
1073 unsigned long crc;
1074 const unsigned char FAR *buf;
1075 uInt len;
1076{
1077 return crc32_z(crc, buf, len);
1078}
1079
1080/* ========================================================================= */
1081uLong ZEXPORT crc32_combine64(crc1, crc2, len2)
1082 uLong crc1;
1083 uLong crc2;
1084 z_off64_t len2;
1085{
1086#ifdef DYNAMIC_CRC_TABLE
1087 once(&made, make_crc_table);
1088#endif /* DYNAMIC_CRC_TABLE */
1089 return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff);
1090}
1091
1092/* ========================================================================= */
1093uLong ZEXPORT crc32_combine(crc1, crc2, len2)
1094 uLong crc1;
1095 uLong crc2;
1096 z_off_t len2;
1097{
1098 return crc32_combine64(crc1, crc2, (z_off64_t)len2);
1099}
1100
1101/* ========================================================================= */
1102uLong ZEXPORT crc32_combine_gen64(len2)
1103 z_off64_t len2;
1104{
1105#ifdef DYNAMIC_CRC_TABLE
1106 once(&made, make_crc_table);
1107#endif /* DYNAMIC_CRC_TABLE */
1108 return x2nmodp(len2, 3);
1109}
1110
1111/* ========================================================================= */
1112uLong ZEXPORT crc32_combine_gen(len2)
1113 z_off_t len2;
1114{
1115 return crc32_combine_gen64((z_off64_t)len2);
1116}
1117
1118/* ========================================================================= */
1119uLong ZEXPORT crc32_combine_op(crc1, crc2, op)
1120 uLong crc1;
1121 uLong crc2;
1122 uLong op;
1123{
1124 return multmodp(op, crc1) ^ (crc2 & 0xffffffff);
1125}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette