1 | ///////////////////////////////////////////////////////////////////////////////
|
---|
2 | //
|
---|
3 | /// \file fastpos.h
|
---|
4 | /// \brief Kind of two-bit version of bit scan reverse
|
---|
5 | ///
|
---|
6 | // Authors: Igor Pavlov
|
---|
7 | // Lasse Collin
|
---|
8 | //
|
---|
9 | // This file has been put into the public domain.
|
---|
10 | // You can do whatever you want with this file.
|
---|
11 | //
|
---|
12 | ///////////////////////////////////////////////////////////////////////////////
|
---|
13 |
|
---|
14 | #ifndef LZMA_FASTPOS_H
|
---|
15 | #define LZMA_FASTPOS_H
|
---|
16 |
|
---|
17 | // LZMA encodes match distances by storing the highest two bits using
|
---|
18 | // a six-bit value [0, 63], and then the missing lower bits.
|
---|
19 | // Dictionary size is also stored using this encoding in the .xz
|
---|
20 | // file format header.
|
---|
21 | //
|
---|
22 | // fastpos.h provides a way to quickly find out the correct six-bit
|
---|
23 | // values. The following table gives some examples of this encoding:
|
---|
24 | //
|
---|
25 | // dist return
|
---|
26 | // 0 0
|
---|
27 | // 1 1
|
---|
28 | // 2 2
|
---|
29 | // 3 3
|
---|
30 | // 4 4
|
---|
31 | // 5 4
|
---|
32 | // 6 5
|
---|
33 | // 7 5
|
---|
34 | // 8 6
|
---|
35 | // 11 6
|
---|
36 | // 12 7
|
---|
37 | // ... ...
|
---|
38 | // 15 7
|
---|
39 | // 16 8
|
---|
40 | // 17 8
|
---|
41 | // ... ...
|
---|
42 | // 23 8
|
---|
43 | // 24 9
|
---|
44 | // 25 9
|
---|
45 | // ... ...
|
---|
46 | //
|
---|
47 | //
|
---|
48 | // Provided functions or macros
|
---|
49 | // ----------------------------
|
---|
50 | //
|
---|
51 | // get_dist_slot(dist) is the basic version. get_dist_slot_2(dist)
|
---|
52 | // assumes that dist >= FULL_DISTANCES, thus the result is at least
|
---|
53 | // FULL_DISTANCES_BITS * 2. Using get_dist_slot(dist) instead of
|
---|
54 | // get_dist_slot_2(dist) would give the same result, but get_dist_slot_2(dist)
|
---|
55 | // should be tiny bit faster due to the assumption being made.
|
---|
56 | //
|
---|
57 | //
|
---|
58 | // Size vs. speed
|
---|
59 | // --------------
|
---|
60 | //
|
---|
61 | // With some CPUs that have fast BSR (bit scan reverse) instruction, the
|
---|
62 | // size optimized version is slightly faster than the bigger table based
|
---|
63 | // approach. Such CPUs include Intel Pentium Pro, Pentium II, Pentium III
|
---|
64 | // and Core 2 (possibly others). AMD K7 seems to have slower BSR, but that
|
---|
65 | // would still have speed roughly comparable to the table version. Older
|
---|
66 | // x86 CPUs like the original Pentium have very slow BSR; on those systems
|
---|
67 | // the table version is a lot faster.
|
---|
68 | //
|
---|
69 | // On some CPUs, the table version is a lot faster when using position
|
---|
70 | // dependent code, but with position independent code the size optimized
|
---|
71 | // version is slightly faster. This occurs at least on 32-bit SPARC (no
|
---|
72 | // ASM optimizations).
|
---|
73 | //
|
---|
74 | // I'm making the table version the default, because that has good speed
|
---|
75 | // on all systems I have tried. The size optimized version is sometimes
|
---|
76 | // slightly faster, but sometimes it is a lot slower.
|
---|
77 |
|
---|
78 | #ifdef HAVE_SMALL
|
---|
79 | # define get_dist_slot(dist) \
|
---|
80 | ((dist) <= 4 ? (dist) : get_dist_slot_2(dist))
|
---|
81 |
|
---|
82 | static inline uint32_t
|
---|
83 | get_dist_slot_2(uint32_t dist)
|
---|
84 | {
|
---|
85 | const uint32_t i = bsr32(dist);
|
---|
86 | return (i + i) + ((dist >> (i - 1)) & 1);
|
---|
87 | }
|
---|
88 |
|
---|
89 |
|
---|
90 | #else
|
---|
91 |
|
---|
92 | #define FASTPOS_BITS 13
|
---|
93 |
|
---|
94 | extern const uint8_t lzma_fastpos[1 << FASTPOS_BITS];
|
---|
95 |
|
---|
96 |
|
---|
97 | #define fastpos_shift(extra, n) \
|
---|
98 | ((extra) + (n) * (FASTPOS_BITS - 1))
|
---|
99 |
|
---|
100 | #define fastpos_limit(extra, n) \
|
---|
101 | (UINT32_C(1) << (FASTPOS_BITS + fastpos_shift(extra, n)))
|
---|
102 |
|
---|
103 | #define fastpos_result(dist, extra, n) \
|
---|
104 | (uint32_t)(lzma_fastpos[(dist) >> fastpos_shift(extra, n)]) \
|
---|
105 | + 2 * fastpos_shift(extra, n)
|
---|
106 |
|
---|
107 |
|
---|
108 | static inline uint32_t
|
---|
109 | get_dist_slot(uint32_t dist)
|
---|
110 | {
|
---|
111 | // If it is small enough, we can pick the result directly from
|
---|
112 | // the precalculated table.
|
---|
113 | if (dist < fastpos_limit(0, 0))
|
---|
114 | return lzma_fastpos[dist];
|
---|
115 |
|
---|
116 | if (dist < fastpos_limit(0, 1))
|
---|
117 | return fastpos_result(dist, 0, 1);
|
---|
118 |
|
---|
119 | return fastpos_result(dist, 0, 2);
|
---|
120 | }
|
---|
121 |
|
---|
122 |
|
---|
123 | #ifdef FULL_DISTANCES_BITS
|
---|
124 | static inline uint32_t
|
---|
125 | get_dist_slot_2(uint32_t dist)
|
---|
126 | {
|
---|
127 | assert(dist >= FULL_DISTANCES);
|
---|
128 |
|
---|
129 | if (dist < fastpos_limit(FULL_DISTANCES_BITS - 1, 0))
|
---|
130 | return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 0);
|
---|
131 |
|
---|
132 | if (dist < fastpos_limit(FULL_DISTANCES_BITS - 1, 1))
|
---|
133 | return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 1);
|
---|
134 |
|
---|
135 | return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 2);
|
---|
136 | }
|
---|
137 | #endif
|
---|
138 |
|
---|
139 | #endif
|
---|
140 |
|
---|
141 | #endif
|
---|