1 | ///////////////////////////////////////////////////////////////////////////////
|
---|
2 | //
|
---|
3 | /// \file lzma_common.h
|
---|
4 | /// \brief Private definitions common to LZMA encoder and decoder
|
---|
5 | ///
|
---|
6 | // Authors: Igor Pavlov
|
---|
7 | // Lasse Collin
|
---|
8 | //
|
---|
9 | // This file has been put into the public domain.
|
---|
10 | // You can do whatever you want with this file.
|
---|
11 | //
|
---|
12 | ///////////////////////////////////////////////////////////////////////////////
|
---|
13 |
|
---|
14 | #ifndef LZMA_LZMA_COMMON_H
|
---|
15 | #define LZMA_LZMA_COMMON_H
|
---|
16 |
|
---|
17 | #include "common.h"
|
---|
18 | #include "range_common.h"
|
---|
19 |
|
---|
20 |
|
---|
21 | ///////////////////
|
---|
22 | // Miscellaneous //
|
---|
23 | ///////////////////
|
---|
24 |
|
---|
25 | /// Maximum number of position states. A position state is the lowest pos bits
|
---|
26 | /// number of bits of the current uncompressed offset. In some places there
|
---|
27 | /// are different sets of probabilities for different pos states.
|
---|
28 | #define POS_STATES_MAX (1 << LZMA_PB_MAX)
|
---|
29 |
|
---|
30 |
|
---|
31 | /// Validates lc, lp, and pb.
|
---|
32 | static inline bool
|
---|
33 | is_lclppb_valid(const lzma_options_lzma *options)
|
---|
34 | {
|
---|
35 | return options->lc <= LZMA_LCLP_MAX && options->lp <= LZMA_LCLP_MAX
|
---|
36 | && options->lc + options->lp <= LZMA_LCLP_MAX
|
---|
37 | && options->pb <= LZMA_PB_MAX;
|
---|
38 | }
|
---|
39 |
|
---|
40 |
|
---|
41 | ///////////
|
---|
42 | // State //
|
---|
43 | ///////////
|
---|
44 |
|
---|
45 | /// This enum is used to track which events have occurred most recently and
|
---|
46 | /// in which order. This information is used to predict the next event.
|
---|
47 | ///
|
---|
48 | /// Events:
|
---|
49 | /// - Literal: One 8-bit byte
|
---|
50 | /// - Match: Repeat a chunk of data at some distance
|
---|
51 | /// - Long repeat: Multi-byte match at a recently seen distance
|
---|
52 | /// - Short repeat: One-byte repeat at a recently seen distance
|
---|
53 | ///
|
---|
54 | /// The event names are in from STATE_oldest_older_previous. REP means
|
---|
55 | /// either short or long repeated match, and NONLIT means any non-literal.
|
---|
56 | typedef enum {
|
---|
57 | STATE_LIT_LIT,
|
---|
58 | STATE_MATCH_LIT_LIT,
|
---|
59 | STATE_REP_LIT_LIT,
|
---|
60 | STATE_SHORTREP_LIT_LIT,
|
---|
61 | STATE_MATCH_LIT,
|
---|
62 | STATE_REP_LIT,
|
---|
63 | STATE_SHORTREP_LIT,
|
---|
64 | STATE_LIT_MATCH,
|
---|
65 | STATE_LIT_LONGREP,
|
---|
66 | STATE_LIT_SHORTREP,
|
---|
67 | STATE_NONLIT_MATCH,
|
---|
68 | STATE_NONLIT_REP,
|
---|
69 | } lzma_lzma_state;
|
---|
70 |
|
---|
71 |
|
---|
72 | /// Total number of states
|
---|
73 | #define STATES 12
|
---|
74 |
|
---|
75 | /// The lowest 7 states indicate that the previous state was a literal.
|
---|
76 | #define LIT_STATES 7
|
---|
77 |
|
---|
78 |
|
---|
79 | /// Indicate that the latest state was a literal.
|
---|
80 | #define update_literal(state) \
|
---|
81 | state = ((state) <= STATE_SHORTREP_LIT_LIT \
|
---|
82 | ? STATE_LIT_LIT \
|
---|
83 | : ((state) <= STATE_LIT_SHORTREP \
|
---|
84 | ? (state) - 3 \
|
---|
85 | : (state) - 6))
|
---|
86 |
|
---|
87 | /// Indicate that the latest state was a match.
|
---|
88 | #define update_match(state) \
|
---|
89 | state = ((state) < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH)
|
---|
90 |
|
---|
91 | /// Indicate that the latest state was a long repeated match.
|
---|
92 | #define update_long_rep(state) \
|
---|
93 | state = ((state) < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP)
|
---|
94 |
|
---|
95 | /// Indicate that the latest state was a short match.
|
---|
96 | #define update_short_rep(state) \
|
---|
97 | state = ((state) < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP)
|
---|
98 |
|
---|
99 | /// Test if the previous state was a literal.
|
---|
100 | #define is_literal_state(state) \
|
---|
101 | ((state) < LIT_STATES)
|
---|
102 |
|
---|
103 |
|
---|
104 | /////////////
|
---|
105 | // Literal //
|
---|
106 | /////////////
|
---|
107 |
|
---|
108 | /// Each literal coder is divided in three sections:
|
---|
109 | /// - 0x001-0x0FF: Without match byte
|
---|
110 | /// - 0x101-0x1FF: With match byte; match bit is 0
|
---|
111 | /// - 0x201-0x2FF: With match byte; match bit is 1
|
---|
112 | ///
|
---|
113 | /// Match byte is used when the previous LZMA symbol was something else than
|
---|
114 | /// a literal (that is, it was some kind of match).
|
---|
115 | #define LITERAL_CODER_SIZE 0x300
|
---|
116 |
|
---|
117 | /// Maximum number of literal coders
|
---|
118 | #define LITERAL_CODERS_MAX (1 << LZMA_LCLP_MAX)
|
---|
119 |
|
---|
120 | /// Locate the literal coder for the next literal byte. The choice depends on
|
---|
121 | /// - the lowest literal_pos_bits bits of the position of the current
|
---|
122 | /// byte; and
|
---|
123 | /// - the highest literal_context_bits bits of the previous byte.
|
---|
124 | #define literal_subcoder(probs, lc, lp_mask, pos, prev_byte) \
|
---|
125 | ((probs)[(((pos) & (lp_mask)) << (lc)) \
|
---|
126 | + ((uint32_t)(prev_byte) >> (8U - (lc)))])
|
---|
127 |
|
---|
128 |
|
---|
129 | static inline void
|
---|
130 | literal_init(probability (*probs)[LITERAL_CODER_SIZE],
|
---|
131 | uint32_t lc, uint32_t lp)
|
---|
132 | {
|
---|
133 | assert(lc + lp <= LZMA_LCLP_MAX);
|
---|
134 |
|
---|
135 | const uint32_t coders = 1U << (lc + lp);
|
---|
136 |
|
---|
137 | for (uint32_t i = 0; i < coders; ++i)
|
---|
138 | for (uint32_t j = 0; j < LITERAL_CODER_SIZE; ++j)
|
---|
139 | bit_reset(probs[i][j]);
|
---|
140 |
|
---|
141 | return;
|
---|
142 | }
|
---|
143 |
|
---|
144 |
|
---|
145 | //////////////////
|
---|
146 | // Match length //
|
---|
147 | //////////////////
|
---|
148 |
|
---|
149 | // Minimum length of a match is two bytes.
|
---|
150 | #define MATCH_LEN_MIN 2
|
---|
151 |
|
---|
152 | // Match length is encoded with 4, 5, or 10 bits.
|
---|
153 | //
|
---|
154 | // Length Bits
|
---|
155 | // 2-9 4 = Choice=0 + 3 bits
|
---|
156 | // 10-17 5 = Choice=1 + Choice2=0 + 3 bits
|
---|
157 | // 18-273 10 = Choice=1 + Choice2=1 + 8 bits
|
---|
158 | #define LEN_LOW_BITS 3
|
---|
159 | #define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS)
|
---|
160 | #define LEN_MID_BITS 3
|
---|
161 | #define LEN_MID_SYMBOLS (1 << LEN_MID_BITS)
|
---|
162 | #define LEN_HIGH_BITS 8
|
---|
163 | #define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS)
|
---|
164 | #define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS)
|
---|
165 |
|
---|
166 | // Maximum length of a match is 273 which is a result of the encoding
|
---|
167 | // described above.
|
---|
168 | #define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1)
|
---|
169 |
|
---|
170 |
|
---|
171 | ////////////////////
|
---|
172 | // Match distance //
|
---|
173 | ////////////////////
|
---|
174 |
|
---|
175 | // Different sets of probabilities are used for match distances that have very
|
---|
176 | // short match length: Lengths of 2, 3, and 4 bytes have a separate set of
|
---|
177 | // probabilities for each length. The matches with longer length use a shared
|
---|
178 | // set of probabilities.
|
---|
179 | #define DIST_STATES 4
|
---|
180 |
|
---|
181 | // Macro to get the index of the appropriate probability array.
|
---|
182 | #define get_dist_state(len) \
|
---|
183 | ((len) < DIST_STATES + MATCH_LEN_MIN \
|
---|
184 | ? (len) - MATCH_LEN_MIN \
|
---|
185 | : DIST_STATES - 1)
|
---|
186 |
|
---|
187 | // The highest two bits of a match distance (distance slot) are encoded
|
---|
188 | // using six bits. See fastpos.h for more explanation.
|
---|
189 | #define DIST_SLOT_BITS 6
|
---|
190 | #define DIST_SLOTS (1 << DIST_SLOT_BITS)
|
---|
191 |
|
---|
192 | // Match distances up to 127 are fully encoded using probabilities. Since
|
---|
193 | // the highest two bits (distance slot) are always encoded using six bits,
|
---|
194 | // the distances 0-3 don't need any additional bits to encode, since the
|
---|
195 | // distance slot itself is the same as the actual distance. DIST_MODEL_START
|
---|
196 | // indicates the first distance slot where at least one additional bit is
|
---|
197 | // needed.
|
---|
198 | #define DIST_MODEL_START 4
|
---|
199 |
|
---|
200 | // Match distances greater than 127 are encoded in three pieces:
|
---|
201 | // - distance slot: the highest two bits
|
---|
202 | // - direct bits: 2-26 bits below the highest two bits
|
---|
203 | // - alignment bits: four lowest bits
|
---|
204 | //
|
---|
205 | // Direct bits don't use any probabilities.
|
---|
206 | //
|
---|
207 | // The distance slot value of 14 is for distances 128-191 (see the table in
|
---|
208 | // fastpos.h to understand why).
|
---|
209 | #define DIST_MODEL_END 14
|
---|
210 |
|
---|
211 | // Distance slots that indicate a distance <= 127.
|
---|
212 | #define FULL_DISTANCES_BITS (DIST_MODEL_END / 2)
|
---|
213 | #define FULL_DISTANCES (1 << FULL_DISTANCES_BITS)
|
---|
214 |
|
---|
215 | // For match distances greater than 127, only the highest two bits and the
|
---|
216 | // lowest four bits (alignment) is encoded using probabilities.
|
---|
217 | #define ALIGN_BITS 4
|
---|
218 | #define ALIGN_SIZE (1 << ALIGN_BITS)
|
---|
219 | #define ALIGN_MASK (ALIGN_SIZE - 1)
|
---|
220 |
|
---|
221 | // LZMA remembers the four most recent match distances. Reusing these distances
|
---|
222 | // tends to take less space than re-encoding the actual distance value.
|
---|
223 | #define REPS 4
|
---|
224 |
|
---|
225 | #endif
|
---|