1 | ///////////////////////////////////////////////////////////////////////////////
|
---|
2 | //
|
---|
3 | /// \file lz_encoder.c
|
---|
4 | /// \brief LZ in window
|
---|
5 | ///
|
---|
6 | // Authors: Igor Pavlov
|
---|
7 | // Lasse Collin
|
---|
8 | //
|
---|
9 | // This file has been put into the public domain.
|
---|
10 | // You can do whatever you want with this file.
|
---|
11 | //
|
---|
12 | ///////////////////////////////////////////////////////////////////////////////
|
---|
13 |
|
---|
14 | #include "lz_encoder.h"
|
---|
15 | #include "lz_encoder_hash.h"
|
---|
16 |
|
---|
17 | // See lz_encoder_hash.h. This is a bit hackish but avoids making
|
---|
18 | // endianness a conditional in makefiles.
|
---|
19 | #if defined(WORDS_BIGENDIAN) && !defined(HAVE_SMALL)
|
---|
20 | # include "lz_encoder_hash_table.h"
|
---|
21 | #endif
|
---|
22 |
|
---|
23 | #include "memcmplen.h"
|
---|
24 |
|
---|
25 |
|
---|
26 | typedef struct {
|
---|
27 | /// LZ-based encoder e.g. LZMA
|
---|
28 | lzma_lz_encoder lz;
|
---|
29 |
|
---|
30 | /// History buffer and match finder
|
---|
31 | lzma_mf mf;
|
---|
32 |
|
---|
33 | /// Next coder in the chain
|
---|
34 | lzma_next_coder next;
|
---|
35 | } lzma_coder;
|
---|
36 |
|
---|
37 |
|
---|
38 | /// \brief Moves the data in the input window to free space for new data
|
---|
39 | ///
|
---|
40 | /// mf->buffer is a sliding input window, which keeps mf->keep_size_before
|
---|
41 | /// bytes of input history available all the time. Now and then we need to
|
---|
42 | /// "slide" the buffer to make space for the new data to the end of the
|
---|
43 | /// buffer. At the same time, data older than keep_size_before is dropped.
|
---|
44 | ///
|
---|
45 | static void
|
---|
46 | move_window(lzma_mf *mf)
|
---|
47 | {
|
---|
48 | // Align the move to a multiple of 16 bytes. Some LZ-based encoders
|
---|
49 | // like LZMA use the lowest bits of mf->read_pos to know the
|
---|
50 | // alignment of the uncompressed data. We also get better speed
|
---|
51 | // for memmove() with aligned buffers.
|
---|
52 | assert(mf->read_pos > mf->keep_size_before);
|
---|
53 | const uint32_t move_offset
|
---|
54 | = (mf->read_pos - mf->keep_size_before) & ~UINT32_C(15);
|
---|
55 |
|
---|
56 | assert(mf->write_pos > move_offset);
|
---|
57 | const size_t move_size = mf->write_pos - move_offset;
|
---|
58 |
|
---|
59 | assert(move_offset + move_size <= mf->size);
|
---|
60 |
|
---|
61 | memmove(mf->buffer, mf->buffer + move_offset, move_size);
|
---|
62 |
|
---|
63 | mf->offset += move_offset;
|
---|
64 | mf->read_pos -= move_offset;
|
---|
65 | mf->read_limit -= move_offset;
|
---|
66 | mf->write_pos -= move_offset;
|
---|
67 |
|
---|
68 | return;
|
---|
69 | }
|
---|
70 |
|
---|
71 |
|
---|
72 | /// \brief Tries to fill the input window (mf->buffer)
|
---|
73 | ///
|
---|
74 | /// If we are the last encoder in the chain, our input data is in in[].
|
---|
75 | /// Otherwise we call the next filter in the chain to process in[] and
|
---|
76 | /// write its output to mf->buffer.
|
---|
77 | ///
|
---|
78 | /// This function must not be called once it has returned LZMA_STREAM_END.
|
---|
79 | ///
|
---|
80 | static lzma_ret
|
---|
81 | fill_window(lzma_coder *coder, const lzma_allocator *allocator,
|
---|
82 | const uint8_t *in, size_t *in_pos, size_t in_size,
|
---|
83 | lzma_action action)
|
---|
84 | {
|
---|
85 | assert(coder->mf.read_pos <= coder->mf.write_pos);
|
---|
86 |
|
---|
87 | // Move the sliding window if needed.
|
---|
88 | if (coder->mf.read_pos >= coder->mf.size - coder->mf.keep_size_after)
|
---|
89 | move_window(&coder->mf);
|
---|
90 |
|
---|
91 | // Maybe this is ugly, but lzma_mf uses uint32_t for most things
|
---|
92 | // (which I find cleanest), but we need size_t here when filling
|
---|
93 | // the history window.
|
---|
94 | size_t write_pos = coder->mf.write_pos;
|
---|
95 | lzma_ret ret;
|
---|
96 | if (coder->next.code == NULL) {
|
---|
97 | // Not using a filter, simply memcpy() as much as possible.
|
---|
98 | lzma_bufcpy(in, in_pos, in_size, coder->mf.buffer,
|
---|
99 | &write_pos, coder->mf.size);
|
---|
100 |
|
---|
101 | ret = action != LZMA_RUN && *in_pos == in_size
|
---|
102 | ? LZMA_STREAM_END : LZMA_OK;
|
---|
103 |
|
---|
104 | } else {
|
---|
105 | ret = coder->next.code(coder->next.coder, allocator,
|
---|
106 | in, in_pos, in_size,
|
---|
107 | coder->mf.buffer, &write_pos,
|
---|
108 | coder->mf.size, action);
|
---|
109 | }
|
---|
110 |
|
---|
111 | coder->mf.write_pos = write_pos;
|
---|
112 |
|
---|
113 | // Silence Valgrind. lzma_memcmplen() can read extra bytes
|
---|
114 | // and Valgrind will give warnings if those bytes are uninitialized
|
---|
115 | // because Valgrind cannot see that the values of the uninitialized
|
---|
116 | // bytes are eventually ignored.
|
---|
117 | memzero(coder->mf.buffer + write_pos, LZMA_MEMCMPLEN_EXTRA);
|
---|
118 |
|
---|
119 | // If end of stream has been reached or flushing completed, we allow
|
---|
120 | // the encoder to process all the input (that is, read_pos is allowed
|
---|
121 | // to reach write_pos). Otherwise we keep keep_size_after bytes
|
---|
122 | // available as prebuffer.
|
---|
123 | if (ret == LZMA_STREAM_END) {
|
---|
124 | assert(*in_pos == in_size);
|
---|
125 | ret = LZMA_OK;
|
---|
126 | coder->mf.action = action;
|
---|
127 | coder->mf.read_limit = coder->mf.write_pos;
|
---|
128 |
|
---|
129 | } else if (coder->mf.write_pos > coder->mf.keep_size_after) {
|
---|
130 | // This needs to be done conditionally, because if we got
|
---|
131 | // only little new input, there may be too little input
|
---|
132 | // to do any encoding yet.
|
---|
133 | coder->mf.read_limit = coder->mf.write_pos
|
---|
134 | - coder->mf.keep_size_after;
|
---|
135 | }
|
---|
136 |
|
---|
137 | // Restart the match finder after finished LZMA_SYNC_FLUSH.
|
---|
138 | if (coder->mf.pending > 0
|
---|
139 | && coder->mf.read_pos < coder->mf.read_limit) {
|
---|
140 | // Match finder may update coder->pending and expects it to
|
---|
141 | // start from zero, so use a temporary variable.
|
---|
142 | const uint32_t pending = coder->mf.pending;
|
---|
143 | coder->mf.pending = 0;
|
---|
144 |
|
---|
145 | // Rewind read_pos so that the match finder can hash
|
---|
146 | // the pending bytes.
|
---|
147 | assert(coder->mf.read_pos >= pending);
|
---|
148 | coder->mf.read_pos -= pending;
|
---|
149 |
|
---|
150 | // Call the skip function directly instead of using
|
---|
151 | // mf_skip(), since we don't want to touch mf->read_ahead.
|
---|
152 | coder->mf.skip(&coder->mf, pending);
|
---|
153 | }
|
---|
154 |
|
---|
155 | return ret;
|
---|
156 | }
|
---|
157 |
|
---|
158 |
|
---|
159 | static lzma_ret
|
---|
160 | lz_encode(void *coder_ptr, const lzma_allocator *allocator,
|
---|
161 | const uint8_t *restrict in, size_t *restrict in_pos,
|
---|
162 | size_t in_size,
|
---|
163 | uint8_t *restrict out, size_t *restrict out_pos,
|
---|
164 | size_t out_size, lzma_action action)
|
---|
165 | {
|
---|
166 | lzma_coder *coder = coder_ptr;
|
---|
167 |
|
---|
168 | while (*out_pos < out_size
|
---|
169 | && (*in_pos < in_size || action != LZMA_RUN)) {
|
---|
170 | // Read more data to coder->mf.buffer if needed.
|
---|
171 | if (coder->mf.action == LZMA_RUN && coder->mf.read_pos
|
---|
172 | >= coder->mf.read_limit)
|
---|
173 | return_if_error(fill_window(coder, allocator,
|
---|
174 | in, in_pos, in_size, action));
|
---|
175 |
|
---|
176 | // Encode
|
---|
177 | const lzma_ret ret = coder->lz.code(coder->lz.coder,
|
---|
178 | &coder->mf, out, out_pos, out_size);
|
---|
179 | if (ret != LZMA_OK) {
|
---|
180 | // Setting this to LZMA_RUN for cases when we are
|
---|
181 | // flushing. It doesn't matter when finishing or if
|
---|
182 | // an error occurred.
|
---|
183 | coder->mf.action = LZMA_RUN;
|
---|
184 | return ret;
|
---|
185 | }
|
---|
186 | }
|
---|
187 |
|
---|
188 | return LZMA_OK;
|
---|
189 | }
|
---|
190 |
|
---|
191 |
|
---|
192 | static bool
|
---|
193 | lz_encoder_prepare(lzma_mf *mf, const lzma_allocator *allocator,
|
---|
194 | const lzma_lz_options *lz_options)
|
---|
195 | {
|
---|
196 | // For now, the dictionary size is limited to 1.5 GiB. This may grow
|
---|
197 | // in the future if needed, but it needs a little more work than just
|
---|
198 | // changing this check.
|
---|
199 | if (lz_options->dict_size < LZMA_DICT_SIZE_MIN
|
---|
200 | || lz_options->dict_size
|
---|
201 | > (UINT32_C(1) << 30) + (UINT32_C(1) << 29)
|
---|
202 | || lz_options->nice_len > lz_options->match_len_max)
|
---|
203 | return true;
|
---|
204 |
|
---|
205 | mf->keep_size_before = lz_options->before_size + lz_options->dict_size;
|
---|
206 |
|
---|
207 | mf->keep_size_after = lz_options->after_size
|
---|
208 | + lz_options->match_len_max;
|
---|
209 |
|
---|
210 | // To avoid constant memmove()s, allocate some extra space. Since
|
---|
211 | // memmove()s become more expensive when the size of the buffer
|
---|
212 | // increases, we reserve more space when a large dictionary is
|
---|
213 | // used to make the memmove() calls rarer.
|
---|
214 | //
|
---|
215 | // This works with dictionaries up to about 3 GiB. If bigger
|
---|
216 | // dictionary is wanted, some extra work is needed:
|
---|
217 | // - Several variables in lzma_mf have to be changed from uint32_t
|
---|
218 | // to size_t.
|
---|
219 | // - Memory usage calculation needs something too, e.g. use uint64_t
|
---|
220 | // for mf->size.
|
---|
221 | uint32_t reserve = lz_options->dict_size / 2;
|
---|
222 | if (reserve > (UINT32_C(1) << 30))
|
---|
223 | reserve /= 2;
|
---|
224 |
|
---|
225 | reserve += (lz_options->before_size + lz_options->match_len_max
|
---|
226 | + lz_options->after_size) / 2 + (UINT32_C(1) << 19);
|
---|
227 |
|
---|
228 | const uint32_t old_size = mf->size;
|
---|
229 | mf->size = mf->keep_size_before + reserve + mf->keep_size_after;
|
---|
230 |
|
---|
231 | // Deallocate the old history buffer if it exists but has different
|
---|
232 | // size than what is needed now.
|
---|
233 | if (mf->buffer != NULL && old_size != mf->size) {
|
---|
234 | lzma_free(mf->buffer, allocator);
|
---|
235 | mf->buffer = NULL;
|
---|
236 | }
|
---|
237 |
|
---|
238 | // Match finder options
|
---|
239 | mf->match_len_max = lz_options->match_len_max;
|
---|
240 | mf->nice_len = lz_options->nice_len;
|
---|
241 |
|
---|
242 | // cyclic_size has to stay smaller than 2 Gi. Note that this doesn't
|
---|
243 | // mean limiting dictionary size to less than 2 GiB. With a match
|
---|
244 | // finder that uses multibyte resolution (hashes start at e.g. every
|
---|
245 | // fourth byte), cyclic_size would stay below 2 Gi even when
|
---|
246 | // dictionary size is greater than 2 GiB.
|
---|
247 | //
|
---|
248 | // It would be possible to allow cyclic_size >= 2 Gi, but then we
|
---|
249 | // would need to be careful to use 64-bit types in various places
|
---|
250 | // (size_t could do since we would need bigger than 32-bit address
|
---|
251 | // space anyway). It would also require either zeroing a multigigabyte
|
---|
252 | // buffer at initialization (waste of time and RAM) or allow
|
---|
253 | // normalization in lz_encoder_mf.c to access uninitialized
|
---|
254 | // memory to keep the code simpler. The current way is simple and
|
---|
255 | // still allows pretty big dictionaries, so I don't expect these
|
---|
256 | // limits to change.
|
---|
257 | mf->cyclic_size = lz_options->dict_size + 1;
|
---|
258 |
|
---|
259 | // Validate the match finder ID and setup the function pointers.
|
---|
260 | switch (lz_options->match_finder) {
|
---|
261 | #ifdef HAVE_MF_HC3
|
---|
262 | case LZMA_MF_HC3:
|
---|
263 | mf->find = &lzma_mf_hc3_find;
|
---|
264 | mf->skip = &lzma_mf_hc3_skip;
|
---|
265 | break;
|
---|
266 | #endif
|
---|
267 | #ifdef HAVE_MF_HC4
|
---|
268 | case LZMA_MF_HC4:
|
---|
269 | mf->find = &lzma_mf_hc4_find;
|
---|
270 | mf->skip = &lzma_mf_hc4_skip;
|
---|
271 | break;
|
---|
272 | #endif
|
---|
273 | #ifdef HAVE_MF_BT2
|
---|
274 | case LZMA_MF_BT2:
|
---|
275 | mf->find = &lzma_mf_bt2_find;
|
---|
276 | mf->skip = &lzma_mf_bt2_skip;
|
---|
277 | break;
|
---|
278 | #endif
|
---|
279 | #ifdef HAVE_MF_BT3
|
---|
280 | case LZMA_MF_BT3:
|
---|
281 | mf->find = &lzma_mf_bt3_find;
|
---|
282 | mf->skip = &lzma_mf_bt3_skip;
|
---|
283 | break;
|
---|
284 | #endif
|
---|
285 | #ifdef HAVE_MF_BT4
|
---|
286 | case LZMA_MF_BT4:
|
---|
287 | mf->find = &lzma_mf_bt4_find;
|
---|
288 | mf->skip = &lzma_mf_bt4_skip;
|
---|
289 | break;
|
---|
290 | #endif
|
---|
291 |
|
---|
292 | default:
|
---|
293 | return true;
|
---|
294 | }
|
---|
295 |
|
---|
296 | // Calculate the sizes of mf->hash and mf->son.
|
---|
297 | //
|
---|
298 | // NOTE: Since 5.3.5beta the LZMA encoder ensures that nice_len
|
---|
299 | // is big enough for the selected match finder. This makes it
|
---|
300 | // easier for applications as nice_len = 2 will always be accepted
|
---|
301 | // even though the effective value can be slightly bigger.
|
---|
302 | const uint32_t hash_bytes
|
---|
303 | = mf_get_hash_bytes(lz_options->match_finder);
|
---|
304 | assert(hash_bytes <= mf->nice_len);
|
---|
305 |
|
---|
306 | const bool is_bt = (lz_options->match_finder & 0x10) != 0;
|
---|
307 | uint32_t hs;
|
---|
308 |
|
---|
309 | if (hash_bytes == 2) {
|
---|
310 | hs = 0xFFFF;
|
---|
311 | } else {
|
---|
312 | // Round dictionary size up to the next 2^n - 1 so it can
|
---|
313 | // be used as a hash mask.
|
---|
314 | hs = lz_options->dict_size - 1;
|
---|
315 | hs |= hs >> 1;
|
---|
316 | hs |= hs >> 2;
|
---|
317 | hs |= hs >> 4;
|
---|
318 | hs |= hs >> 8;
|
---|
319 | hs >>= 1;
|
---|
320 | hs |= 0xFFFF;
|
---|
321 |
|
---|
322 | if (hs > (UINT32_C(1) << 24)) {
|
---|
323 | if (hash_bytes == 3)
|
---|
324 | hs = (UINT32_C(1) << 24) - 1;
|
---|
325 | else
|
---|
326 | hs >>= 1;
|
---|
327 | }
|
---|
328 | }
|
---|
329 |
|
---|
330 | mf->hash_mask = hs;
|
---|
331 |
|
---|
332 | ++hs;
|
---|
333 | if (hash_bytes > 2)
|
---|
334 | hs += HASH_2_SIZE;
|
---|
335 | if (hash_bytes > 3)
|
---|
336 | hs += HASH_3_SIZE;
|
---|
337 | /*
|
---|
338 | No match finder uses this at the moment.
|
---|
339 | if (mf->hash_bytes > 4)
|
---|
340 | hs += HASH_4_SIZE;
|
---|
341 | */
|
---|
342 |
|
---|
343 | const uint32_t old_hash_count = mf->hash_count;
|
---|
344 | const uint32_t old_sons_count = mf->sons_count;
|
---|
345 | mf->hash_count = hs;
|
---|
346 | mf->sons_count = mf->cyclic_size;
|
---|
347 | if (is_bt)
|
---|
348 | mf->sons_count *= 2;
|
---|
349 |
|
---|
350 | // Deallocate the old hash array if it exists and has different size
|
---|
351 | // than what is needed now.
|
---|
352 | if (old_hash_count != mf->hash_count
|
---|
353 | || old_sons_count != mf->sons_count) {
|
---|
354 | lzma_free(mf->hash, allocator);
|
---|
355 | mf->hash = NULL;
|
---|
356 |
|
---|
357 | lzma_free(mf->son, allocator);
|
---|
358 | mf->son = NULL;
|
---|
359 | }
|
---|
360 |
|
---|
361 | // Maximum number of match finder cycles
|
---|
362 | mf->depth = lz_options->depth;
|
---|
363 | if (mf->depth == 0) {
|
---|
364 | if (is_bt)
|
---|
365 | mf->depth = 16 + mf->nice_len / 2;
|
---|
366 | else
|
---|
367 | mf->depth = 4 + mf->nice_len / 4;
|
---|
368 | }
|
---|
369 |
|
---|
370 | return false;
|
---|
371 | }
|
---|
372 |
|
---|
373 |
|
---|
374 | static bool
|
---|
375 | lz_encoder_init(lzma_mf *mf, const lzma_allocator *allocator,
|
---|
376 | const lzma_lz_options *lz_options)
|
---|
377 | {
|
---|
378 | // Allocate the history buffer.
|
---|
379 | if (mf->buffer == NULL) {
|
---|
380 | // lzma_memcmplen() is used for the dictionary buffer
|
---|
381 | // so we need to allocate a few extra bytes to prevent
|
---|
382 | // it from reading past the end of the buffer.
|
---|
383 | mf->buffer = lzma_alloc(mf->size + LZMA_MEMCMPLEN_EXTRA,
|
---|
384 | allocator);
|
---|
385 | if (mf->buffer == NULL)
|
---|
386 | return true;
|
---|
387 |
|
---|
388 | // Keep Valgrind happy with lzma_memcmplen() and initialize
|
---|
389 | // the extra bytes whose value may get read but which will
|
---|
390 | // effectively get ignored.
|
---|
391 | memzero(mf->buffer + mf->size, LZMA_MEMCMPLEN_EXTRA);
|
---|
392 | }
|
---|
393 |
|
---|
394 | // Use cyclic_size as initial mf->offset. This allows
|
---|
395 | // avoiding a few branches in the match finders. The downside is
|
---|
396 | // that match finder needs to be normalized more often, which may
|
---|
397 | // hurt performance with huge dictionaries.
|
---|
398 | mf->offset = mf->cyclic_size;
|
---|
399 | mf->read_pos = 0;
|
---|
400 | mf->read_ahead = 0;
|
---|
401 | mf->read_limit = 0;
|
---|
402 | mf->write_pos = 0;
|
---|
403 | mf->pending = 0;
|
---|
404 |
|
---|
405 | #if UINT32_MAX >= SIZE_MAX / 4
|
---|
406 | // Check for integer overflow. (Huge dictionaries are not
|
---|
407 | // possible on 32-bit CPU.)
|
---|
408 | if (mf->hash_count > SIZE_MAX / sizeof(uint32_t)
|
---|
409 | || mf->sons_count > SIZE_MAX / sizeof(uint32_t))
|
---|
410 | return true;
|
---|
411 | #endif
|
---|
412 |
|
---|
413 | // Allocate and initialize the hash table. Since EMPTY_HASH_VALUE
|
---|
414 | // is zero, we can use lzma_alloc_zero() or memzero() for mf->hash.
|
---|
415 | //
|
---|
416 | // We don't need to initialize mf->son, but not doing that may
|
---|
417 | // make Valgrind complain in normalization (see normalize() in
|
---|
418 | // lz_encoder_mf.c). Skipping the initialization is *very* good
|
---|
419 | // when big dictionary is used but only small amount of data gets
|
---|
420 | // actually compressed: most of the mf->son won't get actually
|
---|
421 | // allocated by the kernel, so we avoid wasting RAM and improve
|
---|
422 | // initialization speed a lot.
|
---|
423 | if (mf->hash == NULL) {
|
---|
424 | mf->hash = lzma_alloc_zero(mf->hash_count * sizeof(uint32_t),
|
---|
425 | allocator);
|
---|
426 | mf->son = lzma_alloc(mf->sons_count * sizeof(uint32_t),
|
---|
427 | allocator);
|
---|
428 |
|
---|
429 | if (mf->hash == NULL || mf->son == NULL) {
|
---|
430 | lzma_free(mf->hash, allocator);
|
---|
431 | mf->hash = NULL;
|
---|
432 |
|
---|
433 | lzma_free(mf->son, allocator);
|
---|
434 | mf->son = NULL;
|
---|
435 |
|
---|
436 | return true;
|
---|
437 | }
|
---|
438 | } else {
|
---|
439 | /*
|
---|
440 | for (uint32_t i = 0; i < mf->hash_count; ++i)
|
---|
441 | mf->hash[i] = EMPTY_HASH_VALUE;
|
---|
442 | */
|
---|
443 | memzero(mf->hash, mf->hash_count * sizeof(uint32_t));
|
---|
444 | }
|
---|
445 |
|
---|
446 | mf->cyclic_pos = 0;
|
---|
447 |
|
---|
448 | // Handle preset dictionary.
|
---|
449 | if (lz_options->preset_dict != NULL
|
---|
450 | && lz_options->preset_dict_size > 0) {
|
---|
451 | // If the preset dictionary is bigger than the actual
|
---|
452 | // dictionary, use only the tail.
|
---|
453 | mf->write_pos = my_min(lz_options->preset_dict_size, mf->size);
|
---|
454 | memcpy(mf->buffer, lz_options->preset_dict
|
---|
455 | + lz_options->preset_dict_size - mf->write_pos,
|
---|
456 | mf->write_pos);
|
---|
457 | mf->action = LZMA_SYNC_FLUSH;
|
---|
458 | mf->skip(mf, mf->write_pos);
|
---|
459 | }
|
---|
460 |
|
---|
461 | mf->action = LZMA_RUN;
|
---|
462 |
|
---|
463 | return false;
|
---|
464 | }
|
---|
465 |
|
---|
466 |
|
---|
467 | extern uint64_t
|
---|
468 | lzma_lz_encoder_memusage(const lzma_lz_options *lz_options)
|
---|
469 | {
|
---|
470 | // Old buffers must not exist when calling lz_encoder_prepare().
|
---|
471 | lzma_mf mf = {
|
---|
472 | .buffer = NULL,
|
---|
473 | .hash = NULL,
|
---|
474 | .son = NULL,
|
---|
475 | .hash_count = 0,
|
---|
476 | .sons_count = 0,
|
---|
477 | };
|
---|
478 |
|
---|
479 | // Setup the size information into mf.
|
---|
480 | if (lz_encoder_prepare(&mf, NULL, lz_options))
|
---|
481 | return UINT64_MAX;
|
---|
482 |
|
---|
483 | // Calculate the memory usage.
|
---|
484 | return ((uint64_t)(mf.hash_count) + mf.sons_count) * sizeof(uint32_t)
|
---|
485 | + mf.size + sizeof(lzma_coder);
|
---|
486 | }
|
---|
487 |
|
---|
488 |
|
---|
489 | static void
|
---|
490 | lz_encoder_end(void *coder_ptr, const lzma_allocator *allocator)
|
---|
491 | {
|
---|
492 | lzma_coder *coder = coder_ptr;
|
---|
493 |
|
---|
494 | lzma_next_end(&coder->next, allocator);
|
---|
495 |
|
---|
496 | lzma_free(coder->mf.son, allocator);
|
---|
497 | lzma_free(coder->mf.hash, allocator);
|
---|
498 | lzma_free(coder->mf.buffer, allocator);
|
---|
499 |
|
---|
500 | if (coder->lz.end != NULL)
|
---|
501 | coder->lz.end(coder->lz.coder, allocator);
|
---|
502 | else
|
---|
503 | lzma_free(coder->lz.coder, allocator);
|
---|
504 |
|
---|
505 | lzma_free(coder, allocator);
|
---|
506 | return;
|
---|
507 | }
|
---|
508 |
|
---|
509 |
|
---|
510 | static lzma_ret
|
---|
511 | lz_encoder_update(void *coder_ptr, const lzma_allocator *allocator,
|
---|
512 | const lzma_filter *filters_null lzma_attribute((__unused__)),
|
---|
513 | const lzma_filter *reversed_filters)
|
---|
514 | {
|
---|
515 | lzma_coder *coder = coder_ptr;
|
---|
516 |
|
---|
517 | if (coder->lz.options_update == NULL)
|
---|
518 | return LZMA_PROG_ERROR;
|
---|
519 |
|
---|
520 | return_if_error(coder->lz.options_update(
|
---|
521 | coder->lz.coder, reversed_filters));
|
---|
522 |
|
---|
523 | return lzma_next_filter_update(
|
---|
524 | &coder->next, allocator, reversed_filters + 1);
|
---|
525 | }
|
---|
526 |
|
---|
527 |
|
---|
528 | static lzma_ret
|
---|
529 | lz_encoder_set_out_limit(void *coder_ptr, uint64_t *uncomp_size,
|
---|
530 | uint64_t out_limit)
|
---|
531 | {
|
---|
532 | lzma_coder *coder = coder_ptr;
|
---|
533 |
|
---|
534 | // This is supported only if there are no other filters chained.
|
---|
535 | if (coder->next.code == NULL && coder->lz.set_out_limit != NULL)
|
---|
536 | return coder->lz.set_out_limit(
|
---|
537 | coder->lz.coder, uncomp_size, out_limit);
|
---|
538 |
|
---|
539 | return LZMA_OPTIONS_ERROR;
|
---|
540 | }
|
---|
541 |
|
---|
542 |
|
---|
543 | extern lzma_ret
|
---|
544 | lzma_lz_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
|
---|
545 | const lzma_filter_info *filters,
|
---|
546 | lzma_ret (*lz_init)(lzma_lz_encoder *lz,
|
---|
547 | const lzma_allocator *allocator,
|
---|
548 | lzma_vli id, const void *options,
|
---|
549 | lzma_lz_options *lz_options))
|
---|
550 | {
|
---|
551 | #if defined(HAVE_SMALL) && !defined(HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR)
|
---|
552 | // We need that the CRC32 table has been initialized.
|
---|
553 | lzma_crc32_init();
|
---|
554 | #endif
|
---|
555 |
|
---|
556 | // Allocate and initialize the base data structure.
|
---|
557 | lzma_coder *coder = next->coder;
|
---|
558 | if (coder == NULL) {
|
---|
559 | coder = lzma_alloc(sizeof(lzma_coder), allocator);
|
---|
560 | if (coder == NULL)
|
---|
561 | return LZMA_MEM_ERROR;
|
---|
562 |
|
---|
563 | next->coder = coder;
|
---|
564 | next->code = &lz_encode;
|
---|
565 | next->end = &lz_encoder_end;
|
---|
566 | next->update = &lz_encoder_update;
|
---|
567 | next->set_out_limit = &lz_encoder_set_out_limit;
|
---|
568 |
|
---|
569 | coder->lz.coder = NULL;
|
---|
570 | coder->lz.code = NULL;
|
---|
571 | coder->lz.end = NULL;
|
---|
572 |
|
---|
573 | // mf.size is initialized to silence Valgrind
|
---|
574 | // when used on optimized binaries (GCC may reorder
|
---|
575 | // code in a way that Valgrind gets unhappy).
|
---|
576 | coder->mf.buffer = NULL;
|
---|
577 | coder->mf.size = 0;
|
---|
578 | coder->mf.hash = NULL;
|
---|
579 | coder->mf.son = NULL;
|
---|
580 | coder->mf.hash_count = 0;
|
---|
581 | coder->mf.sons_count = 0;
|
---|
582 |
|
---|
583 | coder->next = LZMA_NEXT_CODER_INIT;
|
---|
584 | }
|
---|
585 |
|
---|
586 | // Initialize the LZ-based encoder.
|
---|
587 | lzma_lz_options lz_options;
|
---|
588 | return_if_error(lz_init(&coder->lz, allocator,
|
---|
589 | filters[0].id, filters[0].options, &lz_options));
|
---|
590 |
|
---|
591 | // Setup the size information into coder->mf and deallocate
|
---|
592 | // old buffers if they have wrong size.
|
---|
593 | if (lz_encoder_prepare(&coder->mf, allocator, &lz_options))
|
---|
594 | return LZMA_OPTIONS_ERROR;
|
---|
595 |
|
---|
596 | // Allocate new buffers if needed, and do the rest of
|
---|
597 | // the initialization.
|
---|
598 | if (lz_encoder_init(&coder->mf, allocator, &lz_options))
|
---|
599 | return LZMA_MEM_ERROR;
|
---|
600 |
|
---|
601 | // Initialize the next filter in the chain, if any.
|
---|
602 | return lzma_next_filter_init(&coder->next, allocator, filters + 1);
|
---|
603 | }
|
---|
604 |
|
---|
605 |
|
---|
606 | extern LZMA_API(lzma_bool)
|
---|
607 | lzma_mf_is_supported(lzma_match_finder mf)
|
---|
608 | {
|
---|
609 | switch (mf) {
|
---|
610 | #ifdef HAVE_MF_HC3
|
---|
611 | case LZMA_MF_HC3:
|
---|
612 | return true;
|
---|
613 | #endif
|
---|
614 | #ifdef HAVE_MF_HC4
|
---|
615 | case LZMA_MF_HC4:
|
---|
616 | return true;
|
---|
617 | #endif
|
---|
618 | #ifdef HAVE_MF_BT2
|
---|
619 | case LZMA_MF_BT2:
|
---|
620 | return true;
|
---|
621 | #endif
|
---|
622 | #ifdef HAVE_MF_BT3
|
---|
623 | case LZMA_MF_BT3:
|
---|
624 | return true;
|
---|
625 | #endif
|
---|
626 | #ifdef HAVE_MF_BT4
|
---|
627 | case LZMA_MF_BT4:
|
---|
628 | return true;
|
---|
629 | #endif
|
---|
630 | default:
|
---|
631 | return false;
|
---|
632 | }
|
---|
633 | }
|
---|