VirtualBox

source: vbox/trunk/src/libs/liblzma-5.4.1/common/file_info.c@ 98879

Last change on this file since 98879 was 98730, checked in by vboxsync, 21 months ago

libs/liblzma-5.4.1: Export to OSE, bugref:10254

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 28.0 KB
Line 
1///////////////////////////////////////////////////////////////////////////////
2//
3/// \file file_info.c
4/// \brief Decode .xz file information into a lzma_index structure
5//
6// Author: Lasse Collin
7//
8// This file has been put into the public domain.
9// You can do whatever you want with this file.
10//
11///////////////////////////////////////////////////////////////////////////////
12
13#include "index_decoder.h"
14
15
16typedef struct {
17 enum {
18 SEQ_MAGIC_BYTES,
19 SEQ_PADDING_SEEK,
20 SEQ_PADDING_DECODE,
21 SEQ_FOOTER,
22 SEQ_INDEX_INIT,
23 SEQ_INDEX_DECODE,
24 SEQ_HEADER_DECODE,
25 SEQ_HEADER_COMPARE,
26 } sequence;
27
28 /// Absolute position of in[*in_pos] in the file. All code that
29 /// modifies *in_pos also updates this. seek_to_pos() needs this
30 /// to determine if we need to request the application to seek for
31 /// us or if we can do the seeking internally by adjusting *in_pos.
32 uint64_t file_cur_pos;
33
34 /// This refers to absolute positions of interesting parts of the
35 /// input file. Sometimes it points to the *beginning* of a specific
36 /// field and sometimes to the *end* of a field. The current target
37 /// position at each moment is explained in the comments.
38 uint64_t file_target_pos;
39
40 /// Size of the .xz file (from the application).
41 uint64_t file_size;
42
43 /// Index decoder
44 lzma_next_coder index_decoder;
45
46 /// Number of bytes remaining in the Index field that is currently
47 /// being decoded.
48 lzma_vli index_remaining;
49
50 /// The Index decoder will store the decoded Index in this pointer.
51 lzma_index *this_index;
52
53 /// Amount of Stream Padding in the current Stream.
54 lzma_vli stream_padding;
55
56 /// The final combined index is collected here.
57 lzma_index *combined_index;
58
59 /// Pointer from the application where to store the index information
60 /// after successful decoding.
61 lzma_index **dest_index;
62
63 /// Pointer to lzma_stream.seek_pos to be used when returning
64 /// LZMA_SEEK_NEEDED. This is set by seek_to_pos() when needed.
65 uint64_t *external_seek_pos;
66
67 /// Memory usage limit
68 uint64_t memlimit;
69
70 /// Stream Flags from the very beginning of the file.
71 lzma_stream_flags first_header_flags;
72
73 /// Stream Flags from Stream Header of the current Stream.
74 lzma_stream_flags header_flags;
75
76 /// Stream Flags from Stream Footer of the current Stream.
77 lzma_stream_flags footer_flags;
78
79 size_t temp_pos;
80 size_t temp_size;
81 uint8_t temp[8192];
82
83} lzma_file_info_coder;
84
85
86/// Copies data from in[*in_pos] into coder->temp until
87/// coder->temp_pos == coder->temp_size. This also keeps coder->file_cur_pos
88/// in sync with *in_pos. Returns true if more input is needed.
89static bool
90fill_temp(lzma_file_info_coder *coder, const uint8_t *restrict in,
91 size_t *restrict in_pos, size_t in_size)
92{
93 coder->file_cur_pos += lzma_bufcpy(in, in_pos, in_size,
94 coder->temp, &coder->temp_pos, coder->temp_size);
95 return coder->temp_pos < coder->temp_size;
96}
97
98
99/// Seeks to the absolute file position specified by target_pos.
100/// This tries to do the seeking by only modifying *in_pos, if possible.
101/// The main benefit of this is that if one passes the whole file at once
102/// to lzma_code(), the decoder will never need to return LZMA_SEEK_NEEDED
103/// as all the seeking can be done by adjusting *in_pos in this function.
104///
105/// Returns true if an external seek is needed and the caller must return
106/// LZMA_SEEK_NEEDED.
107static bool
108seek_to_pos(lzma_file_info_coder *coder, uint64_t target_pos,
109 size_t in_start, size_t *in_pos, size_t in_size)
110{
111 // The input buffer doesn't extend beyond the end of the file.
112 // This has been checked by file_info_decode() already.
113 assert(coder->file_size - coder->file_cur_pos >= in_size - *in_pos);
114
115 const uint64_t pos_min = coder->file_cur_pos - (*in_pos - in_start);
116 const uint64_t pos_max = coder->file_cur_pos + (in_size - *in_pos);
117
118 bool external_seek_needed;
119
120 if (target_pos >= pos_min && target_pos <= pos_max) {
121 // The requested position is available in the current input
122 // buffer or right after it. That is, in a corner case we
123 // end up setting *in_pos == in_size and thus will immediately
124 // need new input bytes from the application.
125 *in_pos += (size_t)(target_pos - coder->file_cur_pos);
126 external_seek_needed = false;
127 } else {
128 // Ask the application to seek the input file.
129 *coder->external_seek_pos = target_pos;
130 external_seek_needed = true;
131
132 // Mark the whole input buffer as used. This way
133 // lzma_stream.total_in will have a better estimate
134 // of the amount of data read. It still won't be perfect
135 // as the value will depend on the input buffer size that
136 // the application uses, but it should be good enough for
137 // those few who want an estimate.
138 *in_pos = in_size;
139 }
140
141 // After seeking (internal or external) the current position
142 // will match the requested target position.
143 coder->file_cur_pos = target_pos;
144
145 return external_seek_needed;
146}
147
148
149/// The caller sets coder->file_target_pos so that it points to the *end*
150/// of the desired file position. This function then determines how far
151/// backwards from that position we can seek. After seeking fill_temp()
152/// can be used to read data into coder->temp. When fill_temp() has finished,
153/// coder->temp[coder->temp_size] will match coder->file_target_pos.
154///
155/// This also validates that coder->target_file_pos is sane in sense that
156/// we aren't trying to seek too far backwards (too close or beyond the
157/// beginning of the file).
158static lzma_ret
159reverse_seek(lzma_file_info_coder *coder,
160 size_t in_start, size_t *in_pos, size_t in_size)
161{
162 // Check that there is enough data before the target position
163 // to contain at least Stream Header and Stream Footer. If there
164 // isn't, the file cannot be valid.
165 if (coder->file_target_pos < 2 * LZMA_STREAM_HEADER_SIZE)
166 return LZMA_DATA_ERROR;
167
168 coder->temp_pos = 0;
169
170 // The Stream Header at the very beginning of the file gets handled
171 // specially in SEQ_MAGIC_BYTES and thus we will never need to seek
172 // there. By not seeking to the first LZMA_STREAM_HEADER_SIZE bytes
173 // we avoid a useless external seek after SEQ_MAGIC_BYTES if the
174 // application uses an extremely small input buffer and the input
175 // file is very small.
176 if (coder->file_target_pos - LZMA_STREAM_HEADER_SIZE
177 < sizeof(coder->temp))
178 coder->temp_size = (size_t)(coder->file_target_pos
179 - LZMA_STREAM_HEADER_SIZE);
180 else
181 coder->temp_size = sizeof(coder->temp);
182
183 // The above if-statements guarantee this. This is important because
184 // the Stream Header/Footer decoders assume that there's at least
185 // LZMA_STREAM_HEADER_SIZE bytes in coder->temp.
186 assert(coder->temp_size >= LZMA_STREAM_HEADER_SIZE);
187
188 if (seek_to_pos(coder, coder->file_target_pos - coder->temp_size,
189 in_start, in_pos, in_size))
190 return LZMA_SEEK_NEEDED;
191
192 return LZMA_OK;
193}
194
195
196/// Gets the number of zero-bytes at the end of the buffer.
197static size_t
198get_padding_size(const uint8_t *buf, size_t buf_size)
199{
200 size_t padding = 0;
201 while (buf_size > 0 && buf[--buf_size] == 0x00)
202 ++padding;
203
204 return padding;
205}
206
207
208/// With the Stream Header at the very beginning of the file, LZMA_FORMAT_ERROR
209/// is used to tell the application that Magic Bytes didn't match. In other
210/// Stream Header/Footer fields (in the middle/end of the file) it could be
211/// a bit confusing to return LZMA_FORMAT_ERROR as we already know that there
212/// is a valid Stream Header at the beginning of the file. For those cases
213/// this function is used to convert LZMA_FORMAT_ERROR to LZMA_DATA_ERROR.
214static lzma_ret
215hide_format_error(lzma_ret ret)
216{
217 if (ret == LZMA_FORMAT_ERROR)
218 ret = LZMA_DATA_ERROR;
219
220 return ret;
221}
222
223
224/// Calls the Index decoder and updates coder->index_remaining.
225/// This is a separate function because the input can be either directly
226/// from the application or from coder->temp.
227static lzma_ret
228decode_index(lzma_file_info_coder *coder, const lzma_allocator *allocator,
229 const uint8_t *restrict in, size_t *restrict in_pos,
230 size_t in_size, bool update_file_cur_pos)
231{
232 const size_t in_start = *in_pos;
233
234 const lzma_ret ret = coder->index_decoder.code(
235 coder->index_decoder.coder,
236 allocator, in, in_pos, in_size,
237 NULL, NULL, 0, LZMA_RUN);
238
239 coder->index_remaining -= *in_pos - in_start;
240
241 if (update_file_cur_pos)
242 coder->file_cur_pos += *in_pos - in_start;
243
244 return ret;
245}
246
247
248static lzma_ret
249file_info_decode(void *coder_ptr, const lzma_allocator *allocator,
250 const uint8_t *restrict in, size_t *restrict in_pos,
251 size_t in_size,
252 uint8_t *restrict out lzma_attribute((__unused__)),
253 size_t *restrict out_pos lzma_attribute((__unused__)),
254 size_t out_size lzma_attribute((__unused__)),
255 lzma_action action lzma_attribute((__unused__)))
256{
257 lzma_file_info_coder *coder = coder_ptr;
258 const size_t in_start = *in_pos;
259
260 // If the caller provides input past the end of the file, trim
261 // the extra bytes from the buffer so that we won't read too far.
262 assert(coder->file_size >= coder->file_cur_pos);
263 if (coder->file_size - coder->file_cur_pos < in_size - in_start)
264 in_size = in_start
265 + (size_t)(coder->file_size - coder->file_cur_pos);
266
267 while (true)
268 switch (coder->sequence) {
269 case SEQ_MAGIC_BYTES:
270 // Decode the Stream Header at the beginning of the file
271 // first to check if the Magic Bytes match. The flags
272 // are stored in coder->first_header_flags so that we
273 // don't need to seek to it again.
274 //
275 // Check that the file is big enough to contain at least
276 // Stream Header.
277 if (coder->file_size < LZMA_STREAM_HEADER_SIZE)
278 return LZMA_FORMAT_ERROR;
279
280 // Read the Stream Header field into coder->temp.
281 if (fill_temp(coder, in, in_pos, in_size))
282 return LZMA_OK;
283
284 // This is the only Stream Header/Footer decoding where we
285 // want to return LZMA_FORMAT_ERROR if the Magic Bytes don't
286 // match. Elsewhere it will be converted to LZMA_DATA_ERROR.
287 return_if_error(lzma_stream_header_decode(
288 &coder->first_header_flags, coder->temp));
289
290 // Now that we know that the Magic Bytes match, check the
291 // file size. It's better to do this here after checking the
292 // Magic Bytes since this way we can give LZMA_FORMAT_ERROR
293 // instead of LZMA_DATA_ERROR when the Magic Bytes don't
294 // match in a file that is too big or isn't a multiple of
295 // four bytes.
296 if (coder->file_size > LZMA_VLI_MAX || (coder->file_size & 3))
297 return LZMA_DATA_ERROR;
298
299 // Start looking for Stream Padding and Stream Footer
300 // at the end of the file.
301 coder->file_target_pos = coder->file_size;
302
303 // Fall through
304
305 case SEQ_PADDING_SEEK:
306 coder->sequence = SEQ_PADDING_DECODE;
307 return_if_error(reverse_seek(
308 coder, in_start, in_pos, in_size));
309
310 // Fall through
311
312 case SEQ_PADDING_DECODE: {
313 // Copy to coder->temp first. This keeps the code simpler if
314 // the application only provides input a few bytes at a time.
315 if (fill_temp(coder, in, in_pos, in_size))
316 return LZMA_OK;
317
318 // Scan the buffer backwards to get the size of the
319 // Stream Padding field (if any).
320 const size_t new_padding = get_padding_size(
321 coder->temp, coder->temp_size);
322 coder->stream_padding += new_padding;
323
324 // Set the target position to the beginning of Stream Padding
325 // that has been observed so far. If all Stream Padding has
326 // been seen, then the target position will be at the end
327 // of the Stream Footer field.
328 coder->file_target_pos -= new_padding;
329
330 if (new_padding == coder->temp_size) {
331 // The whole buffer was padding. Seek backwards in
332 // the file to get more input.
333 coder->sequence = SEQ_PADDING_SEEK;
334 break;
335 }
336
337 // Size of Stream Padding must be a multiple of 4 bytes.
338 if (coder->stream_padding & 3)
339 return LZMA_DATA_ERROR;
340
341 coder->sequence = SEQ_FOOTER;
342
343 // Calculate the amount of non-padding data in coder->temp.
344 coder->temp_size -= new_padding;
345 coder->temp_pos = coder->temp_size;
346
347 // We can avoid an external seek if the whole Stream Footer
348 // is already in coder->temp. In that case SEQ_FOOTER won't
349 // read more input and will find the Stream Footer from
350 // coder->temp[coder->temp_size - LZMA_STREAM_HEADER_SIZE].
351 //
352 // Otherwise we will need to seek. The seeking is done so
353 // that Stream Footer wil be at the end of coder->temp.
354 // This way it's likely that we also get a complete Index
355 // field into coder->temp without needing a separate seek
356 // for that (unless the Index field is big).
357 if (coder->temp_size < LZMA_STREAM_HEADER_SIZE)
358 return_if_error(reverse_seek(
359 coder, in_start, in_pos, in_size));
360 }
361
362 // Fall through
363
364 case SEQ_FOOTER:
365 // Copy the Stream Footer field into coder->temp.
366 // If Stream Footer was already available in coder->temp
367 // in SEQ_PADDING_DECODE, then this does nothing.
368 if (fill_temp(coder, in, in_pos, in_size))
369 return LZMA_OK;
370
371 // Make coder->file_target_pos and coder->temp_size point
372 // to the beginning of Stream Footer and thus to the end
373 // of the Index field. coder->temp_pos will be updated
374 // a bit later.
375 coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE;
376 coder->temp_size -= LZMA_STREAM_HEADER_SIZE;
377
378 // Decode Stream Footer.
379 return_if_error(hide_format_error(lzma_stream_footer_decode(
380 &coder->footer_flags,
381 coder->temp + coder->temp_size)));
382
383 // Check that we won't seek past the beginning of the file.
384 //
385 // LZMA_STREAM_HEADER_SIZE is added because there must be
386 // space for Stream Header too even though we won't seek
387 // there before decoding the Index field.
388 //
389 // There's no risk of integer overflow here because
390 // Backward Size cannot be greater than 2^34.
391 if (coder->file_target_pos < coder->footer_flags.backward_size
392 + LZMA_STREAM_HEADER_SIZE)
393 return LZMA_DATA_ERROR;
394
395 // Set the target position to the beginning of the Index field.
396 coder->file_target_pos -= coder->footer_flags.backward_size;
397 coder->sequence = SEQ_INDEX_INIT;
398
399 // We can avoid an external seek if the whole Index field is
400 // already available in coder->temp.
401 if (coder->temp_size >= coder->footer_flags.backward_size) {
402 // Set coder->temp_pos to point to the beginning
403 // of the Index.
404 coder->temp_pos = coder->temp_size
405 - coder->footer_flags.backward_size;
406 } else {
407 // These are set to zero to indicate that there's no
408 // useful data (Index or anything else) in coder->temp.
409 coder->temp_pos = 0;
410 coder->temp_size = 0;
411
412 // Seek to the beginning of the Index field.
413 if (seek_to_pos(coder, coder->file_target_pos,
414 in_start, in_pos, in_size))
415 return LZMA_SEEK_NEEDED;
416 }
417
418 // Fall through
419
420 case SEQ_INDEX_INIT: {
421 // Calculate the amount of memory already used by the earlier
422 // Indexes so that we know how big memory limit to pass to
423 // the Index decoder.
424 //
425 // NOTE: When there are multiple Streams, the separate
426 // lzma_index structures can use more RAM (as measured by
427 // lzma_index_memused()) than the final combined lzma_index.
428 // Thus memlimit may need to be slightly higher than the final
429 // calculated memory usage will be. This is perhaps a bit
430 // confusing to the application, but I think it shouldn't
431 // cause problems in practice.
432 uint64_t memused = 0;
433 if (coder->combined_index != NULL) {
434 memused = lzma_index_memused(coder->combined_index);
435 assert(memused <= coder->memlimit);
436 if (memused > coder->memlimit) // Extra sanity check
437 return LZMA_PROG_ERROR;
438 }
439
440 // Initialize the Index decoder.
441 return_if_error(lzma_index_decoder_init(
442 &coder->index_decoder, allocator,
443 &coder->this_index,
444 coder->memlimit - memused));
445
446 coder->index_remaining = coder->footer_flags.backward_size;
447 coder->sequence = SEQ_INDEX_DECODE;
448 }
449
450 // Fall through
451
452 case SEQ_INDEX_DECODE: {
453 // Decode (a part of) the Index. If the whole Index is already
454 // in coder->temp, read it from there. Otherwise read from
455 // in[*in_pos] onwards. Note that index_decode() updates
456 // coder->index_remaining and optionally coder->file_cur_pos.
457 lzma_ret ret;
458 if (coder->temp_size != 0) {
459 assert(coder->temp_size - coder->temp_pos
460 == coder->index_remaining);
461 ret = decode_index(coder, allocator, coder->temp,
462 &coder->temp_pos, coder->temp_size,
463 false);
464 } else {
465 // Don't give the decoder more input than the known
466 // remaining size of the Index field.
467 size_t in_stop = in_size;
468 if (in_size - *in_pos > coder->index_remaining)
469 in_stop = *in_pos
470 + (size_t)(coder->index_remaining);
471
472 ret = decode_index(coder, allocator,
473 in, in_pos, in_stop, true);
474 }
475
476 switch (ret) {
477 case LZMA_OK:
478 // If the Index docoder asks for more input when we
479 // have already given it as much input as Backward Size
480 // indicated, the file is invalid.
481 if (coder->index_remaining == 0)
482 return LZMA_DATA_ERROR;
483
484 // We cannot get here if we were reading Index from
485 // coder->temp because when reading from coder->temp
486 // we give the Index decoder exactly
487 // coder->index_remaining bytes of input.
488 assert(coder->temp_size == 0);
489
490 return LZMA_OK;
491
492 case LZMA_STREAM_END:
493 // If the decoding seems to be successful, check also
494 // that the Index decoder consumed as much input as
495 // indicated by the Backward Size field.
496 if (coder->index_remaining != 0)
497 return LZMA_DATA_ERROR;
498
499 break;
500
501 default:
502 return ret;
503 }
504
505 // Calculate how much the Index tells us to seek backwards
506 // (relative to the beginning of the Index): Total size of
507 // all Blocks plus the size of the Stream Header field.
508 // No integer overflow here because lzma_index_total_size()
509 // cannot return a value greater than LZMA_VLI_MAX.
510 const uint64_t seek_amount
511 = lzma_index_total_size(coder->this_index)
512 + LZMA_STREAM_HEADER_SIZE;
513
514 // Check that Index is sane in sense that seek_amount won't
515 // make us seek past the beginning of the file when locating
516 // the Stream Header.
517 //
518 // coder->file_target_pos still points to the beginning of
519 // the Index field.
520 if (coder->file_target_pos < seek_amount)
521 return LZMA_DATA_ERROR;
522
523 // Set the target to the beginning of Stream Header.
524 coder->file_target_pos -= seek_amount;
525
526 if (coder->file_target_pos == 0) {
527 // We would seek to the beginning of the file, but
528 // since we already decoded that Stream Header in
529 // SEQ_MAGIC_BYTES, we can use the cached value from
530 // coder->first_header_flags to avoid the seek.
531 coder->header_flags = coder->first_header_flags;
532 coder->sequence = SEQ_HEADER_COMPARE;
533 break;
534 }
535
536 coder->sequence = SEQ_HEADER_DECODE;
537
538 // Make coder->file_target_pos point to the end of
539 // the Stream Header field.
540 coder->file_target_pos += LZMA_STREAM_HEADER_SIZE;
541
542 // If coder->temp_size is non-zero, it points to the end
543 // of the Index field. Then the beginning of the Index
544 // field is at coder->temp[coder->temp_size
545 // - coder->footer_flags.backward_size].
546 assert(coder->temp_size == 0 || coder->temp_size
547 >= coder->footer_flags.backward_size);
548
549 // If coder->temp contained the whole Index, see if it has
550 // enough data to contain also the Stream Header. If so,
551 // we avoid an external seek.
552 //
553 // NOTE: This can happen only with small .xz files and only
554 // for the non-first Stream as the Stream Flags of the first
555 // Stream are cached and already handled a few lines above.
556 // So this isn't as useful as the other seek-avoidance cases.
557 if (coder->temp_size != 0 && coder->temp_size
558 - coder->footer_flags.backward_size
559 >= seek_amount) {
560 // Make temp_pos and temp_size point to the *end* of
561 // Stream Header so that SEQ_HEADER_DECODE will find
562 // the start of Stream Header from coder->temp[
563 // coder->temp_size - LZMA_STREAM_HEADER_SIZE].
564 coder->temp_pos = coder->temp_size
565 - coder->footer_flags.backward_size
566 - seek_amount
567 + LZMA_STREAM_HEADER_SIZE;
568 coder->temp_size = coder->temp_pos;
569 } else {
570 // Seek so that Stream Header will be at the end of
571 // coder->temp. With typical multi-Stream files we
572 // will usually also get the Stream Footer and Index
573 // of the *previous* Stream in coder->temp and thus
574 // won't need a separate seek for them.
575 return_if_error(reverse_seek(coder,
576 in_start, in_pos, in_size));
577 }
578 }
579
580 // Fall through
581
582 case SEQ_HEADER_DECODE:
583 // Copy the Stream Header field into coder->temp.
584 // If Stream Header was already available in coder->temp
585 // in SEQ_INDEX_DECODE, then this does nothing.
586 if (fill_temp(coder, in, in_pos, in_size))
587 return LZMA_OK;
588
589 // Make all these point to the beginning of Stream Header.
590 coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE;
591 coder->temp_size -= LZMA_STREAM_HEADER_SIZE;
592 coder->temp_pos = coder->temp_size;
593
594 // Decode the Stream Header.
595 return_if_error(hide_format_error(lzma_stream_header_decode(
596 &coder->header_flags,
597 coder->temp + coder->temp_size)));
598
599 coder->sequence = SEQ_HEADER_COMPARE;
600
601 // Fall through
602
603 case SEQ_HEADER_COMPARE:
604 // Compare Stream Header against Stream Footer. They must
605 // match.
606 return_if_error(lzma_stream_flags_compare(
607 &coder->header_flags, &coder->footer_flags));
608
609 // Store the decoded Stream Flags into the Index. Use the
610 // Footer Flags because it contains Backward Size, although
611 // it shouldn't matter in practice.
612 if (lzma_index_stream_flags(coder->this_index,
613 &coder->footer_flags) != LZMA_OK)
614 return LZMA_PROG_ERROR;
615
616 // Store also the size of the Stream Padding field. It is
617 // needed to calculate the offsets of the Streams correctly.
618 if (lzma_index_stream_padding(coder->this_index,
619 coder->stream_padding) != LZMA_OK)
620 return LZMA_PROG_ERROR;
621
622 // Reset it so that it's ready for the next Stream.
623 coder->stream_padding = 0;
624
625 // Append the earlier decoded Indexes after this_index.
626 if (coder->combined_index != NULL)
627 return_if_error(lzma_index_cat(coder->this_index,
628 coder->combined_index, allocator));
629
630 coder->combined_index = coder->this_index;
631 coder->this_index = NULL;
632
633 // If the whole file was decoded, tell the caller that we
634 // are finished.
635 if (coder->file_target_pos == 0) {
636 // The combined index must indicate the same file
637 // size as was told to us at initialization.
638 assert(lzma_index_file_size(coder->combined_index)
639 == coder->file_size);
640
641 // Make the combined index available to
642 // the application.
643 *coder->dest_index = coder->combined_index;
644 coder->combined_index = NULL;
645
646 // Mark the input buffer as used since we may have
647 // done internal seeking and thus don't know how
648 // many input bytes were actually used. This way
649 // lzma_stream.total_in gets a slightly better
650 // estimate of the amount of input used.
651 *in_pos = in_size;
652 return LZMA_STREAM_END;
653 }
654
655 // We didn't hit the beginning of the file yet, so continue
656 // reading backwards in the file. If we have unprocessed
657 // data in coder->temp, use it before requesting more data
658 // from the application.
659 //
660 // coder->file_target_pos, coder->temp_size, and
661 // coder->temp_pos all point to the beginning of Stream Header
662 // and thus the end of the previous Stream in the file.
663 coder->sequence = coder->temp_size > 0
664 ? SEQ_PADDING_DECODE : SEQ_PADDING_SEEK;
665 break;
666
667 default:
668 assert(0);
669 return LZMA_PROG_ERROR;
670 }
671}
672
673
674static lzma_ret
675file_info_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
676 uint64_t *old_memlimit, uint64_t new_memlimit)
677{
678 lzma_file_info_coder *coder = coder_ptr;
679
680 // The memory usage calculation comes from three things:
681 //
682 // (1) The Indexes that have already been decoded and processed into
683 // coder->combined_index.
684 //
685 // (2) The latest Index in coder->this_index that has been decoded but
686 // not yet put into coder->combined_index.
687 //
688 // (3) The latest Index that we have started decoding but haven't
689 // finished and thus isn't available in coder->this_index yet.
690 // Memory usage and limit information needs to be communicated
691 // from/to coder->index_decoder.
692 //
693 // Care has to be taken to not do both (2) and (3) when calculating
694 // the memory usage.
695 uint64_t combined_index_memusage = 0;
696 uint64_t this_index_memusage = 0;
697
698 // (1) If we have already successfully decoded one or more Indexes,
699 // get their memory usage.
700 if (coder->combined_index != NULL)
701 combined_index_memusage = lzma_index_memused(
702 coder->combined_index);
703
704 // Choose between (2), (3), or neither.
705 if (coder->this_index != NULL) {
706 // (2) The latest Index is available. Use its memory usage.
707 this_index_memusage = lzma_index_memused(coder->this_index);
708
709 } else if (coder->sequence == SEQ_INDEX_DECODE) {
710 // (3) The Index decoder is activate and hasn't yet stored
711 // the new index in coder->this_index. Get the memory usage
712 // information from the Index decoder.
713 //
714 // NOTE: If the Index decoder doesn't yet know how much memory
715 // it will eventually need, it will return a tiny value here.
716 uint64_t dummy;
717 if (coder->index_decoder.memconfig(coder->index_decoder.coder,
718 &this_index_memusage, &dummy, 0)
719 != LZMA_OK) {
720 assert(0);
721 return LZMA_PROG_ERROR;
722 }
723 }
724
725 // Now we know the total memory usage/requirement. If we had neither
726 // old Indexes nor a new Index, this will be zero which isn't
727 // acceptable as lzma_memusage() has to return non-zero on success
728 // and even with an empty .xz file we will end up with a lzma_index
729 // that takes some memory.
730 *memusage = combined_index_memusage + this_index_memusage;
731 if (*memusage == 0)
732 *memusage = lzma_index_memusage(1, 0);
733
734 *old_memlimit = coder->memlimit;
735
736 // If requested, set a new memory usage limit.
737 if (new_memlimit != 0) {
738 if (new_memlimit < *memusage)
739 return LZMA_MEMLIMIT_ERROR;
740
741 // In the condition (3) we need to tell the Index decoder
742 // its new memory usage limit.
743 if (coder->this_index == NULL
744 && coder->sequence == SEQ_INDEX_DECODE) {
745 const uint64_t idec_new_memlimit = new_memlimit
746 - combined_index_memusage;
747
748 assert(this_index_memusage > 0);
749 assert(idec_new_memlimit > 0);
750
751 uint64_t dummy1;
752 uint64_t dummy2;
753
754 if (coder->index_decoder.memconfig(
755 coder->index_decoder.coder,
756 &dummy1, &dummy2, idec_new_memlimit)
757 != LZMA_OK) {
758 assert(0);
759 return LZMA_PROG_ERROR;
760 }
761 }
762
763 coder->memlimit = new_memlimit;
764 }
765
766 return LZMA_OK;
767}
768
769
770static void
771file_info_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
772{
773 lzma_file_info_coder *coder = coder_ptr;
774
775 lzma_next_end(&coder->index_decoder, allocator);
776 lzma_index_end(coder->this_index, allocator);
777 lzma_index_end(coder->combined_index, allocator);
778
779 lzma_free(coder, allocator);
780 return;
781}
782
783
784static lzma_ret
785lzma_file_info_decoder_init(lzma_next_coder *next,
786 const lzma_allocator *allocator, uint64_t *seek_pos,
787 lzma_index **dest_index,
788 uint64_t memlimit, uint64_t file_size)
789{
790 lzma_next_coder_init(&lzma_file_info_decoder_init, next, allocator);
791
792 if (dest_index == NULL)
793 return LZMA_PROG_ERROR;
794
795 lzma_file_info_coder *coder = next->coder;
796 if (coder == NULL) {
797 coder = lzma_alloc(sizeof(lzma_file_info_coder), allocator);
798 if (coder == NULL)
799 return LZMA_MEM_ERROR;
800
801 next->coder = coder;
802 next->code = &file_info_decode;
803 next->end = &file_info_decoder_end;
804 next->memconfig = &file_info_decoder_memconfig;
805
806 coder->index_decoder = LZMA_NEXT_CODER_INIT;
807 coder->this_index = NULL;
808 coder->combined_index = NULL;
809 }
810
811 coder->sequence = SEQ_MAGIC_BYTES;
812 coder->file_cur_pos = 0;
813 coder->file_target_pos = 0;
814 coder->file_size = file_size;
815
816 lzma_index_end(coder->this_index, allocator);
817 coder->this_index = NULL;
818
819 lzma_index_end(coder->combined_index, allocator);
820 coder->combined_index = NULL;
821
822 coder->stream_padding = 0;
823
824 coder->dest_index = dest_index;
825 coder->external_seek_pos = seek_pos;
826
827 // If memlimit is 0, make it 1 to ensure that lzma_memlimit_get()
828 // won't return 0 (which would indicate an error).
829 coder->memlimit = my_max(1, memlimit);
830
831 // Prepare these for reading the first Stream Header into coder->temp.
832 coder->temp_pos = 0;
833 coder->temp_size = LZMA_STREAM_HEADER_SIZE;
834
835 return LZMA_OK;
836}
837
838
839extern LZMA_API(lzma_ret)
840lzma_file_info_decoder(lzma_stream *strm, lzma_index **dest_index,
841 uint64_t memlimit, uint64_t file_size)
842{
843 lzma_next_strm_init(lzma_file_info_decoder_init, strm, &strm->seek_pos,
844 dest_index, memlimit, file_size);
845
846 // We allow LZMA_FINISH in addition to LZMA_RUN for convenience.
847 // lzma_code() is able to handle the LZMA_FINISH + LZMA_SEEK_NEEDED
848 // combination in a sane way. Applications still need to be careful
849 // if they use LZMA_FINISH so that they remember to reset it back
850 // to LZMA_RUN after seeking if needed.
851 strm->internal->supported_actions[LZMA_RUN] = true;
852 strm->internal->supported_actions[LZMA_FINISH] = true;
853
854 return LZMA_OK;
855}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette