index.c@ 98879

Last change on this file since 98879 was 98730, checked in by vboxsync, 21 months ago
libs/liblzma-5.4.1: Export to OSE, bugref:10254
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 35.0 KB

Line
1	///////////////////////////////////////////////////////////////////////////////
2	//
3	/// \file index.c
4	/// \brief Handling of .xz Indexes and some other Stream information
5	//
6	// Author: Lasse Collin
7	//
8	// This file has been put into the public domain.
9	// You can do whatever you want with this file.
10	//
11	///////////////////////////////////////////////////////////////////////////////
12
13	#include "common.h"
14	#include "index.h"
15	#include "stream_flags_common.h"
16
17
18	/// \brief How many Records to allocate at once
19	///
20	/// This should be big enough to avoid making lots of tiny allocations
21	/// but small enough to avoid too much unused memory at once.
22	#define INDEX_GROUP_SIZE 512
23
24
25	/// \brief How many Records can be allocated at once at maximum
26	#define PREALLOC_MAX ((SIZE_MAX - sizeof(index_group)) / sizeof(index_record))
27
28
29	/// \brief Base structure for index_stream and index_group structures
30	typedef struct index_tree_node_s index_tree_node;
31	struct index_tree_node_s {
32	/// Uncompressed start offset of this Stream (relative to the
33	/// beginning of the file) or Block (relative to the beginning
34	/// of the Stream)
35	lzma_vli uncompressed_base;
36
37	/// Compressed start offset of this Stream or Block
38	lzma_vli compressed_base;
39
40	index_tree_node *parent;
41	index_tree_node *left;
42	index_tree_node *right;
43	};
44
45
46	/// \brief AVL tree to hold index_stream or index_group structures
47	typedef struct {
48	/// Root node
49	index_tree_node *root;
50
51	/// Leftmost node. Since the tree will be filled sequentially,
52	/// this won't change after the first node has been added to
53	/// the tree.
54	index_tree_node *leftmost;
55
56	/// The rightmost node in the tree. Since the tree is filled
57	/// sequentially, this is always the node where to add the new data.
58	index_tree_node *rightmost;
59
60	/// Number of nodes in the tree
61	uint32_t count;
62
63	} index_tree;
64
65
66	typedef struct {
67	lzma_vli uncompressed_sum;
68	lzma_vli unpadded_sum;
69	} index_record;
70
71
72	typedef struct {
73	/// Every Record group is part of index_stream.groups tree.
74	index_tree_node node;
75
76	/// Number of Blocks in this Stream before this group.
77	lzma_vli number_base;
78
79	/// Number of Records that can be put in records[].
80	size_t allocated;
81
82	/// Index of the last Record in use.
83	size_t last;
84
85	/// The sizes in this array are stored as cumulative sums relative
86	/// to the beginning of the Stream. This makes it possible to
87	/// use binary search in lzma_index_locate().
88	///
89	/// Note that the cumulative summing is done specially for
90	/// unpadded_sum: The previous value is rounded up to the next
91	/// multiple of four before adding the Unpadded Size of the new
92	/// Block. The total encoded size of the Blocks in the Stream
93	/// is records[last].unpadded_sum in the last Record group of
94	/// the Stream.
95	///
96	/// For example, if the Unpadded Sizes are 39, 57, and 81, the
97	/// stored values are 39, 97 (40 + 57), and 181 (100 + 181).
98	/// The total encoded size of these Blocks is 184.
99	///
100	/// This is a flexible array, because it makes easy to optimize
101	/// memory usage in case someone concatenates many Streams that
102	/// have only one or few Blocks.
103	index_record records[];
104
105	} index_group;
106
107
108	typedef struct {
109	/// Every index_stream is a node in the tree of Streams.
110	index_tree_node node;
111
112	/// Number of this Stream (first one is 1)
113	uint32_t number;
114
115	/// Total number of Blocks before this Stream
116	lzma_vli block_number_base;
117
118	/// Record groups of this Stream are stored in a tree.
119	/// It's a T-tree with AVL-tree balancing. There are
120	/// INDEX_GROUP_SIZE Records per node by default.
121	/// This keeps the number of memory allocations reasonable
122	/// and finding a Record is fast.
123	index_tree groups;
124
125	/// Number of Records in this Stream
126	lzma_vli record_count;
127
128	/// Size of the List of Records field in this Stream. This is used
129	/// together with record_count to calculate the size of the Index
130	/// field and thus the total size of the Stream.
131	lzma_vli index_list_size;
132
133	/// Stream Flags of this Stream. This is meaningful only if
134	/// the Stream Flags have been told us with lzma_index_stream_flags().
135	/// Initially stream_flags.version is set to UINT32_MAX to indicate
136	/// that the Stream Flags are unknown.
137	lzma_stream_flags stream_flags;
138
139	/// Amount of Stream Padding after this Stream. This defaults to
140	/// zero and can be set with lzma_index_stream_padding().
141	lzma_vli stream_padding;
142
143	} index_stream;
144
145
146	struct lzma_index_s {
147	/// AVL-tree containing the Stream(s). Often there is just one
148	/// Stream, but using a tree keeps lookups fast even when there
149	/// are many concatenated Streams.
150	index_tree streams;
151
152	/// Uncompressed size of all the Blocks in the Stream(s)
153	lzma_vli uncompressed_size;
154
155	/// Total size of all the Blocks in the Stream(s)
156	lzma_vli total_size;
157
158	/// Total number of Records in all Streams in this lzma_index
159	lzma_vli record_count;
160
161	/// Size of the List of Records field if all the Streams in this
162	/// lzma_index were packed into a single Stream (makes it simpler to
163	/// take many .xz files and combine them into a single Stream).
164	///
165	/// This value together with record_count is needed to calculate
166	/// Backward Size that is stored into Stream Footer.
167	lzma_vli index_list_size;
168
169	/// How many Records to allocate at once in lzma_index_append().
170	/// This defaults to INDEX_GROUP_SIZE but can be overridden with
171	/// lzma_index_prealloc().
172	size_t prealloc;
173
174	/// Bitmask indicating what integrity check types have been used
175	/// as set by lzma_index_stream_flags(). The bit of the last Stream
176	/// is not included here, since it is possible to change it by
177	/// calling lzma_index_stream_flags() again.
178	uint32_t checks;
179	};
180
181
182	static void
183	index_tree_init(index_tree *tree)
184	{
185	tree->root = NULL;
186	tree->leftmost = NULL;
187	tree->rightmost = NULL;
188	tree->count = 0;
189	return;
190	}
191
192
193	/// Helper for index_tree_end()
194	static void
195	index_tree_node_end(index_tree_node node, const lzma_allocator allocator,
196	void (free_func)(void node, const lzma_allocator *allocator))
197	{
198	// The tree won't ever be very huge, so recursion should be fine.
199	// 20 levels in the tree is likely quite a lot already in practice.
200	if (node->left != NULL)
201	index_tree_node_end(node->left, allocator, free_func);
202
203	if (node->right != NULL)
204	index_tree_node_end(node->right, allocator, free_func);
205
206	free_func(node, allocator);
207	return;
208	}
209
210
211	/// Free the memory allocated for a tree. Each node is freed using the
212	/// given free_func which is either &lzma_free or &index_stream_end.
213	/// The latter is used to free the Record groups from each index_stream
214	/// before freeing the index_stream itself.
215	static void
216	index_tree_end(index_tree tree, const lzma_allocator allocator,
217	void (free_func)(void node, const lzma_allocator *allocator))
218	{
219	assert(free_func != NULL);
220
221	if (tree->root != NULL)
222	index_tree_node_end(tree->root, allocator, free_func);
223
224	return;
225	}
226
227
228	/// Add a new node to the tree. node->uncompressed_base and
229	/// node->compressed_base must have been set by the caller already.
230	static void
231	index_tree_append(index_tree tree, index_tree_node node)
232	{
233	node->parent = tree->rightmost;
234	node->left = NULL;
235	node->right = NULL;
236
237	++tree->count;
238
239	// Handle the special case of adding the first node.
240	if (tree->root == NULL) {
241	tree->root = node;
242	tree->leftmost = node;
243	tree->rightmost = node;
244	return;
245	}
246
247	// The tree is always filled sequentially.
248	assert(tree->rightmost->uncompressed_base <= node->uncompressed_base);
249	assert(tree->rightmost->compressed_base < node->compressed_base);
250
251	// Add the new node after the rightmost node. It's the correct
252	// place due to the reason above.
253	tree->rightmost->right = node;
254	tree->rightmost = node;
255
256	// Balance the AVL-tree if needed. We don't need to keep the balance
257	// factors in nodes, because we always fill the tree sequentially,
258	// and thus know the state of the tree just by looking at the node
259	// count. From the node count we can calculate how many steps to go
260	// up in the tree to find the rotation root.
261	uint32_t up = tree->count ^ (UINT32_C(1) << bsr32(tree->count));
262	if (up != 0) {
263	// Locate the root node for the rotation.
264	up = ctz32(tree->count) + 2;
265	do {
266	node = node->parent;
267	} while (--up > 0);
268
269	// Rotate left using node as the rotation root.
270	index_tree_node *pivot = node->right;
271
272	if (node->parent == NULL) {
273	tree->root = pivot;
274	} else {
275	assert(node->parent->right == node);
276	node->parent->right = pivot;
277	}
278
279	pivot->parent = node->parent;
280
281	node->right = pivot->left;
282	if (node->right != NULL)
283	node->right->parent = node;
284
285	pivot->left = node;
286	node->parent = pivot;
287	}
288
289	return;
290	}
291
292
293	/// Get the next node in the tree. Return NULL if there are no more nodes.
294	static void *
295	index_tree_next(const index_tree_node *node)
296	{
297	if (node->right != NULL) {
298	node = node->right;
299	while (node->left != NULL)
300	node = node->left;
301
302	return (void *)(node);
303	}
304
305	while (node->parent != NULL && node->parent->right == node)
306	node = node->parent;
307
308	return (void *)(node->parent);
309	}
310
311
312	/// Locate a node that contains the given uncompressed offset. It is
313	/// caller's job to check that target is not bigger than the uncompressed
314	/// size of the tree (the last node would be returned in that case still).
315	static void *
316	index_tree_locate(const index_tree *tree, lzma_vli target)
317	{
318	const index_tree_node *result = NULL;
319	const index_tree_node *node = tree->root;
320
321	assert(tree->leftmost == NULL
322	\|\| tree->leftmost->uncompressed_base == 0);
323
324	// Consecutive nodes may have the same uncompressed_base.
325	// We must pick the rightmost one.
326	while (node != NULL) {
327	if (node->uncompressed_base > target) {
328	node = node->left;
329	} else {
330	result = node;
331	node = node->right;
332	}
333	}
334
335	return (void *)(result);
336	}
337
338
339	/// Allocate and initialize a new Stream using the given base offsets.
340	static index_stream *
341	index_stream_init(lzma_vli compressed_base, lzma_vli uncompressed_base,
342	uint32_t stream_number, lzma_vli block_number_base,
343	const lzma_allocator *allocator)
344	{
345	index_stream *s = lzma_alloc(sizeof(index_stream), allocator);
346	if (s == NULL)
347	return NULL;
348
349	s->node.uncompressed_base = uncompressed_base;
350	s->node.compressed_base = compressed_base;
351	s->node.parent = NULL;
352	s->node.left = NULL;
353	s->node.right = NULL;
354
355	s->number = stream_number;
356	s->block_number_base = block_number_base;
357
358	index_tree_init(&s->groups);
359
360	s->record_count = 0;
361	s->index_list_size = 0;
362	s->stream_flags.version = UINT32_MAX;
363	s->stream_padding = 0;
364
365	return s;
366	}
367
368
369	/// Free the memory allocated for a Stream and its Record groups.
370	static void
371	index_stream_end(void node, const lzma_allocator allocator)
372	{
373	index_stream *s = node;
374	index_tree_end(&s->groups, allocator, &lzma_free);
375	lzma_free(s, allocator);
376	return;
377	}
378
379
380	static lzma_index *
381	index_init_plain(const lzma_allocator *allocator)
382	{
383	lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator);
384	if (i != NULL) {
385	index_tree_init(&i->streams);
386	i->uncompressed_size = 0;
387	i->total_size = 0;
388	i->record_count = 0;
389	i->index_list_size = 0;
390	i->prealloc = INDEX_GROUP_SIZE;
391	i->checks = 0;
392	}
393
394	return i;
395	}
396
397
398	extern LZMA_API(lzma_index *)
399	lzma_index_init(const lzma_allocator *allocator)
400	{
401	lzma_index *i = index_init_plain(allocator);
402	if (i == NULL)
403	return NULL;
404
405	index_stream *s = index_stream_init(0, 0, 1, 0, allocator);
406	if (s == NULL) {
407	lzma_free(i, allocator);
408	return NULL;
409	}
410
411	index_tree_append(&i->streams, &s->node);
412
413	return i;
414	}
415
416
417	extern LZMA_API(void)
418	lzma_index_end(lzma_index i, const lzma_allocator allocator)
419	{
420	// NOTE: If you modify this function, check also the bottom
421	// of lzma_index_cat().
422	if (i != NULL) {
423	index_tree_end(&i->streams, allocator, &index_stream_end);
424	lzma_free(i, allocator);
425	}
426
427	return;
428	}
429
430
431	extern void
432	lzma_index_prealloc(lzma_index *i, lzma_vli records)
433	{
434	if (records > PREALLOC_MAX)
435	records = PREALLOC_MAX;
436
437	i->prealloc = (size_t)(records);
438	return;
439	}
440
441
442	extern LZMA_API(uint64_t)
443	lzma_index_memusage(lzma_vli streams, lzma_vli blocks)
444	{
445	// This calculates an upper bound that is only a little bit
446	// bigger than the exact maximum memory usage with the given
447	// parameters.
448
449	// Typical malloc() overhead is 2 * sizeof(void *) but we take
450	// a little bit extra just in case. Using LZMA_MEMUSAGE_BASE
451	// instead would give too inaccurate estimate.
452	const size_t alloc_overhead = 4 * sizeof(void *);
453
454	// Amount of memory needed for each Stream base structures.
455	// We assume that every Stream has at least one Block and
456	// thus at least one group.
457	const size_t stream_base = sizeof(index_stream)
458	+ sizeof(index_group) + 2 * alloc_overhead;
459
460	// Amount of memory needed per group.
461	const size_t group_base = sizeof(index_group)
462	+ INDEX_GROUP_SIZE * sizeof(index_record)
463	+ alloc_overhead;
464
465	// Number of groups. There may actually be more, but that overhead
466	// has been taken into account in stream_base already.
467	const lzma_vli groups
468	= (blocks + INDEX_GROUP_SIZE - 1) / INDEX_GROUP_SIZE;
469
470	// Memory used by index_stream and index_group structures.
471	const uint64_t streams_mem = streams * stream_base;
472	const uint64_t groups_mem = groups * group_base;
473
474	// Memory used by the base structure.
475	const uint64_t index_base = sizeof(lzma_index) + alloc_overhead;
476
477	// Validate the arguments and catch integer overflows.
478	// Maximum number of Streams is "only" UINT32_MAX, because
479	// that limit is used by the tree containing the Streams.
480	const uint64_t limit = UINT64_MAX - index_base;
481	if (streams == 0 \|\| streams > UINT32_MAX \|\| blocks > LZMA_VLI_MAX
482	\|\| streams > limit / stream_base
483	\|\| groups > limit / group_base
484	\|\| limit - streams_mem < groups_mem)
485	return UINT64_MAX;
486
487	return index_base + streams_mem + groups_mem;
488	}
489
490
491	extern LZMA_API(uint64_t)
492	lzma_index_memused(const lzma_index *i)
493	{
494	return lzma_index_memusage(i->streams.count, i->record_count);
495	}
496
497
498	extern LZMA_API(lzma_vli)
499	lzma_index_block_count(const lzma_index *i)
500	{
501	return i->record_count;
502	}
503
504
505	extern LZMA_API(lzma_vli)
506	lzma_index_stream_count(const lzma_index *i)
507	{
508	return i->streams.count;
509	}
510
511
512	extern LZMA_API(lzma_vli)
513	lzma_index_size(const lzma_index *i)
514	{
515	return index_size(i->record_count, i->index_list_size);
516	}
517
518
519	extern LZMA_API(lzma_vli)
520	lzma_index_total_size(const lzma_index *i)
521	{
522	return i->total_size;
523	}
524
525
526	extern LZMA_API(lzma_vli)
527	lzma_index_stream_size(const lzma_index *i)
528	{
529	// Stream Header + Blocks + Index + Stream Footer
530	return LZMA_STREAM_HEADER_SIZE + i->total_size
531	+ index_size(i->record_count, i->index_list_size)
532	+ LZMA_STREAM_HEADER_SIZE;
533	}
534
535
536	static lzma_vli
537	index_file_size(lzma_vli compressed_base, lzma_vli unpadded_sum,
538	lzma_vli record_count, lzma_vli index_list_size,
539	lzma_vli stream_padding)
540	{
541	// Earlier Streams and Stream Paddings + Stream Header
542	// + Blocks + Index + Stream Footer + Stream Padding
543	//
544	// This might go over LZMA_VLI_MAX due to too big unpadded_sum
545	// when this function is used in lzma_index_append().
546	lzma_vli file_size = compressed_base + 2 * LZMA_STREAM_HEADER_SIZE
547	+ stream_padding + vli_ceil4(unpadded_sum);
548	if (file_size > LZMA_VLI_MAX)
549	return LZMA_VLI_UNKNOWN;
550
551	// The same applies here.
552	file_size += index_size(record_count, index_list_size);
553	if (file_size > LZMA_VLI_MAX)
554	return LZMA_VLI_UNKNOWN;
555
556	return file_size;
557	}
558
559
560	extern LZMA_API(lzma_vli)
561	lzma_index_file_size(const lzma_index *i)
562	{
563	const index_stream s = (const index_stream )(i->streams.rightmost);
564	const index_group g = (const index_group )(s->groups.rightmost);
565	return index_file_size(s->node.compressed_base,
566	g == NULL ? 0 : g->records[g->last].unpadded_sum,
567	s->record_count, s->index_list_size,
568	s->stream_padding);
569	}
570
571
572	extern LZMA_API(lzma_vli)
573	lzma_index_uncompressed_size(const lzma_index *i)
574	{
575	return i->uncompressed_size;
576	}
577
578
579	extern LZMA_API(uint32_t)
580	lzma_index_checks(const lzma_index *i)
581	{
582	uint32_t checks = i->checks;
583
584	// Get the type of the Check of the last Stream too.
585	const index_stream s = (const index_stream )(i->streams.rightmost);
586	if (s->stream_flags.version != UINT32_MAX)
587	checks \|= UINT32_C(1) << s->stream_flags.check;
588
589	return checks;
590	}
591
592
593	extern uint32_t
594	lzma_index_padding_size(const lzma_index *i)
595	{
596	return (LZMA_VLI_C(4) - index_size_unpadded(
597	i->record_count, i->index_list_size)) & 3;
598	}
599
600
601	extern LZMA_API(lzma_ret)
602	lzma_index_stream_flags(lzma_index i, const lzma_stream_flags stream_flags)
603	{
604	if (i == NULL \|\| stream_flags == NULL)
605	return LZMA_PROG_ERROR;
606
607	// Validate the Stream Flags.
608	return_if_error(lzma_stream_flags_compare(
609	stream_flags, stream_flags));
610
611	index_stream s = (index_stream )(i->streams.rightmost);
612	s->stream_flags = *stream_flags;
613
614	return LZMA_OK;
615	}
616
617
618	extern LZMA_API(lzma_ret)
619	lzma_index_stream_padding(lzma_index *i, lzma_vli stream_padding)
620	{
621	if (i == NULL \|\| stream_padding > LZMA_VLI_MAX
622	\|\| (stream_padding & 3) != 0)
623	return LZMA_PROG_ERROR;
624
625	index_stream s = (index_stream )(i->streams.rightmost);
626
627	// Check that the new value won't make the file grow too big.
628	const lzma_vli old_stream_padding = s->stream_padding;
629	s->stream_padding = 0;
630	if (lzma_index_file_size(i) + stream_padding > LZMA_VLI_MAX) {
631	s->stream_padding = old_stream_padding;
632	return LZMA_DATA_ERROR;
633	}
634
635	s->stream_padding = stream_padding;
636	return LZMA_OK;
637	}
638
639
640	extern LZMA_API(lzma_ret)
641	lzma_index_append(lzma_index i, const lzma_allocator allocator,
642	lzma_vli unpadded_size, lzma_vli uncompressed_size)
643	{
644	// Validate.
645	if (i == NULL \|\| unpadded_size < UNPADDED_SIZE_MIN
646	\|\| unpadded_size > UNPADDED_SIZE_MAX
647	\|\| uncompressed_size > LZMA_VLI_MAX)
648	return LZMA_PROG_ERROR;
649
650	index_stream s = (index_stream )(i->streams.rightmost);
651	index_group g = (index_group )(s->groups.rightmost);
652
653	const lzma_vli compressed_base = g == NULL ? 0
654	: vli_ceil4(g->records[g->last].unpadded_sum);
655	const lzma_vli uncompressed_base = g == NULL ? 0
656	: g->records[g->last].uncompressed_sum;
657	const uint32_t index_list_size_add = lzma_vli_size(unpadded_size)
658	+ lzma_vli_size(uncompressed_size);
659
660	// Check that uncompressed size will not overflow.
661	if (uncompressed_base + uncompressed_size > LZMA_VLI_MAX)
662	return LZMA_DATA_ERROR;
663
664	// Check that the file size will stay within limits.
665	if (index_file_size(s->node.compressed_base,
666	compressed_base + unpadded_size, s->record_count + 1,
667	s->index_list_size + index_list_size_add,
668	s->stream_padding) == LZMA_VLI_UNKNOWN)
669	return LZMA_DATA_ERROR;
670
671	// The size of the Index field must not exceed the maximum value
672	// that can be stored in the Backward Size field.
673	if (index_size(i->record_count + 1,
674	i->index_list_size + index_list_size_add)
675	> LZMA_BACKWARD_SIZE_MAX)
676	return LZMA_DATA_ERROR;
677
678	if (g != NULL && g->last + 1 < g->allocated) {
679	// There is space in the last group at least for one Record.
680	++g->last;
681	} else {
682	// We need to allocate a new group.
683	g = lzma_alloc(sizeof(index_group)
684	+ i->prealloc * sizeof(index_record),
685	allocator);
686	if (g == NULL)
687	return LZMA_MEM_ERROR;
688
689	g->last = 0;
690	g->allocated = i->prealloc;
691
692	// Reset prealloc so that if the application happens to
693	// add new Records, the allocation size will be sane.
694	i->prealloc = INDEX_GROUP_SIZE;
695
696	// Set the start offsets of this group.
697	g->node.uncompressed_base = uncompressed_base;
698	g->node.compressed_base = compressed_base;
699	g->number_base = s->record_count + 1;
700
701	// Add the new group to the Stream.
702	index_tree_append(&s->groups, &g->node);
703	}
704
705	// Add the new Record to the group.
706	g->records[g->last].uncompressed_sum
707	= uncompressed_base + uncompressed_size;
708	g->records[g->last].unpadded_sum
709	= compressed_base + unpadded_size;
710
711	// Update the totals.
712	++s->record_count;
713	s->index_list_size += index_list_size_add;
714
715	i->total_size += vli_ceil4(unpadded_size);
716	i->uncompressed_size += uncompressed_size;
717	++i->record_count;
718	i->index_list_size += index_list_size_add;
719
720	return LZMA_OK;
721	}
722
723
724	/// Structure to pass info to index_cat_helper()
725	typedef struct {
726	/// Uncompressed size of the destination
727	lzma_vli uncompressed_size;
728
729	/// Compressed file size of the destination
730	lzma_vli file_size;
731
732	/// Same as above but for Block numbers
733	lzma_vli block_number_add;
734
735	/// Number of Streams that were in the destination index before we
736	/// started appending new Streams from the source index. This is
737	/// used to fix the Stream numbering.
738	uint32_t stream_number_add;
739
740	/// Destination index' Stream tree
741	index_tree *streams;
742
743	} index_cat_info;
744
745
746	/// Add the Stream nodes from the source index to dest using recursion.
747	/// Simplest iterative traversal of the source tree wouldn't work, because
748	/// we update the pointers in nodes when moving them to the destination tree.
749	static void
750	index_cat_helper(const index_cat_info info, index_stream this)
751	{
752	index_stream left = (index_stream )(this->node.left);
753	index_stream right = (index_stream )(this->node.right);
754
755	if (left != NULL)
756	index_cat_helper(info, left);
757
758	this->node.uncompressed_base += info->uncompressed_size;
759	this->node.compressed_base += info->file_size;
760	this->number += info->stream_number_add;
761	this->block_number_base += info->block_number_add;
762	index_tree_append(info->streams, &this->node);
763
764	if (right != NULL)
765	index_cat_helper(info, right);
766
767	return;
768	}
769
770
771	extern LZMA_API(lzma_ret)
772	lzma_index_cat(lzma_index restrict dest, lzma_index restrict src,
773	const lzma_allocator *allocator)
774	{
775	if (dest == NULL \|\| src == NULL)
776	return LZMA_PROG_ERROR;
777
778	const lzma_vli dest_file_size = lzma_index_file_size(dest);
779
780	// Check that we don't exceed the file size limits.
781	if (dest_file_size + lzma_index_file_size(src) > LZMA_VLI_MAX
782	\|\| dest->uncompressed_size + src->uncompressed_size
783	> LZMA_VLI_MAX)
784	return LZMA_DATA_ERROR;
785
786	// Check that the encoded size of the combined lzma_indexes stays
787	// within limits. In theory, this should be done only if we know
788	// that the user plans to actually combine the Streams and thus
789	// construct a single Index (probably rare). However, exceeding
790	// this limit is quite theoretical, so we do this check always
791	// to simplify things elsewhere.
792	{
793	const lzma_vli dest_size = index_size_unpadded(
794	dest->record_count, dest->index_list_size);
795	const lzma_vli src_size = index_size_unpadded(
796	src->record_count, src->index_list_size);
797	if (vli_ceil4(dest_size + src_size) > LZMA_BACKWARD_SIZE_MAX)
798	return LZMA_DATA_ERROR;
799	}
800
801	// Optimize the last group to minimize memory usage. Allocation has
802	// to be done before modifying dest or src.
803	{
804	index_stream s = (index_stream )(dest->streams.rightmost);
805	index_group g = (index_group )(s->groups.rightmost);
806	if (g != NULL && g->last + 1 < g->allocated) {
807	assert(g->node.left == NULL);
808	assert(g->node.right == NULL);
809
810	index_group *newg = lzma_alloc(sizeof(index_group)
811	+ (g->last + 1)
812	* sizeof(index_record),
813	allocator);
814	if (newg == NULL)
815	return LZMA_MEM_ERROR;
816
817	newg->node = g->node;
818	newg->allocated = g->last + 1;
819	newg->last = g->last;
820	newg->number_base = g->number_base;
821
822	memcpy(newg->records, g->records, newg->allocated
823	* sizeof(index_record));
824
825	if (g->node.parent != NULL) {
826	assert(g->node.parent->right == &g->node);
827	g->node.parent->right = &newg->node;
828	}
829
830	if (s->groups.leftmost == &g->node) {
831	assert(s->groups.root == &g->node);
832	s->groups.leftmost = &newg->node;
833	s->groups.root = &newg->node;
834	}
835
836	assert(s->groups.rightmost == &g->node);
837	s->groups.rightmost = &newg->node;
838
839	lzma_free(g, allocator);
840
841	// NOTE: newg isn't leaked here because
842	// newg == (void *)&newg->node.
843	}
844	}
845
846	// dest->checks includes the check types of all except the last Stream
847	// in dest. Set the bit for the check type of the last Stream now so
848	// that it won't get lost when Stream(s) from src are appended to dest.
849	dest->checks = lzma_index_checks(dest);
850
851	// Add all the Streams from src to dest. Update the base offsets
852	// of each Stream from src.
853	const index_cat_info info = {
854	.uncompressed_size = dest->uncompressed_size,
855	.file_size = dest_file_size,
856	.stream_number_add = dest->streams.count,
857	.block_number_add = dest->record_count,
858	.streams = &dest->streams,
859	};
860	index_cat_helper(&info, (index_stream *)(src->streams.root));
861
862	// Update info about all the combined Streams.
863	dest->uncompressed_size += src->uncompressed_size;
864	dest->total_size += src->total_size;
865	dest->record_count += src->record_count;
866	dest->index_list_size += src->index_list_size;
867	dest->checks \|= src->checks;
868
869	// There's nothing else left in src than the base structure.
870	lzma_free(src, allocator);
871
872	return LZMA_OK;
873	}
874
875
876	/// Duplicate an index_stream.
877	static index_stream *
878	index_dup_stream(const index_stream src, const lzma_allocator allocator)
879	{
880	// Catch a somewhat theoretical integer overflow.
881	if (src->record_count > PREALLOC_MAX)
882	return NULL;
883
884	// Allocate and initialize a new Stream.
885	index_stream *dest = index_stream_init(src->node.compressed_base,
886	src->node.uncompressed_base, src->number,
887	src->block_number_base, allocator);
888	if (dest == NULL)
889	return NULL;
890
891	// Copy the overall information.
892	dest->record_count = src->record_count;
893	dest->index_list_size = src->index_list_size;
894	dest->stream_flags = src->stream_flags;
895	dest->stream_padding = src->stream_padding;
896
897	// Return if there are no groups to duplicate.
898	if (src->groups.leftmost == NULL)
899	return dest;
900
901	// Allocate memory for the Records. We put all the Records into
902	// a single group. It's simplest and also tends to make
903	// lzma_index_locate() a little bit faster with very big Indexes.
904	index_group *destg = lzma_alloc(sizeof(index_group)
905	+ src->record_count * sizeof(index_record),
906	allocator);
907	if (destg == NULL) {
908	index_stream_end(dest, allocator);
909	return NULL;
910	}
911
912	// Initialize destg.
913	destg->node.uncompressed_base = 0;
914	destg->node.compressed_base = 0;
915	destg->number_base = 1;
916	destg->allocated = src->record_count;
917	destg->last = src->record_count - 1;
918
919	// Go through all the groups in src and copy the Records into destg.
920	const index_group srcg = (const index_group )(src->groups.leftmost);
921	size_t i = 0;
922	do {
923	memcpy(destg->records + i, srcg->records,
924	(srcg->last + 1) * sizeof(index_record));
925	i += srcg->last + 1;
926	srcg = index_tree_next(&srcg->node);
927	} while (srcg != NULL);
928
929	assert(i == destg->allocated);
930
931	// Add the group to the new Stream.
932	index_tree_append(&dest->groups, &destg->node);
933
934	return dest;
935	}
936
937
938	extern LZMA_API(lzma_index *)
939	lzma_index_dup(const lzma_index src, const lzma_allocator allocator)
940	{
941	// Allocate the base structure (no initial Stream).
942	lzma_index *dest = index_init_plain(allocator);
943	if (dest == NULL)
944	return NULL;
945
946	// Copy the totals.
947	dest->uncompressed_size = src->uncompressed_size;
948	dest->total_size = src->total_size;
949	dest->record_count = src->record_count;
950	dest->index_list_size = src->index_list_size;
951
952	// Copy the Streams and the groups in them.
953	const index_stream *srcstream
954	= (const index_stream *)(src->streams.leftmost);
955	do {
956	index_stream *deststream = index_dup_stream(
957	srcstream, allocator);
958	if (deststream == NULL) {
959	lzma_index_end(dest, allocator);
960	return NULL;
961	}
962
963	index_tree_append(&dest->streams, &deststream->node);
964
965	srcstream = index_tree_next(&srcstream->node);
966	} while (srcstream != NULL);
967
968	return dest;
969	}
970
971
972	/// Indexing for lzma_index_iter.internal[]
973	enum {
974	ITER_INDEX,
975	ITER_STREAM,
976	ITER_GROUP,
977	ITER_RECORD,
978	ITER_METHOD,
979	};
980
981
982	/// Values for lzma_index_iter.internal[ITER_METHOD].s
983	enum {
984	ITER_METHOD_NORMAL,
985	ITER_METHOD_NEXT,
986	ITER_METHOD_LEFTMOST,
987	};
988
989
990	static void
991	iter_set_info(lzma_index_iter *iter)
992	{
993	const lzma_index *i = iter->internal[ITER_INDEX].p;
994	const index_stream *stream = iter->internal[ITER_STREAM].p;
995	const index_group *group = iter->internal[ITER_GROUP].p;
996	const size_t record = iter->internal[ITER_RECORD].s;
997
998	// lzma_index_iter.internal must not contain a pointer to the last
999	// group in the index, because that may be reallocated by
1000	// lzma_index_cat().
1001	if (group == NULL) {
1002	// There are no groups.
1003	assert(stream->groups.root == NULL);
1004	iter->internal[ITER_METHOD].s = ITER_METHOD_LEFTMOST;
1005
1006	} else if (i->streams.rightmost != &stream->node
1007	\|\| stream->groups.rightmost != &group->node) {
1008	// The group is not not the last group in the index.
1009	iter->internal[ITER_METHOD].s = ITER_METHOD_NORMAL;
1010
1011	} else if (stream->groups.leftmost != &group->node) {
1012	// The group isn't the only group in the Stream, thus we
1013	// know that it must have a parent group i.e. it's not
1014	// the root node.
1015	assert(stream->groups.root != &group->node);
1016	assert(group->node.parent->right == &group->node);
1017	iter->internal[ITER_METHOD].s = ITER_METHOD_NEXT;
1018	iter->internal[ITER_GROUP].p = group->node.parent;
1019
1020	} else {
1021	// The Stream has only one group.
1022	assert(stream->groups.root == &group->node);
1023	assert(group->node.parent == NULL);
1024	iter->internal[ITER_METHOD].s = ITER_METHOD_LEFTMOST;
1025	iter->internal[ITER_GROUP].p = NULL;
1026	}
1027
1028	// NOTE: lzma_index_iter.stream.number is lzma_vli but we use uint32_t
1029	// internally.
1030	iter->stream.number = stream->number;
1031	iter->stream.block_count = stream->record_count;
1032	iter->stream.compressed_offset = stream->node.compressed_base;
1033	iter->stream.uncompressed_offset = stream->node.uncompressed_base;
1034
1035	// iter->stream.flags will be NULL if the Stream Flags haven't been
1036	// set with lzma_index_stream_flags().
1037	iter->stream.flags = stream->stream_flags.version == UINT32_MAX
1038	? NULL : &stream->stream_flags;
1039	iter->stream.padding = stream->stream_padding;
1040
1041	if (stream->groups.rightmost == NULL) {
1042	// Stream has no Blocks.
1043	iter->stream.compressed_size = index_size(0, 0)
1044	+ 2 * LZMA_STREAM_HEADER_SIZE;
1045	iter->stream.uncompressed_size = 0;
1046	} else {
1047	const index_group g = (const index_group )(
1048	stream->groups.rightmost);
1049
1050	// Stream Header + Stream Footer + Index + Blocks
1051	iter->stream.compressed_size = 2 * LZMA_STREAM_HEADER_SIZE
1052	+ index_size(stream->record_count,
1053	stream->index_list_size)
1054	+ vli_ceil4(g->records[g->last].unpadded_sum);
1055	iter->stream.uncompressed_size
1056	= g->records[g->last].uncompressed_sum;
1057	}
1058
1059	if (group != NULL) {
1060	iter->block.number_in_stream = group->number_base + record;
1061	iter->block.number_in_file = iter->block.number_in_stream
1062	+ stream->block_number_base;
1063
1064	iter->block.compressed_stream_offset
1065	= record == 0 ? group->node.compressed_base
1066	: vli_ceil4(group->records[
1067	record - 1].unpadded_sum);
1068	iter->block.uncompressed_stream_offset
1069	= record == 0 ? group->node.uncompressed_base
1070	: group->records[record - 1].uncompressed_sum;
1071
1072	iter->block.uncompressed_size
1073	= group->records[record].uncompressed_sum
1074	- iter->block.uncompressed_stream_offset;
1075	iter->block.unpadded_size
1076	= group->records[record].unpadded_sum
1077	- iter->block.compressed_stream_offset;
1078	iter->block.total_size = vli_ceil4(iter->block.unpadded_size);
1079
1080	iter->block.compressed_stream_offset
1081	+= LZMA_STREAM_HEADER_SIZE;
1082
1083	iter->block.compressed_file_offset
1084	= iter->block.compressed_stream_offset
1085	+ iter->stream.compressed_offset;
1086	iter->block.uncompressed_file_offset
1087	= iter->block.uncompressed_stream_offset
1088	+ iter->stream.uncompressed_offset;
1089	}
1090
1091	return;
1092	}
1093
1094
1095	extern LZMA_API(void)
1096	lzma_index_iter_init(lzma_index_iter iter, const lzma_index i)
1097	{
1098	iter->internal[ITER_INDEX].p = i;
1099	lzma_index_iter_rewind(iter);
1100	return;
1101	}
1102
1103
1104	extern LZMA_API(void)
1105	lzma_index_iter_rewind(lzma_index_iter *iter)
1106	{
1107	iter->internal[ITER_STREAM].p = NULL;
1108	iter->internal[ITER_GROUP].p = NULL;
1109	iter->internal[ITER_RECORD].s = 0;
1110	iter->internal[ITER_METHOD].s = ITER_METHOD_NORMAL;
1111	return;
1112	}
1113
1114
1115	extern LZMA_API(lzma_bool)
1116	lzma_index_iter_next(lzma_index_iter *iter, lzma_index_iter_mode mode)
1117	{
1118	// Catch unsupported mode values.
1119	if ((unsigned int)(mode) > LZMA_INDEX_ITER_NONEMPTY_BLOCK)
1120	return true;
1121
1122	const lzma_index *i = iter->internal[ITER_INDEX].p;
1123	const index_stream *stream = iter->internal[ITER_STREAM].p;
1124	const index_group *group = NULL;
1125	size_t record = iter->internal[ITER_RECORD].s;
1126
1127	// If we are being asked for the next Stream, leave group to NULL
1128	// so that the rest of the this function thinks that this Stream
1129	// has no groups and will thus go to the next Stream.
1130	if (mode != LZMA_INDEX_ITER_STREAM) {
1131	// Get the pointer to the current group. See iter_set_inf()
1132	// for explanation.
1133	switch (iter->internal[ITER_METHOD].s) {
1134	case ITER_METHOD_NORMAL:
1135	group = iter->internal[ITER_GROUP].p;
1136	break;
1137
1138	case ITER_METHOD_NEXT:
1139	group = index_tree_next(iter->internal[ITER_GROUP].p);
1140	break;
1141
1142	case ITER_METHOD_LEFTMOST:
1143	group = (const index_group *)(
1144	stream->groups.leftmost);
1145	break;
1146	}
1147	}
1148
1149	again:
1150	if (stream == NULL) {
1151	// We at the beginning of the lzma_index.
1152	// Locate the first Stream.
1153	stream = (const index_stream *)(i->streams.leftmost);
1154	if (mode >= LZMA_INDEX_ITER_BLOCK) {
1155	// Since we are being asked to return information
1156	// about the first a Block, skip Streams that have
1157	// no Blocks.
1158	while (stream->groups.leftmost == NULL) {
1159	stream = index_tree_next(&stream->node);
1160	if (stream == NULL)
1161	return true;
1162	}
1163	}
1164
1165	// Start from the first Record in the Stream.
1166	group = (const index_group *)(stream->groups.leftmost);
1167	record = 0;
1168
1169	} else if (group != NULL && record < group->last) {
1170	// The next Record is in the same group.
1171	++record;
1172
1173	} else {
1174	// This group has no more Records or this Stream has
1175	// no Blocks at all.
1176	record = 0;
1177
1178	// If group is not NULL, this Stream has at least one Block
1179	// and thus at least one group. Find the next group.
1180	if (group != NULL)
1181	group = index_tree_next(&group->node);
1182
1183	if (group == NULL) {
1184	// This Stream has no more Records. Find the next
1185	// Stream. If we are being asked to return information
1186	// about a Block, we skip empty Streams.
1187	do {
1188	stream = index_tree_next(&stream->node);
1189	if (stream == NULL)
1190	return true;
1191	} while (mode >= LZMA_INDEX_ITER_BLOCK
1192	&& stream->groups.leftmost == NULL);
1193
1194	group = (const index_group *)(
1195	stream->groups.leftmost);
1196	}
1197	}
1198
1199	if (mode == LZMA_INDEX_ITER_NONEMPTY_BLOCK) {
1200	// We need to look for the next Block again if this Block
1201	// is empty.
1202	if (record == 0) {
1203	if (group->node.uncompressed_base
1204	== group->records[0].uncompressed_sum)
1205	goto again;
1206	} else if (group->records[record - 1].uncompressed_sum
1207	== group->records[record].uncompressed_sum) {
1208	goto again;
1209	}
1210	}
1211
1212	iter->internal[ITER_STREAM].p = stream;
1213	iter->internal[ITER_GROUP].p = group;
1214	iter->internal[ITER_RECORD].s = record;
1215
1216	iter_set_info(iter);
1217
1218	return false;
1219	}
1220
1221
1222	extern LZMA_API(lzma_bool)
1223	lzma_index_iter_locate(lzma_index_iter *iter, lzma_vli target)
1224	{
1225	const lzma_index *i = iter->internal[ITER_INDEX].p;
1226
1227	// If the target is past the end of the file, return immediately.
1228	if (i->uncompressed_size <= target)
1229	return true;
1230
1231	// Locate the Stream containing the target offset.
1232	const index_stream *stream = index_tree_locate(&i->streams, target);
1233	assert(stream != NULL);
1234	target -= stream->node.uncompressed_base;
1235
1236	// Locate the group containing the target offset.
1237	const index_group *group = index_tree_locate(&stream->groups, target);
1238	assert(group != NULL);
1239
1240	// Use binary search to locate the exact Record. It is the first
1241	// Record whose uncompressed_sum is greater than target.
1242	// This is because we want the rightmost Record that fulfills the
1243	// search criterion. It is possible that there are empty Blocks;
1244	// we don't want to return them.
1245	size_t left = 0;
1246	size_t right = group->last;
1247
1248	while (left < right) {
1249	const size_t pos = left + (right - left) / 2;
1250	if (group->records[pos].uncompressed_sum <= target)
1251	left = pos + 1;
1252	else
1253	right = pos;
1254	}
1255
1256	iter->internal[ITER_STREAM].p = stream;
1257	iter->internal[ITER_GROUP].p = group;
1258	iter->internal[ITER_RECORD].s = left;
1259
1260	iter_set_info(iter);
1261
1262	return false;
1263	}

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/libs/liblzma-5.4.1/common/index.c@ 98879

Download in other formats: