arm64.c@ 100111

Last change on this file since 100111 was 98730, checked in by vboxsync, 22 months ago
libs/liblzma-5.4.1: Export to OSE, bugref:10254
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 4.1 KB

Line
1	///////////////////////////////////////////////////////////////////////////////
2	//
3	/// \file arm64.c
4	/// \brief Filter for ARM64 binaries
5	///
6	/// This converts ARM64 relative addresses in the BL and ADRP immediates
7	/// to absolute values to increase redundancy of ARM64 code.
8	///
9	/// Converting B or ADR instructions was also tested but it's not useful.
10	/// A majority of the jumps for the B instruction are very small (+/- 0xFF).
11	/// These are typical for loops and if-statements. Encoding them to their
12	/// absolute address reduces redundancy since many of the small relative
13	/// jump values are repeated, but very few of the absolute addresses are.
14	//
15	// Authors: Lasse Collin
16	// Jia Tan
17	// Igor Pavlov
18	//
19	// This file has been put into the public domain.
20	// You can do whatever you want with this file.
21	//
22	///////////////////////////////////////////////////////////////////////////////
23
24	#include "simple_private.h"
25
26
27	static size_t
28	arm64_code(void *simple lzma_attribute((__unused__)),
29	uint32_t now_pos, bool is_encoder,
30	uint8_t *buffer, size_t size)
31	{
32	size_t i;
33
34	// Clang 14.0.6 on x86-64 makes this four times bigger and 40 % slower
35	// with auto-vectorization that is enabled by default with -O2.
36	// Such vectorization bloat happens with -O2 when targeting ARM64 too
37	// but performance hasn't been tested.
38	#ifdef __clang__
39	# pragma clang loop vectorize(disable)
40	#endif
41	for (i = 0; i + 4 <= size; i += 4) {
42	uint32_t pc = (uint32_t)(now_pos + i);
43	uint32_t instr = read32le(buffer + i);
44
45	if ((instr >> 26) == 0x25) {
46	// BL instruction:
47	// The full 26-bit immediate is converted.
48	// The range is +/-128 MiB.
49	//
50	// Using the full range is helps quite a lot with
51	// big executables. Smaller range would reduce false
52	// positives in non-code sections of the input though
53	// so this is a compromise that slightly favors big
54	// files. With the full range only six bits of the 32
55	// need to match to trigger a conversion.
56	const uint32_t src = instr;
57	instr = 0x94000000;
58
59	pc >>= 2;
60	if (!is_encoder)
61	pc = 0U - pc;
62
63	instr \|= (src + pc) & 0x03FFFFFF;
64	write32le(buffer + i, instr);
65
66	} else if ((instr & 0x9F000000) == 0x90000000) {
67	// ADRP instruction:
68	// Only values in the range +/-512 MiB are converted.
69	//
70	// Using less than the full +/-4 GiB range reduces
71	// false positives on non-code sections of the input
72	// while being excellent for executables up to 512 MiB.
73	// The positive effect of ADRP conversion is smaller
74	// than that of BL but it also doesn't hurt so much in
75	// non-code sections of input because, with +/-512 MiB
76	// range, nine bits of 32 need to match to trigger a
77	// conversion (two 10-bit match choices = 9 bits).
78	const uint32_t src = ((instr >> 29) & 3)
79	\| ((instr >> 3) & 0x001FFFFC);
80
81	// With the addition only one branch is needed to
82	// check the +/- range. This is usually false when
83	// processing ARM64 code so branch prediction will
84	// handle it well in terms of performance.
85	//
86	//if ((src & 0x001E0000) != 0
87	// && (src & 0x001E0000) != 0x001E0000)
88	if ((src + 0x00020000) & 0x001C0000)
89	continue;
90
91	instr &= 0x9000001F;
92
93	pc >>= 12;
94	if (!is_encoder)
95	pc = 0U - pc;
96
97	const uint32_t dest = src + pc;
98	instr \|= (dest & 3) << 29;
99	instr \|= (dest & 0x0003FFFC) << 3;
100	instr \|= (0U - (dest & 0x00020000)) & 0x00E00000;
101	write32le(buffer + i, instr);
102	}
103	}
104
105	return i;
106	}
107
108
109	static lzma_ret
110	arm64_coder_init(lzma_next_coder next, const lzma_allocator allocator,
111	const lzma_filter_info *filters, bool is_encoder)
112	{
113	return lzma_simple_coder_init(next, allocator, filters,
114	&arm64_code, 0, 4, 4, is_encoder);
115	}
116
117
118	#ifdef HAVE_ENCODER_ARM64
119	extern lzma_ret
120	lzma_simple_arm64_encoder_init(lzma_next_coder *next,
121	const lzma_allocator *allocator,
122	const lzma_filter_info *filters)
123	{
124	return arm64_coder_init(next, allocator, filters, true);
125	}
126	#endif
127
128
129	#ifdef HAVE_DECODER_ARM64
130	extern lzma_ret
131	lzma_simple_arm64_decoder_init(lzma_next_coder *next,
132	const lzma_allocator *allocator,
133	const lzma_filter_info *filters)
134	{
135	return arm64_coder_init(next, allocator, filters, false);
136	}
137	#endif

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/libs/liblzma-5.4.1/simple/arm64.c@ 100111

Download in other formats: