VirtualBox

source: vbox/trunk/src/libs/liblzma-5.4.1/simple/arm64.c@ 100111

Last change on this file since 100111 was 98730, checked in by vboxsync, 22 months ago

libs/liblzma-5.4.1: Export to OSE, bugref:10254

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 4.1 KB
Line 
1///////////////////////////////////////////////////////////////////////////////
2//
3/// \file arm64.c
4/// \brief Filter for ARM64 binaries
5///
6/// This converts ARM64 relative addresses in the BL and ADRP immediates
7/// to absolute values to increase redundancy of ARM64 code.
8///
9/// Converting B or ADR instructions was also tested but it's not useful.
10/// A majority of the jumps for the B instruction are very small (+/- 0xFF).
11/// These are typical for loops and if-statements. Encoding them to their
12/// absolute address reduces redundancy since many of the small relative
13/// jump values are repeated, but very few of the absolute addresses are.
14//
15// Authors: Lasse Collin
16// Jia Tan
17// Igor Pavlov
18//
19// This file has been put into the public domain.
20// You can do whatever you want with this file.
21//
22///////////////////////////////////////////////////////////////////////////////
23
24#include "simple_private.h"
25
26
27static size_t
28arm64_code(void *simple lzma_attribute((__unused__)),
29 uint32_t now_pos, bool is_encoder,
30 uint8_t *buffer, size_t size)
31{
32 size_t i;
33
34 // Clang 14.0.6 on x86-64 makes this four times bigger and 40 % slower
35 // with auto-vectorization that is enabled by default with -O2.
36 // Such vectorization bloat happens with -O2 when targeting ARM64 too
37 // but performance hasn't been tested.
38#ifdef __clang__
39# pragma clang loop vectorize(disable)
40#endif
41 for (i = 0; i + 4 <= size; i += 4) {
42 uint32_t pc = (uint32_t)(now_pos + i);
43 uint32_t instr = read32le(buffer + i);
44
45 if ((instr >> 26) == 0x25) {
46 // BL instruction:
47 // The full 26-bit immediate is converted.
48 // The range is +/-128 MiB.
49 //
50 // Using the full range is helps quite a lot with
51 // big executables. Smaller range would reduce false
52 // positives in non-code sections of the input though
53 // so this is a compromise that slightly favors big
54 // files. With the full range only six bits of the 32
55 // need to match to trigger a conversion.
56 const uint32_t src = instr;
57 instr = 0x94000000;
58
59 pc >>= 2;
60 if (!is_encoder)
61 pc = 0U - pc;
62
63 instr |= (src + pc) & 0x03FFFFFF;
64 write32le(buffer + i, instr);
65
66 } else if ((instr & 0x9F000000) == 0x90000000) {
67 // ADRP instruction:
68 // Only values in the range +/-512 MiB are converted.
69 //
70 // Using less than the full +/-4 GiB range reduces
71 // false positives on non-code sections of the input
72 // while being excellent for executables up to 512 MiB.
73 // The positive effect of ADRP conversion is smaller
74 // than that of BL but it also doesn't hurt so much in
75 // non-code sections of input because, with +/-512 MiB
76 // range, nine bits of 32 need to match to trigger a
77 // conversion (two 10-bit match choices = 9 bits).
78 const uint32_t src = ((instr >> 29) & 3)
79 | ((instr >> 3) & 0x001FFFFC);
80
81 // With the addition only one branch is needed to
82 // check the +/- range. This is usually false when
83 // processing ARM64 code so branch prediction will
84 // handle it well in terms of performance.
85 //
86 //if ((src & 0x001E0000) != 0
87 // && (src & 0x001E0000) != 0x001E0000)
88 if ((src + 0x00020000) & 0x001C0000)
89 continue;
90
91 instr &= 0x9000001F;
92
93 pc >>= 12;
94 if (!is_encoder)
95 pc = 0U - pc;
96
97 const uint32_t dest = src + pc;
98 instr |= (dest & 3) << 29;
99 instr |= (dest & 0x0003FFFC) << 3;
100 instr |= (0U - (dest & 0x00020000)) & 0x00E00000;
101 write32le(buffer + i, instr);
102 }
103 }
104
105 return i;
106}
107
108
109static lzma_ret
110arm64_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,
111 const lzma_filter_info *filters, bool is_encoder)
112{
113 return lzma_simple_coder_init(next, allocator, filters,
114 &arm64_code, 0, 4, 4, is_encoder);
115}
116
117
118#ifdef HAVE_ENCODER_ARM64
119extern lzma_ret
120lzma_simple_arm64_encoder_init(lzma_next_coder *next,
121 const lzma_allocator *allocator,
122 const lzma_filter_info *filters)
123{
124 return arm64_coder_init(next, allocator, filters, true);
125}
126#endif
127
128
129#ifdef HAVE_DECODER_ARM64
130extern lzma_ret
131lzma_simple_arm64_decoder_init(lzma_next_coder *next,
132 const lzma_allocator *allocator,
133 const lzma_filter_info *filters)
134{
135 return arm64_coder_init(next, allocator, filters, false);
136}
137#endif
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette