1 | /* $Id: IEMAllN8veExecMem.cpp 106740 2024-10-28 10:26:13Z vboxsync $ */
|
---|
2 | /** @file
|
---|
3 | * IEM - Native Recompiler, Executable Memory Allocator.
|
---|
4 | */
|
---|
5 |
|
---|
6 | /*
|
---|
7 | * Copyright (C) 2023-2024 Oracle and/or its affiliates.
|
---|
8 | *
|
---|
9 | * This file is part of VirtualBox base platform packages, as
|
---|
10 | * available from https://www.virtualbox.org.
|
---|
11 | *
|
---|
12 | * This program is free software; you can redistribute it and/or
|
---|
13 | * modify it under the terms of the GNU General Public License
|
---|
14 | * as published by the Free Software Foundation, in version 3 of the
|
---|
15 | * License.
|
---|
16 | *
|
---|
17 | * This program is distributed in the hope that it will be useful, but
|
---|
18 | * WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
20 | * General Public License for more details.
|
---|
21 | *
|
---|
22 | * You should have received a copy of the GNU General Public License
|
---|
23 | * along with this program; if not, see <https://www.gnu.org/licenses>.
|
---|
24 | *
|
---|
25 | * SPDX-License-Identifier: GPL-3.0-only
|
---|
26 | */
|
---|
27 |
|
---|
28 |
|
---|
29 | /*********************************************************************************************************************************
|
---|
30 | * Header Files *
|
---|
31 | *********************************************************************************************************************************/
|
---|
32 | #define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
|
---|
33 | #define IEM_WITH_OPAQUE_DECODER_STATE
|
---|
34 | #define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
|
---|
35 | #include <VBox/vmm/iem.h>
|
---|
36 | #include <VBox/vmm/cpum.h>
|
---|
37 | #include "IEMInternal.h"
|
---|
38 | #include <VBox/vmm/vmcc.h>
|
---|
39 | #include <VBox/log.h>
|
---|
40 | #include <VBox/err.h>
|
---|
41 | #include <VBox/param.h>
|
---|
42 | #include <iprt/assert.h>
|
---|
43 | #include <iprt/mem.h>
|
---|
44 | #include <iprt/string.h>
|
---|
45 | #if defined(RT_ARCH_AMD64)
|
---|
46 | # include <iprt/x86.h>
|
---|
47 | #elif defined(RT_ARCH_ARM64)
|
---|
48 | # include <iprt/armv8.h>
|
---|
49 | #endif
|
---|
50 |
|
---|
51 | #ifdef RT_OS_WINDOWS
|
---|
52 | # include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
|
---|
53 | extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
|
---|
54 | extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
|
---|
55 | #else
|
---|
56 | # include <iprt/formats/dwarf.h>
|
---|
57 | # if defined(RT_OS_DARWIN)
|
---|
58 | # include <libkern/OSCacheControl.h>
|
---|
59 | # include <mach/mach.h>
|
---|
60 | # include <mach/mach_vm.h>
|
---|
61 | # define IEMNATIVE_USE_LIBUNWIND
|
---|
62 | extern "C" void __register_frame(const void *pvFde);
|
---|
63 | extern "C" void __deregister_frame(const void *pvFde);
|
---|
64 | # else
|
---|
65 | # ifdef DEBUG_bird /** @todo not thread safe yet */
|
---|
66 | # define IEMNATIVE_USE_GDB_JIT
|
---|
67 | # endif
|
---|
68 | # ifdef IEMNATIVE_USE_GDB_JIT
|
---|
69 | # include <iprt/critsect.h>
|
---|
70 | # include <iprt/once.h>
|
---|
71 | # include <iprt/formats/elf64.h>
|
---|
72 | # endif
|
---|
73 | extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
|
---|
74 | extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
|
---|
75 | # endif
|
---|
76 | #endif
|
---|
77 |
|
---|
78 | #include "IEMN8veRecompiler.h"
|
---|
79 |
|
---|
80 |
|
---|
81 | /*********************************************************************************************************************************
|
---|
82 | * Executable Memory Allocator *
|
---|
83 | *********************************************************************************************************************************/
|
---|
84 | /** The chunk sub-allocation unit size in bytes. */
|
---|
85 | #define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 256
|
---|
86 | /** The chunk sub-allocation unit size as a shift factor. */
|
---|
87 | #define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 8
|
---|
88 | /** Enables adding a header to the sub-allocator allocations.
|
---|
89 | * This is useful for freeing up executable memory among other things. */
|
---|
90 | #define IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
|
---|
91 | /** Use alternative pruning. */
|
---|
92 | #define IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
|
---|
93 |
|
---|
94 |
|
---|
95 | #if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
|
---|
96 | # ifdef IEMNATIVE_USE_GDB_JIT
|
---|
97 | # define IEMNATIVE_USE_GDB_JIT_ET_DYN
|
---|
98 |
|
---|
99 | /** GDB JIT: Code entry. */
|
---|
100 | typedef struct GDBJITCODEENTRY
|
---|
101 | {
|
---|
102 | struct GDBJITCODEENTRY *pNext;
|
---|
103 | struct GDBJITCODEENTRY *pPrev;
|
---|
104 | uint8_t *pbSymFile;
|
---|
105 | uint64_t cbSymFile;
|
---|
106 | } GDBJITCODEENTRY;
|
---|
107 |
|
---|
108 | /** GDB JIT: Actions. */
|
---|
109 | typedef enum GDBJITACTIONS : uint32_t
|
---|
110 | {
|
---|
111 | kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
|
---|
112 | } GDBJITACTIONS;
|
---|
113 |
|
---|
114 | /** GDB JIT: Descriptor. */
|
---|
115 | typedef struct GDBJITDESCRIPTOR
|
---|
116 | {
|
---|
117 | uint32_t uVersion;
|
---|
118 | GDBJITACTIONS enmAction;
|
---|
119 | GDBJITCODEENTRY *pRelevant;
|
---|
120 | GDBJITCODEENTRY *pHead;
|
---|
121 | /** Our addition: */
|
---|
122 | GDBJITCODEENTRY *pTail;
|
---|
123 | } GDBJITDESCRIPTOR;
|
---|
124 |
|
---|
125 | /** GDB JIT: Our simple symbol file data. */
|
---|
126 | typedef struct GDBJITSYMFILE
|
---|
127 | {
|
---|
128 | Elf64_Ehdr EHdr;
|
---|
129 | # ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
|
---|
130 | Elf64_Shdr aShdrs[5];
|
---|
131 | # else
|
---|
132 | Elf64_Shdr aShdrs[7];
|
---|
133 | Elf64_Phdr aPhdrs[2];
|
---|
134 | # endif
|
---|
135 | /** The dwarf ehframe data for the chunk. */
|
---|
136 | uint8_t abEhFrame[512];
|
---|
137 | char szzStrTab[128];
|
---|
138 | Elf64_Sym aSymbols[3];
|
---|
139 | # ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
|
---|
140 | Elf64_Sym aDynSyms[2];
|
---|
141 | Elf64_Dyn aDyn[6];
|
---|
142 | # endif
|
---|
143 | } GDBJITSYMFILE;
|
---|
144 |
|
---|
145 | extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
|
---|
146 | extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
|
---|
147 |
|
---|
148 | /** Init once for g_IemNativeGdbJitLock. */
|
---|
149 | static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
|
---|
150 | /** Init once for the critical section. */
|
---|
151 | static RTCRITSECT g_IemNativeGdbJitLock;
|
---|
152 |
|
---|
153 | /** GDB reads the info here. */
|
---|
154 | GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
|
---|
155 |
|
---|
156 | /** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
|
---|
157 | DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
|
---|
158 | {
|
---|
159 | ASMNopPause();
|
---|
160 | }
|
---|
161 |
|
---|
162 | /** @callback_method_impl{FNRTONCE} */
|
---|
163 | static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
|
---|
164 | {
|
---|
165 | RT_NOREF(pvUser);
|
---|
166 | return RTCritSectInit(&g_IemNativeGdbJitLock);
|
---|
167 | }
|
---|
168 |
|
---|
169 |
|
---|
170 | # endif /* IEMNATIVE_USE_GDB_JIT */
|
---|
171 |
|
---|
172 | /**
|
---|
173 | * Per-chunk unwind info for non-windows hosts.
|
---|
174 | */
|
---|
175 | typedef struct IEMEXECMEMCHUNKEHFRAME
|
---|
176 | {
|
---|
177 | # ifdef IEMNATIVE_USE_LIBUNWIND
|
---|
178 | /** The offset of the FDA into abEhFrame. */
|
---|
179 | uintptr_t offFda;
|
---|
180 | # else
|
---|
181 | /** 'struct object' storage area. */
|
---|
182 | uint8_t abObject[1024];
|
---|
183 | # endif
|
---|
184 | # ifdef IEMNATIVE_USE_GDB_JIT
|
---|
185 | # if 0
|
---|
186 | /** The GDB JIT 'symbol file' data. */
|
---|
187 | GDBJITSYMFILE GdbJitSymFile;
|
---|
188 | # endif
|
---|
189 | /** The GDB JIT list entry. */
|
---|
190 | GDBJITCODEENTRY GdbJitEntry;
|
---|
191 | # endif
|
---|
192 | /** The dwarf ehframe data for the chunk. */
|
---|
193 | uint8_t abEhFrame[512];
|
---|
194 | } IEMEXECMEMCHUNKEHFRAME;
|
---|
195 | /** Pointer to per-chunk info info for non-windows hosts. */
|
---|
196 | typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
|
---|
197 | #endif
|
---|
198 |
|
---|
199 |
|
---|
200 | /**
|
---|
201 | * An chunk of executable memory.
|
---|
202 | */
|
---|
203 | typedef struct IEMEXECMEMCHUNK
|
---|
204 | {
|
---|
205 | /** Number of free items in this chunk. */
|
---|
206 | uint32_t cFreeUnits;
|
---|
207 | /** Hint were to start searching for free space in the allocation bitmap. */
|
---|
208 | uint32_t idxFreeHint;
|
---|
209 | /** Pointer to the readable/writeable view of the memory chunk. */
|
---|
210 | void *pvChunkRw;
|
---|
211 | /** Pointer to the readable/executable view of the memory chunk. */
|
---|
212 | void *pvChunkRx;
|
---|
213 | /** Pointer to the context structure detailing the per chunk common code. */
|
---|
214 | PCIEMNATIVEPERCHUNKCTX pCtx;
|
---|
215 | #ifdef IN_RING3
|
---|
216 | /**
|
---|
217 | * Pointer to the unwind information.
|
---|
218 | *
|
---|
219 | * This is used during C++ throw and longjmp (windows and probably most other
|
---|
220 | * platforms). Some debuggers (windbg) makes use of it as well.
|
---|
221 | *
|
---|
222 | * Windows: This is allocated from hHeap on windows because (at least for
|
---|
223 | * AMD64) the UNWIND_INFO structure address in the
|
---|
224 | * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
|
---|
225 | *
|
---|
226 | * Others: Allocated from the regular heap to avoid unnecessary executable data
|
---|
227 | * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
|
---|
228 | void *pvUnwindInfo;
|
---|
229 | #elif defined(IN_RING0)
|
---|
230 | /** Allocation handle. */
|
---|
231 | RTR0MEMOBJ hMemObj;
|
---|
232 | #endif
|
---|
233 | } IEMEXECMEMCHUNK;
|
---|
234 | /** Pointer to a memory chunk. */
|
---|
235 | typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
|
---|
236 |
|
---|
237 |
|
---|
238 | /**
|
---|
239 | * Executable memory allocator for the native recompiler.
|
---|
240 | */
|
---|
241 | typedef struct IEMEXECMEMALLOCATOR
|
---|
242 | {
|
---|
243 | /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
|
---|
244 | uint32_t uMagic;
|
---|
245 |
|
---|
246 | /** The chunk size. */
|
---|
247 | uint32_t cbChunk;
|
---|
248 | /** The maximum number of chunks. */
|
---|
249 | uint32_t cMaxChunks;
|
---|
250 | /** The current number of chunks. */
|
---|
251 | uint32_t cChunks;
|
---|
252 | /** Hint where to start looking for available memory. */
|
---|
253 | uint32_t idxChunkHint;
|
---|
254 | /** Statistics: Current number of allocations. */
|
---|
255 | uint32_t cAllocations;
|
---|
256 |
|
---|
257 | /** The total amount of memory available. */
|
---|
258 | uint64_t cbTotal;
|
---|
259 | /** Total amount of free memory. */
|
---|
260 | uint64_t cbFree;
|
---|
261 | /** Total amount of memory allocated. */
|
---|
262 | uint64_t cbAllocated;
|
---|
263 |
|
---|
264 | /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
|
---|
265 | *
|
---|
266 | * Since the chunk size is a power of two and the minimum chunk size is a lot
|
---|
267 | * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
|
---|
268 | * require a whole number of uint64_t elements in the allocation bitmap. So,
|
---|
269 | * for sake of simplicity, they are allocated as one continous chunk for
|
---|
270 | * simplicity/laziness. */
|
---|
271 | uint64_t *pbmAlloc;
|
---|
272 | /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
|
---|
273 | uint32_t cUnitsPerChunk;
|
---|
274 | /** Number of bitmap elements per chunk (for quickly locating the bitmap
|
---|
275 | * portion corresponding to an chunk). */
|
---|
276 | uint32_t cBitmapElementsPerChunk;
|
---|
277 |
|
---|
278 | /** Number of times we fruitlessly scanned a chunk for free space. */
|
---|
279 | uint64_t cFruitlessChunkScans;
|
---|
280 |
|
---|
281 | #ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
|
---|
282 | /** The next chunk to prune in. */
|
---|
283 | uint32_t idxChunkPrune;
|
---|
284 | /** Where in chunk offset to start pruning at. */
|
---|
285 | uint32_t offChunkPrune;
|
---|
286 | /** Profiling the pruning code. */
|
---|
287 | STAMPROFILE StatPruneProf;
|
---|
288 | /** Number of bytes recovered by the pruning. */
|
---|
289 | STAMPROFILE StatPruneRecovered;
|
---|
290 | #endif
|
---|
291 |
|
---|
292 | #ifdef VBOX_WITH_STATISTICS
|
---|
293 | STAMPROFILE StatAlloc;
|
---|
294 | /** Total amount of memory not being usable currently due to IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE. */
|
---|
295 | uint64_t cbUnusable;
|
---|
296 | /** Allocation size distribution (in alloc units; 0 is the slop bucket). */
|
---|
297 | STAMCOUNTER aStatSizes[16];
|
---|
298 | #endif
|
---|
299 |
|
---|
300 | #if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
|
---|
301 | /** Pointer to the array of unwind info running parallel to aChunks (same
|
---|
302 | * allocation as this structure, located after the bitmaps).
|
---|
303 | * (For Windows, the structures must reside in 32-bit RVA distance to the
|
---|
304 | * actual chunk, so they are allocated off the chunk.) */
|
---|
305 | PIEMEXECMEMCHUNKEHFRAME paEhFrames;
|
---|
306 | #endif
|
---|
307 |
|
---|
308 | /** The allocation chunks. */
|
---|
309 | RT_FLEXIBLE_ARRAY_EXTENSION
|
---|
310 | IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
|
---|
311 | } IEMEXECMEMALLOCATOR;
|
---|
312 | /** Pointer to an executable memory allocator. */
|
---|
313 | typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
|
---|
314 |
|
---|
315 | /** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
|
---|
316 | #define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
|
---|
317 |
|
---|
318 |
|
---|
319 | #ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
|
---|
320 | /**
|
---|
321 | * Allocation header.
|
---|
322 | */
|
---|
323 | typedef struct IEMEXECMEMALLOCHDR
|
---|
324 | {
|
---|
325 | RT_GCC_EXTENSION
|
---|
326 | union
|
---|
327 | {
|
---|
328 | struct
|
---|
329 | {
|
---|
330 | /** Magic value / eyecatcher (IEMEXECMEMALLOCHDR_MAGIC). */
|
---|
331 | uint32_t uMagic;
|
---|
332 | /** The allocation chunk (for speeding up freeing). */
|
---|
333 | uint32_t idxChunk;
|
---|
334 | };
|
---|
335 | /** Combined magic and chunk index, for the pruning scanner code. */
|
---|
336 | uint64_t u64MagicAndChunkIdx;
|
---|
337 | };
|
---|
338 | /** Pointer to the translation block the allocation belongs to.
|
---|
339 | * This is the whole point of the header. */
|
---|
340 | PIEMTB pTb;
|
---|
341 | } IEMEXECMEMALLOCHDR;
|
---|
342 | /** Pointer to an allocation header. */
|
---|
343 | typedef IEMEXECMEMALLOCHDR *PIEMEXECMEMALLOCHDR;
|
---|
344 | /** Magic value for IEMEXECMEMALLOCHDR ('ExeM'). */
|
---|
345 | # define IEMEXECMEMALLOCHDR_MAGIC UINT32_C(0x4d657845)
|
---|
346 | #endif
|
---|
347 |
|
---|
348 |
|
---|
349 | static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
|
---|
350 |
|
---|
351 |
|
---|
352 | #ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
|
---|
353 | /**
|
---|
354 | * Frees up executable memory when we're out space.
|
---|
355 | *
|
---|
356 | * This is an alternative to iemTbAllocatorFreeupNativeSpace() that frees up
|
---|
357 | * space in a more linear fashion from the allocator's point of view. It may
|
---|
358 | * also defragment if implemented & enabled
|
---|
359 | */
|
---|
360 | static void iemExecMemAllocatorPrune(PVMCPU pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
|
---|
361 | {
|
---|
362 | # ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
|
---|
363 | # error "IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING requires IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER"
|
---|
364 | # endif
|
---|
365 | STAM_REL_PROFILE_START(&pExecMemAllocator->StatPruneProf, a);
|
---|
366 |
|
---|
367 | /*
|
---|
368 | * Before we can start, we must process delayed frees.
|
---|
369 | */
|
---|
370 | #if 1
|
---|
371 | PIEMTBALLOCATOR const pTbAllocator = iemTbAllocatorFreeBulkStart(pVCpu);
|
---|
372 | #else
|
---|
373 | iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
|
---|
374 | #endif
|
---|
375 |
|
---|
376 | AssertCompile(RT_IS_POWER_OF_TWO(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE));
|
---|
377 |
|
---|
378 | uint32_t const cbChunk = pExecMemAllocator->cbChunk;
|
---|
379 | AssertReturnVoid(RT_IS_POWER_OF_TWO(cbChunk));
|
---|
380 | AssertReturnVoid(cbChunk >= _1M && cbChunk <= _256M); /* see iemExecMemAllocatorInit */
|
---|
381 |
|
---|
382 | uint32_t const cChunks = pExecMemAllocator->cChunks;
|
---|
383 | AssertReturnVoid(cChunks == pExecMemAllocator->cMaxChunks);
|
---|
384 | AssertReturnVoid(cChunks >= 1);
|
---|
385 |
|
---|
386 | Assert(!pVCpu->iem.s.pCurTbR3);
|
---|
387 |
|
---|
388 | /*
|
---|
389 | * Decide how much to prune. The chunk is is a multiple of two, so we'll be
|
---|
390 | * scanning a multiple of two here as well.
|
---|
391 | */
|
---|
392 | uint32_t cbToPrune = cbChunk;
|
---|
393 |
|
---|
394 | /* Never more than 25%. */
|
---|
395 | if (cChunks < 4)
|
---|
396 | cbToPrune /= cChunks == 1 ? 4 : 2;
|
---|
397 |
|
---|
398 | /* Upper limit. In a debug build a 4MB limit averages out at ~0.6ms per call. */
|
---|
399 | if (cbToPrune > _4M)
|
---|
400 | cbToPrune = _4M;
|
---|
401 |
|
---|
402 | /*
|
---|
403 | * Adjust the pruning chunk and offset accordingly.
|
---|
404 | */
|
---|
405 | uint32_t idxChunk = pExecMemAllocator->idxChunkPrune;
|
---|
406 | uint32_t offChunk = pExecMemAllocator->offChunkPrune;
|
---|
407 | offChunk &= ~(uint32_t)(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1U);
|
---|
408 | if (offChunk >= cbChunk)
|
---|
409 | {
|
---|
410 | offChunk = 0;
|
---|
411 | idxChunk += 1;
|
---|
412 | }
|
---|
413 | if (idxChunk >= cChunks)
|
---|
414 | {
|
---|
415 | offChunk = 0;
|
---|
416 | idxChunk = 0;
|
---|
417 | }
|
---|
418 |
|
---|
419 | uint32_t const offPruneStart = offChunk;
|
---|
420 | uint32_t const offPruneEnd = RT_MIN(offChunk + cbToPrune, cbChunk);
|
---|
421 |
|
---|
422 | /*
|
---|
423 | * Do the pruning. The current approach is the sever kind.
|
---|
424 | *
|
---|
425 | * This is memory bound, as we must load both the allocation header and the
|
---|
426 | * associated TB and then modify them. So, the CPU isn't all that unitilized
|
---|
427 | * here. Try apply some prefetching to speed it up a tiny bit.
|
---|
428 | */
|
---|
429 | uint64_t cbPruned = 0;
|
---|
430 | uint64_t const u64MagicAndChunkIdx = RT_MAKE_U64(IEMEXECMEMALLOCHDR_MAGIC, idxChunk);
|
---|
431 | uint8_t * const pbChunk = (uint8_t *)pExecMemAllocator->aChunks[idxChunk].pvChunkRx;
|
---|
432 | while (offChunk < offPruneEnd)
|
---|
433 | {
|
---|
434 | PIEMEXECMEMALLOCHDR pHdr = (PIEMEXECMEMALLOCHDR)&pbChunk[offChunk];
|
---|
435 |
|
---|
436 | /* Is this the start of an allocation block for a TB? (We typically
|
---|
437 | have one allocation at the start of each chunk for the unwind info
|
---|
438 | where pTb is NULL.) */
|
---|
439 | PIEMTB pTb;
|
---|
440 | if ( pHdr->u64MagicAndChunkIdx == u64MagicAndChunkIdx
|
---|
441 | && RT_LIKELY((pTb = pHdr->pTb) != NULL))
|
---|
442 | {
|
---|
443 | AssertPtr(pTb);
|
---|
444 |
|
---|
445 | uint32_t const cbBlock = RT_ALIGN_32(pTb->Native.cInstructions * sizeof(IEMNATIVEINSTR) + sizeof(*pHdr),
|
---|
446 | IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
|
---|
447 |
|
---|
448 | /* Prefetch the next header before freeing the current one and its TB. */
|
---|
449 | /** @todo Iff the block size was part of the header in some way, this could be
|
---|
450 | * a tiny bit faster. */
|
---|
451 | offChunk += cbBlock;
|
---|
452 | #if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
|
---|
453 | _mm_prefetch((char *)&pbChunk[offChunk], _MM_HINT_T0);
|
---|
454 | #elif defined(_MSC_VER) && defined(RT_ARCH_ARM64)
|
---|
455 | __prefetch(&pbChunk[offChunk]);
|
---|
456 | #else
|
---|
457 | __builtin_prefetch(&pbChunk[offChunk], 1 /*rw*/);
|
---|
458 | #endif
|
---|
459 | /* Some paranoia first, though. */
|
---|
460 | AssertBreakStmt(offChunk <= cbChunk, offChunk -= cbBlock - IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
|
---|
461 | cbPruned += cbBlock;
|
---|
462 |
|
---|
463 | #if 1
|
---|
464 | iemTbAllocatorFreeBulk(pVCpu, pTbAllocator, pTb);
|
---|
465 | #else
|
---|
466 | iemTbAllocatorFree(pVCpu, pTb);
|
---|
467 | #endif
|
---|
468 | }
|
---|
469 | else
|
---|
470 | offChunk += IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE;
|
---|
471 | }
|
---|
472 | STAM_REL_PROFILE_ADD_PERIOD(&pExecMemAllocator->StatPruneRecovered, cbPruned);
|
---|
473 |
|
---|
474 | pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
|
---|
475 |
|
---|
476 | /*
|
---|
477 | * Save the current pruning point.
|
---|
478 | */
|
---|
479 | pExecMemAllocator->offChunkPrune = offChunk;
|
---|
480 | pExecMemAllocator->idxChunkPrune = idxChunk;
|
---|
481 |
|
---|
482 | /* Set the hint to the start of the pruned region. */
|
---|
483 | pExecMemAllocator->idxChunkHint = idxChunk;
|
---|
484 | pExecMemAllocator->aChunks[idxChunk].idxFreeHint = offPruneStart / IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE;
|
---|
485 |
|
---|
486 | STAM_REL_PROFILE_STOP(&pExecMemAllocator->StatPruneProf, a);
|
---|
487 | }
|
---|
488 | #endif /* IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING */
|
---|
489 |
|
---|
490 |
|
---|
491 | #if defined(VBOX_STRICT) || 0
|
---|
492 | /**
|
---|
493 | * The old bitmap scanner code, for comparison and assertions.
|
---|
494 | */
|
---|
495 | static uint32_t iemExecMemAllocatorFindReqFreeUnitsOld(uint64_t *pbmAlloc, uint32_t cToScan, uint32_t cReqUnits)
|
---|
496 | {
|
---|
497 | /** @todo This can probably be done more efficiently for non-x86 systems. */
|
---|
498 | int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
|
---|
499 | while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
|
---|
500 | {
|
---|
501 | uint32_t idxAddBit = 1;
|
---|
502 | while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
|
---|
503 | idxAddBit++;
|
---|
504 | if (idxAddBit >= cReqUnits)
|
---|
505 | return (uint32_t)iBit;
|
---|
506 | iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
|
---|
507 | }
|
---|
508 | return UINT32_MAX;
|
---|
509 | }
|
---|
510 | #endif
|
---|
511 |
|
---|
512 |
|
---|
513 | /**
|
---|
514 | * Bitmap scanner code that looks for a bunch of @a cReqUnits zero bits.
|
---|
515 | *
|
---|
516 | * Booting win11 with a r165098 release build the average native TB size is
|
---|
517 | * around 9 units (of 256 bytes). So, it is unlikely we need to scan any
|
---|
518 | * subsequent words once we hit a patch of zeros, thus @a a_fBig.
|
---|
519 | *
|
---|
520 | * @todo This needs more tweaking. While it *is* faster the the old code,
|
---|
521 | * it doens't seem like it's all that much. :/
|
---|
522 | */
|
---|
523 | template<const bool a_fBig>
|
---|
524 | static uint32_t iemExecMemAllocatorFindReqFreeUnits(uint64_t *pbmAlloc, uint32_t c64WordsToScan, uint32_t cReqUnits)
|
---|
525 | {
|
---|
526 | /*
|
---|
527 | * Scan the (section of the) allocation bitmap in 64-bit words.
|
---|
528 | */
|
---|
529 | unsigned cPrevLeadingZeros = 0;
|
---|
530 | for (uint32_t off = 0; off < c64WordsToScan; off++)
|
---|
531 | {
|
---|
532 | uint64_t uWord = pbmAlloc[off];
|
---|
533 | if (uWord == UINT64_MAX)
|
---|
534 | {
|
---|
535 | /*
|
---|
536 | * Getting thru patches of UINT64_MAX is a frequent problem when the allocator
|
---|
537 | * fills up, so it's definitely worth optimizing.
|
---|
538 | *
|
---|
539 | * The complicated code below is a bit faster on arm. Reducing the per TB cost
|
---|
540 | * from 4255ns to 4106ns (best run out of 10). On win/amd64 there isn't an
|
---|
541 | * obvious gain here, at least not with the data currently being profiled.
|
---|
542 | */
|
---|
543 | #if 1
|
---|
544 | off++;
|
---|
545 | uint32_t cQuads = (c64WordsToScan - off) / 4;
|
---|
546 |
|
---|
547 | /* Align. */
|
---|
548 | if (cQuads > 1)
|
---|
549 | switch (((uintptr_t)&pbmAlloc[off] / sizeof(uint64_t)) & 3)
|
---|
550 | {
|
---|
551 | case 0:
|
---|
552 | break;
|
---|
553 | case 1:
|
---|
554 | {
|
---|
555 | uWord = pbmAlloc[off];
|
---|
556 | uint64_t uWord1 = pbmAlloc[off + 1];
|
---|
557 | uint64_t uWord2 = pbmAlloc[off + 2];
|
---|
558 | if ((uWord & uWord1 & uWord2) == UINT64_MAX)
|
---|
559 | {
|
---|
560 | off += 3;
|
---|
561 | cQuads = (c64WordsToScan - off) / 4;
|
---|
562 | }
|
---|
563 | else if (uWord == UINT64_MAX)
|
---|
564 | {
|
---|
565 | if (uWord1 != UINT64_MAX)
|
---|
566 | {
|
---|
567 | uWord = uWord1;
|
---|
568 | off += 1;
|
---|
569 | }
|
---|
570 | else
|
---|
571 | {
|
---|
572 | uWord = uWord2;
|
---|
573 | off += 2;
|
---|
574 | }
|
---|
575 | }
|
---|
576 | break;
|
---|
577 | }
|
---|
578 | case 2:
|
---|
579 | {
|
---|
580 | uWord = pbmAlloc[off];
|
---|
581 | uint64_t uWord1 = pbmAlloc[off + 1];
|
---|
582 | if ((uWord & uWord1) == UINT64_MAX)
|
---|
583 | {
|
---|
584 | off += 2;
|
---|
585 | cQuads = (c64WordsToScan - off) / 4;
|
---|
586 | }
|
---|
587 | else if (uWord == UINT64_MAX)
|
---|
588 | {
|
---|
589 | uWord = uWord1;
|
---|
590 | off += 1;
|
---|
591 | }
|
---|
592 | break;
|
---|
593 | }
|
---|
594 | case 3:
|
---|
595 | uWord = pbmAlloc[off];
|
---|
596 | if (uWord == UINT64_MAX)
|
---|
597 | {
|
---|
598 | off++;
|
---|
599 | cQuads = (c64WordsToScan - off) / 4;
|
---|
600 | }
|
---|
601 | break;
|
---|
602 | }
|
---|
603 | if (uWord == UINT64_MAX)
|
---|
604 | {
|
---|
605 | /* Looping over 32 bytes at a time. */
|
---|
606 | for (;;)
|
---|
607 | {
|
---|
608 | if (cQuads-- > 0)
|
---|
609 | {
|
---|
610 | uWord = pbmAlloc[off + 0];
|
---|
611 | uint64_t uWord1 = pbmAlloc[off + 1];
|
---|
612 | uint64_t uWord2 = pbmAlloc[off + 2];
|
---|
613 | uint64_t uWord3 = pbmAlloc[off + 3];
|
---|
614 | if ((uWord & uWord1 & uWord2 & uWord3) == UINT64_MAX)
|
---|
615 | off += 4;
|
---|
616 | else
|
---|
617 | {
|
---|
618 | if (uWord != UINT64_MAX)
|
---|
619 | { }
|
---|
620 | else if (uWord1 != UINT64_MAX)
|
---|
621 | {
|
---|
622 | uWord = uWord1;
|
---|
623 | off += 1;
|
---|
624 | }
|
---|
625 | else if (uWord2 != UINT64_MAX)
|
---|
626 | {
|
---|
627 | uWord = uWord2;
|
---|
628 | off += 2;
|
---|
629 | }
|
---|
630 | else
|
---|
631 | {
|
---|
632 | uWord = uWord3;
|
---|
633 | off += 3;
|
---|
634 | }
|
---|
635 | break;
|
---|
636 | }
|
---|
637 | }
|
---|
638 | else
|
---|
639 | {
|
---|
640 | if (off < c64WordsToScan)
|
---|
641 | {
|
---|
642 | uWord = pbmAlloc[off];
|
---|
643 | if (uWord != UINT64_MAX)
|
---|
644 | break;
|
---|
645 | off++;
|
---|
646 | if (off < c64WordsToScan)
|
---|
647 | {
|
---|
648 | uWord = pbmAlloc[off];
|
---|
649 | if (uWord != UINT64_MAX)
|
---|
650 | break;
|
---|
651 | off++;
|
---|
652 | if (off < c64WordsToScan)
|
---|
653 | {
|
---|
654 | uWord = pbmAlloc[off];
|
---|
655 | if (uWord != UINT64_MAX)
|
---|
656 | break;
|
---|
657 | Assert(off + 1 == c64WordsToScan);
|
---|
658 | }
|
---|
659 | }
|
---|
660 | }
|
---|
661 | return UINT32_MAX;
|
---|
662 | }
|
---|
663 | }
|
---|
664 | }
|
---|
665 | #else
|
---|
666 | do
|
---|
667 | {
|
---|
668 | off++;
|
---|
669 | if (off < c64WordsToScan)
|
---|
670 | uWord = pbmAlloc[off];
|
---|
671 | else
|
---|
672 | return UINT32_MAX;
|
---|
673 | } while (uWord == UINT64_MAX);
|
---|
674 | #endif
|
---|
675 | cPrevLeadingZeros = 0;
|
---|
676 | }
|
---|
677 |
|
---|
678 | /*
|
---|
679 | * If we get down here, we have a word that isn't UINT64_MAX.
|
---|
680 | */
|
---|
681 | if (uWord != 0)
|
---|
682 | {
|
---|
683 | /*
|
---|
684 | * Fend of large request we cannot satisfy before the first set bit.
|
---|
685 | */
|
---|
686 | if (!a_fBig || cReqUnits < 64 + cPrevLeadingZeros)
|
---|
687 | {
|
---|
688 | #ifdef __GNUC__
|
---|
689 | unsigned cZerosInWord = __builtin_popcountl(~uWord);
|
---|
690 | #elif defined(_MSC_VER) && defined(RT_ARCH_AMD64)
|
---|
691 | unsigned cZerosInWord = __popcnt64(~uWord);
|
---|
692 | #elif defined(_MSC_VER) && defined(RT_ARCH_ARM64)
|
---|
693 | unsigned cZerosInWord = _CountOneBits64(~uWord);
|
---|
694 | #else
|
---|
695 | # pragma message("need popcount intrinsic or something...")
|
---|
696 | unsigned cZerosInWord = 0;
|
---|
697 | for (uint64_t uTmp = ~uWords; uTmp; cZerosInWord++)
|
---|
698 | uTmp &= uTmp - 1; /* Clears the least significant bit set. */
|
---|
699 | #endif
|
---|
700 | if (cZerosInWord + cPrevLeadingZeros >= cReqUnits)
|
---|
701 | {
|
---|
702 | /* Check if we've got a patch of zeros at the trailing end
|
---|
703 | when joined with the previous word: */
|
---|
704 | #ifdef __GNUC__
|
---|
705 | unsigned cTrailingZeros = __builtin_ctzl(uWord);
|
---|
706 | #else
|
---|
707 | unsigned cTrailingZeros = ASMBitFirstSetU64(uWord) - 1;
|
---|
708 | #endif
|
---|
709 | if (cPrevLeadingZeros + cTrailingZeros >= cReqUnits)
|
---|
710 | return off * 64 - cPrevLeadingZeros;
|
---|
711 |
|
---|
712 | /*
|
---|
713 | * Try leading zeros before we get on with the tedious stuff.
|
---|
714 | */
|
---|
715 | #ifdef __GNUC__
|
---|
716 | cPrevLeadingZeros = __builtin_clzl(uWord);
|
---|
717 | #else
|
---|
718 | cPrevLeadingZeros = 64 - ASMBitLastSetU64(uWord);
|
---|
719 | #endif
|
---|
720 | if (cPrevLeadingZeros >= cReqUnits)
|
---|
721 | return (off + 1) * 64 - cPrevLeadingZeros;
|
---|
722 |
|
---|
723 | /*
|
---|
724 | * Check the popcount again sans leading & trailing before looking
|
---|
725 | * inside the word.
|
---|
726 | */
|
---|
727 | cZerosInWord -= cPrevLeadingZeros + cTrailingZeros;
|
---|
728 | if (cZerosInWord >= cReqUnits)
|
---|
729 | {
|
---|
730 | /* 1; 64 - 0 - 1 = 63; */
|
---|
731 | unsigned const iBitLast = 64 - cPrevLeadingZeros - cReqUnits; /** @todo boundrary */
|
---|
732 | unsigned iBit = cTrailingZeros;
|
---|
733 | uWord >>= cTrailingZeros;
|
---|
734 | do
|
---|
735 | {
|
---|
736 | Assert(uWord & 1);
|
---|
737 | #ifdef __GNUC__
|
---|
738 | unsigned iZeroBit = __builtin_ctzl(~uWord);
|
---|
739 | #else
|
---|
740 | unsigned iZeroBit = ASMBitFirstSetU64(~uWord) - 1;
|
---|
741 | #endif
|
---|
742 | iBit += iZeroBit;
|
---|
743 | uWord >>= iZeroBit;
|
---|
744 | Assert(iBit <= iBitLast);
|
---|
745 | Assert((uWord & 1) == 0);
|
---|
746 | #ifdef __GNUC__
|
---|
747 | unsigned cZeros = __builtin_ctzl(uWord);
|
---|
748 | #else
|
---|
749 | unsigned cZeros = ASMBitFirstSetU64(uWord) - 1;
|
---|
750 | #endif
|
---|
751 | if (cZeros >= cReqUnits)
|
---|
752 | return off * 64 + iBit;
|
---|
753 |
|
---|
754 | cZerosInWord -= cZeros; /* (may underflow as we will count shifted in zeros) */
|
---|
755 | iBit += cZeros;
|
---|
756 | uWord >>= cZeros;
|
---|
757 | } while ((int)cZerosInWord >= (int)cReqUnits && iBit < iBitLast);
|
---|
758 | }
|
---|
759 | continue; /* we've already calculated cPrevLeadingZeros */
|
---|
760 | }
|
---|
761 | }
|
---|
762 |
|
---|
763 | /* Update the leading (MSB) zero count. */
|
---|
764 | #ifdef __GNUC__
|
---|
765 | cPrevLeadingZeros = __builtin_clzl(uWord);
|
---|
766 | #else
|
---|
767 | cPrevLeadingZeros = 64 - ASMBitLastSetU64(uWord);
|
---|
768 | #endif
|
---|
769 | }
|
---|
770 | /*
|
---|
771 | * uWord == 0
|
---|
772 | */
|
---|
773 | else
|
---|
774 | {
|
---|
775 | if RT_CONSTEXPR_IF(!a_fBig)
|
---|
776 | return off * 64 - cPrevLeadingZeros;
|
---|
777 | else /* keep else */
|
---|
778 | {
|
---|
779 | if (cPrevLeadingZeros + 64 >= cReqUnits)
|
---|
780 | return off * 64 - cPrevLeadingZeros;
|
---|
781 | for (uint32_t off2 = off + 1;; off2++)
|
---|
782 | {
|
---|
783 | if (off2 < c64WordsToScan)
|
---|
784 | {
|
---|
785 | uWord = pbmAlloc[off2];
|
---|
786 | if (uWord == UINT64_MAX)
|
---|
787 | {
|
---|
788 | cPrevLeadingZeros = 0;
|
---|
789 | break;
|
---|
790 | }
|
---|
791 | if (uWord == 0)
|
---|
792 | {
|
---|
793 | if (cPrevLeadingZeros + (off2 - off + 1) * 64 >= cReqUnits)
|
---|
794 | return off * 64 - cPrevLeadingZeros;
|
---|
795 | }
|
---|
796 | else
|
---|
797 | {
|
---|
798 | #ifdef __GNUC__
|
---|
799 | unsigned cTrailingZeros = __builtin_ctzl(uWord);
|
---|
800 | #else
|
---|
801 | unsigned cTrailingZeros = ASMBitFirstSetU64(uWord) - 1;
|
---|
802 | #endif
|
---|
803 | if (cPrevLeadingZeros + (off2 - off) * 64 + cTrailingZeros >= cReqUnits)
|
---|
804 | return off * 64 - cPrevLeadingZeros;
|
---|
805 | #ifdef __GNUC__
|
---|
806 | cPrevLeadingZeros = __builtin_clzl(uWord);
|
---|
807 | #else
|
---|
808 | cPrevLeadingZeros = 64 - ASMBitLastSetU64(uWord);
|
---|
809 | #endif
|
---|
810 | break;
|
---|
811 | }
|
---|
812 | }
|
---|
813 | else
|
---|
814 | return UINT32_MAX;
|
---|
815 | }
|
---|
816 | }
|
---|
817 | }
|
---|
818 | }
|
---|
819 | return UINT32_MAX;
|
---|
820 | }
|
---|
821 |
|
---|
822 |
|
---|
823 | /**
|
---|
824 | * Try allocate a block of @a cReqUnits in the chunk @a idxChunk.
|
---|
825 | */
|
---|
826 | static void *
|
---|
827 | iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
|
---|
828 | uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk, PIEMTB pTb,
|
---|
829 | void **ppvExec, PCIEMNATIVEPERCHUNKCTX *ppChunkCtx)
|
---|
830 | {
|
---|
831 | /*
|
---|
832 | * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
|
---|
833 | */
|
---|
834 | Assert(!(cToScan & 63));
|
---|
835 | Assert(!(idxFirst & 63));
|
---|
836 | Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
|
---|
837 | pbmAlloc += idxFirst / 64;
|
---|
838 | cToScan += idxFirst & 63;
|
---|
839 | Assert(!(cToScan & 63));
|
---|
840 |
|
---|
841 | #if 1
|
---|
842 | uint32_t const iBit = cReqUnits < 64
|
---|
843 | ? iemExecMemAllocatorFindReqFreeUnits<false>(pbmAlloc, cToScan / 64, cReqUnits)
|
---|
844 | : iemExecMemAllocatorFindReqFreeUnits<true>( pbmAlloc, cToScan / 64, cReqUnits);
|
---|
845 | # ifdef VBOX_STRICT
|
---|
846 | uint32_t const iBitOld = iemExecMemAllocatorFindReqFreeUnitsOld(pbmAlloc, cToScan, cReqUnits);
|
---|
847 | AssertMsg( iBit == iBitOld
|
---|
848 | || (iBit / 64) == (iBitOld / 64), /* New algorithm will return trailing hit before middle. */
|
---|
849 | ("iBit=%#x (%#018RX64); iBitOld=%#x (%#018RX64); cReqUnits=%#x\n",
|
---|
850 | iBit, iBit != UINT32_MAX ? pbmAlloc[iBit / 64] : 0,
|
---|
851 | iBitOld, iBitOld != UINT32_MAX ? pbmAlloc[iBitOld / 64] : 0, cReqUnits));
|
---|
852 | # endif
|
---|
853 | #else
|
---|
854 | uint32_t const iBit = iemExecMemAllocatorFindReqFreeUnitsOld(pbmAlloc, cToScan, cReqUnits);
|
---|
855 | #endif
|
---|
856 | if (iBit != UINT32_MAX)
|
---|
857 | {
|
---|
858 | ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
|
---|
859 |
|
---|
860 | PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
|
---|
861 | pChunk->cFreeUnits -= cReqUnits;
|
---|
862 | pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
|
---|
863 |
|
---|
864 | pExecMemAllocator->cAllocations += 1;
|
---|
865 | uint32_t const cbReq = cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
|
---|
866 | pExecMemAllocator->cbAllocated += cbReq;
|
---|
867 | pExecMemAllocator->cbFree -= cbReq;
|
---|
868 | pExecMemAllocator->idxChunkHint = idxChunk;
|
---|
869 |
|
---|
870 | void * const pvMemRw = (uint8_t *)pChunk->pvChunkRw
|
---|
871 | + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
|
---|
872 |
|
---|
873 | if (ppChunkCtx)
|
---|
874 | *ppChunkCtx = pChunk->pCtx;
|
---|
875 |
|
---|
876 | /*
|
---|
877 | * Initialize the header and return.
|
---|
878 | */
|
---|
879 | # ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
|
---|
880 | PIEMEXECMEMALLOCHDR const pHdr = (PIEMEXECMEMALLOCHDR)pvMemRw;
|
---|
881 | pHdr->uMagic = IEMEXECMEMALLOCHDR_MAGIC;
|
---|
882 | pHdr->idxChunk = idxChunk;
|
---|
883 | pHdr->pTb = pTb;
|
---|
884 |
|
---|
885 | if (ppvExec)
|
---|
886 | *ppvExec = (uint8_t *)pChunk->pvChunkRx
|
---|
887 | + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT)
|
---|
888 | + sizeof(*pHdr);
|
---|
889 |
|
---|
890 | return pHdr + 1;
|
---|
891 | #else
|
---|
892 | if (ppvExec)
|
---|
893 | *ppvExec = (uint8_t *)pChunk->pvChunkRx
|
---|
894 | + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
|
---|
895 |
|
---|
896 | RT_NOREF(pTb);
|
---|
897 | return pvMem;
|
---|
898 | #endif
|
---|
899 | }
|
---|
900 |
|
---|
901 | return NULL;
|
---|
902 | }
|
---|
903 |
|
---|
904 |
|
---|
905 | /**
|
---|
906 | * Converts requested number of bytes into a unit count.
|
---|
907 | */
|
---|
908 | DECL_FORCE_INLINE(uint32_t) iemExecMemAllocBytesToUnits(uint32_t cbReq)
|
---|
909 | {
|
---|
910 | #ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
|
---|
911 | return (cbReq + sizeof(IEMEXECMEMALLOCHDR) + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)
|
---|
912 | #else
|
---|
913 | return (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)
|
---|
914 | #endif
|
---|
915 | >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
|
---|
916 | }
|
---|
917 |
|
---|
918 |
|
---|
919 | DECL_FORCE_INLINE(PIEMNATIVEINSTR)
|
---|
920 | iemExecMemAllocatorAllocUnitsInChunkInner(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cReqUnits,
|
---|
921 | PIEMTB pTb, PIEMNATIVEINSTR *ppaExec, PCIEMNATIVEPERCHUNKCTX *ppChunkCtx)
|
---|
922 | {
|
---|
923 | uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
|
---|
924 | uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
|
---|
925 | if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
|
---|
926 | {
|
---|
927 | void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
|
---|
928 | pExecMemAllocator->cUnitsPerChunk - idxHint,
|
---|
929 | cReqUnits, idxChunk, pTb, (void **)ppaExec, ppChunkCtx);
|
---|
930 | if (pvRet)
|
---|
931 | return (PIEMNATIVEINSTR)pvRet;
|
---|
932 | }
|
---|
933 | void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
|
---|
934 | RT_MIN(pExecMemAllocator->cUnitsPerChunk,
|
---|
935 | RT_ALIGN_32(idxHint + cReqUnits, 64*4)),
|
---|
936 | cReqUnits, idxChunk, pTb, (void **)ppaExec, ppChunkCtx);
|
---|
937 | if (pvRet)
|
---|
938 | return (PIEMNATIVEINSTR)pvRet;
|
---|
939 |
|
---|
940 | pExecMemAllocator->cFruitlessChunkScans += 1;
|
---|
941 | return NULL;
|
---|
942 | }
|
---|
943 |
|
---|
944 |
|
---|
945 | DECLINLINE(PIEMNATIVEINSTR)
|
---|
946 | iemExecMemAllocatorAllocBytesInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq,
|
---|
947 | PIEMNATIVEINSTR *ppaExec)
|
---|
948 | {
|
---|
949 | uint32_t const cReqUnits = iemExecMemAllocBytesToUnits(cbReq);
|
---|
950 | if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
|
---|
951 | return iemExecMemAllocatorAllocUnitsInChunkInner(pExecMemAllocator, idxChunk, cReqUnits, NULL /*pTb*/,
|
---|
952 | ppaExec, NULL /*ppChunkCtx*/);
|
---|
953 | return NULL;
|
---|
954 | }
|
---|
955 |
|
---|
956 |
|
---|
957 | /**
|
---|
958 | * Allocates @a cbReq bytes of executable memory.
|
---|
959 | *
|
---|
960 | * @returns Pointer to the readable/writeable memory, NULL if out of memory or other problem
|
---|
961 | * encountered.
|
---|
962 | * @param pVCpu The cross context virtual CPU structure of the
|
---|
963 | * calling thread.
|
---|
964 | * @param cbReq How many bytes are required.
|
---|
965 | * @param pTb The translation block that will be using the allocation.
|
---|
966 | * @param ppaExec Where to return the pointer to executable view of
|
---|
967 | * the allocated memory, optional.
|
---|
968 | * @param ppChunkCtx Where to return the per chunk attached context
|
---|
969 | * if available, optional.
|
---|
970 | */
|
---|
971 | DECLHIDDEN(PIEMNATIVEINSTR) iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq, PIEMTB pTb,
|
---|
972 | PIEMNATIVEINSTR *ppaExec, PCIEMNATIVEPERCHUNKCTX *ppChunkCtx) RT_NOEXCEPT
|
---|
973 | {
|
---|
974 | PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
|
---|
975 | AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
|
---|
976 | AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
|
---|
977 | STAM_PROFILE_START(&pExecMemAllocator->StatAlloc, a);
|
---|
978 |
|
---|
979 | uint32_t const cReqUnits = iemExecMemAllocBytesToUnits(cbReq);
|
---|
980 | STAM_COUNTER_INC(&pExecMemAllocator->aStatSizes[cReqUnits < RT_ELEMENTS(pExecMemAllocator->aStatSizes) ? cReqUnits : 0]);
|
---|
981 | for (unsigned iIteration = 0;; iIteration++)
|
---|
982 | {
|
---|
983 | if ( cbReq * 2 <= pExecMemAllocator->cbFree
|
---|
984 | || (cReqUnits == 1 || pExecMemAllocator->cbFree >= IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) )
|
---|
985 | {
|
---|
986 | uint32_t const cChunks = pExecMemAllocator->cChunks;
|
---|
987 | uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
|
---|
988 |
|
---|
989 | /*
|
---|
990 | * We do two passes here, the first pass we skip chunks with fewer than cReqUnits * 16,
|
---|
991 | * the 2nd pass we skip chunks. The second pass checks the one skipped in the first pass.
|
---|
992 | */
|
---|
993 | for (uint32_t cMinFreePass = cReqUnits == 1 ? cReqUnits : cReqUnits * 16, cMaxFreePass = UINT32_MAX;;)
|
---|
994 | {
|
---|
995 | for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
|
---|
996 | if ( pExecMemAllocator->aChunks[idxChunk].cFreeUnits >= cMinFreePass
|
---|
997 | && pExecMemAllocator->aChunks[idxChunk].cFreeUnits <= cMaxFreePass)
|
---|
998 | {
|
---|
999 | PIEMNATIVEINSTR const pRet = iemExecMemAllocatorAllocUnitsInChunkInner(pExecMemAllocator, idxChunk,
|
---|
1000 | cReqUnits, pTb, ppaExec, ppChunkCtx);
|
---|
1001 | if (pRet)
|
---|
1002 | {
|
---|
1003 | STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
|
---|
1004 | #ifdef VBOX_WITH_STATISTICS
|
---|
1005 | pExecMemAllocator->cbUnusable += (cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT) - cbReq;
|
---|
1006 | #endif
|
---|
1007 | return pRet;
|
---|
1008 | }
|
---|
1009 | }
|
---|
1010 | for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
|
---|
1011 | if ( pExecMemAllocator->aChunks[idxChunk].cFreeUnits >= cMinFreePass
|
---|
1012 | && pExecMemAllocator->aChunks[idxChunk].cFreeUnits <= cMaxFreePass)
|
---|
1013 | {
|
---|
1014 | PIEMNATIVEINSTR const pRet = iemExecMemAllocatorAllocUnitsInChunkInner(pExecMemAllocator, idxChunk,
|
---|
1015 | cReqUnits, pTb, ppaExec, ppChunkCtx);
|
---|
1016 | if (pRet)
|
---|
1017 | {
|
---|
1018 | STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
|
---|
1019 | #ifdef VBOX_WITH_STATISTICS
|
---|
1020 | pExecMemAllocator->cbUnusable += (cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT) - cbReq;
|
---|
1021 | #endif
|
---|
1022 | return pRet;
|
---|
1023 | }
|
---|
1024 | }
|
---|
1025 | if (cMinFreePass <= cReqUnits * 2)
|
---|
1026 | break;
|
---|
1027 | cMaxFreePass = cMinFreePass - 1;
|
---|
1028 | cMinFreePass = cReqUnits * 2;
|
---|
1029 | }
|
---|
1030 | }
|
---|
1031 |
|
---|
1032 | /*
|
---|
1033 | * Can we grow it with another chunk?
|
---|
1034 | */
|
---|
1035 | if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
|
---|
1036 | {
|
---|
1037 | int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
|
---|
1038 | AssertLogRelRCReturn(rc, NULL);
|
---|
1039 |
|
---|
1040 | uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
|
---|
1041 | PIEMNATIVEINSTR const pRet = iemExecMemAllocatorAllocUnitsInChunkInner(pExecMemAllocator, idxChunk, cReqUnits, pTb,
|
---|
1042 | ppaExec, ppChunkCtx);
|
---|
1043 | if (pRet)
|
---|
1044 | {
|
---|
1045 | STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
|
---|
1046 | #ifdef VBOX_WITH_STATISTICS
|
---|
1047 | pExecMemAllocator->cbUnusable += (cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT) - cbReq;
|
---|
1048 | #endif
|
---|
1049 | return pRet;
|
---|
1050 | }
|
---|
1051 | AssertFailed();
|
---|
1052 | }
|
---|
1053 |
|
---|
1054 | /*
|
---|
1055 | * Try prune native TBs once.
|
---|
1056 | */
|
---|
1057 | if (iIteration == 0)
|
---|
1058 | {
|
---|
1059 | #ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
|
---|
1060 | iemExecMemAllocatorPrune(pVCpu, pExecMemAllocator);
|
---|
1061 | #else
|
---|
1062 | /* No header included in the instruction count here. */
|
---|
1063 | uint32_t const cNeededInstrs = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) / sizeof(IEMNATIVEINSTR);
|
---|
1064 | iemTbAllocatorFreeupNativeSpace(pVCpu, cNeededInstrs);
|
---|
1065 | #endif
|
---|
1066 | }
|
---|
1067 | else
|
---|
1068 | {
|
---|
1069 | STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeExecMemInstrBufAllocFailed);
|
---|
1070 | STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
|
---|
1071 | return NULL;
|
---|
1072 | }
|
---|
1073 | }
|
---|
1074 | }
|
---|
1075 |
|
---|
1076 |
|
---|
1077 | /** This is a hook to ensure the instruction cache is properly flushed before the code in the memory
|
---|
1078 | * given by @a pv and @a cb is executed */
|
---|
1079 | DECLHIDDEN(void) iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb) RT_NOEXCEPT
|
---|
1080 | {
|
---|
1081 | #ifdef RT_OS_DARWIN
|
---|
1082 | /*
|
---|
1083 | * We need to synchronize the stuff we wrote to the data cache with the
|
---|
1084 | * instruction cache, since these aren't coherent on arm (or at least not
|
---|
1085 | * on Apple Mn CPUs).
|
---|
1086 | *
|
---|
1087 | * Note! Since we don't any share JIT'ed code with the other CPUs, we don't
|
---|
1088 | * really care whether the dcache is fully flushed back to memory. It
|
---|
1089 | * only needs to hit the level 2 cache, which the level 1 instruction
|
---|
1090 | * and data caches seems to be sharing. In ARM terms, we need to reach
|
---|
1091 | * a point of unification (PoU), rather than a point of coherhency (PoC).
|
---|
1092 | *
|
---|
1093 | * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
|
---|
1094 | *
|
---|
1095 | * https://developer.arm.com/documentation/den0013/d/Caches/Point-of-coherency-and-unification
|
---|
1096 | *
|
---|
1097 | * Experimenting with the approach used by sys_icache_invalidate() and
|
---|
1098 | * tweaking it a little, could let us shave off a bit of effort. The thing
|
---|
1099 | * that slows the apple code down on an M2 (runing Sonoma 13.4), seems to
|
---|
1100 | * the 'DSB ISH' instructions performed every 20 icache line flushes.
|
---|
1101 | * Skipping these saves ~100ns or more per TB when profiling the native
|
---|
1102 | * recompiler on the TBs from a win11 full boot-desktop-shutdow sequence.
|
---|
1103 | * Thus we will leave DCACHE_ICACHE_SYNC_WITH_WITH_IVAU_DSB undefined if we
|
---|
1104 | * can.
|
---|
1105 | *
|
---|
1106 | * There appears not to be much difference between DSB options 'ISH',
|
---|
1107 | * 'ISHST', 'NSH' and 'NSHST'. The latter is theoretically all we need, so
|
---|
1108 | * we'll use that one.
|
---|
1109 | *
|
---|
1110 | * See https://developer.arm.com/documentation/100941/0101/Barriers for
|
---|
1111 | * details on the barrier options.
|
---|
1112 | *
|
---|
1113 | * Note! The CFG value "/IEM/HostICacheInvalidationViaHostAPI" can be used
|
---|
1114 | * to disabling the experimental code should it misbehave.
|
---|
1115 | */
|
---|
1116 | uint8_t const fHostICacheInvalidation = pVCpu->iem.s.fHostICacheInvalidation;
|
---|
1117 | if (!(fHostICacheInvalidation & IEMNATIVE_ICACHE_F_USE_HOST_API))
|
---|
1118 | {
|
---|
1119 | # define DCACHE_ICACHE_SYNC_DSB_OPTION "nshst"
|
---|
1120 | /*# define DCACHE_ICACHE_SYNC_WITH_WITH_IVAU_DSB*/
|
---|
1121 |
|
---|
1122 | /* Skipping this is fine, but doesn't impact perf much. */
|
---|
1123 | __asm__ __volatile__("dsb " DCACHE_ICACHE_SYNC_DSB_OPTION);
|
---|
1124 |
|
---|
1125 | /* Invalidate the icache for the range [pv,pv+cb). */
|
---|
1126 | # ifdef DCACHE_ICACHE_SYNC_WITH_WITH_IVAU_DSB
|
---|
1127 | size_t const cIvauDsbEvery= 20;
|
---|
1128 | unsigned cDsb = cIvauDsbEvery;
|
---|
1129 | # endif
|
---|
1130 | size_t const cbCacheLine = 64;
|
---|
1131 | size_t cbInvalidate = cb + ((uintptr_t)pv & (cbCacheLine - 1)) ;
|
---|
1132 | size_t cCacheLines = RT_ALIGN_Z(cbInvalidate, cbCacheLine) / cbCacheLine;
|
---|
1133 | uintptr_t uPtr = (uintptr_t)pv & ~(uintptr_t)(cbCacheLine - 1);
|
---|
1134 | for (;; uPtr += cbCacheLine)
|
---|
1135 | {
|
---|
1136 | __asm__ /*__volatile__*/("ic ivau, %0" : : "r" (uPtr));
|
---|
1137 | cCacheLines -= 1;
|
---|
1138 | if (!cCacheLines)
|
---|
1139 | break;
|
---|
1140 | # ifdef DCACHE_ICACHE_SYNC_WITH_WITH_IVAU_DSB
|
---|
1141 | cDsb -= 1;
|
---|
1142 | if (cDsb != 0)
|
---|
1143 | { /* likely */ }
|
---|
1144 | else
|
---|
1145 | {
|
---|
1146 | __asm__ __volatile__("dsb " DCACHE_ICACHE_SYNC_DSB_OPTION);
|
---|
1147 | cDsb = cIvauDsbEvery;
|
---|
1148 | }
|
---|
1149 | # endif
|
---|
1150 | }
|
---|
1151 |
|
---|
1152 | /*
|
---|
1153 | * The DSB here is non-optional it seems.
|
---|
1154 | *
|
---|
1155 | * The following ISB can be omitted on M2 without any obvious sideeffects,
|
---|
1156 | * it produces better number in the above mention profiling scenario.
|
---|
1157 | * This could be related to the kHasICDSB flag in cpu_capabilities.h,
|
---|
1158 | * but it doesn't look like that flag is set here (M2, Sonoma 13.4).
|
---|
1159 | *
|
---|
1160 | * I've made the inclusion of the ISH barrier as configurable and with
|
---|
1161 | * a default of skipping it.
|
---|
1162 | */
|
---|
1163 | if (!(fHostICacheInvalidation & IEMNATIVE_ICACHE_F_END_WITH_ISH))
|
---|
1164 | __asm__ __volatile__("dsb " DCACHE_ICACHE_SYNC_DSB_OPTION
|
---|
1165 | ::: "memory");
|
---|
1166 | else
|
---|
1167 | __asm__ __volatile__("dsb " DCACHE_ICACHE_SYNC_DSB_OPTION "\n\t"
|
---|
1168 | "isb"
|
---|
1169 | ::: "memory");
|
---|
1170 | }
|
---|
1171 | else
|
---|
1172 | sys_icache_invalidate(pv, cb);
|
---|
1173 |
|
---|
1174 | #elif defined(RT_OS_LINUX) && defined(RT_ARCH_ARM64)
|
---|
1175 | RT_NOREF(pVCpu);
|
---|
1176 |
|
---|
1177 | /* There is __builtin___clear_cache() but it flushes both the instruction and data cache, so do it manually. */
|
---|
1178 | static uint32_t s_u32CtrEl0 = 0;
|
---|
1179 | if (!s_u32CtrEl0)
|
---|
1180 | asm volatile ("mrs %0, ctr_el0":"=r" (s_u32CtrEl0));
|
---|
1181 | uintptr_t cbICacheLine = (uintptr_t)4 << (s_u32CtrEl0 & 0xf);
|
---|
1182 |
|
---|
1183 | uintptr_t pb = (uintptr_t)pv & ~(cbICacheLine - 1);
|
---|
1184 | for (; pb < (uintptr_t)pv + cb; pb += cbICacheLine)
|
---|
1185 | asm volatile ("ic ivau, %0" : : "r" (pb) : "memory");
|
---|
1186 |
|
---|
1187 | asm volatile ("dsb ish\n\t isb\n\t" : : : "memory");
|
---|
1188 |
|
---|
1189 | #else
|
---|
1190 | RT_NOREF(pVCpu, pv, cb);
|
---|
1191 | #endif
|
---|
1192 | }
|
---|
1193 |
|
---|
1194 |
|
---|
1195 | /**
|
---|
1196 | * Frees executable memory.
|
---|
1197 | */
|
---|
1198 | DECLHIDDEN(void) iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb) RT_NOEXCEPT
|
---|
1199 | {
|
---|
1200 | PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
|
---|
1201 | Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
|
---|
1202 | AssertPtr(pv);
|
---|
1203 | #ifdef VBOX_WITH_STATISTICS
|
---|
1204 | size_t const cbOrig = cb;
|
---|
1205 | #endif
|
---|
1206 | #ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
|
---|
1207 | Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
|
---|
1208 |
|
---|
1209 | /* Align the size as we did when allocating the block. */
|
---|
1210 | cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
|
---|
1211 |
|
---|
1212 | #else
|
---|
1213 | PIEMEXECMEMALLOCHDR pHdr = (PIEMEXECMEMALLOCHDR)pv - 1;
|
---|
1214 | Assert(!((uintptr_t)pHdr & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
|
---|
1215 | AssertReturnVoid(pHdr->uMagic == IEMEXECMEMALLOCHDR_MAGIC);
|
---|
1216 | uint32_t const idxChunk = pHdr->idxChunk;
|
---|
1217 | AssertReturnVoid(idxChunk < pExecMemAllocator->cChunks);
|
---|
1218 | pv = pHdr;
|
---|
1219 |
|
---|
1220 | /* Adjust and align the size to cover the whole allocation area. */
|
---|
1221 | cb = RT_ALIGN_Z(cb + sizeof(*pHdr), IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
|
---|
1222 | #endif
|
---|
1223 |
|
---|
1224 | /* Free it / assert sanity. */
|
---|
1225 | bool fFound = false;
|
---|
1226 | uint32_t const cbChunk = pExecMemAllocator->cbChunk;
|
---|
1227 | #ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
|
---|
1228 | uint32_t const cChunks = pExecMemAllocator->cChunks;
|
---|
1229 | for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
|
---|
1230 | #endif
|
---|
1231 | {
|
---|
1232 | uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunkRx;
|
---|
1233 | fFound = offChunk < cbChunk;
|
---|
1234 | if (fFound)
|
---|
1235 | {
|
---|
1236 | uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
|
---|
1237 | uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
|
---|
1238 |
|
---|
1239 | /* Check that it's valid and free it. */
|
---|
1240 | uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
|
---|
1241 | AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
|
---|
1242 | for (uint32_t i = 1; i < cReqUnits; i++)
|
---|
1243 | AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
|
---|
1244 | ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
|
---|
1245 |
|
---|
1246 | /* Invalidate the header using the writeable memory view. */
|
---|
1247 | pHdr = (PIEMEXECMEMALLOCHDR)((uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunkRw + offChunk);
|
---|
1248 | #ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
|
---|
1249 | pHdr->uMagic = 0;
|
---|
1250 | pHdr->idxChunk = 0;
|
---|
1251 | pHdr->pTb = NULL;
|
---|
1252 | #endif
|
---|
1253 | pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
|
---|
1254 | pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
|
---|
1255 |
|
---|
1256 | /* Update the stats. */
|
---|
1257 | pExecMemAllocator->cbAllocated -= cb;
|
---|
1258 | pExecMemAllocator->cbFree += cb;
|
---|
1259 | pExecMemAllocator->cAllocations -= 1;
|
---|
1260 | #ifdef VBOX_WITH_STATISTICS
|
---|
1261 | pExecMemAllocator->cbUnusable -= (cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT) - cbOrig;
|
---|
1262 | #endif
|
---|
1263 | return;
|
---|
1264 | }
|
---|
1265 | }
|
---|
1266 | AssertFailed();
|
---|
1267 | }
|
---|
1268 |
|
---|
1269 |
|
---|
1270 | /**
|
---|
1271 | * Interface used by iemNativeRecompileAttachExecMemChunkCtx and unwind info
|
---|
1272 | * generators.
|
---|
1273 | */
|
---|
1274 | DECLHIDDEN(PIEMNATIVEINSTR)
|
---|
1275 | iemExecMemAllocatorAllocFromChunk(PVMCPU pVCpu, uint32_t idxChunk, uint32_t cbReq, PIEMNATIVEINSTR *ppaExec)
|
---|
1276 | {
|
---|
1277 | PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
|
---|
1278 | AssertReturn(idxChunk < pExecMemAllocator->cChunks, NULL);
|
---|
1279 | Assert(cbReq < _1M);
|
---|
1280 | return iemExecMemAllocatorAllocBytesInChunk(pExecMemAllocator, idxChunk, cbReq, ppaExec);
|
---|
1281 | }
|
---|
1282 |
|
---|
1283 |
|
---|
1284 | /**
|
---|
1285 | * For getting the per-chunk context detailing common code for a TB.
|
---|
1286 | *
|
---|
1287 | * This is for use by the disassembler.
|
---|
1288 | */
|
---|
1289 | DECLHIDDEN(PCIEMNATIVEPERCHUNKCTX) iemExecMemGetTbChunkCtx(PVMCPU pVCpu, PCIEMTB pTb)
|
---|
1290 | {
|
---|
1291 | PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
|
---|
1292 | if ((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
|
---|
1293 | {
|
---|
1294 | uintptr_t const uAddress = (uintptr_t)pTb->Native.paInstructions;
|
---|
1295 | uint32_t const cbChunk = pExecMemAllocator->cbChunk;
|
---|
1296 | uint32_t idxChunk = pExecMemAllocator->cChunks;
|
---|
1297 | while (idxChunk-- > 0)
|
---|
1298 | if (uAddress - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunkRx < cbChunk)
|
---|
1299 | return pExecMemAllocator->aChunks[idxChunk].pCtx;
|
---|
1300 | }
|
---|
1301 | return NULL;
|
---|
1302 | }
|
---|
1303 |
|
---|
1304 |
|
---|
1305 | #ifdef IN_RING3
|
---|
1306 | # ifdef RT_OS_WINDOWS
|
---|
1307 |
|
---|
1308 | /**
|
---|
1309 | * Initializes the unwind info structures for windows hosts.
|
---|
1310 | */
|
---|
1311 | static int
|
---|
1312 | iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
|
---|
1313 | void *pvChunk, uint32_t idxChunk)
|
---|
1314 | {
|
---|
1315 | RT_NOREF(pVCpu);
|
---|
1316 |
|
---|
1317 | # ifdef RT_AMD64
|
---|
1318 | /*
|
---|
1319 | * The AMD64 unwind opcodes.
|
---|
1320 | *
|
---|
1321 | * This is a program that starts with RSP after a RET instruction that
|
---|
1322 | * ends up in recompiled code, and the operations we describe here will
|
---|
1323 | * restore all non-volatile registers and bring RSP back to where our
|
---|
1324 | * RET address is. This means it's reverse order from what happens in
|
---|
1325 | * the prologue.
|
---|
1326 | *
|
---|
1327 | * Note! Using a frame register approach here both because we have one
|
---|
1328 | * and but mainly because the UWOP_ALLOC_LARGE argument values
|
---|
1329 | * would be a pain to write initializers for. On the positive
|
---|
1330 | * side, we're impervious to changes in the the stack variable
|
---|
1331 | * area can can deal with dynamic stack allocations if necessary.
|
---|
1332 | */
|
---|
1333 | static const IMAGE_UNWIND_CODE s_aOpcodes[] =
|
---|
1334 | {
|
---|
1335 | { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
|
---|
1336 | { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
|
---|
1337 | { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
|
---|
1338 | { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
|
---|
1339 | { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
|
---|
1340 | { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
|
---|
1341 | { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
|
---|
1342 | { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
|
---|
1343 | { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
|
---|
1344 | { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
|
---|
1345 | };
|
---|
1346 | union
|
---|
1347 | {
|
---|
1348 | IMAGE_UNWIND_INFO Info;
|
---|
1349 | uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
|
---|
1350 | } s_UnwindInfo =
|
---|
1351 | {
|
---|
1352 | {
|
---|
1353 | /* .Version = */ 1,
|
---|
1354 | /* .Flags = */ 0,
|
---|
1355 | /* .SizeOfProlog = */ 16, /* whatever */
|
---|
1356 | /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
|
---|
1357 | /* .FrameRegister = */ X86_GREG_xBP,
|
---|
1358 | /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
|
---|
1359 | }
|
---|
1360 | };
|
---|
1361 | AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
|
---|
1362 | AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
|
---|
1363 |
|
---|
1364 | # elif defined(RT_ARCH_ARM64)
|
---|
1365 | /*
|
---|
1366 | * The ARM64 unwind codes.
|
---|
1367 | *
|
---|
1368 | * See https://learn.microsoft.com/en-us/cpp/build/arm64-exception-handling?view=msvc-170
|
---|
1369 | */
|
---|
1370 | static const uint8_t s_abOpcodes[] =
|
---|
1371 | {
|
---|
1372 | /* Prolog: None. */
|
---|
1373 | 0xe5, /* end_c */
|
---|
1374 | /* Epilog / unwind info: */
|
---|
1375 | IEMNATIVE_FRAME_VAR_SIZE / 16, /* alloc_s */
|
---|
1376 | 0xc8, 0x00, /* save_regp x19, x20, [sp + #0] */
|
---|
1377 | 0xc8, 0x82, /* save_regp x21, x22, [sp + #2*8] */
|
---|
1378 | 0xc9, 0x04, /* save_regp x23, x24, [sp + #4*8] */
|
---|
1379 | 0xc9, 0x86, /* save_regp x25, x26, [sp + #6*8] */
|
---|
1380 | 0xca, 0x08, /* save_regp x27, x28, [sp + #8*8] */
|
---|
1381 | 0x4a, /* save_fplr x29, x30, [sp + #10*8] */
|
---|
1382 | 12*8 / 16, /* alloc_s */
|
---|
1383 | 0xc4, /* end */
|
---|
1384 | 0xc5 /* nop */
|
---|
1385 | };
|
---|
1386 | AssertCompile(!(sizeof(s_abOpcodes) & 3));
|
---|
1387 | AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 15) && IEMNATIVE_FRAME_VAR_SIZE < 512);
|
---|
1388 |
|
---|
1389 | # else
|
---|
1390 | # error "Port me!"
|
---|
1391 | # endif
|
---|
1392 |
|
---|
1393 | /*
|
---|
1394 | * Calc how much space we need and allocate it off the exec heap.
|
---|
1395 | */
|
---|
1396 | # ifdef RT_ARCH_ARM64
|
---|
1397 | unsigned const cbPerEntry = _1M - 4;
|
---|
1398 | unsigned const cFunctionEntries = (pExecMemAllocator->cbChunk + cbPerEntry - 1) / cbPerEntry;
|
---|
1399 | unsigned const cbUnwindInfo = (sizeof(uint32_t) * 2 + sizeof(s_abOpcodes)) * cFunctionEntries;
|
---|
1400 | # else
|
---|
1401 | unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
|
---|
1402 | unsigned const cFunctionEntries = 1;
|
---|
1403 | # endif
|
---|
1404 | unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
|
---|
1405 | PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
|
---|
1406 | = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocBytesInChunk(pExecMemAllocator, idxChunk, cbNeeded, NULL);
|
---|
1407 | AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
|
---|
1408 | pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
|
---|
1409 |
|
---|
1410 | /*
|
---|
1411 | * Initialize the structures.
|
---|
1412 | */
|
---|
1413 | # ifdef RT_AMD64
|
---|
1414 | PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
|
---|
1415 |
|
---|
1416 | paFunctions[0].BeginAddress = 0;
|
---|
1417 | paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
|
---|
1418 | paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
|
---|
1419 |
|
---|
1420 | memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
|
---|
1421 | memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
|
---|
1422 |
|
---|
1423 | # elif defined(RT_ARCH_ARM64)
|
---|
1424 |
|
---|
1425 | PIMAGE_ARM64_RUNTIME_FUNCTION_ENTRY_XDATA pInfo = (PIMAGE_ARM64_RUNTIME_FUNCTION_ENTRY_XDATA)&paFunctions[cFunctionEntries];
|
---|
1426 | for (uint32_t i = 0, off = 0; i < cFunctionEntries; i++)
|
---|
1427 | {
|
---|
1428 | paFunctions[i].BeginAddress = off;
|
---|
1429 | paFunctions[i].UnwindData = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk) | PdataRefToFullXdata;
|
---|
1430 |
|
---|
1431 | uint32_t const cFunctionLengthInWords = RT_MAX(cbPerEntry, pExecMemAllocator->cbChunk - off) / 4;
|
---|
1432 | pInfo[0].FunctionLength = cFunctionLengthInWords;
|
---|
1433 | pInfo[0].Version = 0;
|
---|
1434 | pInfo[0].ExceptionDataPresent = 0;
|
---|
1435 | pInfo[0].EpilogInHeader = 0;
|
---|
1436 | pInfo[0].EpilogCount = 1;
|
---|
1437 | pInfo[0].CodeWords = sizeof(s_abOpcodes) / sizeof(uint32_t);
|
---|
1438 |
|
---|
1439 | pInfo[1].EpilogInfo.EpilogStartOffset = cFunctionLengthInWords;
|
---|
1440 | pInfo[1].EpilogInfo.Reserved = 0;
|
---|
1441 | pInfo[1].EpilogInfo.EpilogStartIndex = 1;
|
---|
1442 | pInfo += 2;
|
---|
1443 |
|
---|
1444 | memcpy(pInfo, s_abOpcodes, sizeof(s_abOpcodes));
|
---|
1445 | pInfo += sizeof(s_abOpcodes) / sizeof(*pInfo);
|
---|
1446 | }
|
---|
1447 |
|
---|
1448 | # else
|
---|
1449 | # error "Port me!"
|
---|
1450 | # endif
|
---|
1451 |
|
---|
1452 | /*
|
---|
1453 | * Register them.
|
---|
1454 | */
|
---|
1455 | uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
|
---|
1456 | AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
|
---|
1457 |
|
---|
1458 | return VINF_SUCCESS;
|
---|
1459 | }
|
---|
1460 |
|
---|
1461 |
|
---|
1462 | # else /* !RT_OS_WINDOWS */
|
---|
1463 |
|
---|
1464 | /**
|
---|
1465 | * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
|
---|
1466 | */
|
---|
1467 | DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
|
---|
1468 | {
|
---|
1469 | if (iValue >= 64)
|
---|
1470 | {
|
---|
1471 | Assert(iValue < 0x2000);
|
---|
1472 | *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
|
---|
1473 | *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
|
---|
1474 | }
|
---|
1475 | else if (iValue >= 0)
|
---|
1476 | *Ptr.pb++ = (uint8_t)iValue;
|
---|
1477 | else if (iValue > -64)
|
---|
1478 | *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
|
---|
1479 | else
|
---|
1480 | {
|
---|
1481 | Assert(iValue > -0x2000);
|
---|
1482 | *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
|
---|
1483 | *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
|
---|
1484 | }
|
---|
1485 | return Ptr;
|
---|
1486 | }
|
---|
1487 |
|
---|
1488 |
|
---|
1489 | /**
|
---|
1490 | * Emits an ULEB128 encoded value (up to 64-bit wide).
|
---|
1491 | */
|
---|
1492 | DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
|
---|
1493 | {
|
---|
1494 | while (uValue >= 0x80)
|
---|
1495 | {
|
---|
1496 | *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
|
---|
1497 | uValue >>= 7;
|
---|
1498 | }
|
---|
1499 | *Ptr.pb++ = (uint8_t)uValue;
|
---|
1500 | return Ptr;
|
---|
1501 | }
|
---|
1502 |
|
---|
1503 |
|
---|
1504 | /**
|
---|
1505 | * Emits a CFA rule as register @a uReg + offset @a off.
|
---|
1506 | */
|
---|
1507 | DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
|
---|
1508 | {
|
---|
1509 | *Ptr.pb++ = DW_CFA_def_cfa;
|
---|
1510 | Ptr = iemDwarfPutUleb128(Ptr, uReg);
|
---|
1511 | Ptr = iemDwarfPutUleb128(Ptr, off);
|
---|
1512 | return Ptr;
|
---|
1513 | }
|
---|
1514 |
|
---|
1515 |
|
---|
1516 | /**
|
---|
1517 | * Emits a register (@a uReg) save location:
|
---|
1518 | * CFA + @a off * data_alignment_factor
|
---|
1519 | */
|
---|
1520 | DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
|
---|
1521 | {
|
---|
1522 | if (uReg < 0x40)
|
---|
1523 | *Ptr.pb++ = DW_CFA_offset | uReg;
|
---|
1524 | else
|
---|
1525 | {
|
---|
1526 | *Ptr.pb++ = DW_CFA_offset_extended;
|
---|
1527 | Ptr = iemDwarfPutUleb128(Ptr, uReg);
|
---|
1528 | }
|
---|
1529 | Ptr = iemDwarfPutUleb128(Ptr, off);
|
---|
1530 | return Ptr;
|
---|
1531 | }
|
---|
1532 |
|
---|
1533 |
|
---|
1534 | # if 0 /* unused */
|
---|
1535 | /**
|
---|
1536 | * Emits a register (@a uReg) save location, using signed offset:
|
---|
1537 | * CFA + @a offSigned * data_alignment_factor
|
---|
1538 | */
|
---|
1539 | DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
|
---|
1540 | {
|
---|
1541 | *Ptr.pb++ = DW_CFA_offset_extended_sf;
|
---|
1542 | Ptr = iemDwarfPutUleb128(Ptr, uReg);
|
---|
1543 | Ptr = iemDwarfPutLeb128(Ptr, offSigned);
|
---|
1544 | return Ptr;
|
---|
1545 | }
|
---|
1546 | # endif
|
---|
1547 |
|
---|
1548 |
|
---|
1549 | /**
|
---|
1550 | * Initializes the unwind info section for non-windows hosts.
|
---|
1551 | */
|
---|
1552 | static int
|
---|
1553 | iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
|
---|
1554 | void *pvChunk, uint32_t idxChunk)
|
---|
1555 | {
|
---|
1556 | PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
|
---|
1557 | pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
|
---|
1558 |
|
---|
1559 | RTPTRUNION Ptr = { pEhFrame->abEhFrame };
|
---|
1560 |
|
---|
1561 | /*
|
---|
1562 | * Generate the CIE first.
|
---|
1563 | */
|
---|
1564 | # ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
|
---|
1565 | uint8_t const iDwarfVer = 3;
|
---|
1566 | # else
|
---|
1567 | uint8_t const iDwarfVer = 4;
|
---|
1568 | # endif
|
---|
1569 | RTPTRUNION const PtrCie = Ptr;
|
---|
1570 | *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
|
---|
1571 | *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
|
---|
1572 | *Ptr.pb++ = iDwarfVer; /* DwARF version */
|
---|
1573 | *Ptr.pb++ = 0; /* Augmentation. */
|
---|
1574 | if (iDwarfVer >= 4)
|
---|
1575 | {
|
---|
1576 | *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
|
---|
1577 | *Ptr.pb++ = 0; /* Segment selector size. */
|
---|
1578 | }
|
---|
1579 | # ifdef RT_ARCH_AMD64
|
---|
1580 | Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
|
---|
1581 | # else
|
---|
1582 | Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
|
---|
1583 | # endif
|
---|
1584 | Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
|
---|
1585 | # ifdef RT_ARCH_AMD64
|
---|
1586 | Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
|
---|
1587 | # elif defined(RT_ARCH_ARM64)
|
---|
1588 | Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
|
---|
1589 | # else
|
---|
1590 | # error "port me"
|
---|
1591 | # endif
|
---|
1592 | /* Initial instructions: */
|
---|
1593 | # ifdef RT_ARCH_AMD64
|
---|
1594 | Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
|
---|
1595 | Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
|
---|
1596 | Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
|
---|
1597 | Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
|
---|
1598 | Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
|
---|
1599 | Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
|
---|
1600 | Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
|
---|
1601 | Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
|
---|
1602 | # elif defined(RT_ARCH_ARM64)
|
---|
1603 | # if 1
|
---|
1604 | Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
|
---|
1605 | # else
|
---|
1606 | Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
|
---|
1607 | # endif
|
---|
1608 | Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
|
---|
1609 | Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
|
---|
1610 | Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
|
---|
1611 | Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
|
---|
1612 | Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
|
---|
1613 | Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
|
---|
1614 | Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
|
---|
1615 | Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
|
---|
1616 | Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
|
---|
1617 | Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
|
---|
1618 | Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
|
---|
1619 | Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
|
---|
1620 | AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
|
---|
1621 | /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
|
---|
1622 | # else
|
---|
1623 | # error "port me"
|
---|
1624 | # endif
|
---|
1625 | while ((Ptr.u - PtrCie.u) & 3)
|
---|
1626 | *Ptr.pb++ = DW_CFA_nop;
|
---|
1627 | /* Finalize the CIE size. */
|
---|
1628 | *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
|
---|
1629 |
|
---|
1630 | /*
|
---|
1631 | * Generate an FDE for the whole chunk area.
|
---|
1632 | */
|
---|
1633 | # ifdef IEMNATIVE_USE_LIBUNWIND
|
---|
1634 | pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
|
---|
1635 | # endif
|
---|
1636 | RTPTRUNION const PtrFde = Ptr;
|
---|
1637 | *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
|
---|
1638 | *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
|
---|
1639 | Ptr.pu32++;
|
---|
1640 | *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
|
---|
1641 | *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
|
---|
1642 | # if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
|
---|
1643 | *Ptr.pb++ = DW_CFA_nop;
|
---|
1644 | # endif
|
---|
1645 | while ((Ptr.u - PtrFde.u) & 3)
|
---|
1646 | *Ptr.pb++ = DW_CFA_nop;
|
---|
1647 | /* Finalize the FDE size. */
|
---|
1648 | *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
|
---|
1649 |
|
---|
1650 | /* Terminator entry. */
|
---|
1651 | *Ptr.pu32++ = 0;
|
---|
1652 | *Ptr.pu32++ = 0; /* just to be sure... */
|
---|
1653 | Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
|
---|
1654 |
|
---|
1655 | /*
|
---|
1656 | * Register it.
|
---|
1657 | */
|
---|
1658 | # ifdef IEMNATIVE_USE_LIBUNWIND
|
---|
1659 | __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
|
---|
1660 | # else
|
---|
1661 | memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
|
---|
1662 | __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
|
---|
1663 | # endif
|
---|
1664 |
|
---|
1665 | # ifdef IEMNATIVE_USE_GDB_JIT
|
---|
1666 | /*
|
---|
1667 | * Now for telling GDB about this (experimental).
|
---|
1668 | *
|
---|
1669 | * This seems to work best with ET_DYN.
|
---|
1670 | */
|
---|
1671 | GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocBytesInChunk(pExecMemAllocator, idxChunk,
|
---|
1672 | sizeof(GDBJITSYMFILE), NULL);
|
---|
1673 | AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
|
---|
1674 | unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
|
---|
1675 |
|
---|
1676 | RT_ZERO(*pSymFile);
|
---|
1677 |
|
---|
1678 | /*
|
---|
1679 | * The ELF header:
|
---|
1680 | */
|
---|
1681 | pSymFile->EHdr.e_ident[0] = ELFMAG0;
|
---|
1682 | pSymFile->EHdr.e_ident[1] = ELFMAG1;
|
---|
1683 | pSymFile->EHdr.e_ident[2] = ELFMAG2;
|
---|
1684 | pSymFile->EHdr.e_ident[3] = ELFMAG3;
|
---|
1685 | pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
|
---|
1686 | pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
|
---|
1687 | pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
|
---|
1688 | pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
|
---|
1689 | # ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
|
---|
1690 | pSymFile->EHdr.e_type = ET_DYN;
|
---|
1691 | # else
|
---|
1692 | pSymFile->EHdr.e_type = ET_REL;
|
---|
1693 | # endif
|
---|
1694 | # ifdef RT_ARCH_AMD64
|
---|
1695 | pSymFile->EHdr.e_machine = EM_AMD64;
|
---|
1696 | # elif defined(RT_ARCH_ARM64)
|
---|
1697 | pSymFile->EHdr.e_machine = EM_AARCH64;
|
---|
1698 | # else
|
---|
1699 | # error "port me"
|
---|
1700 | # endif
|
---|
1701 | pSymFile->EHdr.e_version = 1; /*?*/
|
---|
1702 | pSymFile->EHdr.e_entry = 0;
|
---|
1703 | # if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
|
---|
1704 | pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
|
---|
1705 | # else
|
---|
1706 | pSymFile->EHdr.e_phoff = 0;
|
---|
1707 | # endif
|
---|
1708 | pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
|
---|
1709 | pSymFile->EHdr.e_flags = 0;
|
---|
1710 | pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
|
---|
1711 | # if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
|
---|
1712 | pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
|
---|
1713 | pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
|
---|
1714 | # else
|
---|
1715 | pSymFile->EHdr.e_phentsize = 0;
|
---|
1716 | pSymFile->EHdr.e_phnum = 0;
|
---|
1717 | # endif
|
---|
1718 | pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
|
---|
1719 | pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
|
---|
1720 | pSymFile->EHdr.e_shstrndx = 0; /* set later */
|
---|
1721 |
|
---|
1722 | uint32_t offStrTab = 0;
|
---|
1723 | #define APPEND_STR(a_szStr) do { \
|
---|
1724 | memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
|
---|
1725 | offStrTab += sizeof(a_szStr); \
|
---|
1726 | Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
|
---|
1727 | } while (0)
|
---|
1728 | #define APPEND_STR_FMT(a_szStr, ...) do { \
|
---|
1729 | offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
|
---|
1730 | offStrTab++; \
|
---|
1731 | Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
|
---|
1732 | } while (0)
|
---|
1733 |
|
---|
1734 | /*
|
---|
1735 | * Section headers.
|
---|
1736 | */
|
---|
1737 | /* Section header #0: NULL */
|
---|
1738 | unsigned i = 0;
|
---|
1739 | APPEND_STR("");
|
---|
1740 | RT_ZERO(pSymFile->aShdrs[i]);
|
---|
1741 | i++;
|
---|
1742 |
|
---|
1743 | /* Section header: .eh_frame */
|
---|
1744 | pSymFile->aShdrs[i].sh_name = offStrTab;
|
---|
1745 | APPEND_STR(".eh_frame");
|
---|
1746 | pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
|
---|
1747 | pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
|
---|
1748 | # if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
|
---|
1749 | pSymFile->aShdrs[i].sh_offset
|
---|
1750 | = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
|
---|
1751 | # else
|
---|
1752 | pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
|
---|
1753 | pSymFile->aShdrs[i].sh_offset = 0;
|
---|
1754 | # endif
|
---|
1755 |
|
---|
1756 | pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
|
---|
1757 | pSymFile->aShdrs[i].sh_link = 0;
|
---|
1758 | pSymFile->aShdrs[i].sh_info = 0;
|
---|
1759 | pSymFile->aShdrs[i].sh_addralign = 1;
|
---|
1760 | pSymFile->aShdrs[i].sh_entsize = 0;
|
---|
1761 | memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
|
---|
1762 | i++;
|
---|
1763 |
|
---|
1764 | /* Section header: .shstrtab */
|
---|
1765 | unsigned const iShStrTab = i;
|
---|
1766 | pSymFile->EHdr.e_shstrndx = iShStrTab;
|
---|
1767 | pSymFile->aShdrs[i].sh_name = offStrTab;
|
---|
1768 | APPEND_STR(".shstrtab");
|
---|
1769 | pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
|
---|
1770 | pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
|
---|
1771 | # if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
|
---|
1772 | pSymFile->aShdrs[i].sh_offset
|
---|
1773 | = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
|
---|
1774 | # else
|
---|
1775 | pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
|
---|
1776 | pSymFile->aShdrs[i].sh_offset = 0;
|
---|
1777 | # endif
|
---|
1778 | pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
|
---|
1779 | pSymFile->aShdrs[i].sh_link = 0;
|
---|
1780 | pSymFile->aShdrs[i].sh_info = 0;
|
---|
1781 | pSymFile->aShdrs[i].sh_addralign = 1;
|
---|
1782 | pSymFile->aShdrs[i].sh_entsize = 0;
|
---|
1783 | i++;
|
---|
1784 |
|
---|
1785 | /* Section header: .symbols */
|
---|
1786 | pSymFile->aShdrs[i].sh_name = offStrTab;
|
---|
1787 | APPEND_STR(".symtab");
|
---|
1788 | pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
|
---|
1789 | pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
|
---|
1790 | pSymFile->aShdrs[i].sh_offset
|
---|
1791 | = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
|
---|
1792 | pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
|
---|
1793 | pSymFile->aShdrs[i].sh_link = iShStrTab;
|
---|
1794 | pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
|
---|
1795 | pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
|
---|
1796 | pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
|
---|
1797 | i++;
|
---|
1798 |
|
---|
1799 | # if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
|
---|
1800 | /* Section header: .symbols */
|
---|
1801 | pSymFile->aShdrs[i].sh_name = offStrTab;
|
---|
1802 | APPEND_STR(".dynsym");
|
---|
1803 | pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
|
---|
1804 | pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
|
---|
1805 | pSymFile->aShdrs[i].sh_offset
|
---|
1806 | = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
|
---|
1807 | pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
|
---|
1808 | pSymFile->aShdrs[i].sh_link = iShStrTab;
|
---|
1809 | pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
|
---|
1810 | pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
|
---|
1811 | pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
|
---|
1812 | i++;
|
---|
1813 | # endif
|
---|
1814 |
|
---|
1815 | # if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
|
---|
1816 | /* Section header: .dynamic */
|
---|
1817 | pSymFile->aShdrs[i].sh_name = offStrTab;
|
---|
1818 | APPEND_STR(".dynamic");
|
---|
1819 | pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
|
---|
1820 | pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
|
---|
1821 | pSymFile->aShdrs[i].sh_offset
|
---|
1822 | = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
|
---|
1823 | pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
|
---|
1824 | pSymFile->aShdrs[i].sh_link = iShStrTab;
|
---|
1825 | pSymFile->aShdrs[i].sh_info = 0;
|
---|
1826 | pSymFile->aShdrs[i].sh_addralign = 1;
|
---|
1827 | pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
|
---|
1828 | i++;
|
---|
1829 | # endif
|
---|
1830 |
|
---|
1831 | /* Section header: .text */
|
---|
1832 | unsigned const iShText = i;
|
---|
1833 | pSymFile->aShdrs[i].sh_name = offStrTab;
|
---|
1834 | APPEND_STR(".text");
|
---|
1835 | pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
|
---|
1836 | pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
|
---|
1837 | # if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
|
---|
1838 | pSymFile->aShdrs[i].sh_offset
|
---|
1839 | = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
|
---|
1840 | # else
|
---|
1841 | pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
|
---|
1842 | pSymFile->aShdrs[i].sh_offset = 0;
|
---|
1843 | # endif
|
---|
1844 | pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
|
---|
1845 | pSymFile->aShdrs[i].sh_link = 0;
|
---|
1846 | pSymFile->aShdrs[i].sh_info = 0;
|
---|
1847 | pSymFile->aShdrs[i].sh_addralign = 1;
|
---|
1848 | pSymFile->aShdrs[i].sh_entsize = 0;
|
---|
1849 | i++;
|
---|
1850 |
|
---|
1851 | Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
|
---|
1852 |
|
---|
1853 | # if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
|
---|
1854 | /*
|
---|
1855 | * The program headers:
|
---|
1856 | */
|
---|
1857 | /* Everything in a single LOAD segment: */
|
---|
1858 | i = 0;
|
---|
1859 | pSymFile->aPhdrs[i].p_type = PT_LOAD;
|
---|
1860 | pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
|
---|
1861 | pSymFile->aPhdrs[i].p_offset
|
---|
1862 | = pSymFile->aPhdrs[i].p_vaddr
|
---|
1863 | = pSymFile->aPhdrs[i].p_paddr = 0;
|
---|
1864 | pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
|
---|
1865 | = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
|
---|
1866 | pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
|
---|
1867 | i++;
|
---|
1868 | /* The .dynamic segment. */
|
---|
1869 | pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
|
---|
1870 | pSymFile->aPhdrs[i].p_flags = PF_R;
|
---|
1871 | pSymFile->aPhdrs[i].p_offset
|
---|
1872 | = pSymFile->aPhdrs[i].p_vaddr
|
---|
1873 | = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
|
---|
1874 | pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
|
---|
1875 | = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
|
---|
1876 | pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
|
---|
1877 | i++;
|
---|
1878 |
|
---|
1879 | Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
|
---|
1880 |
|
---|
1881 | /*
|
---|
1882 | * The dynamic section:
|
---|
1883 | */
|
---|
1884 | i = 0;
|
---|
1885 | pSymFile->aDyn[i].d_tag = DT_SONAME;
|
---|
1886 | pSymFile->aDyn[i].d_un.d_val = offStrTab;
|
---|
1887 | APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
|
---|
1888 | i++;
|
---|
1889 | pSymFile->aDyn[i].d_tag = DT_STRTAB;
|
---|
1890 | pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
|
---|
1891 | i++;
|
---|
1892 | pSymFile->aDyn[i].d_tag = DT_STRSZ;
|
---|
1893 | pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
|
---|
1894 | i++;
|
---|
1895 | pSymFile->aDyn[i].d_tag = DT_SYMTAB;
|
---|
1896 | pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
|
---|
1897 | i++;
|
---|
1898 | pSymFile->aDyn[i].d_tag = DT_SYMENT;
|
---|
1899 | pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
|
---|
1900 | i++;
|
---|
1901 | pSymFile->aDyn[i].d_tag = DT_NULL;
|
---|
1902 | i++;
|
---|
1903 | Assert(i == RT_ELEMENTS(pSymFile->aDyn));
|
---|
1904 | # endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
|
---|
1905 |
|
---|
1906 | /*
|
---|
1907 | * Symbol tables:
|
---|
1908 | */
|
---|
1909 | /** @todo gdb doesn't seem to really like this ... */
|
---|
1910 | i = 0;
|
---|
1911 | pSymFile->aSymbols[i].st_name = 0;
|
---|
1912 | pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
|
---|
1913 | pSymFile->aSymbols[i].st_value = 0;
|
---|
1914 | pSymFile->aSymbols[i].st_size = 0;
|
---|
1915 | pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
|
---|
1916 | pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
|
---|
1917 | # ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
|
---|
1918 | pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
|
---|
1919 | # endif
|
---|
1920 | i++;
|
---|
1921 |
|
---|
1922 | pSymFile->aSymbols[i].st_name = 0;
|
---|
1923 | pSymFile->aSymbols[i].st_shndx = SHN_ABS;
|
---|
1924 | pSymFile->aSymbols[i].st_value = 0;
|
---|
1925 | pSymFile->aSymbols[i].st_size = 0;
|
---|
1926 | pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
|
---|
1927 | pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
|
---|
1928 | i++;
|
---|
1929 |
|
---|
1930 | pSymFile->aSymbols[i].st_name = offStrTab;
|
---|
1931 | APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
|
---|
1932 | # if 0
|
---|
1933 | pSymFile->aSymbols[i].st_shndx = iShText;
|
---|
1934 | pSymFile->aSymbols[i].st_value = 0;
|
---|
1935 | # else
|
---|
1936 | pSymFile->aSymbols[i].st_shndx = SHN_ABS;
|
---|
1937 | pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
|
---|
1938 | # endif
|
---|
1939 | pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
|
---|
1940 | pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
|
---|
1941 | pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
|
---|
1942 | # ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
|
---|
1943 | pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
|
---|
1944 | pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
|
---|
1945 | # endif
|
---|
1946 | i++;
|
---|
1947 |
|
---|
1948 | Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
|
---|
1949 | Assert(offStrTab < sizeof(pSymFile->szzStrTab));
|
---|
1950 |
|
---|
1951 | /*
|
---|
1952 | * The GDB JIT entry and informing GDB.
|
---|
1953 | */
|
---|
1954 | pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
|
---|
1955 | # if 1
|
---|
1956 | pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
|
---|
1957 | # else
|
---|
1958 | pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
|
---|
1959 | # endif
|
---|
1960 |
|
---|
1961 | RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
|
---|
1962 | RTCritSectEnter(&g_IemNativeGdbJitLock);
|
---|
1963 | pEhFrame->GdbJitEntry.pNext = NULL;
|
---|
1964 | pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
|
---|
1965 | if (__jit_debug_descriptor.pTail)
|
---|
1966 | __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
|
---|
1967 | else
|
---|
1968 | __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
|
---|
1969 | __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
|
---|
1970 | __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
|
---|
1971 |
|
---|
1972 | /* Notify GDB: */
|
---|
1973 | __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
|
---|
1974 | __jit_debug_register_code();
|
---|
1975 | __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
|
---|
1976 | RTCritSectLeave(&g_IemNativeGdbJitLock);
|
---|
1977 |
|
---|
1978 | # else /* !IEMNATIVE_USE_GDB_JIT */
|
---|
1979 | RT_NOREF(pVCpu);
|
---|
1980 | # endif /* !IEMNATIVE_USE_GDB_JIT */
|
---|
1981 |
|
---|
1982 | return VINF_SUCCESS;
|
---|
1983 | }
|
---|
1984 |
|
---|
1985 | # endif /* !RT_OS_WINDOWS */
|
---|
1986 | #endif /* IN_RING3 */
|
---|
1987 |
|
---|
1988 |
|
---|
1989 | /**
|
---|
1990 | * Adds another chunk to the executable memory allocator.
|
---|
1991 | *
|
---|
1992 | * This is used by the init code for the initial allocation and later by the
|
---|
1993 | * regular allocator function when it's out of memory.
|
---|
1994 | */
|
---|
1995 | static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
|
---|
1996 | {
|
---|
1997 | /* Check that we've room for growth. */
|
---|
1998 | uint32_t const idxChunk = pExecMemAllocator->cChunks;
|
---|
1999 | AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
|
---|
2000 |
|
---|
2001 | /* Allocate a chunk. */
|
---|
2002 | #ifdef RT_OS_DARWIN
|
---|
2003 | void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
|
---|
2004 | #else
|
---|
2005 | void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
|
---|
2006 | #endif
|
---|
2007 | AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
|
---|
2008 |
|
---|
2009 | #ifdef RT_OS_DARWIN
|
---|
2010 | /*
|
---|
2011 | * Because it is impossible to have a RWX memory allocation on macOS try to remap the memory
|
---|
2012 | * chunk readable/executable somewhere else so we can save us the hassle of switching between
|
---|
2013 | * protections when exeuctable memory is allocated.
|
---|
2014 | */
|
---|
2015 | int rc = VERR_NO_EXEC_MEMORY;
|
---|
2016 | mach_port_t hPortTask = mach_task_self();
|
---|
2017 | mach_vm_address_t AddrChunk = (mach_vm_address_t)pvChunk;
|
---|
2018 | mach_vm_address_t AddrRemapped = 0;
|
---|
2019 | vm_prot_t ProtCur = 0;
|
---|
2020 | vm_prot_t ProtMax = 0;
|
---|
2021 | kern_return_t krc = mach_vm_remap(hPortTask, &AddrRemapped, pExecMemAllocator->cbChunk, 0,
|
---|
2022 | VM_FLAGS_ANYWHERE | VM_FLAGS_RETURN_DATA_ADDR,
|
---|
2023 | hPortTask, AddrChunk, FALSE, &ProtCur, &ProtMax,
|
---|
2024 | VM_INHERIT_NONE);
|
---|
2025 | if (krc == KERN_SUCCESS)
|
---|
2026 | {
|
---|
2027 | krc = mach_vm_protect(mach_task_self(), AddrRemapped, pExecMemAllocator->cbChunk, FALSE, VM_PROT_READ | VM_PROT_EXECUTE);
|
---|
2028 | if (krc == KERN_SUCCESS)
|
---|
2029 | rc = VINF_SUCCESS;
|
---|
2030 | else
|
---|
2031 | {
|
---|
2032 | AssertLogRelMsgFailed(("mach_vm_protect -> %d (%#x)\n", krc, krc));
|
---|
2033 | krc = mach_vm_deallocate(hPortTask, AddrRemapped, pExecMemAllocator->cbChunk);
|
---|
2034 | Assert(krc == KERN_SUCCESS);
|
---|
2035 | }
|
---|
2036 | }
|
---|
2037 | else
|
---|
2038 | AssertLogRelMsgFailed(("mach_vm_remap -> %d (%#x)\n", krc, krc));
|
---|
2039 | if (RT_FAILURE(rc))
|
---|
2040 | {
|
---|
2041 | RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
|
---|
2042 | return rc;
|
---|
2043 | }
|
---|
2044 |
|
---|
2045 | void *pvChunkRx = (void *)AddrRemapped;
|
---|
2046 | #else
|
---|
2047 | int rc = VINF_SUCCESS;
|
---|
2048 | void *pvChunkRx = pvChunk;
|
---|
2049 | #endif
|
---|
2050 |
|
---|
2051 | /*
|
---|
2052 | * Add the chunk.
|
---|
2053 | *
|
---|
2054 | * This must be done before the unwind init so windows can allocate
|
---|
2055 | * memory from the chunk when using the alternative sub-allocator.
|
---|
2056 | */
|
---|
2057 | pExecMemAllocator->aChunks[idxChunk].pvChunkRw = pvChunk;
|
---|
2058 | pExecMemAllocator->aChunks[idxChunk].pvChunkRx = pvChunkRx;
|
---|
2059 | #ifdef IN_RING3
|
---|
2060 | pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
|
---|
2061 | #endif
|
---|
2062 | pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
|
---|
2063 | pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
|
---|
2064 | memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
|
---|
2065 | 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
|
---|
2066 |
|
---|
2067 | pExecMemAllocator->cChunks = idxChunk + 1;
|
---|
2068 | pExecMemAllocator->idxChunkHint = idxChunk;
|
---|
2069 |
|
---|
2070 | pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
|
---|
2071 | pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
|
---|
2072 |
|
---|
2073 | /* If there is a chunk context init callback call it. */
|
---|
2074 | rc = iemNativeRecompileAttachExecMemChunkCtx(pVCpu, idxChunk, &pExecMemAllocator->aChunks[idxChunk].pCtx);
|
---|
2075 | #ifdef IN_RING3
|
---|
2076 | /*
|
---|
2077 | * Initialize the unwind information (this cannot really fail atm).
|
---|
2078 | * (This sets pvUnwindInfo.)
|
---|
2079 | */
|
---|
2080 | if (RT_SUCCESS(rc))
|
---|
2081 | rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunkRx, idxChunk);
|
---|
2082 | #endif
|
---|
2083 | if (RT_SUCCESS(rc))
|
---|
2084 | { /* likely */ }
|
---|
2085 | else
|
---|
2086 | {
|
---|
2087 | /* Just in case the impossible happens, undo the above up: */
|
---|
2088 | pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
|
---|
2089 | pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
|
---|
2090 | pExecMemAllocator->cChunks = idxChunk;
|
---|
2091 | memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
|
---|
2092 | 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
|
---|
2093 | pExecMemAllocator->aChunks[idxChunk].pvChunkRw = NULL;
|
---|
2094 | pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
|
---|
2095 |
|
---|
2096 | # ifdef RT_OS_DARWIN
|
---|
2097 | krc = mach_vm_deallocate(mach_task_self(), (mach_vm_address_t)pExecMemAllocator->aChunks[idxChunk].pvChunkRx,
|
---|
2098 | pExecMemAllocator->cbChunk);
|
---|
2099 | Assert(krc == KERN_SUCCESS);
|
---|
2100 | # endif
|
---|
2101 |
|
---|
2102 | RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
|
---|
2103 | return rc;
|
---|
2104 | }
|
---|
2105 |
|
---|
2106 | return VINF_SUCCESS;
|
---|
2107 | }
|
---|
2108 |
|
---|
2109 |
|
---|
2110 | /**
|
---|
2111 | * Initializes the executable memory allocator for native recompilation on the
|
---|
2112 | * calling EMT.
|
---|
2113 | *
|
---|
2114 | * @returns VBox status code.
|
---|
2115 | * @param pVCpu The cross context virtual CPU structure of the calling
|
---|
2116 | * thread.
|
---|
2117 | * @param cbMax The max size of the allocator.
|
---|
2118 | * @param cbInitial The initial allocator size.
|
---|
2119 | * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
|
---|
2120 | * dependent).
|
---|
2121 | */
|
---|
2122 | int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk) RT_NOEXCEPT
|
---|
2123 | {
|
---|
2124 | /*
|
---|
2125 | * Validate input.
|
---|
2126 | */
|
---|
2127 | AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
|
---|
2128 | AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
|
---|
2129 | AssertLogRelMsgReturn( cbChunk != UINT32_MAX
|
---|
2130 | || cbChunk == 0
|
---|
2131 | || ( RT_IS_POWER_OF_TWO(cbChunk)
|
---|
2132 | && cbChunk >= _1M
|
---|
2133 | && cbChunk <= _256M
|
---|
2134 | && cbChunk <= cbMax),
|
---|
2135 | ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
|
---|
2136 | VERR_OUT_OF_RANGE);
|
---|
2137 |
|
---|
2138 | /*
|
---|
2139 | * Adjust/figure out the chunk size.
|
---|
2140 | */
|
---|
2141 | if (cbChunk == 0 || cbChunk == UINT32_MAX)
|
---|
2142 | {
|
---|
2143 | if (cbMax >= _256M)
|
---|
2144 | cbChunk = _64M;
|
---|
2145 | else
|
---|
2146 | {
|
---|
2147 | if (cbMax < _16M)
|
---|
2148 | cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
|
---|
2149 | else
|
---|
2150 | cbChunk = (uint32_t)cbMax / 4;
|
---|
2151 | if (!RT_IS_POWER_OF_TWO(cbChunk))
|
---|
2152 | cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
|
---|
2153 | }
|
---|
2154 | }
|
---|
2155 | #if defined(RT_OS_AMD64)
|
---|
2156 | Assert(cbChunk <= _2G);
|
---|
2157 | #elif defined(RT_OS_ARM64)
|
---|
2158 | if (cbChunk > _128M)
|
---|
2159 | cbChunk = _128M; /* Max relative branch distance is +/-2^(25+2) = +/-0x8000000 (134 217 728). */
|
---|
2160 | #endif
|
---|
2161 |
|
---|
2162 | if (cbChunk > cbMax)
|
---|
2163 | cbMax = cbChunk;
|
---|
2164 | else
|
---|
2165 | cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
|
---|
2166 | uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
|
---|
2167 | AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
|
---|
2168 |
|
---|
2169 | /*
|
---|
2170 | * Allocate and initialize the allocatore instance.
|
---|
2171 | */
|
---|
2172 | size_t const offBitmaps = RT_ALIGN_Z(RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]), RT_CACHELINE_SIZE);
|
---|
2173 | size_t const cbBitmaps = (size_t)(cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3)) * cMaxChunks;
|
---|
2174 | size_t cbNeeded = offBitmaps + cbBitmaps;
|
---|
2175 | AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
|
---|
2176 | Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
|
---|
2177 | #if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
|
---|
2178 | size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
|
---|
2179 | cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
|
---|
2180 | #endif
|
---|
2181 | PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
|
---|
2182 | AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
|
---|
2183 | VERR_NO_MEMORY);
|
---|
2184 | pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
|
---|
2185 | pExecMemAllocator->cbChunk = cbChunk;
|
---|
2186 | pExecMemAllocator->cMaxChunks = cMaxChunks;
|
---|
2187 | pExecMemAllocator->cChunks = 0;
|
---|
2188 | pExecMemAllocator->idxChunkHint = 0;
|
---|
2189 | pExecMemAllocator->cAllocations = 0;
|
---|
2190 | pExecMemAllocator->cbTotal = 0;
|
---|
2191 | pExecMemAllocator->cbFree = 0;
|
---|
2192 | pExecMemAllocator->cbAllocated = 0;
|
---|
2193 | #ifdef VBOX_WITH_STATISTICS
|
---|
2194 | pExecMemAllocator->cbUnusable = 0;
|
---|
2195 | #endif
|
---|
2196 | pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
|
---|
2197 | pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
|
---|
2198 | pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
|
---|
2199 | memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmaps); /* Mark everything as allocated. Clear when chunks are added. */
|
---|
2200 | #if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
|
---|
2201 | pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
|
---|
2202 | #endif
|
---|
2203 | for (uint32_t i = 0; i < cMaxChunks; i++)
|
---|
2204 | {
|
---|
2205 | pExecMemAllocator->aChunks[i].cFreeUnits = 0;
|
---|
2206 | pExecMemAllocator->aChunks[i].idxFreeHint = 0;
|
---|
2207 | pExecMemAllocator->aChunks[i].pvChunkRw = NULL;
|
---|
2208 | #ifdef IN_RING0
|
---|
2209 | pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
|
---|
2210 | #else
|
---|
2211 | pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
|
---|
2212 | #endif
|
---|
2213 | }
|
---|
2214 | pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
|
---|
2215 |
|
---|
2216 | /*
|
---|
2217 | * Do the initial allocations.
|
---|
2218 | */
|
---|
2219 | while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
|
---|
2220 | {
|
---|
2221 | int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
|
---|
2222 | AssertLogRelRCReturn(rc, rc);
|
---|
2223 | }
|
---|
2224 |
|
---|
2225 | pExecMemAllocator->idxChunkHint = 0;
|
---|
2226 |
|
---|
2227 | /*
|
---|
2228 | * Register statistics.
|
---|
2229 | */
|
---|
2230 | PUVM const pUVM = pVCpu->pUVCpu->pUVM;
|
---|
2231 | STAMR3RegisterFU(pUVM, &pExecMemAllocator->cAllocations, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
|
---|
2232 | "Current number of allocations", "/IEM/CPU%u/re/ExecMem/cAllocations", pVCpu->idCpu);
|
---|
2233 | STAMR3RegisterFU(pUVM, &pExecMemAllocator->cChunks, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
|
---|
2234 | "Currently allocated chunks", "/IEM/CPU%u/re/ExecMem/cChunks", pVCpu->idCpu);
|
---|
2235 | STAMR3RegisterFU(pUVM, &pExecMemAllocator->cMaxChunks, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
|
---|
2236 | "Maximum number of chunks", "/IEM/CPU%u/re/ExecMem/cMaxChunks", pVCpu->idCpu);
|
---|
2237 | STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbChunk, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
|
---|
2238 | "Allocation chunk size", "/IEM/CPU%u/re/ExecMem/cbChunk", pVCpu->idCpu);
|
---|
2239 | STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbAllocated, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
|
---|
2240 | "Number of bytes current allocated", "/IEM/CPU%u/re/ExecMem/cbAllocated", pVCpu->idCpu);
|
---|
2241 | STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbFree, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
|
---|
2242 | "Number of bytes current free", "/IEM/CPU%u/re/ExecMem/cbFree", pVCpu->idCpu);
|
---|
2243 | STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbTotal, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
|
---|
2244 | "Total number of byte", "/IEM/CPU%u/re/ExecMem/cbTotal", pVCpu->idCpu);
|
---|
2245 | #ifdef VBOX_WITH_STATISTICS
|
---|
2246 | STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbUnusable, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
|
---|
2247 | "Total number of bytes being unusable", "/IEM/CPU%u/re/ExecMem/cbUnusable", pVCpu->idCpu);
|
---|
2248 | STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatAlloc, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL,
|
---|
2249 | "Profiling the allocator", "/IEM/CPU%u/re/ExecMem/ProfAlloc", pVCpu->idCpu);
|
---|
2250 | for (unsigned i = 1; i < RT_ELEMENTS(pExecMemAllocator->aStatSizes); i++)
|
---|
2251 | STAMR3RegisterFU(pUVM, &pExecMemAllocator->aStatSizes[i], STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
|
---|
2252 | "Number of allocations of this number of allocation units",
|
---|
2253 | "/IEM/CPU%u/re/ExecMem/aSize%02u", pVCpu->idCpu, i);
|
---|
2254 | STAMR3RegisterFU(pUVM, &pExecMemAllocator->aStatSizes[0], STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
|
---|
2255 | "Number of allocations 16 units or larger", "/IEM/CPU%u/re/ExecMem/aSize16OrLarger", pVCpu->idCpu);
|
---|
2256 | #endif
|
---|
2257 | #ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
|
---|
2258 | STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatPruneProf, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL,
|
---|
2259 | "Pruning executable memory (alt)", "/IEM/CPU%u/re/ExecMem/Pruning", pVCpu->idCpu);
|
---|
2260 | STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatPruneRecovered, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES_PER_CALL,
|
---|
2261 | "Bytes recovered while pruning", "/IEM/CPU%u/re/ExecMem/PruningRecovered", pVCpu->idCpu);
|
---|
2262 | #endif
|
---|
2263 | STAMR3RegisterFU(pUVM, &pExecMemAllocator->cFruitlessChunkScans, STAMTYPE_U64_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
|
---|
2264 | "Chunks fruitlessly scanned for free space", "/IEM/CPU%u/re/ExecMem/FruitlessChunkScans", pVCpu->idCpu);
|
---|
2265 |
|
---|
2266 | return VINF_SUCCESS;
|
---|
2267 | }
|
---|
2268 |
|
---|