VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veExecMem.cpp@ 106061

Last change on this file since 106061 was 106061, checked in by vboxsync, 5 months ago

Copyright year updates by scm.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 68.0 KB
Line 
1/* $Id: IEMAllN8veExecMem.cpp 106061 2024-09-16 14:03:52Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
50#include <VBox/vmm/iem.h>
51#include <VBox/vmm/cpum.h>
52#include "IEMInternal.h"
53#include <VBox/vmm/vmcc.h>
54#include <VBox/log.h>
55#include <VBox/err.h>
56#include <VBox/param.h>
57#include <iprt/assert.h>
58#include <iprt/mem.h>
59#include <iprt/string.h>
60#if defined(RT_ARCH_AMD64)
61# include <iprt/x86.h>
62#elif defined(RT_ARCH_ARM64)
63# include <iprt/armv8.h>
64#endif
65
66#ifdef RT_OS_WINDOWS
67# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
68extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
69extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
70#else
71# include <iprt/formats/dwarf.h>
72# if defined(RT_OS_DARWIN)
73# include <libkern/OSCacheControl.h>
74# include <mach/mach.h>
75# include <mach/mach_vm.h>
76# define IEMNATIVE_USE_LIBUNWIND
77extern "C" void __register_frame(const void *pvFde);
78extern "C" void __deregister_frame(const void *pvFde);
79# else
80# ifdef DEBUG_bird /** @todo not thread safe yet */
81# define IEMNATIVE_USE_GDB_JIT
82# endif
83# ifdef IEMNATIVE_USE_GDB_JIT
84# include <iprt/critsect.h>
85# include <iprt/once.h>
86# include <iprt/formats/elf64.h>
87# endif
88extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
89extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
90# endif
91#endif
92
93#include "IEMN8veRecompiler.h"
94
95
96/*********************************************************************************************************************************
97* Executable Memory Allocator *
98*********************************************************************************************************************************/
99/** The chunk sub-allocation unit size in bytes. */
100#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 256
101/** The chunk sub-allocation unit size as a shift factor. */
102#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 8
103/** Enables adding a header to the sub-allocator allocations.
104 * This is useful for freeing up executable memory among other things. */
105#define IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
106/** Use alternative pruning. */
107#define IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
108
109
110#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
111# ifdef IEMNATIVE_USE_GDB_JIT
112# define IEMNATIVE_USE_GDB_JIT_ET_DYN
113
114/** GDB JIT: Code entry. */
115typedef struct GDBJITCODEENTRY
116{
117 struct GDBJITCODEENTRY *pNext;
118 struct GDBJITCODEENTRY *pPrev;
119 uint8_t *pbSymFile;
120 uint64_t cbSymFile;
121} GDBJITCODEENTRY;
122
123/** GDB JIT: Actions. */
124typedef enum GDBJITACTIONS : uint32_t
125{
126 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
127} GDBJITACTIONS;
128
129/** GDB JIT: Descriptor. */
130typedef struct GDBJITDESCRIPTOR
131{
132 uint32_t uVersion;
133 GDBJITACTIONS enmAction;
134 GDBJITCODEENTRY *pRelevant;
135 GDBJITCODEENTRY *pHead;
136 /** Our addition: */
137 GDBJITCODEENTRY *pTail;
138} GDBJITDESCRIPTOR;
139
140/** GDB JIT: Our simple symbol file data. */
141typedef struct GDBJITSYMFILE
142{
143 Elf64_Ehdr EHdr;
144# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
145 Elf64_Shdr aShdrs[5];
146# else
147 Elf64_Shdr aShdrs[7];
148 Elf64_Phdr aPhdrs[2];
149# endif
150 /** The dwarf ehframe data for the chunk. */
151 uint8_t abEhFrame[512];
152 char szzStrTab[128];
153 Elf64_Sym aSymbols[3];
154# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
155 Elf64_Sym aDynSyms[2];
156 Elf64_Dyn aDyn[6];
157# endif
158} GDBJITSYMFILE;
159
160extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
161extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
162
163/** Init once for g_IemNativeGdbJitLock. */
164static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
165/** Init once for the critical section. */
166static RTCRITSECT g_IemNativeGdbJitLock;
167
168/** GDB reads the info here. */
169GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
170
171/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
172DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
173{
174 ASMNopPause();
175}
176
177/** @callback_method_impl{FNRTONCE} */
178static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
179{
180 RT_NOREF(pvUser);
181 return RTCritSectInit(&g_IemNativeGdbJitLock);
182}
183
184
185# endif /* IEMNATIVE_USE_GDB_JIT */
186
187/**
188 * Per-chunk unwind info for non-windows hosts.
189 */
190typedef struct IEMEXECMEMCHUNKEHFRAME
191{
192# ifdef IEMNATIVE_USE_LIBUNWIND
193 /** The offset of the FDA into abEhFrame. */
194 uintptr_t offFda;
195# else
196 /** 'struct object' storage area. */
197 uint8_t abObject[1024];
198# endif
199# ifdef IEMNATIVE_USE_GDB_JIT
200# if 0
201 /** The GDB JIT 'symbol file' data. */
202 GDBJITSYMFILE GdbJitSymFile;
203# endif
204 /** The GDB JIT list entry. */
205 GDBJITCODEENTRY GdbJitEntry;
206# endif
207 /** The dwarf ehframe data for the chunk. */
208 uint8_t abEhFrame[512];
209} IEMEXECMEMCHUNKEHFRAME;
210/** Pointer to per-chunk info info for non-windows hosts. */
211typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
212#endif
213
214
215/**
216 * An chunk of executable memory.
217 */
218typedef struct IEMEXECMEMCHUNK
219{
220 /** Number of free items in this chunk. */
221 uint32_t cFreeUnits;
222 /** Hint were to start searching for free space in the allocation bitmap. */
223 uint32_t idxFreeHint;
224 /** Pointer to the readable/writeable view of the memory chunk. */
225 void *pvChunkRw;
226 /** Pointer to the readable/executable view of the memory chunk. */
227 void *pvChunkRx;
228#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
229 /** Pointer to the context structure detailing the per chunk common code. */
230 PCIEMNATIVEPERCHUNKCTX pCtx;
231#endif
232#ifdef IN_RING3
233 /**
234 * Pointer to the unwind information.
235 *
236 * This is used during C++ throw and longjmp (windows and probably most other
237 * platforms). Some debuggers (windbg) makes use of it as well.
238 *
239 * Windows: This is allocated from hHeap on windows because (at least for
240 * AMD64) the UNWIND_INFO structure address in the
241 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
242 *
243 * Others: Allocated from the regular heap to avoid unnecessary executable data
244 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
245 void *pvUnwindInfo;
246#elif defined(IN_RING0)
247 /** Allocation handle. */
248 RTR0MEMOBJ hMemObj;
249#endif
250} IEMEXECMEMCHUNK;
251/** Pointer to a memory chunk. */
252typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
253
254
255/**
256 * Executable memory allocator for the native recompiler.
257 */
258typedef struct IEMEXECMEMALLOCATOR
259{
260 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
261 uint32_t uMagic;
262
263 /** The chunk size. */
264 uint32_t cbChunk;
265 /** The maximum number of chunks. */
266 uint32_t cMaxChunks;
267 /** The current number of chunks. */
268 uint32_t cChunks;
269 /** Hint where to start looking for available memory. */
270 uint32_t idxChunkHint;
271 /** Statistics: Current number of allocations. */
272 uint32_t cAllocations;
273
274 /** The total amount of memory available. */
275 uint64_t cbTotal;
276 /** Total amount of free memory. */
277 uint64_t cbFree;
278 /** Total amount of memory allocated. */
279 uint64_t cbAllocated;
280
281 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
282 *
283 * Since the chunk size is a power of two and the minimum chunk size is a lot
284 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
285 * require a whole number of uint64_t elements in the allocation bitmap. So,
286 * for sake of simplicity, they are allocated as one continous chunk for
287 * simplicity/laziness. */
288 uint64_t *pbmAlloc;
289 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
290 uint32_t cUnitsPerChunk;
291 /** Number of bitmap elements per chunk (for quickly locating the bitmap
292 * portion corresponding to an chunk). */
293 uint32_t cBitmapElementsPerChunk;
294
295#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
296 /** The next chunk to prune in. */
297 uint32_t idxChunkPrune;
298 /** Where in chunk offset to start pruning at. */
299 uint32_t offChunkPrune;
300 /** Profiling the pruning code. */
301 STAMPROFILE StatPruneProf;
302 /** Number of bytes recovered by the pruning. */
303 STAMPROFILE StatPruneRecovered;
304#endif
305
306#ifdef VBOX_WITH_STATISTICS
307 STAMPROFILE StatAlloc;
308 /** Total amount of memory not being usable currently due to IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE. */
309 uint64_t cbUnusable;
310#endif
311
312
313#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
314 /** Pointer to the array of unwind info running parallel to aChunks (same
315 * allocation as this structure, located after the bitmaps).
316 * (For Windows, the structures must reside in 32-bit RVA distance to the
317 * actual chunk, so they are allocated off the chunk.) */
318 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
319#endif
320
321 /** The allocation chunks. */
322 RT_FLEXIBLE_ARRAY_EXTENSION
323 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
324} IEMEXECMEMALLOCATOR;
325/** Pointer to an executable memory allocator. */
326typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
327
328/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
329#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
330
331
332#ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
333/**
334 * Allocation header.
335 */
336typedef struct IEMEXECMEMALLOCHDR
337{
338 /** Magic value / eyecatcher (IEMEXECMEMALLOCHDR_MAGIC). */
339 uint32_t uMagic;
340 /** The allocation chunk (for speeding up freeing). */
341 uint32_t idxChunk;
342 /** Pointer to the translation block the allocation belongs to.
343 * This is the whole point of the header. */
344 PIEMTB pTb;
345} IEMEXECMEMALLOCHDR;
346/** Pointer to an allocation header. */
347typedef IEMEXECMEMALLOCHDR *PIEMEXECMEMALLOCHDR;
348/** Magic value for IEMEXECMEMALLOCHDR ('ExeM'). */
349# define IEMEXECMEMALLOCHDR_MAGIC UINT32_C(0x4d657845)
350#endif
351
352
353static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
354
355
356#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
357/**
358 * Frees up executable memory when we're out space.
359 *
360 * This is an alternative to iemTbAllocatorFreeupNativeSpace() that frees up
361 * space in a more linear fashion from the allocator's point of view. It may
362 * also defragment if implemented & enabled
363 */
364static void iemExecMemAllocatorPrune(PVMCPU pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
365{
366# ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
367# error "IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING requires IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER"
368# endif
369 STAM_REL_PROFILE_START(&pExecMemAllocator->StatPruneProf, a);
370
371 /*
372 * Before we can start, we must process delayed frees.
373 */
374 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
375
376 AssertCompile(RT_IS_POWER_OF_TWO(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE));
377
378 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
379 AssertReturnVoid(RT_IS_POWER_OF_TWO(cbChunk));
380 AssertReturnVoid(cbChunk >= _1M && cbChunk <= _256M); /* see iemExecMemAllocatorInit */
381
382 uint32_t const cChunks = pExecMemAllocator->cChunks;
383 AssertReturnVoid(cChunks == pExecMemAllocator->cMaxChunks);
384 AssertReturnVoid(cChunks >= 1);
385
386 Assert(!pVCpu->iem.s.pCurTbR3);
387
388 /*
389 * Decide how much to prune. The chunk is is a multiple of two, so we'll be
390 * scanning a multiple of two here as well.
391 */
392 uint32_t cbToPrune = cbChunk;
393
394 /* Never more than 25%. */
395 if (cChunks < 4)
396 cbToPrune /= cChunks == 1 ? 4 : 2;
397
398 /* Upper limit. In a debug build a 4MB limit averages out at ~0.6ms per call. */
399 if (cbToPrune > _4M)
400 cbToPrune = _4M;
401
402 /*
403 * Adjust the pruning chunk and offset accordingly.
404 */
405 uint32_t idxChunk = pExecMemAllocator->idxChunkPrune;
406 uint32_t offChunk = pExecMemAllocator->offChunkPrune;
407 offChunk &= ~(uint32_t)(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1U);
408 if (offChunk >= cbChunk)
409 {
410 offChunk = 0;
411 idxChunk += 1;
412 }
413 if (idxChunk >= cChunks)
414 {
415 offChunk = 0;
416 idxChunk = 0;
417 }
418
419 uint32_t const offPruneEnd = RT_MIN(offChunk + cbToPrune, cbChunk);
420
421 /*
422 * Do the pruning. The current approach is the sever kind.
423 */
424 uint64_t cbPruned = 0;
425 uint8_t * const pbChunk = (uint8_t *)pExecMemAllocator->aChunks[idxChunk].pvChunkRx;
426 while (offChunk < offPruneEnd)
427 {
428 PIEMEXECMEMALLOCHDR pHdr = (PIEMEXECMEMALLOCHDR)&pbChunk[offChunk];
429
430 /* Is this the start of an allocation block for TB? (We typically have
431 one allocation at the start of each chunk for the unwind info where
432 pTb is NULL.) */
433 if ( pHdr->uMagic == IEMEXECMEMALLOCHDR_MAGIC
434 && pHdr->pTb != NULL
435 && pHdr->idxChunk == idxChunk)
436 {
437 PIEMTB const pTb = pHdr->pTb;
438 AssertPtr(pTb);
439
440 uint32_t const cbBlock = RT_ALIGN_32(pTb->Native.cInstructions * sizeof(IEMNATIVEINSTR) + sizeof(*pHdr),
441 IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
442 AssertBreakStmt(offChunk + cbBlock <= cbChunk, offChunk += IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE); /* paranoia */
443
444 iemTbAllocatorFree(pVCpu, pTb);
445
446 cbPruned += cbBlock;
447 offChunk += cbBlock;
448 }
449 else
450 offChunk += IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE;
451 }
452 STAM_REL_PROFILE_ADD_PERIOD(&pExecMemAllocator->StatPruneRecovered, cbPruned);
453
454 /*
455 * Save the current pruning point.
456 */
457 pExecMemAllocator->offChunkPrune = offChunk;
458 pExecMemAllocator->idxChunkPrune = idxChunk;
459
460 /* Set the hint to the start of the pruned region. */
461 pExecMemAllocator->idxChunkHint = idxChunk;
462 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = offChunk / IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE;
463
464 STAM_REL_PROFILE_STOP(&pExecMemAllocator->StatPruneProf, a);
465}
466#endif /* IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING */
467
468
469/**
470 * Try allocate a block of @a cReqUnits in the chunk @a idxChunk.
471 */
472static void *
473iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
474 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk, PIEMTB pTb,
475 void **ppvExec, PCIEMNATIVEPERCHUNKCTX *ppChunkCtx)
476{
477 /*
478 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
479 */
480 Assert(!(cToScan & 63));
481 Assert(!(idxFirst & 63));
482 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
483 pbmAlloc += idxFirst / 64;
484
485 /*
486 * Scan the bitmap for cReqUnits of consequtive clear bits
487 */
488 /** @todo This can probably be done more efficiently for non-x86 systems. */
489 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
490 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
491 {
492 uint32_t idxAddBit = 1;
493 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
494 idxAddBit++;
495 if (idxAddBit >= cReqUnits)
496 {
497 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
498
499 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
500 pChunk->cFreeUnits -= cReqUnits;
501 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
502
503 pExecMemAllocator->cAllocations += 1;
504 uint32_t const cbReq = cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
505 pExecMemAllocator->cbAllocated += cbReq;
506 pExecMemAllocator->cbFree -= cbReq;
507 pExecMemAllocator->idxChunkHint = idxChunk;
508
509 void * const pvMemRw = (uint8_t *)pChunk->pvChunkRw
510 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
511
512 if (ppChunkCtx)
513 *ppChunkCtx = pChunk->pCtx;
514
515 /*
516 * Initialize the header and return.
517 */
518# ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
519 PIEMEXECMEMALLOCHDR const pHdr = (PIEMEXECMEMALLOCHDR)pvMemRw;
520 pHdr->uMagic = IEMEXECMEMALLOCHDR_MAGIC;
521 pHdr->idxChunk = idxChunk;
522 pHdr->pTb = pTb;
523
524 if (ppvExec)
525 *ppvExec = (uint8_t *)pChunk->pvChunkRx
526 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT)
527 + sizeof(*pHdr);
528
529 return pHdr + 1;
530#else
531 if (ppvExec)
532 *ppvExec = (uint8_t *)pChunk->pvChunkRx
533 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
534
535 RT_NOREF(pTb);
536 return pvMem;
537#endif
538 }
539
540 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
541 }
542 return NULL;
543}
544
545
546static PIEMNATIVEINSTR
547iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq, PIEMTB pTb,
548 PIEMNATIVEINSTR *ppaExec, PCIEMNATIVEPERCHUNKCTX *ppChunkCtx)
549{
550 /*
551 * Figure out how much to allocate.
552 */
553#ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
554 uint32_t const cReqUnits = (cbReq + sizeof(IEMEXECMEMALLOCHDR) + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)
555#else
556 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)
557#endif
558 >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
559 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
560 {
561 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
562 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
563 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
564 {
565 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
566 pExecMemAllocator->cUnitsPerChunk - idxHint,
567 cReqUnits, idxChunk, pTb, (void **)ppaExec, ppChunkCtx);
568 if (pvRet)
569 {
570#ifdef VBOX_WITH_STATISTICS
571 pExecMemAllocator->cbUnusable += (cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT) - cbReq;
572#endif
573 return (PIEMNATIVEINSTR)pvRet;
574 }
575 }
576 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
577 RT_MIN(pExecMemAllocator->cUnitsPerChunk,
578 RT_ALIGN_32(idxHint + cReqUnits, 64)),
579 cReqUnits, idxChunk, pTb, (void **)ppaExec, ppChunkCtx);
580#ifdef VBOX_WITH_STATISTICS
581 if (pvRet)
582 pExecMemAllocator->cbUnusable += (cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT) - cbReq;
583#endif
584 return (PIEMNATIVEINSTR)pvRet;
585 }
586 return NULL;
587}
588
589
590/**
591 * Allocates @a cbReq bytes of executable memory.
592 *
593 * @returns Pointer to the readable/writeable memory, NULL if out of memory or other problem
594 * encountered.
595 * @param pVCpu The cross context virtual CPU structure of the
596 * calling thread.
597 * @param cbReq How many bytes are required.
598 * @param pTb The translation block that will be using the allocation.
599 * @param ppaExec Where to return the pointer to executable view of
600 * the allocated memory, optional.
601 * @param ppChunkCtx Where to return the per chunk attached context
602 * if available, optional.
603 */
604DECLHIDDEN(PIEMNATIVEINSTR) iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq, PIEMTB pTb,
605 PIEMNATIVEINSTR *ppaExec, PCIEMNATIVEPERCHUNKCTX *ppChunkCtx) RT_NOEXCEPT
606{
607 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
608 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
609 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
610 STAM_PROFILE_START(&pExecMemAllocator->StatAlloc, a);
611
612 for (unsigned iIteration = 0;; iIteration++)
613 {
614 if (cbReq <= pExecMemAllocator->cbFree)
615 {
616 uint32_t const cChunks = pExecMemAllocator->cChunks;
617 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
618 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
619 {
620 PIEMNATIVEINSTR const pRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb,
621 ppaExec, ppChunkCtx);
622 if (pRet)
623 {
624 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
625 return pRet;
626 }
627 }
628 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
629 {
630 PIEMNATIVEINSTR const pRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb,
631 ppaExec, ppChunkCtx);
632 if (pRet)
633 {
634 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
635 return pRet;
636 }
637 }
638 }
639
640 /*
641 * Can we grow it with another chunk?
642 */
643 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
644 {
645 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
646 AssertLogRelRCReturn(rc, NULL);
647
648 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
649 PIEMNATIVEINSTR const pRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb,
650 ppaExec, ppChunkCtx);
651 if (pRet)
652 {
653 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
654 return pRet;
655 }
656 AssertFailed();
657 }
658
659 /*
660 * Try prune native TBs once.
661 */
662 if (iIteration == 0)
663 {
664#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
665 iemExecMemAllocatorPrune(pVCpu, pExecMemAllocator);
666#else
667 /* No header included in the instruction count here. */
668 uint32_t const cNeededInstrs = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) / sizeof(IEMNATIVEINSTR);
669 iemTbAllocatorFreeupNativeSpace(pVCpu, cNeededInstrs);
670#endif
671 }
672 else
673 {
674 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeExecMemInstrBufAllocFailed);
675 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
676 return NULL;
677 }
678 }
679}
680
681
682/** This is a hook to ensure the instruction cache is properly flushed before the code in the memory
683 * given by @a pv and @a cb is executed */
684DECLHIDDEN(void) iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb) RT_NOEXCEPT
685{
686#ifdef RT_OS_DARWIN
687 /*
688 * Flush the instruction cache:
689 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
690 */
691 /* sys_dcache_flush(pv, cb); - not necessary */
692 sys_icache_invalidate(pv, cb);
693 RT_NOREF(pVCpu);
694
695#elif defined(RT_OS_LINUX) && defined(RT_ARCH_ARM64)
696 RT_NOREF(pVCpu);
697
698 /* There is __builtin___clear_cache() but it flushes both the instruction and data cache, so do it manually. */
699 static uint32_t s_u32CtrEl0 = 0;
700 if (!s_u32CtrEl0)
701 asm volatile ("mrs %0, ctr_el0":"=r" (s_u32CtrEl0));
702 uintptr_t cbICacheLine = (uintptr_t)4 << (s_u32CtrEl0 & 0xf);
703
704 uintptr_t pb = (uintptr_t)pv & ~(cbICacheLine - 1);
705 for (; pb < (uintptr_t)pv + cb; pb += cbICacheLine)
706 asm volatile ("ic ivau, %0" : : "r" (pb) : "memory");
707
708 asm volatile ("dsb ish\n\t isb\n\t" : : : "memory");
709
710#else
711 RT_NOREF(pVCpu, pv, cb);
712#endif
713}
714
715
716/**
717 * Frees executable memory.
718 */
719DECLHIDDEN(void) iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb) RT_NOEXCEPT
720{
721 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
722 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
723 AssertPtr(pv);
724#ifdef VBOX_WITH_STATISTICS
725 size_t const cbOrig = cb;
726#endif
727#ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
728 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
729
730 /* Align the size as we did when allocating the block. */
731 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
732
733#else
734 PIEMEXECMEMALLOCHDR pHdr = (PIEMEXECMEMALLOCHDR)pv - 1;
735 Assert(!((uintptr_t)pHdr & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
736 AssertReturnVoid(pHdr->uMagic == IEMEXECMEMALLOCHDR_MAGIC);
737 uint32_t const idxChunk = pHdr->idxChunk;
738 AssertReturnVoid(idxChunk < pExecMemAllocator->cChunks);
739 pv = pHdr;
740
741 /* Adjust and align the size to cover the whole allocation area. */
742 cb = RT_ALIGN_Z(cb + sizeof(*pHdr), IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
743#endif
744
745 /* Free it / assert sanity. */
746 bool fFound = false;
747 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
748#ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
749 uint32_t const cChunks = pExecMemAllocator->cChunks;
750 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
751#endif
752 {
753 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunkRx;
754 fFound = offChunk < cbChunk;
755 if (fFound)
756 {
757 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
758 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
759
760 /* Check that it's valid and free it. */
761 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
762 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
763 for (uint32_t i = 1; i < cReqUnits; i++)
764 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
765 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
766
767 /* Invalidate the header using the writeable memory view. */
768 pHdr = (PIEMEXECMEMALLOCHDR)((uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunkRw + offChunk);
769#ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
770 pHdr->uMagic = 0;
771 pHdr->idxChunk = 0;
772 pHdr->pTb = NULL;
773#endif
774 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
775 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
776
777 /* Update the stats. */
778 pExecMemAllocator->cbAllocated -= cb;
779 pExecMemAllocator->cbFree += cb;
780 pExecMemAllocator->cAllocations -= 1;
781#ifdef VBOX_WITH_STATISTICS
782 pExecMemAllocator->cbUnusable -= (cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT) - cbOrig;
783#endif
784 return;
785 }
786 }
787 AssertFailed();
788}
789
790
791/**
792 * Interface used by iemNativeRecompileAttachExecMemChunkCtx and unwind info
793 * generators.
794 */
795DECLHIDDEN(PIEMNATIVEINSTR)
796iemExecMemAllocatorAllocFromChunk(PVMCPU pVCpu, uint32_t idxChunk, uint32_t cbReq, PIEMNATIVEINSTR *ppaExec)
797{
798 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
799 AssertReturn(idxChunk < pExecMemAllocator->cChunks, NULL);
800 Assert(cbReq < _1M);
801 return iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, NULL /*pTb*/, ppaExec, NULL /*ppChunkCtx*/);
802}
803
804
805#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
806/**
807 * For getting the per-chunk context detailing common code for a TB.
808 *
809 * This is for use by the disassembler.
810 */
811DECLHIDDEN(PCIEMNATIVEPERCHUNKCTX) iemExecMemGetTbChunkCtx(PVMCPU pVCpu, PCIEMTB pTb)
812{
813 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
814 if ((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
815 {
816 uintptr_t const uAddress = (uintptr_t)pTb->Native.paInstructions;
817 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
818 uint32_t idxChunk = pExecMemAllocator->cChunks;
819 while (idxChunk-- > 0)
820 if (uAddress - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunkRx < cbChunk)
821 return pExecMemAllocator->aChunks[idxChunk].pCtx;
822 }
823 return NULL;
824}
825#endif
826
827
828#ifdef IN_RING3
829# ifdef RT_OS_WINDOWS
830
831/**
832 * Initializes the unwind info structures for windows hosts.
833 */
834static int
835iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
836 void *pvChunk, uint32_t idxChunk)
837{
838 RT_NOREF(pVCpu);
839
840 /*
841 * The AMD64 unwind opcodes.
842 *
843 * This is a program that starts with RSP after a RET instruction that
844 * ends up in recompiled code, and the operations we describe here will
845 * restore all non-volatile registers and bring RSP back to where our
846 * RET address is. This means it's reverse order from what happens in
847 * the prologue.
848 *
849 * Note! Using a frame register approach here both because we have one
850 * and but mainly because the UWOP_ALLOC_LARGE argument values
851 * would be a pain to write initializers for. On the positive
852 * side, we're impervious to changes in the the stack variable
853 * area can can deal with dynamic stack allocations if necessary.
854 */
855 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
856 {
857 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
858 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
859 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
860 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
861 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
862 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
863 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
864 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
865 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
866 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
867 };
868 union
869 {
870 IMAGE_UNWIND_INFO Info;
871 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
872 } s_UnwindInfo =
873 {
874 {
875 /* .Version = */ 1,
876 /* .Flags = */ 0,
877 /* .SizeOfProlog = */ 16, /* whatever */
878 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
879 /* .FrameRegister = */ X86_GREG_xBP,
880 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
881 }
882 };
883 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
884 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
885
886 /*
887 * Calc how much space we need and allocate it off the exec heap.
888 */
889 unsigned const cFunctionEntries = 1;
890 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
891 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
892 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
893 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeeded, NULL, NULL, NULL);
894 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
895 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
896
897 /*
898 * Initialize the structures.
899 */
900 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
901
902 paFunctions[0].BeginAddress = 0;
903 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
904 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
905
906 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
907 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
908
909 /*
910 * Register it.
911 */
912 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
913 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
914
915 return VINF_SUCCESS;
916}
917
918
919# else /* !RT_OS_WINDOWS */
920
921/**
922 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
923 */
924DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
925{
926 if (iValue >= 64)
927 {
928 Assert(iValue < 0x2000);
929 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
930 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
931 }
932 else if (iValue >= 0)
933 *Ptr.pb++ = (uint8_t)iValue;
934 else if (iValue > -64)
935 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
936 else
937 {
938 Assert(iValue > -0x2000);
939 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
940 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
941 }
942 return Ptr;
943}
944
945
946/**
947 * Emits an ULEB128 encoded value (up to 64-bit wide).
948 */
949DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
950{
951 while (uValue >= 0x80)
952 {
953 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
954 uValue >>= 7;
955 }
956 *Ptr.pb++ = (uint8_t)uValue;
957 return Ptr;
958}
959
960
961/**
962 * Emits a CFA rule as register @a uReg + offset @a off.
963 */
964DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
965{
966 *Ptr.pb++ = DW_CFA_def_cfa;
967 Ptr = iemDwarfPutUleb128(Ptr, uReg);
968 Ptr = iemDwarfPutUleb128(Ptr, off);
969 return Ptr;
970}
971
972
973/**
974 * Emits a register (@a uReg) save location:
975 * CFA + @a off * data_alignment_factor
976 */
977DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
978{
979 if (uReg < 0x40)
980 *Ptr.pb++ = DW_CFA_offset | uReg;
981 else
982 {
983 *Ptr.pb++ = DW_CFA_offset_extended;
984 Ptr = iemDwarfPutUleb128(Ptr, uReg);
985 }
986 Ptr = iemDwarfPutUleb128(Ptr, off);
987 return Ptr;
988}
989
990
991# if 0 /* unused */
992/**
993 * Emits a register (@a uReg) save location, using signed offset:
994 * CFA + @a offSigned * data_alignment_factor
995 */
996DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
997{
998 *Ptr.pb++ = DW_CFA_offset_extended_sf;
999 Ptr = iemDwarfPutUleb128(Ptr, uReg);
1000 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
1001 return Ptr;
1002}
1003# endif
1004
1005
1006/**
1007 * Initializes the unwind info section for non-windows hosts.
1008 */
1009static int
1010iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
1011 void *pvChunk, uint32_t idxChunk)
1012{
1013 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
1014 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
1015
1016 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
1017
1018 /*
1019 * Generate the CIE first.
1020 */
1021# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
1022 uint8_t const iDwarfVer = 3;
1023# else
1024 uint8_t const iDwarfVer = 4;
1025# endif
1026 RTPTRUNION const PtrCie = Ptr;
1027 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
1028 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
1029 *Ptr.pb++ = iDwarfVer; /* DwARF version */
1030 *Ptr.pb++ = 0; /* Augmentation. */
1031 if (iDwarfVer >= 4)
1032 {
1033 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
1034 *Ptr.pb++ = 0; /* Segment selector size. */
1035 }
1036# ifdef RT_ARCH_AMD64
1037 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
1038# else
1039 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
1040# endif
1041 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
1042# ifdef RT_ARCH_AMD64
1043 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
1044# elif defined(RT_ARCH_ARM64)
1045 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
1046# else
1047# error "port me"
1048# endif
1049 /* Initial instructions: */
1050# ifdef RT_ARCH_AMD64
1051 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
1052 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
1053 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
1054 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
1055 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
1056 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
1057 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
1058 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
1059# elif defined(RT_ARCH_ARM64)
1060# if 1
1061 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
1062# else
1063 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
1064# endif
1065 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
1066 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
1067 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
1068 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
1069 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
1070 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
1071 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
1072 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
1073 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
1074 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
1075 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
1076 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
1077 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
1078 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
1079# else
1080# error "port me"
1081# endif
1082 while ((Ptr.u - PtrCie.u) & 3)
1083 *Ptr.pb++ = DW_CFA_nop;
1084 /* Finalize the CIE size. */
1085 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
1086
1087 /*
1088 * Generate an FDE for the whole chunk area.
1089 */
1090# ifdef IEMNATIVE_USE_LIBUNWIND
1091 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
1092# endif
1093 RTPTRUNION const PtrFde = Ptr;
1094 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
1095 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
1096 Ptr.pu32++;
1097 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
1098 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
1099# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
1100 *Ptr.pb++ = DW_CFA_nop;
1101# endif
1102 while ((Ptr.u - PtrFde.u) & 3)
1103 *Ptr.pb++ = DW_CFA_nop;
1104 /* Finalize the FDE size. */
1105 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
1106
1107 /* Terminator entry. */
1108 *Ptr.pu32++ = 0;
1109 *Ptr.pu32++ = 0; /* just to be sure... */
1110 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
1111
1112 /*
1113 * Register it.
1114 */
1115# ifdef IEMNATIVE_USE_LIBUNWIND
1116 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
1117# else
1118 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
1119 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
1120# endif
1121
1122# ifdef IEMNATIVE_USE_GDB_JIT
1123 /*
1124 * Now for telling GDB about this (experimental).
1125 *
1126 * This seems to work best with ET_DYN.
1127 */
1128 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk,
1129 sizeof(GDBJITSYMFILE), NULL, NULL, NULL);
1130 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
1131 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
1132
1133 RT_ZERO(*pSymFile);
1134
1135 /*
1136 * The ELF header:
1137 */
1138 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1139 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1140 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1141 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1142 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1143 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1144 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1145 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1146# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1147 pSymFile->EHdr.e_type = ET_DYN;
1148# else
1149 pSymFile->EHdr.e_type = ET_REL;
1150# endif
1151# ifdef RT_ARCH_AMD64
1152 pSymFile->EHdr.e_machine = EM_AMD64;
1153# elif defined(RT_ARCH_ARM64)
1154 pSymFile->EHdr.e_machine = EM_AARCH64;
1155# else
1156# error "port me"
1157# endif
1158 pSymFile->EHdr.e_version = 1; /*?*/
1159 pSymFile->EHdr.e_entry = 0;
1160# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1161 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1162# else
1163 pSymFile->EHdr.e_phoff = 0;
1164# endif
1165 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1166 pSymFile->EHdr.e_flags = 0;
1167 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1168# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1169 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1170 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1171# else
1172 pSymFile->EHdr.e_phentsize = 0;
1173 pSymFile->EHdr.e_phnum = 0;
1174# endif
1175 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1176 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1177 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1178
1179 uint32_t offStrTab = 0;
1180#define APPEND_STR(a_szStr) do { \
1181 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1182 offStrTab += sizeof(a_szStr); \
1183 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1184 } while (0)
1185#define APPEND_STR_FMT(a_szStr, ...) do { \
1186 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1187 offStrTab++; \
1188 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1189 } while (0)
1190
1191 /*
1192 * Section headers.
1193 */
1194 /* Section header #0: NULL */
1195 unsigned i = 0;
1196 APPEND_STR("");
1197 RT_ZERO(pSymFile->aShdrs[i]);
1198 i++;
1199
1200 /* Section header: .eh_frame */
1201 pSymFile->aShdrs[i].sh_name = offStrTab;
1202 APPEND_STR(".eh_frame");
1203 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1204 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1205# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1206 pSymFile->aShdrs[i].sh_offset
1207 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1208# else
1209 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1210 pSymFile->aShdrs[i].sh_offset = 0;
1211# endif
1212
1213 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1214 pSymFile->aShdrs[i].sh_link = 0;
1215 pSymFile->aShdrs[i].sh_info = 0;
1216 pSymFile->aShdrs[i].sh_addralign = 1;
1217 pSymFile->aShdrs[i].sh_entsize = 0;
1218 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1219 i++;
1220
1221 /* Section header: .shstrtab */
1222 unsigned const iShStrTab = i;
1223 pSymFile->EHdr.e_shstrndx = iShStrTab;
1224 pSymFile->aShdrs[i].sh_name = offStrTab;
1225 APPEND_STR(".shstrtab");
1226 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1227 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1228# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1229 pSymFile->aShdrs[i].sh_offset
1230 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1231# else
1232 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1233 pSymFile->aShdrs[i].sh_offset = 0;
1234# endif
1235 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1236 pSymFile->aShdrs[i].sh_link = 0;
1237 pSymFile->aShdrs[i].sh_info = 0;
1238 pSymFile->aShdrs[i].sh_addralign = 1;
1239 pSymFile->aShdrs[i].sh_entsize = 0;
1240 i++;
1241
1242 /* Section header: .symbols */
1243 pSymFile->aShdrs[i].sh_name = offStrTab;
1244 APPEND_STR(".symtab");
1245 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1246 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1247 pSymFile->aShdrs[i].sh_offset
1248 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1249 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1250 pSymFile->aShdrs[i].sh_link = iShStrTab;
1251 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1252 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1253 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1254 i++;
1255
1256# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1257 /* Section header: .symbols */
1258 pSymFile->aShdrs[i].sh_name = offStrTab;
1259 APPEND_STR(".dynsym");
1260 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1261 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1262 pSymFile->aShdrs[i].sh_offset
1263 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1264 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1265 pSymFile->aShdrs[i].sh_link = iShStrTab;
1266 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1267 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1268 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1269 i++;
1270# endif
1271
1272# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1273 /* Section header: .dynamic */
1274 pSymFile->aShdrs[i].sh_name = offStrTab;
1275 APPEND_STR(".dynamic");
1276 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1277 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1278 pSymFile->aShdrs[i].sh_offset
1279 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1280 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1281 pSymFile->aShdrs[i].sh_link = iShStrTab;
1282 pSymFile->aShdrs[i].sh_info = 0;
1283 pSymFile->aShdrs[i].sh_addralign = 1;
1284 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1285 i++;
1286# endif
1287
1288 /* Section header: .text */
1289 unsigned const iShText = i;
1290 pSymFile->aShdrs[i].sh_name = offStrTab;
1291 APPEND_STR(".text");
1292 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1293 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1294# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1295 pSymFile->aShdrs[i].sh_offset
1296 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1297# else
1298 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1299 pSymFile->aShdrs[i].sh_offset = 0;
1300# endif
1301 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1302 pSymFile->aShdrs[i].sh_link = 0;
1303 pSymFile->aShdrs[i].sh_info = 0;
1304 pSymFile->aShdrs[i].sh_addralign = 1;
1305 pSymFile->aShdrs[i].sh_entsize = 0;
1306 i++;
1307
1308 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1309
1310# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1311 /*
1312 * The program headers:
1313 */
1314 /* Everything in a single LOAD segment: */
1315 i = 0;
1316 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1317 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1318 pSymFile->aPhdrs[i].p_offset
1319 = pSymFile->aPhdrs[i].p_vaddr
1320 = pSymFile->aPhdrs[i].p_paddr = 0;
1321 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1322 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1323 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1324 i++;
1325 /* The .dynamic segment. */
1326 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1327 pSymFile->aPhdrs[i].p_flags = PF_R;
1328 pSymFile->aPhdrs[i].p_offset
1329 = pSymFile->aPhdrs[i].p_vaddr
1330 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1331 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1332 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1333 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1334 i++;
1335
1336 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1337
1338 /*
1339 * The dynamic section:
1340 */
1341 i = 0;
1342 pSymFile->aDyn[i].d_tag = DT_SONAME;
1343 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1344 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1345 i++;
1346 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1347 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1348 i++;
1349 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1350 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1351 i++;
1352 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1353 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1354 i++;
1355 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1356 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1357 i++;
1358 pSymFile->aDyn[i].d_tag = DT_NULL;
1359 i++;
1360 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1361# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1362
1363 /*
1364 * Symbol tables:
1365 */
1366 /** @todo gdb doesn't seem to really like this ... */
1367 i = 0;
1368 pSymFile->aSymbols[i].st_name = 0;
1369 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1370 pSymFile->aSymbols[i].st_value = 0;
1371 pSymFile->aSymbols[i].st_size = 0;
1372 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1373 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1374# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1375 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1376# endif
1377 i++;
1378
1379 pSymFile->aSymbols[i].st_name = 0;
1380 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1381 pSymFile->aSymbols[i].st_value = 0;
1382 pSymFile->aSymbols[i].st_size = 0;
1383 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1384 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1385 i++;
1386
1387 pSymFile->aSymbols[i].st_name = offStrTab;
1388 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1389# if 0
1390 pSymFile->aSymbols[i].st_shndx = iShText;
1391 pSymFile->aSymbols[i].st_value = 0;
1392# else
1393 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1394 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1395# endif
1396 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1397 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1398 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1399# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1400 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1401 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1402# endif
1403 i++;
1404
1405 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1406 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1407
1408 /*
1409 * The GDB JIT entry and informing GDB.
1410 */
1411 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1412# if 1
1413 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1414# else
1415 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1416# endif
1417
1418 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1419 RTCritSectEnter(&g_IemNativeGdbJitLock);
1420 pEhFrame->GdbJitEntry.pNext = NULL;
1421 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1422 if (__jit_debug_descriptor.pTail)
1423 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1424 else
1425 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1426 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1427 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1428
1429 /* Notify GDB: */
1430 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1431 __jit_debug_register_code();
1432 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1433 RTCritSectLeave(&g_IemNativeGdbJitLock);
1434
1435# else /* !IEMNATIVE_USE_GDB_JIT */
1436 RT_NOREF(pVCpu);
1437# endif /* !IEMNATIVE_USE_GDB_JIT */
1438
1439 return VINF_SUCCESS;
1440}
1441
1442# endif /* !RT_OS_WINDOWS */
1443#endif /* IN_RING3 */
1444
1445
1446/**
1447 * Adds another chunk to the executable memory allocator.
1448 *
1449 * This is used by the init code for the initial allocation and later by the
1450 * regular allocator function when it's out of memory.
1451 */
1452static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1453{
1454 /* Check that we've room for growth. */
1455 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1456 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1457
1458 /* Allocate a chunk. */
1459#ifdef RT_OS_DARWIN
1460 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1461#else
1462 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1463#endif
1464 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1465
1466#ifdef RT_OS_DARWIN
1467 /*
1468 * Because it is impossible to have a RWX memory allocation on macOS try to remap the memory
1469 * chunk readable/executable somewhere else so we can save us the hassle of switching between
1470 * protections when exeuctable memory is allocated.
1471 */
1472 int rc = VERR_NO_EXEC_MEMORY;
1473 mach_port_t hPortTask = mach_task_self();
1474 mach_vm_address_t AddrChunk = (mach_vm_address_t)pvChunk;
1475 mach_vm_address_t AddrRemapped = 0;
1476 vm_prot_t ProtCur = 0;
1477 vm_prot_t ProtMax = 0;
1478 kern_return_t krc = mach_vm_remap(hPortTask, &AddrRemapped, pExecMemAllocator->cbChunk, 0,
1479 VM_FLAGS_ANYWHERE | VM_FLAGS_RETURN_DATA_ADDR,
1480 hPortTask, AddrChunk, FALSE, &ProtCur, &ProtMax,
1481 VM_INHERIT_NONE);
1482 if (krc == KERN_SUCCESS)
1483 {
1484 krc = mach_vm_protect(mach_task_self(), AddrRemapped, pExecMemAllocator->cbChunk, FALSE, VM_PROT_READ | VM_PROT_EXECUTE);
1485 if (krc == KERN_SUCCESS)
1486 rc = VINF_SUCCESS;
1487 else
1488 {
1489 AssertLogRelMsgFailed(("mach_vm_protect -> %d (%#x)\n", krc, krc));
1490 krc = mach_vm_deallocate(hPortTask, AddrRemapped, pExecMemAllocator->cbChunk);
1491 Assert(krc == KERN_SUCCESS);
1492 }
1493 }
1494 else
1495 AssertLogRelMsgFailed(("mach_vm_remap -> %d (%#x)\n", krc, krc));
1496 if (RT_FAILURE(rc))
1497 {
1498 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1499 return rc;
1500 }
1501
1502 void *pvChunkRx = (void *)AddrRemapped;
1503#else
1504# if defined(IN_RING3) || defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE)
1505 int rc = VINF_SUCCESS;
1506# endif
1507 void *pvChunkRx = pvChunk;
1508#endif
1509
1510 /*
1511 * Add the chunk.
1512 *
1513 * This must be done before the unwind init so windows can allocate
1514 * memory from the chunk when using the alternative sub-allocator.
1515 */
1516 pExecMemAllocator->aChunks[idxChunk].pvChunkRw = pvChunk;
1517 pExecMemAllocator->aChunks[idxChunk].pvChunkRx = pvChunkRx;
1518#ifdef IN_RING3
1519 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1520#endif
1521 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1522 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1523 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1524 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1525
1526 pExecMemAllocator->cChunks = idxChunk + 1;
1527 pExecMemAllocator->idxChunkHint = idxChunk;
1528
1529 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1530 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1531
1532 /* If there is a chunk context init callback call it. */
1533#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
1534 pExecMemAllocator->aChunks[idxChunk].pCtx = iemNativeRecompileAttachExecMemChunkCtx(pVCpu, idxChunk);
1535 if (pExecMemAllocator->aChunks[idxChunk].pCtx)
1536#endif
1537 {
1538#ifdef IN_RING3
1539 /*
1540 * Initialize the unwind information (this cannot really fail atm).
1541 * (This sets pvUnwindInfo.)
1542 */
1543 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunkRx, idxChunk);
1544#endif
1545 }
1546#if defined(IN_RING3) || defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE)
1547 if (RT_SUCCESS(rc))
1548 { /* likely */ }
1549 else
1550 {
1551 /* Just in case the impossible happens, undo the above up: */
1552 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1553 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1554 pExecMemAllocator->cChunks = idxChunk;
1555 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1556 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1557 pExecMemAllocator->aChunks[idxChunk].pvChunkRw = NULL;
1558 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1559
1560# ifdef RT_OS_DARWIN
1561 krc = mach_vm_deallocate(mach_task_self(), (mach_vm_address_t)pExecMemAllocator->aChunks[idxChunk].pvChunkRx,
1562 pExecMemAllocator->cbChunk);
1563 Assert(krc == KERN_SUCCESS);
1564# endif
1565
1566 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1567 return rc;
1568 }
1569#endif
1570
1571 return VINF_SUCCESS;
1572}
1573
1574
1575/**
1576 * Initializes the executable memory allocator for native recompilation on the
1577 * calling EMT.
1578 *
1579 * @returns VBox status code.
1580 * @param pVCpu The cross context virtual CPU structure of the calling
1581 * thread.
1582 * @param cbMax The max size of the allocator.
1583 * @param cbInitial The initial allocator size.
1584 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1585 * dependent).
1586 */
1587int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk) RT_NOEXCEPT
1588{
1589 /*
1590 * Validate input.
1591 */
1592 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1593 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1594 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1595 || cbChunk == 0
1596 || ( RT_IS_POWER_OF_TWO(cbChunk)
1597 && cbChunk >= _1M
1598 && cbChunk <= _256M
1599 && cbChunk <= cbMax),
1600 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1601 VERR_OUT_OF_RANGE);
1602
1603 /*
1604 * Adjust/figure out the chunk size.
1605 */
1606 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1607 {
1608 if (cbMax >= _256M)
1609 cbChunk = _64M;
1610 else
1611 {
1612 if (cbMax < _16M)
1613 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1614 else
1615 cbChunk = (uint32_t)cbMax / 4;
1616 if (!RT_IS_POWER_OF_TWO(cbChunk))
1617 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1618 }
1619 }
1620#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
1621# if defined(RT_OS_AMD64)
1622 Assert(cbChunk <= _2G);
1623# elif defined(RT_OS_ARM64)
1624 if (cbChunk > _128M)
1625 cbChunk = _128M; /* Max relative branch distance is +/-2^(25+2) = +/-0x8000000 (134 217 728). */
1626# endif
1627#endif
1628
1629 if (cbChunk > cbMax)
1630 cbMax = cbChunk;
1631 else
1632 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1633 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1634 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1635
1636 /*
1637 * Allocate and initialize the allocatore instance.
1638 */
1639 size_t const offBitmaps = RT_ALIGN_Z(RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]), RT_CACHELINE_SIZE);
1640 size_t const cbBitmaps = (size_t)(cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3)) * cMaxChunks;
1641 size_t cbNeeded = offBitmaps + cbBitmaps;
1642 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1643 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1644#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1645 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1646 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1647#endif
1648 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1649 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1650 VERR_NO_MEMORY);
1651 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1652 pExecMemAllocator->cbChunk = cbChunk;
1653 pExecMemAllocator->cMaxChunks = cMaxChunks;
1654 pExecMemAllocator->cChunks = 0;
1655 pExecMemAllocator->idxChunkHint = 0;
1656 pExecMemAllocator->cAllocations = 0;
1657 pExecMemAllocator->cbTotal = 0;
1658 pExecMemAllocator->cbFree = 0;
1659 pExecMemAllocator->cbAllocated = 0;
1660#ifdef VBOX_WITH_STATISTICS
1661 pExecMemAllocator->cbUnusable = 0;
1662#endif
1663 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1664 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1665 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1666 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmaps); /* Mark everything as allocated. Clear when chunks are added. */
1667#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1668 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1669#endif
1670 for (uint32_t i = 0; i < cMaxChunks; i++)
1671 {
1672 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1673 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1674 pExecMemAllocator->aChunks[i].pvChunkRw = NULL;
1675#ifdef IN_RING0
1676 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1677#else
1678 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1679#endif
1680 }
1681 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1682
1683 /*
1684 * Do the initial allocations.
1685 */
1686 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1687 {
1688 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1689 AssertLogRelRCReturn(rc, rc);
1690 }
1691
1692 pExecMemAllocator->idxChunkHint = 0;
1693
1694 /*
1695 * Register statistics.
1696 */
1697 PUVM const pUVM = pVCpu->pUVCpu->pUVM;
1698 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cAllocations, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1699 "Current number of allocations", "/IEM/CPU%u/re/ExecMem/cAllocations", pVCpu->idCpu);
1700 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cChunks, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
1701 "Currently allocated chunks", "/IEM/CPU%u/re/ExecMem/cChunks", pVCpu->idCpu);
1702 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cMaxChunks, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
1703 "Maximum number of chunks", "/IEM/CPU%u/re/ExecMem/cMaxChunks", pVCpu->idCpu);
1704 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbChunk, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1705 "Allocation chunk size", "/IEM/CPU%u/re/ExecMem/cbChunk", pVCpu->idCpu);
1706 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbAllocated, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1707 "Number of bytes current allocated", "/IEM/CPU%u/re/ExecMem/cbAllocated", pVCpu->idCpu);
1708 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbFree, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1709 "Number of bytes current free", "/IEM/CPU%u/re/ExecMem/cbFree", pVCpu->idCpu);
1710 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbTotal, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1711 "Total number of byte", "/IEM/CPU%u/re/ExecMem/cbTotal", pVCpu->idCpu);
1712#ifdef VBOX_WITH_STATISTICS
1713 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbUnusable, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1714 "Total number of bytes being unusable", "/IEM/CPU%u/re/ExecMem/cbUnusable", pVCpu->idCpu);
1715 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatAlloc, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL,
1716 "Profiling the allocator", "/IEM/CPU%u/re/ExecMem/ProfAlloc", pVCpu->idCpu);
1717#endif
1718#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
1719 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatPruneProf, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL,
1720 "Pruning executable memory (alt)", "/IEM/CPU%u/re/ExecMem/Pruning", pVCpu->idCpu);
1721 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatPruneRecovered, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES_PER_CALL,
1722 "Bytes recovered while pruning", "/IEM/CPU%u/re/ExecMem/PruningRecovered", pVCpu->idCpu);
1723#endif
1724
1725 return VINF_SUCCESS;
1726}
1727
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette