VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veExecMem.cpp@ 104116

Last change on this file since 104116 was 104116, checked in by vboxsync, 9 months ago

VMM/IEM: Darwin fixes for IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER. The header requires more switching between writable and executable memory flags. bugref:10370

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 61.3 KB
Line 
1/* $Id: IEMAllN8veExecMem.cpp 104116 2024-03-29 03:06:39Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
50#include <VBox/vmm/iem.h>
51#include <VBox/vmm/cpum.h>
52#include "IEMInternal.h"
53#include <VBox/vmm/vmcc.h>
54#include <VBox/log.h>
55#include <VBox/err.h>
56#include <VBox/param.h>
57#include <iprt/assert.h>
58#include <iprt/mem.h>
59#include <iprt/string.h>
60#if defined(RT_ARCH_AMD64)
61# include <iprt/x86.h>
62#elif defined(RT_ARCH_ARM64)
63# include <iprt/armv8.h>
64#endif
65
66#ifdef RT_OS_WINDOWS
67# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
68extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
69extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
70#else
71# include <iprt/formats/dwarf.h>
72# if defined(RT_OS_DARWIN)
73# include <libkern/OSCacheControl.h>
74# define IEMNATIVE_USE_LIBUNWIND
75extern "C" void __register_frame(const void *pvFde);
76extern "C" void __deregister_frame(const void *pvFde);
77# else
78# ifdef DEBUG_bird /** @todo not thread safe yet */
79# define IEMNATIVE_USE_GDB_JIT
80# endif
81# ifdef IEMNATIVE_USE_GDB_JIT
82# include <iprt/critsect.h>
83# include <iprt/once.h>
84# include <iprt/formats/elf64.h>
85# endif
86extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
87extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
88# endif
89#endif
90
91#include "IEMN8veRecompiler.h"
92
93
94/*********************************************************************************************************************************
95* Executable Memory Allocator *
96*********************************************************************************************************************************/
97/** The chunk sub-allocation unit size in bytes. */
98#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
99/** The chunk sub-allocation unit size as a shift factor. */
100#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
101/** Enables adding a header to the sub-allocator allocations.
102 * This is useful for freeing up executable memory among other things. */
103#define IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
104/** Use alternative pruning. */
105#define IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
106
107
108#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
109# ifdef IEMNATIVE_USE_GDB_JIT
110# define IEMNATIVE_USE_GDB_JIT_ET_DYN
111
112/** GDB JIT: Code entry. */
113typedef struct GDBJITCODEENTRY
114{
115 struct GDBJITCODEENTRY *pNext;
116 struct GDBJITCODEENTRY *pPrev;
117 uint8_t *pbSymFile;
118 uint64_t cbSymFile;
119} GDBJITCODEENTRY;
120
121/** GDB JIT: Actions. */
122typedef enum GDBJITACTIONS : uint32_t
123{
124 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
125} GDBJITACTIONS;
126
127/** GDB JIT: Descriptor. */
128typedef struct GDBJITDESCRIPTOR
129{
130 uint32_t uVersion;
131 GDBJITACTIONS enmAction;
132 GDBJITCODEENTRY *pRelevant;
133 GDBJITCODEENTRY *pHead;
134 /** Our addition: */
135 GDBJITCODEENTRY *pTail;
136} GDBJITDESCRIPTOR;
137
138/** GDB JIT: Our simple symbol file data. */
139typedef struct GDBJITSYMFILE
140{
141 Elf64_Ehdr EHdr;
142# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
143 Elf64_Shdr aShdrs[5];
144# else
145 Elf64_Shdr aShdrs[7];
146 Elf64_Phdr aPhdrs[2];
147# endif
148 /** The dwarf ehframe data for the chunk. */
149 uint8_t abEhFrame[512];
150 char szzStrTab[128];
151 Elf64_Sym aSymbols[3];
152# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
153 Elf64_Sym aDynSyms[2];
154 Elf64_Dyn aDyn[6];
155# endif
156} GDBJITSYMFILE;
157
158extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
159extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
160
161/** Init once for g_IemNativeGdbJitLock. */
162static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
163/** Init once for the critical section. */
164static RTCRITSECT g_IemNativeGdbJitLock;
165
166/** GDB reads the info here. */
167GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
168
169/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
170DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
171{
172 ASMNopPause();
173}
174
175/** @callback_method_impl{FNRTONCE} */
176static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
177{
178 RT_NOREF(pvUser);
179 return RTCritSectInit(&g_IemNativeGdbJitLock);
180}
181
182
183# endif /* IEMNATIVE_USE_GDB_JIT */
184
185/**
186 * Per-chunk unwind info for non-windows hosts.
187 */
188typedef struct IEMEXECMEMCHUNKEHFRAME
189{
190# ifdef IEMNATIVE_USE_LIBUNWIND
191 /** The offset of the FDA into abEhFrame. */
192 uintptr_t offFda;
193# else
194 /** 'struct object' storage area. */
195 uint8_t abObject[1024];
196# endif
197# ifdef IEMNATIVE_USE_GDB_JIT
198# if 0
199 /** The GDB JIT 'symbol file' data. */
200 GDBJITSYMFILE GdbJitSymFile;
201# endif
202 /** The GDB JIT list entry. */
203 GDBJITCODEENTRY GdbJitEntry;
204# endif
205 /** The dwarf ehframe data for the chunk. */
206 uint8_t abEhFrame[512];
207} IEMEXECMEMCHUNKEHFRAME;
208/** Pointer to per-chunk info info for non-windows hosts. */
209typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
210#endif
211
212
213/**
214 * An chunk of executable memory.
215 */
216typedef struct IEMEXECMEMCHUNK
217{
218 /** Number of free items in this chunk. */
219 uint32_t cFreeUnits;
220 /** Hint were to start searching for free space in the allocation bitmap. */
221 uint32_t idxFreeHint;
222 /** Pointer to the chunk. */
223 void *pvChunk;
224#ifdef IN_RING3
225 /**
226 * Pointer to the unwind information.
227 *
228 * This is used during C++ throw and longjmp (windows and probably most other
229 * platforms). Some debuggers (windbg) makes use of it as well.
230 *
231 * Windows: This is allocated from hHeap on windows because (at least for
232 * AMD64) the UNWIND_INFO structure address in the
233 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
234 *
235 * Others: Allocated from the regular heap to avoid unnecessary executable data
236 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
237 void *pvUnwindInfo;
238#elif defined(IN_RING0)
239 /** Allocation handle. */
240 RTR0MEMOBJ hMemObj;
241#endif
242} IEMEXECMEMCHUNK;
243/** Pointer to a memory chunk. */
244typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
245
246
247/**
248 * Executable memory allocator for the native recompiler.
249 */
250typedef struct IEMEXECMEMALLOCATOR
251{
252 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
253 uint32_t uMagic;
254
255 /** The chunk size. */
256 uint32_t cbChunk;
257 /** The maximum number of chunks. */
258 uint32_t cMaxChunks;
259 /** The current number of chunks. */
260 uint32_t cChunks;
261 /** Hint where to start looking for available memory. */
262 uint32_t idxChunkHint;
263 /** Statistics: Current number of allocations. */
264 uint32_t cAllocations;
265
266 /** The total amount of memory available. */
267 uint64_t cbTotal;
268 /** Total amount of free memory. */
269 uint64_t cbFree;
270 /** Total amount of memory allocated. */
271 uint64_t cbAllocated;
272
273 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
274 *
275 * Since the chunk size is a power of two and the minimum chunk size is a lot
276 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
277 * require a whole number of uint64_t elements in the allocation bitmap. So,
278 * for sake of simplicity, they are allocated as one continous chunk for
279 * simplicity/laziness. */
280 uint64_t *pbmAlloc;
281 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
282 uint32_t cUnitsPerChunk;
283 /** Number of bitmap elements per chunk (for quickly locating the bitmap
284 * portion corresponding to an chunk). */
285 uint32_t cBitmapElementsPerChunk;
286
287#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
288 /** The next chunk to prune in. */
289 uint32_t idxChunkPrune;
290 /** Where in chunk offset to start pruning at. */
291 uint32_t offChunkPrune;
292 /** Profiling the pruning code. */
293 STAMPROFILE StatPruneProf;
294 /** Number of bytes recovered by the pruning. */
295 STAMPROFILE StatPruneRecovered;
296#endif
297
298#ifdef VBOX_WITH_STATISTICS
299 STAMPROFILE StatAlloc;
300#endif
301
302
303#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
304 /** Pointer to the array of unwind info running parallel to aChunks (same
305 * allocation as this structure, located after the bitmaps).
306 * (For Windows, the structures must reside in 32-bit RVA distance to the
307 * actual chunk, so they are allocated off the chunk.) */
308 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
309#endif
310
311 /** The allocation chunks. */
312 RT_FLEXIBLE_ARRAY_EXTENSION
313 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
314} IEMEXECMEMALLOCATOR;
315/** Pointer to an executable memory allocator. */
316typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
317
318/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
319#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
320
321
322#ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
323/**
324 * Allocation header.
325 */
326typedef struct IEMEXECMEMALLOCHDR
327{
328 /** Magic value / eyecatcher (IEMEXECMEMALLOCHDR_MAGIC). */
329 uint32_t uMagic;
330 /** The allocation chunk (for speeding up freeing). */
331 uint32_t idxChunk;
332 /** Pointer to the translation block the allocation belongs to.
333 * This is the whole point of the header. */
334 PIEMTB pTb;
335} IEMEXECMEMALLOCHDR;
336/** Pointer to an allocation header. */
337typedef IEMEXECMEMALLOCHDR *PIEMEXECMEMALLOCHDR;
338/** Magic value for IEMEXECMEMALLOCHDR ('ExeM'). */
339# define IEMEXECMEMALLOCHDR_MAGIC UINT32_C(0x4d657845)
340#endif
341
342
343static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
344
345#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
346/**
347 * Frees up executable memory when we're out space.
348 *
349 * This is an alternative to iemTbAllocatorFreeupNativeSpace() that frees up
350 * space in a more linear fashion from the allocator's point of view. It may
351 * also defragment if implemented & enabled
352 */
353static void iemExecMemAllocatorPrune(PVMCPU pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
354{
355# ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
356# error "IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING requires IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER"
357# endif
358 STAM_REL_PROFILE_START(&pExecMemAllocator->StatPruneProf, a);
359
360 /*
361 * Before we can start, we must process delayed frees.
362 */
363 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
364
365 AssertCompile(RT_IS_POWER_OF_TWO(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE));
366
367 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
368 AssertReturnVoid(RT_IS_POWER_OF_TWO(cbChunk));
369 AssertReturnVoid(cbChunk >= _1M && cbChunk <= _256M); /* see iemExecMemAllocatorInit */
370
371 uint32_t const cChunks = pExecMemAllocator->cChunks;
372 AssertReturnVoid(cChunks == pExecMemAllocator->cMaxChunks);
373 AssertReturnVoid(cChunks >= 1);
374
375 /*
376 * Decide how much to prune. The chunk is is a multiple of two, so we'll be
377 * scanning a multiple of two here as well.
378 */
379 uint32_t cbToPrune = cbChunk;
380
381 /* Never more than 25%. */
382 if (cChunks < 4)
383 cbToPrune /= cChunks == 1 ? 4 : 2;
384
385 /* Upper limit. In a debug build a 4MB limit averages out at ~0.6ms per call. */
386 if (cbToPrune > _4M)
387 cbToPrune = _4M;
388
389 /*
390 * Adjust the pruning chunk and offset accordingly.
391 */
392 uint32_t idxChunk = pExecMemAllocator->idxChunkPrune;
393 uint32_t offChunk = pExecMemAllocator->offChunkPrune;
394 offChunk &= ~(uint32_t)(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1U);
395 if (offChunk >= cbChunk)
396 {
397 offChunk = 0;
398 idxChunk += 1;
399 }
400 if (idxChunk >= cChunks)
401 {
402 offChunk = 0;
403 idxChunk = 0;
404 }
405
406 uint32_t const offPruneEnd = RT_MIN(offChunk + cbToPrune, cbChunk);
407
408 /*
409 * Do the pruning. The current approach is the sever kind.
410 */
411 uint64_t cbPruned = 0;
412 uint8_t * const pbChunk = (uint8_t *)pExecMemAllocator->aChunks[idxChunk].pvChunk;
413 while (offChunk < offPruneEnd)
414 {
415 PIEMEXECMEMALLOCHDR pHdr = (PIEMEXECMEMALLOCHDR)&pbChunk[offChunk];
416
417 /* Is this the start of an allocation block for TB? (We typically have
418 one allocation at the start of each chunk for the unwind info where
419 pTb is NULL.) */
420 if ( pHdr->uMagic == IEMEXECMEMALLOCHDR_MAGIC
421 && pHdr->pTb != NULL
422 && pHdr->idxChunk == idxChunk)
423 {
424 PIEMTB const pTb = pHdr->pTb;
425 AssertPtr(pTb);
426 Assert((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
427
428 uint32_t const cbBlock = RT_ALIGN_32(pTb->Native.cInstructions * sizeof(IEMNATIVEINSTR) + sizeof(*pHdr),
429 IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
430 AssertBreakStmt(offChunk + cbBlock <= cbChunk, offChunk += IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE); /* paranoia */
431
432 iemTbAllocatorFree(pVCpu, pTb);
433
434 cbPruned += cbBlock;
435 offChunk += cbBlock;
436 }
437 else
438 offChunk += IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE;
439 }
440 STAM_REL_PROFILE_ADD_PERIOD(&pExecMemAllocator->StatPruneRecovered, cbPruned);
441
442 /*
443 * Save the current pruning point.
444 */
445 pExecMemAllocator->offChunkPrune = offChunk;
446 pExecMemAllocator->idxChunkPrune = idxChunk;
447
448 STAM_REL_PROFILE_STOP(&pExecMemAllocator->StatPruneProf, a);
449}
450#endif /* IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING */
451
452
453/**
454 * Try allocate a block of @a cReqUnits in the chunk @a idxChunk.
455 */
456static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
457 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk, PIEMTB pTb)
458{
459 /*
460 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
461 */
462 Assert(!(cToScan & 63));
463 Assert(!(idxFirst & 63));
464 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
465 pbmAlloc += idxFirst / 64;
466
467 /*
468 * Scan the bitmap for cReqUnits of consequtive clear bits
469 */
470 /** @todo This can probably be done more efficiently for non-x86 systems. */
471 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
472 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
473 {
474 uint32_t idxAddBit = 1;
475 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
476 idxAddBit++;
477 if (idxAddBit >= cReqUnits)
478 {
479 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
480
481 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
482 pChunk->cFreeUnits -= cReqUnits;
483 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
484
485 pExecMemAllocator->cAllocations += 1;
486 uint32_t const cbReq = cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
487 pExecMemAllocator->cbAllocated += cbReq;
488 pExecMemAllocator->cbFree -= cbReq;
489 pExecMemAllocator->idxChunkHint = idxChunk;
490
491 void * const pvMem = (uint8_t *)pChunk->pvChunk
492 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
493#ifdef RT_OS_DARWIN
494 /*
495 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
496 * on darwin. So, we mark the pages returned as read+write after alloc and
497 * expect the caller to call iemExecMemAllocatorReadyForUse when done
498 * writing to the allocation.
499 *
500 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
501 * for details.
502 */
503 /** @todo detect if this is necessary... it wasn't required on 10.15 or
504 * whatever older version it was. */
505 int rc = RTMemProtect(pvMem, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
506 AssertRC(rc);
507#endif
508
509 /*
510 * Initialize the header and return.
511 */
512# ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
513 PIEMEXECMEMALLOCHDR const pHdr = (PIEMEXECMEMALLOCHDR)pvMem;
514 pHdr->uMagic = IEMEXECMEMALLOCHDR_MAGIC;
515 pHdr->idxChunk = idxChunk;
516 pHdr->pTb = pTb;
517 return pHdr + 1;
518#else
519 RT_NOREF(pTb);
520 return pvMem;
521#endif
522 }
523
524 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
525 }
526 return NULL;
527}
528
529
530static void *
531iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq, PIEMTB pTb)
532{
533 /*
534 * Figure out how much to allocate.
535 */
536#ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
537 uint32_t const cReqUnits = (cbReq + sizeof(IEMEXECMEMALLOCHDR) + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)
538#else
539 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)
540#endif
541 >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
542 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
543 {
544 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
545 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
546 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
547 {
548 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
549 pExecMemAllocator->cUnitsPerChunk - idxHint,
550 cReqUnits, idxChunk, pTb);
551 if (pvRet)
552 return pvRet;
553 }
554 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
555 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
556 cReqUnits, idxChunk, pTb);
557 }
558 return NULL;
559}
560
561
562/**
563 * Allocates @a cbReq bytes of executable memory.
564 *
565 * @returns Pointer to the memory, NULL if out of memory or other problem
566 * encountered.
567 * @param pVCpu The cross context virtual CPU structure of the calling
568 * thread.
569 * @param cbReq How many bytes are required.
570 * @param pTb The translation block that will be using the allocation.
571 */
572DECLHIDDEN(void *) iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq, PIEMTB pTb) RT_NOEXCEPT
573{
574 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
575 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
576 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
577 STAM_PROFILE_START(&pExecMemAllocator->StatAlloc, a);
578
579 for (unsigned iIteration = 0;; iIteration++)
580 {
581 if (cbReq <= pExecMemAllocator->cbFree)
582 {
583 uint32_t const cChunks = pExecMemAllocator->cChunks;
584 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
585 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
586 {
587 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb);
588 if (pvRet)
589 {
590 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
591 return pvRet;
592 }
593 }
594 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
595 {
596 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb);
597 if (pvRet)
598 {
599 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
600 return pvRet;
601 }
602 }
603 }
604
605 /*
606 * Can we grow it with another chunk?
607 */
608 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
609 {
610 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
611 AssertLogRelRCReturn(rc, NULL);
612
613 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
614 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb);
615 if (pvRet)
616 {
617 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
618 return pvRet;
619 }
620 AssertFailed();
621 }
622
623 /*
624 * Try prune native TBs once.
625 */
626 if (iIteration == 0)
627 {
628#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
629 iemExecMemAllocatorPrune(pVCpu, pExecMemAllocator);
630#else
631 /* No header included in the instruction count here. */
632 uint32_t const cNeededInstrs = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) / sizeof(IEMNATIVEINSTR);
633 iemTbAllocatorFreeupNativeSpace(pVCpu, cNeededInstrs);
634#endif
635 }
636 else
637 {
638 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeExecMemInstrBufAllocFailed);
639 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
640 return NULL;
641 }
642 }
643}
644
645
646/** This is a hook that we may need later for changing memory protection back
647 * to readonly+exec */
648DECLHIDDEN(void) iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb) RT_NOEXCEPT
649{
650#ifdef RT_OS_DARWIN
651 /* See iemExecMemAllocatorAllocInChunkInt for the explanation. */
652 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
653 AssertRC(rc); RT_NOREF(pVCpu);
654
655 /*
656 * Flush the instruction cache:
657 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
658 */
659 /* sys_dcache_flush(pv, cb); - not necessary */
660 sys_icache_invalidate(pv, cb);
661#else
662 RT_NOREF(pVCpu, pv, cb);
663#endif
664}
665
666
667/**
668 * Frees executable memory.
669 */
670DECLHIDDEN(void) iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb) RT_NOEXCEPT
671{
672 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
673 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
674 AssertPtr(pv);
675#ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
676 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
677
678 /* Align the size as we did when allocating the block. */
679 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
680
681#else
682 PIEMEXECMEMALLOCHDR pHdr = (PIEMEXECMEMALLOCHDR)pv - 1;
683 Assert(!((uintptr_t)pHdr & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
684 AssertReturnVoid(pHdr->uMagic == IEMEXECMEMALLOCHDR_MAGIC);
685 uint32_t const idxChunk = pHdr->idxChunk;
686 AssertReturnVoid(idxChunk < pExecMemAllocator->cChunks);
687 pv = pHdr;
688
689 /* Adjust and align the size to cover the whole allocation area. */
690 cb = RT_ALIGN_Z(cb + sizeof(*pHdr), IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
691#endif
692
693 /* Free it / assert sanity. */
694 bool fFound = false;
695 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
696#ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
697 uint32_t const cChunks = pExecMemAllocator->cChunks;
698 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
699#endif
700 {
701 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
702 fFound = offChunk < cbChunk;
703 if (fFound)
704 {
705 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
706 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
707
708 /* Check that it's valid and free it. */
709 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
710 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
711 for (uint32_t i = 1; i < cReqUnits; i++)
712 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
713 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
714
715#ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
716# ifdef RT_OS_DARWIN
717 int rc = RTMemProtect(pHdr, sizeof(*pHdr), RTMEM_PROT_WRITE | RTMEM_PROT_READ);
718 AssertRC(rc); RT_NOREF(pVCpu);
719# endif
720 pHdr->uMagic = 0;
721 pHdr->idxChunk = 0;
722 pHdr->pTb = NULL;
723# ifdef RT_OS_DARWIN
724 rc = RTMemProtect(pHdr, sizeof(*pHdr), RTMEM_PROT_EXEC | RTMEM_PROT_READ);
725 AssertRC(rc); RT_NOREF(pVCpu);
726# endif
727#endif
728 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
729 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
730
731 /* Update the stats. */
732 pExecMemAllocator->cbAllocated -= cb;
733 pExecMemAllocator->cbFree += cb;
734 pExecMemAllocator->cAllocations -= 1;
735 return;
736 }
737 }
738 AssertFailed();
739}
740
741
742
743#ifdef IN_RING3
744# ifdef RT_OS_WINDOWS
745
746/**
747 * Initializes the unwind info structures for windows hosts.
748 */
749static int
750iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
751 void *pvChunk, uint32_t idxChunk)
752{
753 RT_NOREF(pVCpu);
754
755 /*
756 * The AMD64 unwind opcodes.
757 *
758 * This is a program that starts with RSP after a RET instruction that
759 * ends up in recompiled code, and the operations we describe here will
760 * restore all non-volatile registers and bring RSP back to where our
761 * RET address is. This means it's reverse order from what happens in
762 * the prologue.
763 *
764 * Note! Using a frame register approach here both because we have one
765 * and but mainly because the UWOP_ALLOC_LARGE argument values
766 * would be a pain to write initializers for. On the positive
767 * side, we're impervious to changes in the the stack variable
768 * area can can deal with dynamic stack allocations if necessary.
769 */
770 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
771 {
772 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
773 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
774 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
775 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
776 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
777 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
778 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
779 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
780 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
781 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
782 };
783 union
784 {
785 IMAGE_UNWIND_INFO Info;
786 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
787 } s_UnwindInfo =
788 {
789 {
790 /* .Version = */ 1,
791 /* .Flags = */ 0,
792 /* .SizeOfProlog = */ 16, /* whatever */
793 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
794 /* .FrameRegister = */ X86_GREG_xBP,
795 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
796 }
797 };
798 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
799 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
800
801 /*
802 * Calc how much space we need and allocate it off the exec heap.
803 */
804 unsigned const cFunctionEntries = 1;
805 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
806 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
807 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
808 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeeded, NULL);
809 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
810 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
811
812 /*
813 * Initialize the structures.
814 */
815 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
816
817 paFunctions[0].BeginAddress = 0;
818 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
819 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
820
821 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
822 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
823
824 /*
825 * Register it.
826 */
827 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
828 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
829
830 return VINF_SUCCESS;
831}
832
833
834# else /* !RT_OS_WINDOWS */
835
836/**
837 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
838 */
839DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
840{
841 if (iValue >= 64)
842 {
843 Assert(iValue < 0x2000);
844 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
845 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
846 }
847 else if (iValue >= 0)
848 *Ptr.pb++ = (uint8_t)iValue;
849 else if (iValue > -64)
850 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
851 else
852 {
853 Assert(iValue > -0x2000);
854 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
855 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
856 }
857 return Ptr;
858}
859
860
861/**
862 * Emits an ULEB128 encoded value (up to 64-bit wide).
863 */
864DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
865{
866 while (uValue >= 0x80)
867 {
868 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
869 uValue >>= 7;
870 }
871 *Ptr.pb++ = (uint8_t)uValue;
872 return Ptr;
873}
874
875
876/**
877 * Emits a CFA rule as register @a uReg + offset @a off.
878 */
879DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
880{
881 *Ptr.pb++ = DW_CFA_def_cfa;
882 Ptr = iemDwarfPutUleb128(Ptr, uReg);
883 Ptr = iemDwarfPutUleb128(Ptr, off);
884 return Ptr;
885}
886
887
888/**
889 * Emits a register (@a uReg) save location:
890 * CFA + @a off * data_alignment_factor
891 */
892DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
893{
894 if (uReg < 0x40)
895 *Ptr.pb++ = DW_CFA_offset | uReg;
896 else
897 {
898 *Ptr.pb++ = DW_CFA_offset_extended;
899 Ptr = iemDwarfPutUleb128(Ptr, uReg);
900 }
901 Ptr = iemDwarfPutUleb128(Ptr, off);
902 return Ptr;
903}
904
905
906# if 0 /* unused */
907/**
908 * Emits a register (@a uReg) save location, using signed offset:
909 * CFA + @a offSigned * data_alignment_factor
910 */
911DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
912{
913 *Ptr.pb++ = DW_CFA_offset_extended_sf;
914 Ptr = iemDwarfPutUleb128(Ptr, uReg);
915 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
916 return Ptr;
917}
918# endif
919
920
921/**
922 * Initializes the unwind info section for non-windows hosts.
923 */
924static int
925iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
926 void *pvChunk, uint32_t idxChunk)
927{
928 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
929 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
930
931 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
932
933 /*
934 * Generate the CIE first.
935 */
936# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
937 uint8_t const iDwarfVer = 3;
938# else
939 uint8_t const iDwarfVer = 4;
940# endif
941 RTPTRUNION const PtrCie = Ptr;
942 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
943 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
944 *Ptr.pb++ = iDwarfVer; /* DwARF version */
945 *Ptr.pb++ = 0; /* Augmentation. */
946 if (iDwarfVer >= 4)
947 {
948 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
949 *Ptr.pb++ = 0; /* Segment selector size. */
950 }
951# ifdef RT_ARCH_AMD64
952 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
953# else
954 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
955# endif
956 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
957# ifdef RT_ARCH_AMD64
958 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
959# elif defined(RT_ARCH_ARM64)
960 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
961# else
962# error "port me"
963# endif
964 /* Initial instructions: */
965# ifdef RT_ARCH_AMD64
966 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
967 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
968 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
969 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
970 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
971 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
972 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
973 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
974# elif defined(RT_ARCH_ARM64)
975# if 1
976 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
977# else
978 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
979# endif
980 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
981 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
982 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
983 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
984 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
985 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
986 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
987 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
988 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
989 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
990 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
991 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
992 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
993 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
994# else
995# error "port me"
996# endif
997 while ((Ptr.u - PtrCie.u) & 3)
998 *Ptr.pb++ = DW_CFA_nop;
999 /* Finalize the CIE size. */
1000 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
1001
1002 /*
1003 * Generate an FDE for the whole chunk area.
1004 */
1005# ifdef IEMNATIVE_USE_LIBUNWIND
1006 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
1007# endif
1008 RTPTRUNION const PtrFde = Ptr;
1009 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
1010 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
1011 Ptr.pu32++;
1012 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
1013 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
1014# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
1015 *Ptr.pb++ = DW_CFA_nop;
1016# endif
1017 while ((Ptr.u - PtrFde.u) & 3)
1018 *Ptr.pb++ = DW_CFA_nop;
1019 /* Finalize the FDE size. */
1020 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
1021
1022 /* Terminator entry. */
1023 *Ptr.pu32++ = 0;
1024 *Ptr.pu32++ = 0; /* just to be sure... */
1025 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
1026
1027 /*
1028 * Register it.
1029 */
1030# ifdef IEMNATIVE_USE_LIBUNWIND
1031 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
1032# else
1033 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
1034 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
1035# endif
1036
1037# ifdef IEMNATIVE_USE_GDB_JIT
1038 /*
1039 * Now for telling GDB about this (experimental).
1040 *
1041 * This seems to work best with ET_DYN.
1042 */
1043 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk,
1044 sizeof(GDBJITSYMFILE), NULL);
1045 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
1046 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
1047
1048 RT_ZERO(*pSymFile);
1049
1050 /*
1051 * The ELF header:
1052 */
1053 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1054 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1055 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1056 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1057 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1058 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1059 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1060 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1061# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1062 pSymFile->EHdr.e_type = ET_DYN;
1063# else
1064 pSymFile->EHdr.e_type = ET_REL;
1065# endif
1066# ifdef RT_ARCH_AMD64
1067 pSymFile->EHdr.e_machine = EM_AMD64;
1068# elif defined(RT_ARCH_ARM64)
1069 pSymFile->EHdr.e_machine = EM_AARCH64;
1070# else
1071# error "port me"
1072# endif
1073 pSymFile->EHdr.e_version = 1; /*?*/
1074 pSymFile->EHdr.e_entry = 0;
1075# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1076 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1077# else
1078 pSymFile->EHdr.e_phoff = 0;
1079# endif
1080 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1081 pSymFile->EHdr.e_flags = 0;
1082 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1083# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1084 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1085 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1086# else
1087 pSymFile->EHdr.e_phentsize = 0;
1088 pSymFile->EHdr.e_phnum = 0;
1089# endif
1090 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1091 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1092 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1093
1094 uint32_t offStrTab = 0;
1095#define APPEND_STR(a_szStr) do { \
1096 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1097 offStrTab += sizeof(a_szStr); \
1098 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1099 } while (0)
1100#define APPEND_STR_FMT(a_szStr, ...) do { \
1101 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1102 offStrTab++; \
1103 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1104 } while (0)
1105
1106 /*
1107 * Section headers.
1108 */
1109 /* Section header #0: NULL */
1110 unsigned i = 0;
1111 APPEND_STR("");
1112 RT_ZERO(pSymFile->aShdrs[i]);
1113 i++;
1114
1115 /* Section header: .eh_frame */
1116 pSymFile->aShdrs[i].sh_name = offStrTab;
1117 APPEND_STR(".eh_frame");
1118 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1119 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1120# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1121 pSymFile->aShdrs[i].sh_offset
1122 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1123# else
1124 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1125 pSymFile->aShdrs[i].sh_offset = 0;
1126# endif
1127
1128 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1129 pSymFile->aShdrs[i].sh_link = 0;
1130 pSymFile->aShdrs[i].sh_info = 0;
1131 pSymFile->aShdrs[i].sh_addralign = 1;
1132 pSymFile->aShdrs[i].sh_entsize = 0;
1133 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1134 i++;
1135
1136 /* Section header: .shstrtab */
1137 unsigned const iShStrTab = i;
1138 pSymFile->EHdr.e_shstrndx = iShStrTab;
1139 pSymFile->aShdrs[i].sh_name = offStrTab;
1140 APPEND_STR(".shstrtab");
1141 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1142 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1143# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1144 pSymFile->aShdrs[i].sh_offset
1145 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1146# else
1147 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1148 pSymFile->aShdrs[i].sh_offset = 0;
1149# endif
1150 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1151 pSymFile->aShdrs[i].sh_link = 0;
1152 pSymFile->aShdrs[i].sh_info = 0;
1153 pSymFile->aShdrs[i].sh_addralign = 1;
1154 pSymFile->aShdrs[i].sh_entsize = 0;
1155 i++;
1156
1157 /* Section header: .symbols */
1158 pSymFile->aShdrs[i].sh_name = offStrTab;
1159 APPEND_STR(".symtab");
1160 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1161 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1162 pSymFile->aShdrs[i].sh_offset
1163 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1164 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1165 pSymFile->aShdrs[i].sh_link = iShStrTab;
1166 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1167 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1168 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1169 i++;
1170
1171# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1172 /* Section header: .symbols */
1173 pSymFile->aShdrs[i].sh_name = offStrTab;
1174 APPEND_STR(".dynsym");
1175 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1176 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1177 pSymFile->aShdrs[i].sh_offset
1178 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1179 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1180 pSymFile->aShdrs[i].sh_link = iShStrTab;
1181 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1182 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1183 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1184 i++;
1185# endif
1186
1187# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1188 /* Section header: .dynamic */
1189 pSymFile->aShdrs[i].sh_name = offStrTab;
1190 APPEND_STR(".dynamic");
1191 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1192 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1193 pSymFile->aShdrs[i].sh_offset
1194 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1195 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1196 pSymFile->aShdrs[i].sh_link = iShStrTab;
1197 pSymFile->aShdrs[i].sh_info = 0;
1198 pSymFile->aShdrs[i].sh_addralign = 1;
1199 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1200 i++;
1201# endif
1202
1203 /* Section header: .text */
1204 unsigned const iShText = i;
1205 pSymFile->aShdrs[i].sh_name = offStrTab;
1206 APPEND_STR(".text");
1207 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1208 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1209# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1210 pSymFile->aShdrs[i].sh_offset
1211 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1212# else
1213 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1214 pSymFile->aShdrs[i].sh_offset = 0;
1215# endif
1216 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1217 pSymFile->aShdrs[i].sh_link = 0;
1218 pSymFile->aShdrs[i].sh_info = 0;
1219 pSymFile->aShdrs[i].sh_addralign = 1;
1220 pSymFile->aShdrs[i].sh_entsize = 0;
1221 i++;
1222
1223 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1224
1225# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1226 /*
1227 * The program headers:
1228 */
1229 /* Everything in a single LOAD segment: */
1230 i = 0;
1231 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1232 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1233 pSymFile->aPhdrs[i].p_offset
1234 = pSymFile->aPhdrs[i].p_vaddr
1235 = pSymFile->aPhdrs[i].p_paddr = 0;
1236 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1237 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1238 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1239 i++;
1240 /* The .dynamic segment. */
1241 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1242 pSymFile->aPhdrs[i].p_flags = PF_R;
1243 pSymFile->aPhdrs[i].p_offset
1244 = pSymFile->aPhdrs[i].p_vaddr
1245 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1246 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1247 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1248 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1249 i++;
1250
1251 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1252
1253 /*
1254 * The dynamic section:
1255 */
1256 i = 0;
1257 pSymFile->aDyn[i].d_tag = DT_SONAME;
1258 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1259 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1260 i++;
1261 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1262 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1263 i++;
1264 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1265 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1266 i++;
1267 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1268 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1269 i++;
1270 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1271 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1272 i++;
1273 pSymFile->aDyn[i].d_tag = DT_NULL;
1274 i++;
1275 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1276# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1277
1278 /*
1279 * Symbol tables:
1280 */
1281 /** @todo gdb doesn't seem to really like this ... */
1282 i = 0;
1283 pSymFile->aSymbols[i].st_name = 0;
1284 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1285 pSymFile->aSymbols[i].st_value = 0;
1286 pSymFile->aSymbols[i].st_size = 0;
1287 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1288 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1289# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1290 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1291# endif
1292 i++;
1293
1294 pSymFile->aSymbols[i].st_name = 0;
1295 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1296 pSymFile->aSymbols[i].st_value = 0;
1297 pSymFile->aSymbols[i].st_size = 0;
1298 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1299 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1300 i++;
1301
1302 pSymFile->aSymbols[i].st_name = offStrTab;
1303 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1304# if 0
1305 pSymFile->aSymbols[i].st_shndx = iShText;
1306 pSymFile->aSymbols[i].st_value = 0;
1307# else
1308 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1309 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1310# endif
1311 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1312 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1313 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1314# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1315 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1316 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1317# endif
1318 i++;
1319
1320 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1321 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1322
1323 /*
1324 * The GDB JIT entry and informing GDB.
1325 */
1326 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1327# if 1
1328 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1329# else
1330 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1331# endif
1332
1333 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1334 RTCritSectEnter(&g_IemNativeGdbJitLock);
1335 pEhFrame->GdbJitEntry.pNext = NULL;
1336 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1337 if (__jit_debug_descriptor.pTail)
1338 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1339 else
1340 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1341 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1342 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1343
1344 /* Notify GDB: */
1345 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1346 __jit_debug_register_code();
1347 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1348 RTCritSectLeave(&g_IemNativeGdbJitLock);
1349
1350# else /* !IEMNATIVE_USE_GDB_JIT */
1351 RT_NOREF(pVCpu);
1352# endif /* !IEMNATIVE_USE_GDB_JIT */
1353
1354 return VINF_SUCCESS;
1355}
1356
1357# endif /* !RT_OS_WINDOWS */
1358#endif /* IN_RING3 */
1359
1360
1361/**
1362 * Adds another chunk to the executable memory allocator.
1363 *
1364 * This is used by the init code for the initial allocation and later by the
1365 * regular allocator function when it's out of memory.
1366 */
1367static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1368{
1369 /* Check that we've room for growth. */
1370 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1371 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1372
1373 /* Allocate a chunk. */
1374#ifdef RT_OS_DARWIN
1375 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1376#else
1377 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1378#endif
1379 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1380
1381 /*
1382 * Add the chunk.
1383 *
1384 * This must be done before the unwind init so windows can allocate
1385 * memory from the chunk when using the alternative sub-allocator.
1386 */
1387 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1388#ifdef IN_RING3
1389 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1390#endif
1391 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1392 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1393 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1394 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1395
1396 pExecMemAllocator->cChunks = idxChunk + 1;
1397 pExecMemAllocator->idxChunkHint = idxChunk;
1398
1399 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1400 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1401
1402#ifdef IN_RING3
1403 /*
1404 * Initialize the unwind information (this cannot really fail atm).
1405 * (This sets pvUnwindInfo.)
1406 */
1407 int rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1408 if (RT_SUCCESS(rc))
1409 { /* likely */ }
1410 else
1411 {
1412 /* Just in case the impossible happens, undo the above up: */
1413 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1414 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1415 pExecMemAllocator->cChunks = idxChunk;
1416 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1417 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1418 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1419 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1420
1421 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1422 return rc;
1423 }
1424#endif
1425 return VINF_SUCCESS;
1426}
1427
1428
1429/**
1430 * Initializes the executable memory allocator for native recompilation on the
1431 * calling EMT.
1432 *
1433 * @returns VBox status code.
1434 * @param pVCpu The cross context virtual CPU structure of the calling
1435 * thread.
1436 * @param cbMax The max size of the allocator.
1437 * @param cbInitial The initial allocator size.
1438 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1439 * dependent).
1440 */
1441int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk) RT_NOEXCEPT
1442{
1443 /*
1444 * Validate input.
1445 */
1446 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1447 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1448 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1449 || cbChunk == 0
1450 || ( RT_IS_POWER_OF_TWO(cbChunk)
1451 && cbChunk >= _1M
1452 && cbChunk <= _256M
1453 && cbChunk <= cbMax),
1454 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1455 VERR_OUT_OF_RANGE);
1456
1457 /*
1458 * Adjust/figure out the chunk size.
1459 */
1460 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1461 {
1462 if (cbMax >= _256M)
1463 cbChunk = _64M;
1464 else
1465 {
1466 if (cbMax < _16M)
1467 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1468 else
1469 cbChunk = (uint32_t)cbMax / 4;
1470 if (!RT_IS_POWER_OF_TWO(cbChunk))
1471 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1472 }
1473 }
1474
1475 if (cbChunk > cbMax)
1476 cbMax = cbChunk;
1477 else
1478 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1479 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1480 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1481
1482 /*
1483 * Allocate and initialize the allocatore instance.
1484 */
1485 size_t const offBitmaps = RT_ALIGN_Z(RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]), RT_CACHELINE_SIZE);
1486 size_t const cbBitmaps = (size_t)(cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3)) * cMaxChunks;
1487 size_t cbNeeded = offBitmaps + cbBitmaps;
1488 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1489 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1490#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1491 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1492 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1493#endif
1494 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1495 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1496 VERR_NO_MEMORY);
1497 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1498 pExecMemAllocator->cbChunk = cbChunk;
1499 pExecMemAllocator->cMaxChunks = cMaxChunks;
1500 pExecMemAllocator->cChunks = 0;
1501 pExecMemAllocator->idxChunkHint = 0;
1502 pExecMemAllocator->cAllocations = 0;
1503 pExecMemAllocator->cbTotal = 0;
1504 pExecMemAllocator->cbFree = 0;
1505 pExecMemAllocator->cbAllocated = 0;
1506 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1507 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1508 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1509 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmaps); /* Mark everything as allocated. Clear when chunks are added. */
1510#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1511 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1512#endif
1513 for (uint32_t i = 0; i < cMaxChunks; i++)
1514 {
1515 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1516 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1517 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1518#ifdef IN_RING0
1519 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1520#else
1521 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1522#endif
1523 }
1524 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1525
1526 /*
1527 * Do the initial allocations.
1528 */
1529 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1530 {
1531 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1532 AssertLogRelRCReturn(rc, rc);
1533 }
1534
1535 pExecMemAllocator->idxChunkHint = 0;
1536
1537 /*
1538 * Register statistics.
1539 */
1540 PUVM const pUVM = pVCpu->pUVCpu->pUVM;
1541 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cAllocations, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1542 "Current number of allocations", "/IEM/CPU%u/re/ExecMem/cAllocations", pVCpu->idCpu);
1543 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cChunks, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
1544 "Currently allocated chunks", "/IEM/CPU%u/re/ExecMem/cChunks", pVCpu->idCpu);
1545 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cMaxChunks, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
1546 "Maximum number of chunks", "/IEM/CPU%u/re/ExecMem/cMaxChunks", pVCpu->idCpu);
1547 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbChunk, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1548 "Allocation chunk size", "/IEM/CPU%u/re/ExecMem/cbChunk", pVCpu->idCpu);
1549 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbAllocated, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1550 "Number of bytes current allocated", "/IEM/CPU%u/re/ExecMem/cbAllocated", pVCpu->idCpu);
1551 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbFree, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1552 "Number of bytes current free", "/IEM/CPU%u/re/ExecMem/cbFree", pVCpu->idCpu);
1553 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbTotal, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1554 "Total number of byte", "/IEM/CPU%u/re/ExecMem/cbTotal", pVCpu->idCpu);
1555#ifdef VBOX_WITH_STATISTICS
1556 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatAlloc, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL,
1557 "Profiling the allocator", "/IEM/CPU%u/re/ExecMem/ProfAlloc", pVCpu->idCpu);
1558#endif
1559#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
1560 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatPruneProf, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL,
1561 "Pruning executable memory (alt)", "/IEM/CPU%u/re/ExecMem/Pruning", pVCpu->idCpu);
1562 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatPruneRecovered, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES_PER_CALL,
1563 "Bytes recovered while pruning", "/IEM/CPU%u/re/ExecMem/PruningRecovered", pVCpu->idCpu);
1564#endif
1565
1566 return VINF_SUCCESS;
1567}
1568
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette