VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veExecMem.cpp@ 104123

Last change on this file since 104123 was 104123, checked in by vboxsync, 9 months ago

VMM/IEM: Avoid the two RTMemProtect calls in iemExecMemAllocatorFree on darwin. bugref:10370

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 61.9 KB
Line 
1/* $Id: IEMAllN8veExecMem.cpp 104123 2024-03-30 01:12:57Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
50#include <VBox/vmm/iem.h>
51#include <VBox/vmm/cpum.h>
52#include "IEMInternal.h"
53#include <VBox/vmm/vmcc.h>
54#include <VBox/log.h>
55#include <VBox/err.h>
56#include <VBox/param.h>
57#include <iprt/assert.h>
58#include <iprt/mem.h>
59#include <iprt/string.h>
60#if defined(RT_ARCH_AMD64)
61# include <iprt/x86.h>
62#elif defined(RT_ARCH_ARM64)
63# include <iprt/armv8.h>
64#endif
65
66#ifdef RT_OS_WINDOWS
67# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
68extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
69extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
70#else
71# include <iprt/formats/dwarf.h>
72# if defined(RT_OS_DARWIN)
73# include <libkern/OSCacheControl.h>
74# define IEMNATIVE_USE_LIBUNWIND
75extern "C" void __register_frame(const void *pvFde);
76extern "C" void __deregister_frame(const void *pvFde);
77# else
78# ifdef DEBUG_bird /** @todo not thread safe yet */
79# define IEMNATIVE_USE_GDB_JIT
80# endif
81# ifdef IEMNATIVE_USE_GDB_JIT
82# include <iprt/critsect.h>
83# include <iprt/once.h>
84# include <iprt/formats/elf64.h>
85# endif
86extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
87extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
88# endif
89#endif
90
91#include "IEMN8veRecompiler.h"
92
93
94/*********************************************************************************************************************************
95* Executable Memory Allocator *
96*********************************************************************************************************************************/
97/** The chunk sub-allocation unit size in bytes. */
98#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
99/** The chunk sub-allocation unit size as a shift factor. */
100#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
101/** Enables adding a header to the sub-allocator allocations.
102 * This is useful for freeing up executable memory among other things. */
103#define IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
104/** Use alternative pruning. */
105#define IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
106
107
108#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
109# ifdef IEMNATIVE_USE_GDB_JIT
110# define IEMNATIVE_USE_GDB_JIT_ET_DYN
111
112/** GDB JIT: Code entry. */
113typedef struct GDBJITCODEENTRY
114{
115 struct GDBJITCODEENTRY *pNext;
116 struct GDBJITCODEENTRY *pPrev;
117 uint8_t *pbSymFile;
118 uint64_t cbSymFile;
119} GDBJITCODEENTRY;
120
121/** GDB JIT: Actions. */
122typedef enum GDBJITACTIONS : uint32_t
123{
124 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
125} GDBJITACTIONS;
126
127/** GDB JIT: Descriptor. */
128typedef struct GDBJITDESCRIPTOR
129{
130 uint32_t uVersion;
131 GDBJITACTIONS enmAction;
132 GDBJITCODEENTRY *pRelevant;
133 GDBJITCODEENTRY *pHead;
134 /** Our addition: */
135 GDBJITCODEENTRY *pTail;
136} GDBJITDESCRIPTOR;
137
138/** GDB JIT: Our simple symbol file data. */
139typedef struct GDBJITSYMFILE
140{
141 Elf64_Ehdr EHdr;
142# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
143 Elf64_Shdr aShdrs[5];
144# else
145 Elf64_Shdr aShdrs[7];
146 Elf64_Phdr aPhdrs[2];
147# endif
148 /** The dwarf ehframe data for the chunk. */
149 uint8_t abEhFrame[512];
150 char szzStrTab[128];
151 Elf64_Sym aSymbols[3];
152# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
153 Elf64_Sym aDynSyms[2];
154 Elf64_Dyn aDyn[6];
155# endif
156} GDBJITSYMFILE;
157
158extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
159extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
160
161/** Init once for g_IemNativeGdbJitLock. */
162static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
163/** Init once for the critical section. */
164static RTCRITSECT g_IemNativeGdbJitLock;
165
166/** GDB reads the info here. */
167GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
168
169/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
170DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
171{
172 ASMNopPause();
173}
174
175/** @callback_method_impl{FNRTONCE} */
176static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
177{
178 RT_NOREF(pvUser);
179 return RTCritSectInit(&g_IemNativeGdbJitLock);
180}
181
182
183# endif /* IEMNATIVE_USE_GDB_JIT */
184
185/**
186 * Per-chunk unwind info for non-windows hosts.
187 */
188typedef struct IEMEXECMEMCHUNKEHFRAME
189{
190# ifdef IEMNATIVE_USE_LIBUNWIND
191 /** The offset of the FDA into abEhFrame. */
192 uintptr_t offFda;
193# else
194 /** 'struct object' storage area. */
195 uint8_t abObject[1024];
196# endif
197# ifdef IEMNATIVE_USE_GDB_JIT
198# if 0
199 /** The GDB JIT 'symbol file' data. */
200 GDBJITSYMFILE GdbJitSymFile;
201# endif
202 /** The GDB JIT list entry. */
203 GDBJITCODEENTRY GdbJitEntry;
204# endif
205 /** The dwarf ehframe data for the chunk. */
206 uint8_t abEhFrame[512];
207} IEMEXECMEMCHUNKEHFRAME;
208/** Pointer to per-chunk info info for non-windows hosts. */
209typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
210#endif
211
212
213/**
214 * An chunk of executable memory.
215 */
216typedef struct IEMEXECMEMCHUNK
217{
218 /** Number of free items in this chunk. */
219 uint32_t cFreeUnits;
220 /** Hint were to start searching for free space in the allocation bitmap. */
221 uint32_t idxFreeHint;
222 /** Pointer to the chunk. */
223 void *pvChunk;
224#ifdef IN_RING3
225 /**
226 * Pointer to the unwind information.
227 *
228 * This is used during C++ throw and longjmp (windows and probably most other
229 * platforms). Some debuggers (windbg) makes use of it as well.
230 *
231 * Windows: This is allocated from hHeap on windows because (at least for
232 * AMD64) the UNWIND_INFO structure address in the
233 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
234 *
235 * Others: Allocated from the regular heap to avoid unnecessary executable data
236 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
237 void *pvUnwindInfo;
238#elif defined(IN_RING0)
239 /** Allocation handle. */
240 RTR0MEMOBJ hMemObj;
241#endif
242} IEMEXECMEMCHUNK;
243/** Pointer to a memory chunk. */
244typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
245
246
247/**
248 * Executable memory allocator for the native recompiler.
249 */
250typedef struct IEMEXECMEMALLOCATOR
251{
252 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
253 uint32_t uMagic;
254
255 /** The chunk size. */
256 uint32_t cbChunk;
257 /** The maximum number of chunks. */
258 uint32_t cMaxChunks;
259 /** The current number of chunks. */
260 uint32_t cChunks;
261 /** Hint where to start looking for available memory. */
262 uint32_t idxChunkHint;
263 /** Statistics: Current number of allocations. */
264 uint32_t cAllocations;
265
266 /** The total amount of memory available. */
267 uint64_t cbTotal;
268 /** Total amount of free memory. */
269 uint64_t cbFree;
270 /** Total amount of memory allocated. */
271 uint64_t cbAllocated;
272
273 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
274 *
275 * Since the chunk size is a power of two and the minimum chunk size is a lot
276 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
277 * require a whole number of uint64_t elements in the allocation bitmap. So,
278 * for sake of simplicity, they are allocated as one continous chunk for
279 * simplicity/laziness. */
280 uint64_t *pbmAlloc;
281 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
282 uint32_t cUnitsPerChunk;
283 /** Number of bitmap elements per chunk (for quickly locating the bitmap
284 * portion corresponding to an chunk). */
285 uint32_t cBitmapElementsPerChunk;
286
287#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
288 /** The next chunk to prune in. */
289 uint32_t idxChunkPrune;
290 /** Where in chunk offset to start pruning at. */
291 uint32_t offChunkPrune;
292 /** Profiling the pruning code. */
293 STAMPROFILE StatPruneProf;
294 /** Number of bytes recovered by the pruning. */
295 STAMPROFILE StatPruneRecovered;
296#endif
297
298#ifdef VBOX_WITH_STATISTICS
299 STAMPROFILE StatAlloc;
300#endif
301
302
303#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
304 /** Pointer to the array of unwind info running parallel to aChunks (same
305 * allocation as this structure, located after the bitmaps).
306 * (For Windows, the structures must reside in 32-bit RVA distance to the
307 * actual chunk, so they are allocated off the chunk.) */
308 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
309#endif
310
311 /** The allocation chunks. */
312 RT_FLEXIBLE_ARRAY_EXTENSION
313 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
314} IEMEXECMEMALLOCATOR;
315/** Pointer to an executable memory allocator. */
316typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
317
318/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
319#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
320
321
322#ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
323/**
324 * Allocation header.
325 */
326typedef struct IEMEXECMEMALLOCHDR
327{
328 /** Magic value / eyecatcher (IEMEXECMEMALLOCHDR_MAGIC). */
329 uint32_t uMagic;
330 /** The allocation chunk (for speeding up freeing). */
331 uint32_t idxChunk;
332 /** Pointer to the translation block the allocation belongs to.
333 * This is the whole point of the header. */
334 PIEMTB pTb;
335} IEMEXECMEMALLOCHDR;
336/** Pointer to an allocation header. */
337typedef IEMEXECMEMALLOCHDR *PIEMEXECMEMALLOCHDR;
338/** Magic value for IEMEXECMEMALLOCHDR ('ExeM'). */
339# define IEMEXECMEMALLOCHDR_MAGIC UINT32_C(0x4d657845)
340#endif
341
342
343static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
344
345#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
346/**
347 * Frees up executable memory when we're out space.
348 *
349 * This is an alternative to iemTbAllocatorFreeupNativeSpace() that frees up
350 * space in a more linear fashion from the allocator's point of view. It may
351 * also defragment if implemented & enabled
352 */
353static void iemExecMemAllocatorPrune(PVMCPU pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
354{
355# ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
356# error "IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING requires IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER"
357# endif
358 STAM_REL_PROFILE_START(&pExecMemAllocator->StatPruneProf, a);
359
360 /*
361 * Before we can start, we must process delayed frees.
362 */
363 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
364
365 AssertCompile(RT_IS_POWER_OF_TWO(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE));
366
367 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
368 AssertReturnVoid(RT_IS_POWER_OF_TWO(cbChunk));
369 AssertReturnVoid(cbChunk >= _1M && cbChunk <= _256M); /* see iemExecMemAllocatorInit */
370
371 uint32_t const cChunks = pExecMemAllocator->cChunks;
372 AssertReturnVoid(cChunks == pExecMemAllocator->cMaxChunks);
373 AssertReturnVoid(cChunks >= 1);
374
375 Assert(!pVCpu->iem.s.pCurTbR3);
376
377 /*
378 * Decide how much to prune. The chunk is is a multiple of two, so we'll be
379 * scanning a multiple of two here as well.
380 */
381 uint32_t cbToPrune = cbChunk;
382
383 /* Never more than 25%. */
384 if (cChunks < 4)
385 cbToPrune /= cChunks == 1 ? 4 : 2;
386
387 /* Upper limit. In a debug build a 4MB limit averages out at ~0.6ms per call. */
388 if (cbToPrune > _4M)
389 cbToPrune = _4M;
390
391 /*
392 * Adjust the pruning chunk and offset accordingly.
393 */
394 uint32_t idxChunk = pExecMemAllocator->idxChunkPrune;
395 uint32_t offChunk = pExecMemAllocator->offChunkPrune;
396 offChunk &= ~(uint32_t)(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1U);
397 if (offChunk >= cbChunk)
398 {
399 offChunk = 0;
400 idxChunk += 1;
401 }
402 if (idxChunk >= cChunks)
403 {
404 offChunk = 0;
405 idxChunk = 0;
406 }
407
408 uint32_t const offPruneEnd = RT_MIN(offChunk + cbToPrune, cbChunk);
409
410 /*
411 * Do the pruning. The current approach is the sever kind.
412 */
413 uint64_t cbPruned = 0;
414 uint8_t * const pbChunk = (uint8_t *)pExecMemAllocator->aChunks[idxChunk].pvChunk;
415 while (offChunk < offPruneEnd)
416 {
417 PIEMEXECMEMALLOCHDR pHdr = (PIEMEXECMEMALLOCHDR)&pbChunk[offChunk];
418
419 /* Is this the start of an allocation block for TB? (We typically have
420 one allocation at the start of each chunk for the unwind info where
421 pTb is NULL.) */
422 if ( pHdr->uMagic == IEMEXECMEMALLOCHDR_MAGIC
423 && pHdr->pTb != NULL
424 && pHdr->idxChunk == idxChunk)
425 {
426 PIEMTB const pTb = pHdr->pTb;
427 AssertPtr(pTb);
428
429 /* We now have to check that this isn't a old freed header, given
430 that we don't invalidate the header upon free because of darwin
431 restrictions on executable memory (iemExecMemAllocatorFree).
432 This relies upon iemTbAllocatorFreeInner resetting TB members. */
433 if ( pTb->Native.paInstructions == (PIEMNATIVEINSTR)(pHdr + 1)
434 && (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
435 {
436 uint32_t const cbBlock = RT_ALIGN_32(pTb->Native.cInstructions * sizeof(IEMNATIVEINSTR) + sizeof(*pHdr),
437 IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
438 AssertBreakStmt(offChunk + cbBlock <= cbChunk, offChunk += IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE); /* paranoia */
439
440 iemTbAllocatorFree(pVCpu, pTb);
441
442 cbPruned += cbBlock;
443 offChunk += cbBlock;
444 }
445 else
446 offChunk += IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE;
447 }
448 else
449 offChunk += IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE;
450 }
451 STAM_REL_PROFILE_ADD_PERIOD(&pExecMemAllocator->StatPruneRecovered, cbPruned);
452
453 /*
454 * Save the current pruning point.
455 */
456 pExecMemAllocator->offChunkPrune = offChunk;
457 pExecMemAllocator->idxChunkPrune = idxChunk;
458
459 STAM_REL_PROFILE_STOP(&pExecMemAllocator->StatPruneProf, a);
460}
461#endif /* IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING */
462
463
464/**
465 * Try allocate a block of @a cReqUnits in the chunk @a idxChunk.
466 */
467static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
468 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk, PIEMTB pTb)
469{
470 /*
471 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
472 */
473 Assert(!(cToScan & 63));
474 Assert(!(idxFirst & 63));
475 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
476 pbmAlloc += idxFirst / 64;
477
478 /*
479 * Scan the bitmap for cReqUnits of consequtive clear bits
480 */
481 /** @todo This can probably be done more efficiently for non-x86 systems. */
482 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
483 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
484 {
485 uint32_t idxAddBit = 1;
486 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
487 idxAddBit++;
488 if (idxAddBit >= cReqUnits)
489 {
490 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
491
492 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
493 pChunk->cFreeUnits -= cReqUnits;
494 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
495
496 pExecMemAllocator->cAllocations += 1;
497 uint32_t const cbReq = cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
498 pExecMemAllocator->cbAllocated += cbReq;
499 pExecMemAllocator->cbFree -= cbReq;
500 pExecMemAllocator->idxChunkHint = idxChunk;
501
502 void * const pvMem = (uint8_t *)pChunk->pvChunk
503 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
504#ifdef RT_OS_DARWIN
505 /*
506 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
507 * on darwin. So, we mark the pages returned as read+write after alloc and
508 * expect the caller to call iemExecMemAllocatorReadyForUse when done
509 * writing to the allocation.
510 *
511 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
512 * for details.
513 */
514 /** @todo detect if this is necessary... it wasn't required on 10.15 or
515 * whatever older version it was. */
516 int rc = RTMemProtect(pvMem, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
517 AssertRC(rc);
518#endif
519
520 /*
521 * Initialize the header and return.
522 */
523# ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
524 PIEMEXECMEMALLOCHDR const pHdr = (PIEMEXECMEMALLOCHDR)pvMem;
525 pHdr->uMagic = IEMEXECMEMALLOCHDR_MAGIC;
526 pHdr->idxChunk = idxChunk;
527 pHdr->pTb = pTb;
528 return pHdr + 1;
529#else
530 RT_NOREF(pTb);
531 return pvMem;
532#endif
533 }
534
535 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
536 }
537 return NULL;
538}
539
540
541static void *
542iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq, PIEMTB pTb)
543{
544 /*
545 * Figure out how much to allocate.
546 */
547#ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
548 uint32_t const cReqUnits = (cbReq + sizeof(IEMEXECMEMALLOCHDR) + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)
549#else
550 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)
551#endif
552 >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
553 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
554 {
555 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
556 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
557 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
558 {
559 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
560 pExecMemAllocator->cUnitsPerChunk - idxHint,
561 cReqUnits, idxChunk, pTb);
562 if (pvRet)
563 return pvRet;
564 }
565 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
566 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
567 cReqUnits, idxChunk, pTb);
568 }
569 return NULL;
570}
571
572
573/**
574 * Allocates @a cbReq bytes of executable memory.
575 *
576 * @returns Pointer to the memory, NULL if out of memory or other problem
577 * encountered.
578 * @param pVCpu The cross context virtual CPU structure of the calling
579 * thread.
580 * @param cbReq How many bytes are required.
581 * @param pTb The translation block that will be using the allocation.
582 */
583DECLHIDDEN(void *) iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq, PIEMTB pTb) RT_NOEXCEPT
584{
585 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
586 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
587 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
588 STAM_PROFILE_START(&pExecMemAllocator->StatAlloc, a);
589
590 for (unsigned iIteration = 0;; iIteration++)
591 {
592 if (cbReq <= pExecMemAllocator->cbFree)
593 {
594 uint32_t const cChunks = pExecMemAllocator->cChunks;
595 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
596 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
597 {
598 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb);
599 if (pvRet)
600 {
601 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
602 return pvRet;
603 }
604 }
605 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
606 {
607 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb);
608 if (pvRet)
609 {
610 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
611 return pvRet;
612 }
613 }
614 }
615
616 /*
617 * Can we grow it with another chunk?
618 */
619 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
620 {
621 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
622 AssertLogRelRCReturn(rc, NULL);
623
624 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
625 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb);
626 if (pvRet)
627 {
628 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
629 return pvRet;
630 }
631 AssertFailed();
632 }
633
634 /*
635 * Try prune native TBs once.
636 */
637 if (iIteration == 0)
638 {
639#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
640 iemExecMemAllocatorPrune(pVCpu, pExecMemAllocator);
641#else
642 /* No header included in the instruction count here. */
643 uint32_t const cNeededInstrs = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) / sizeof(IEMNATIVEINSTR);
644 iemTbAllocatorFreeupNativeSpace(pVCpu, cNeededInstrs);
645#endif
646 }
647 else
648 {
649 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeExecMemInstrBufAllocFailed);
650 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
651 return NULL;
652 }
653 }
654}
655
656
657/** This is a hook that we may need later for changing memory protection back
658 * to readonly+exec */
659DECLHIDDEN(void) iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb) RT_NOEXCEPT
660{
661#ifdef RT_OS_DARWIN
662 /* See iemExecMemAllocatorAllocInChunkInt for the explanation. */
663 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
664 AssertRC(rc); RT_NOREF(pVCpu);
665
666 /*
667 * Flush the instruction cache:
668 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
669 */
670 /* sys_dcache_flush(pv, cb); - not necessary */
671 sys_icache_invalidate(pv, cb);
672#else
673 RT_NOREF(pVCpu, pv, cb);
674#endif
675}
676
677
678/**
679 * Frees executable memory.
680 */
681DECLHIDDEN(void) iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb) RT_NOEXCEPT
682{
683 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
684 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
685 AssertPtr(pv);
686#ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
687 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
688
689 /* Align the size as we did when allocating the block. */
690 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
691
692#else
693 PIEMEXECMEMALLOCHDR pHdr = (PIEMEXECMEMALLOCHDR)pv - 1;
694 Assert(!((uintptr_t)pHdr & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
695 AssertReturnVoid(pHdr->uMagic == IEMEXECMEMALLOCHDR_MAGIC);
696 uint32_t const idxChunk = pHdr->idxChunk;
697 AssertReturnVoid(idxChunk < pExecMemAllocator->cChunks);
698 pv = pHdr;
699
700 /* Adjust and align the size to cover the whole allocation area. */
701 cb = RT_ALIGN_Z(cb + sizeof(*pHdr), IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
702#endif
703
704 /* Free it / assert sanity. */
705 bool fFound = false;
706 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
707#ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
708 uint32_t const cChunks = pExecMemAllocator->cChunks;
709 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
710#endif
711 {
712 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
713 fFound = offChunk < cbChunk;
714 if (fFound)
715 {
716 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
717 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
718
719 /* Check that it's valid and free it. */
720 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
721 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
722 for (uint32_t i = 1; i < cReqUnits; i++)
723 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
724 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
725
726#if 0 /*def IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER - not necessary, we'll validate the header in the pruning code. */
727# ifdef RT_OS_DARWIN
728 int rc = RTMemProtect(pHdr, sizeof(*pHdr), RTMEM_PROT_WRITE | RTMEM_PROT_READ);
729 AssertRC(rc); RT_NOREF(pVCpu);
730# endif
731 pHdr->uMagic = 0;
732 pHdr->idxChunk = 0;
733 pHdr->pTb = NULL;
734# ifdef RT_OS_DARWIN
735 rc = RTMemProtect(pHdr, sizeof(*pHdr), RTMEM_PROT_EXEC | RTMEM_PROT_READ);
736 AssertRC(rc); RT_NOREF(pVCpu);
737# endif
738#endif
739 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
740 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
741
742 /* Update the stats. */
743 pExecMemAllocator->cbAllocated -= cb;
744 pExecMemAllocator->cbFree += cb;
745 pExecMemAllocator->cAllocations -= 1;
746 return;
747 }
748 }
749 AssertFailed();
750}
751
752
753
754#ifdef IN_RING3
755# ifdef RT_OS_WINDOWS
756
757/**
758 * Initializes the unwind info structures for windows hosts.
759 */
760static int
761iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
762 void *pvChunk, uint32_t idxChunk)
763{
764 RT_NOREF(pVCpu);
765
766 /*
767 * The AMD64 unwind opcodes.
768 *
769 * This is a program that starts with RSP after a RET instruction that
770 * ends up in recompiled code, and the operations we describe here will
771 * restore all non-volatile registers and bring RSP back to where our
772 * RET address is. This means it's reverse order from what happens in
773 * the prologue.
774 *
775 * Note! Using a frame register approach here both because we have one
776 * and but mainly because the UWOP_ALLOC_LARGE argument values
777 * would be a pain to write initializers for. On the positive
778 * side, we're impervious to changes in the the stack variable
779 * area can can deal with dynamic stack allocations if necessary.
780 */
781 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
782 {
783 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
784 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
785 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
786 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
787 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
788 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
789 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
790 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
791 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
792 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
793 };
794 union
795 {
796 IMAGE_UNWIND_INFO Info;
797 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
798 } s_UnwindInfo =
799 {
800 {
801 /* .Version = */ 1,
802 /* .Flags = */ 0,
803 /* .SizeOfProlog = */ 16, /* whatever */
804 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
805 /* .FrameRegister = */ X86_GREG_xBP,
806 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
807 }
808 };
809 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
810 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
811
812 /*
813 * Calc how much space we need and allocate it off the exec heap.
814 */
815 unsigned const cFunctionEntries = 1;
816 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
817 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
818 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
819 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeeded, NULL);
820 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
821 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
822
823 /*
824 * Initialize the structures.
825 */
826 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
827
828 paFunctions[0].BeginAddress = 0;
829 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
830 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
831
832 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
833 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
834
835 /*
836 * Register it.
837 */
838 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
839 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
840
841 return VINF_SUCCESS;
842}
843
844
845# else /* !RT_OS_WINDOWS */
846
847/**
848 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
849 */
850DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
851{
852 if (iValue >= 64)
853 {
854 Assert(iValue < 0x2000);
855 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
856 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
857 }
858 else if (iValue >= 0)
859 *Ptr.pb++ = (uint8_t)iValue;
860 else if (iValue > -64)
861 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
862 else
863 {
864 Assert(iValue > -0x2000);
865 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
866 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
867 }
868 return Ptr;
869}
870
871
872/**
873 * Emits an ULEB128 encoded value (up to 64-bit wide).
874 */
875DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
876{
877 while (uValue >= 0x80)
878 {
879 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
880 uValue >>= 7;
881 }
882 *Ptr.pb++ = (uint8_t)uValue;
883 return Ptr;
884}
885
886
887/**
888 * Emits a CFA rule as register @a uReg + offset @a off.
889 */
890DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
891{
892 *Ptr.pb++ = DW_CFA_def_cfa;
893 Ptr = iemDwarfPutUleb128(Ptr, uReg);
894 Ptr = iemDwarfPutUleb128(Ptr, off);
895 return Ptr;
896}
897
898
899/**
900 * Emits a register (@a uReg) save location:
901 * CFA + @a off * data_alignment_factor
902 */
903DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
904{
905 if (uReg < 0x40)
906 *Ptr.pb++ = DW_CFA_offset | uReg;
907 else
908 {
909 *Ptr.pb++ = DW_CFA_offset_extended;
910 Ptr = iemDwarfPutUleb128(Ptr, uReg);
911 }
912 Ptr = iemDwarfPutUleb128(Ptr, off);
913 return Ptr;
914}
915
916
917# if 0 /* unused */
918/**
919 * Emits a register (@a uReg) save location, using signed offset:
920 * CFA + @a offSigned * data_alignment_factor
921 */
922DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
923{
924 *Ptr.pb++ = DW_CFA_offset_extended_sf;
925 Ptr = iemDwarfPutUleb128(Ptr, uReg);
926 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
927 return Ptr;
928}
929# endif
930
931
932/**
933 * Initializes the unwind info section for non-windows hosts.
934 */
935static int
936iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
937 void *pvChunk, uint32_t idxChunk)
938{
939 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
940 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
941
942 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
943
944 /*
945 * Generate the CIE first.
946 */
947# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
948 uint8_t const iDwarfVer = 3;
949# else
950 uint8_t const iDwarfVer = 4;
951# endif
952 RTPTRUNION const PtrCie = Ptr;
953 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
954 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
955 *Ptr.pb++ = iDwarfVer; /* DwARF version */
956 *Ptr.pb++ = 0; /* Augmentation. */
957 if (iDwarfVer >= 4)
958 {
959 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
960 *Ptr.pb++ = 0; /* Segment selector size. */
961 }
962# ifdef RT_ARCH_AMD64
963 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
964# else
965 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
966# endif
967 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
968# ifdef RT_ARCH_AMD64
969 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
970# elif defined(RT_ARCH_ARM64)
971 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
972# else
973# error "port me"
974# endif
975 /* Initial instructions: */
976# ifdef RT_ARCH_AMD64
977 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
978 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
979 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
980 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
981 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
982 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
983 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
984 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
985# elif defined(RT_ARCH_ARM64)
986# if 1
987 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
988# else
989 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
990# endif
991 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
992 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
993 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
994 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
995 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
996 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
997 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
998 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
999 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
1000 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
1001 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
1002 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
1003 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
1004 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
1005# else
1006# error "port me"
1007# endif
1008 while ((Ptr.u - PtrCie.u) & 3)
1009 *Ptr.pb++ = DW_CFA_nop;
1010 /* Finalize the CIE size. */
1011 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
1012
1013 /*
1014 * Generate an FDE for the whole chunk area.
1015 */
1016# ifdef IEMNATIVE_USE_LIBUNWIND
1017 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
1018# endif
1019 RTPTRUNION const PtrFde = Ptr;
1020 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
1021 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
1022 Ptr.pu32++;
1023 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
1024 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
1025# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
1026 *Ptr.pb++ = DW_CFA_nop;
1027# endif
1028 while ((Ptr.u - PtrFde.u) & 3)
1029 *Ptr.pb++ = DW_CFA_nop;
1030 /* Finalize the FDE size. */
1031 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
1032
1033 /* Terminator entry. */
1034 *Ptr.pu32++ = 0;
1035 *Ptr.pu32++ = 0; /* just to be sure... */
1036 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
1037
1038 /*
1039 * Register it.
1040 */
1041# ifdef IEMNATIVE_USE_LIBUNWIND
1042 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
1043# else
1044 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
1045 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
1046# endif
1047
1048# ifdef IEMNATIVE_USE_GDB_JIT
1049 /*
1050 * Now for telling GDB about this (experimental).
1051 *
1052 * This seems to work best with ET_DYN.
1053 */
1054 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk,
1055 sizeof(GDBJITSYMFILE), NULL);
1056 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
1057 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
1058
1059 RT_ZERO(*pSymFile);
1060
1061 /*
1062 * The ELF header:
1063 */
1064 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1065 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1066 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1067 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1068 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1069 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1070 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1071 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1072# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1073 pSymFile->EHdr.e_type = ET_DYN;
1074# else
1075 pSymFile->EHdr.e_type = ET_REL;
1076# endif
1077# ifdef RT_ARCH_AMD64
1078 pSymFile->EHdr.e_machine = EM_AMD64;
1079# elif defined(RT_ARCH_ARM64)
1080 pSymFile->EHdr.e_machine = EM_AARCH64;
1081# else
1082# error "port me"
1083# endif
1084 pSymFile->EHdr.e_version = 1; /*?*/
1085 pSymFile->EHdr.e_entry = 0;
1086# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1087 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1088# else
1089 pSymFile->EHdr.e_phoff = 0;
1090# endif
1091 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1092 pSymFile->EHdr.e_flags = 0;
1093 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1094# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1095 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1096 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1097# else
1098 pSymFile->EHdr.e_phentsize = 0;
1099 pSymFile->EHdr.e_phnum = 0;
1100# endif
1101 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1102 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1103 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1104
1105 uint32_t offStrTab = 0;
1106#define APPEND_STR(a_szStr) do { \
1107 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1108 offStrTab += sizeof(a_szStr); \
1109 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1110 } while (0)
1111#define APPEND_STR_FMT(a_szStr, ...) do { \
1112 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1113 offStrTab++; \
1114 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1115 } while (0)
1116
1117 /*
1118 * Section headers.
1119 */
1120 /* Section header #0: NULL */
1121 unsigned i = 0;
1122 APPEND_STR("");
1123 RT_ZERO(pSymFile->aShdrs[i]);
1124 i++;
1125
1126 /* Section header: .eh_frame */
1127 pSymFile->aShdrs[i].sh_name = offStrTab;
1128 APPEND_STR(".eh_frame");
1129 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1130 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1131# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1132 pSymFile->aShdrs[i].sh_offset
1133 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1134# else
1135 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1136 pSymFile->aShdrs[i].sh_offset = 0;
1137# endif
1138
1139 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1140 pSymFile->aShdrs[i].sh_link = 0;
1141 pSymFile->aShdrs[i].sh_info = 0;
1142 pSymFile->aShdrs[i].sh_addralign = 1;
1143 pSymFile->aShdrs[i].sh_entsize = 0;
1144 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1145 i++;
1146
1147 /* Section header: .shstrtab */
1148 unsigned const iShStrTab = i;
1149 pSymFile->EHdr.e_shstrndx = iShStrTab;
1150 pSymFile->aShdrs[i].sh_name = offStrTab;
1151 APPEND_STR(".shstrtab");
1152 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1153 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1154# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1155 pSymFile->aShdrs[i].sh_offset
1156 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1157# else
1158 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1159 pSymFile->aShdrs[i].sh_offset = 0;
1160# endif
1161 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1162 pSymFile->aShdrs[i].sh_link = 0;
1163 pSymFile->aShdrs[i].sh_info = 0;
1164 pSymFile->aShdrs[i].sh_addralign = 1;
1165 pSymFile->aShdrs[i].sh_entsize = 0;
1166 i++;
1167
1168 /* Section header: .symbols */
1169 pSymFile->aShdrs[i].sh_name = offStrTab;
1170 APPEND_STR(".symtab");
1171 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1172 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1173 pSymFile->aShdrs[i].sh_offset
1174 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1175 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1176 pSymFile->aShdrs[i].sh_link = iShStrTab;
1177 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1178 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1179 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1180 i++;
1181
1182# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1183 /* Section header: .symbols */
1184 pSymFile->aShdrs[i].sh_name = offStrTab;
1185 APPEND_STR(".dynsym");
1186 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1187 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1188 pSymFile->aShdrs[i].sh_offset
1189 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1190 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1191 pSymFile->aShdrs[i].sh_link = iShStrTab;
1192 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1193 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1194 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1195 i++;
1196# endif
1197
1198# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1199 /* Section header: .dynamic */
1200 pSymFile->aShdrs[i].sh_name = offStrTab;
1201 APPEND_STR(".dynamic");
1202 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1203 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1204 pSymFile->aShdrs[i].sh_offset
1205 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1206 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1207 pSymFile->aShdrs[i].sh_link = iShStrTab;
1208 pSymFile->aShdrs[i].sh_info = 0;
1209 pSymFile->aShdrs[i].sh_addralign = 1;
1210 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1211 i++;
1212# endif
1213
1214 /* Section header: .text */
1215 unsigned const iShText = i;
1216 pSymFile->aShdrs[i].sh_name = offStrTab;
1217 APPEND_STR(".text");
1218 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1219 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1220# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1221 pSymFile->aShdrs[i].sh_offset
1222 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1223# else
1224 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1225 pSymFile->aShdrs[i].sh_offset = 0;
1226# endif
1227 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1228 pSymFile->aShdrs[i].sh_link = 0;
1229 pSymFile->aShdrs[i].sh_info = 0;
1230 pSymFile->aShdrs[i].sh_addralign = 1;
1231 pSymFile->aShdrs[i].sh_entsize = 0;
1232 i++;
1233
1234 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1235
1236# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1237 /*
1238 * The program headers:
1239 */
1240 /* Everything in a single LOAD segment: */
1241 i = 0;
1242 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1243 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1244 pSymFile->aPhdrs[i].p_offset
1245 = pSymFile->aPhdrs[i].p_vaddr
1246 = pSymFile->aPhdrs[i].p_paddr = 0;
1247 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1248 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1249 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1250 i++;
1251 /* The .dynamic segment. */
1252 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1253 pSymFile->aPhdrs[i].p_flags = PF_R;
1254 pSymFile->aPhdrs[i].p_offset
1255 = pSymFile->aPhdrs[i].p_vaddr
1256 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1257 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1258 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1259 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1260 i++;
1261
1262 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1263
1264 /*
1265 * The dynamic section:
1266 */
1267 i = 0;
1268 pSymFile->aDyn[i].d_tag = DT_SONAME;
1269 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1270 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1271 i++;
1272 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1273 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1274 i++;
1275 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1276 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1277 i++;
1278 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1279 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1280 i++;
1281 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1282 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1283 i++;
1284 pSymFile->aDyn[i].d_tag = DT_NULL;
1285 i++;
1286 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1287# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1288
1289 /*
1290 * Symbol tables:
1291 */
1292 /** @todo gdb doesn't seem to really like this ... */
1293 i = 0;
1294 pSymFile->aSymbols[i].st_name = 0;
1295 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1296 pSymFile->aSymbols[i].st_value = 0;
1297 pSymFile->aSymbols[i].st_size = 0;
1298 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1299 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1300# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1301 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1302# endif
1303 i++;
1304
1305 pSymFile->aSymbols[i].st_name = 0;
1306 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1307 pSymFile->aSymbols[i].st_value = 0;
1308 pSymFile->aSymbols[i].st_size = 0;
1309 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1310 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1311 i++;
1312
1313 pSymFile->aSymbols[i].st_name = offStrTab;
1314 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1315# if 0
1316 pSymFile->aSymbols[i].st_shndx = iShText;
1317 pSymFile->aSymbols[i].st_value = 0;
1318# else
1319 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1320 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1321# endif
1322 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1323 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1324 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1325# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1326 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1327 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1328# endif
1329 i++;
1330
1331 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1332 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1333
1334 /*
1335 * The GDB JIT entry and informing GDB.
1336 */
1337 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1338# if 1
1339 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1340# else
1341 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1342# endif
1343
1344 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1345 RTCritSectEnter(&g_IemNativeGdbJitLock);
1346 pEhFrame->GdbJitEntry.pNext = NULL;
1347 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1348 if (__jit_debug_descriptor.pTail)
1349 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1350 else
1351 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1352 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1353 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1354
1355 /* Notify GDB: */
1356 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1357 __jit_debug_register_code();
1358 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1359 RTCritSectLeave(&g_IemNativeGdbJitLock);
1360
1361# else /* !IEMNATIVE_USE_GDB_JIT */
1362 RT_NOREF(pVCpu);
1363# endif /* !IEMNATIVE_USE_GDB_JIT */
1364
1365 return VINF_SUCCESS;
1366}
1367
1368# endif /* !RT_OS_WINDOWS */
1369#endif /* IN_RING3 */
1370
1371
1372/**
1373 * Adds another chunk to the executable memory allocator.
1374 *
1375 * This is used by the init code for the initial allocation and later by the
1376 * regular allocator function when it's out of memory.
1377 */
1378static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1379{
1380 /* Check that we've room for growth. */
1381 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1382 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1383
1384 /* Allocate a chunk. */
1385#ifdef RT_OS_DARWIN
1386 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1387#else
1388 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1389#endif
1390 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1391
1392 /*
1393 * Add the chunk.
1394 *
1395 * This must be done before the unwind init so windows can allocate
1396 * memory from the chunk when using the alternative sub-allocator.
1397 */
1398 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1399#ifdef IN_RING3
1400 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1401#endif
1402 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1403 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1404 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1405 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1406
1407 pExecMemAllocator->cChunks = idxChunk + 1;
1408 pExecMemAllocator->idxChunkHint = idxChunk;
1409
1410 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1411 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1412
1413#ifdef IN_RING3
1414 /*
1415 * Initialize the unwind information (this cannot really fail atm).
1416 * (This sets pvUnwindInfo.)
1417 */
1418 int rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1419 if (RT_SUCCESS(rc))
1420 { /* likely */ }
1421 else
1422 {
1423 /* Just in case the impossible happens, undo the above up: */
1424 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1425 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1426 pExecMemAllocator->cChunks = idxChunk;
1427 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1428 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1429 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1430 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1431
1432 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1433 return rc;
1434 }
1435#endif
1436 return VINF_SUCCESS;
1437}
1438
1439
1440/**
1441 * Initializes the executable memory allocator for native recompilation on the
1442 * calling EMT.
1443 *
1444 * @returns VBox status code.
1445 * @param pVCpu The cross context virtual CPU structure of the calling
1446 * thread.
1447 * @param cbMax The max size of the allocator.
1448 * @param cbInitial The initial allocator size.
1449 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1450 * dependent).
1451 */
1452int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk) RT_NOEXCEPT
1453{
1454 /*
1455 * Validate input.
1456 */
1457 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1458 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1459 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1460 || cbChunk == 0
1461 || ( RT_IS_POWER_OF_TWO(cbChunk)
1462 && cbChunk >= _1M
1463 && cbChunk <= _256M
1464 && cbChunk <= cbMax),
1465 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1466 VERR_OUT_OF_RANGE);
1467
1468 /*
1469 * Adjust/figure out the chunk size.
1470 */
1471 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1472 {
1473 if (cbMax >= _256M)
1474 cbChunk = _64M;
1475 else
1476 {
1477 if (cbMax < _16M)
1478 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1479 else
1480 cbChunk = (uint32_t)cbMax / 4;
1481 if (!RT_IS_POWER_OF_TWO(cbChunk))
1482 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1483 }
1484 }
1485
1486 if (cbChunk > cbMax)
1487 cbMax = cbChunk;
1488 else
1489 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1490 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1491 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1492
1493 /*
1494 * Allocate and initialize the allocatore instance.
1495 */
1496 size_t const offBitmaps = RT_ALIGN_Z(RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]), RT_CACHELINE_SIZE);
1497 size_t const cbBitmaps = (size_t)(cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3)) * cMaxChunks;
1498 size_t cbNeeded = offBitmaps + cbBitmaps;
1499 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1500 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1501#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1502 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1503 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1504#endif
1505 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1506 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1507 VERR_NO_MEMORY);
1508 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1509 pExecMemAllocator->cbChunk = cbChunk;
1510 pExecMemAllocator->cMaxChunks = cMaxChunks;
1511 pExecMemAllocator->cChunks = 0;
1512 pExecMemAllocator->idxChunkHint = 0;
1513 pExecMemAllocator->cAllocations = 0;
1514 pExecMemAllocator->cbTotal = 0;
1515 pExecMemAllocator->cbFree = 0;
1516 pExecMemAllocator->cbAllocated = 0;
1517 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1518 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1519 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1520 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmaps); /* Mark everything as allocated. Clear when chunks are added. */
1521#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1522 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1523#endif
1524 for (uint32_t i = 0; i < cMaxChunks; i++)
1525 {
1526 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1527 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1528 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1529#ifdef IN_RING0
1530 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1531#else
1532 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1533#endif
1534 }
1535 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1536
1537 /*
1538 * Do the initial allocations.
1539 */
1540 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1541 {
1542 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1543 AssertLogRelRCReturn(rc, rc);
1544 }
1545
1546 pExecMemAllocator->idxChunkHint = 0;
1547
1548 /*
1549 * Register statistics.
1550 */
1551 PUVM const pUVM = pVCpu->pUVCpu->pUVM;
1552 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cAllocations, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1553 "Current number of allocations", "/IEM/CPU%u/re/ExecMem/cAllocations", pVCpu->idCpu);
1554 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cChunks, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
1555 "Currently allocated chunks", "/IEM/CPU%u/re/ExecMem/cChunks", pVCpu->idCpu);
1556 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cMaxChunks, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
1557 "Maximum number of chunks", "/IEM/CPU%u/re/ExecMem/cMaxChunks", pVCpu->idCpu);
1558 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbChunk, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1559 "Allocation chunk size", "/IEM/CPU%u/re/ExecMem/cbChunk", pVCpu->idCpu);
1560 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbAllocated, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1561 "Number of bytes current allocated", "/IEM/CPU%u/re/ExecMem/cbAllocated", pVCpu->idCpu);
1562 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbFree, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1563 "Number of bytes current free", "/IEM/CPU%u/re/ExecMem/cbFree", pVCpu->idCpu);
1564 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbTotal, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1565 "Total number of byte", "/IEM/CPU%u/re/ExecMem/cbTotal", pVCpu->idCpu);
1566#ifdef VBOX_WITH_STATISTICS
1567 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatAlloc, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL,
1568 "Profiling the allocator", "/IEM/CPU%u/re/ExecMem/ProfAlloc", pVCpu->idCpu);
1569#endif
1570#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
1571 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatPruneProf, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL,
1572 "Pruning executable memory (alt)", "/IEM/CPU%u/re/ExecMem/Pruning", pVCpu->idCpu);
1573 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatPruneRecovered, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES_PER_CALL,
1574 "Bytes recovered while pruning", "/IEM/CPU%u/re/ExecMem/PruningRecovered", pVCpu->idCpu);
1575#endif
1576
1577 return VINF_SUCCESS;
1578}
1579
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette