VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veExecMem.cpp@ 104731

Last change on this file since 104731 was 104731, checked in by vboxsync, 7 months ago

VMM/IEM: Some instruction cache flushing code for linux.arm64, bugref:10391

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 62.5 KB
Line 
1/* $Id: IEMAllN8veExecMem.cpp 104731 2024-05-20 16:48:55Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
50#include <VBox/vmm/iem.h>
51#include <VBox/vmm/cpum.h>
52#include "IEMInternal.h"
53#include <VBox/vmm/vmcc.h>
54#include <VBox/log.h>
55#include <VBox/err.h>
56#include <VBox/param.h>
57#include <iprt/assert.h>
58#include <iprt/mem.h>
59#include <iprt/string.h>
60#if defined(RT_ARCH_AMD64)
61# include <iprt/x86.h>
62#elif defined(RT_ARCH_ARM64)
63# include <iprt/armv8.h>
64#endif
65
66#ifdef RT_OS_WINDOWS
67# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
68extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
69extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
70#else
71# include <iprt/formats/dwarf.h>
72# if defined(RT_OS_DARWIN)
73# include <libkern/OSCacheControl.h>
74# define IEMNATIVE_USE_LIBUNWIND
75extern "C" void __register_frame(const void *pvFde);
76extern "C" void __deregister_frame(const void *pvFde);
77# else
78# ifdef DEBUG_bird /** @todo not thread safe yet */
79# define IEMNATIVE_USE_GDB_JIT
80# endif
81# ifdef IEMNATIVE_USE_GDB_JIT
82# include <iprt/critsect.h>
83# include <iprt/once.h>
84# include <iprt/formats/elf64.h>
85# endif
86extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
87extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
88# endif
89#endif
90
91#include "IEMN8veRecompiler.h"
92
93
94/*********************************************************************************************************************************
95* Executable Memory Allocator *
96*********************************************************************************************************************************/
97/** The chunk sub-allocation unit size in bytes. */
98#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 256
99/** The chunk sub-allocation unit size as a shift factor. */
100#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 8
101/** Enables adding a header to the sub-allocator allocations.
102 * This is useful for freeing up executable memory among other things. */
103#define IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
104/** Use alternative pruning. */
105#define IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
106
107
108#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
109# ifdef IEMNATIVE_USE_GDB_JIT
110# define IEMNATIVE_USE_GDB_JIT_ET_DYN
111
112/** GDB JIT: Code entry. */
113typedef struct GDBJITCODEENTRY
114{
115 struct GDBJITCODEENTRY *pNext;
116 struct GDBJITCODEENTRY *pPrev;
117 uint8_t *pbSymFile;
118 uint64_t cbSymFile;
119} GDBJITCODEENTRY;
120
121/** GDB JIT: Actions. */
122typedef enum GDBJITACTIONS : uint32_t
123{
124 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
125} GDBJITACTIONS;
126
127/** GDB JIT: Descriptor. */
128typedef struct GDBJITDESCRIPTOR
129{
130 uint32_t uVersion;
131 GDBJITACTIONS enmAction;
132 GDBJITCODEENTRY *pRelevant;
133 GDBJITCODEENTRY *pHead;
134 /** Our addition: */
135 GDBJITCODEENTRY *pTail;
136} GDBJITDESCRIPTOR;
137
138/** GDB JIT: Our simple symbol file data. */
139typedef struct GDBJITSYMFILE
140{
141 Elf64_Ehdr EHdr;
142# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
143 Elf64_Shdr aShdrs[5];
144# else
145 Elf64_Shdr aShdrs[7];
146 Elf64_Phdr aPhdrs[2];
147# endif
148 /** The dwarf ehframe data for the chunk. */
149 uint8_t abEhFrame[512];
150 char szzStrTab[128];
151 Elf64_Sym aSymbols[3];
152# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
153 Elf64_Sym aDynSyms[2];
154 Elf64_Dyn aDyn[6];
155# endif
156} GDBJITSYMFILE;
157
158extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
159extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
160
161/** Init once for g_IemNativeGdbJitLock. */
162static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
163/** Init once for the critical section. */
164static RTCRITSECT g_IemNativeGdbJitLock;
165
166/** GDB reads the info here. */
167GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
168
169/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
170DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
171{
172 ASMNopPause();
173}
174
175/** @callback_method_impl{FNRTONCE} */
176static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
177{
178 RT_NOREF(pvUser);
179 return RTCritSectInit(&g_IemNativeGdbJitLock);
180}
181
182
183# endif /* IEMNATIVE_USE_GDB_JIT */
184
185/**
186 * Per-chunk unwind info for non-windows hosts.
187 */
188typedef struct IEMEXECMEMCHUNKEHFRAME
189{
190# ifdef IEMNATIVE_USE_LIBUNWIND
191 /** The offset of the FDA into abEhFrame. */
192 uintptr_t offFda;
193# else
194 /** 'struct object' storage area. */
195 uint8_t abObject[1024];
196# endif
197# ifdef IEMNATIVE_USE_GDB_JIT
198# if 0
199 /** The GDB JIT 'symbol file' data. */
200 GDBJITSYMFILE GdbJitSymFile;
201# endif
202 /** The GDB JIT list entry. */
203 GDBJITCODEENTRY GdbJitEntry;
204# endif
205 /** The dwarf ehframe data for the chunk. */
206 uint8_t abEhFrame[512];
207} IEMEXECMEMCHUNKEHFRAME;
208/** Pointer to per-chunk info info for non-windows hosts. */
209typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
210#endif
211
212
213/**
214 * An chunk of executable memory.
215 */
216typedef struct IEMEXECMEMCHUNK
217{
218 /** Number of free items in this chunk. */
219 uint32_t cFreeUnits;
220 /** Hint were to start searching for free space in the allocation bitmap. */
221 uint32_t idxFreeHint;
222 /** Pointer to the chunk. */
223 void *pvChunk;
224#ifdef IN_RING3
225 /**
226 * Pointer to the unwind information.
227 *
228 * This is used during C++ throw and longjmp (windows and probably most other
229 * platforms). Some debuggers (windbg) makes use of it as well.
230 *
231 * Windows: This is allocated from hHeap on windows because (at least for
232 * AMD64) the UNWIND_INFO structure address in the
233 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
234 *
235 * Others: Allocated from the regular heap to avoid unnecessary executable data
236 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
237 void *pvUnwindInfo;
238#elif defined(IN_RING0)
239 /** Allocation handle. */
240 RTR0MEMOBJ hMemObj;
241#endif
242} IEMEXECMEMCHUNK;
243/** Pointer to a memory chunk. */
244typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
245
246
247/**
248 * Executable memory allocator for the native recompiler.
249 */
250typedef struct IEMEXECMEMALLOCATOR
251{
252 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
253 uint32_t uMagic;
254
255 /** The chunk size. */
256 uint32_t cbChunk;
257 /** The maximum number of chunks. */
258 uint32_t cMaxChunks;
259 /** The current number of chunks. */
260 uint32_t cChunks;
261 /** Hint where to start looking for available memory. */
262 uint32_t idxChunkHint;
263 /** Statistics: Current number of allocations. */
264 uint32_t cAllocations;
265
266 /** The total amount of memory available. */
267 uint64_t cbTotal;
268 /** Total amount of free memory. */
269 uint64_t cbFree;
270 /** Total amount of memory allocated. */
271 uint64_t cbAllocated;
272
273 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
274 *
275 * Since the chunk size is a power of two and the minimum chunk size is a lot
276 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
277 * require a whole number of uint64_t elements in the allocation bitmap. So,
278 * for sake of simplicity, they are allocated as one continous chunk for
279 * simplicity/laziness. */
280 uint64_t *pbmAlloc;
281 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
282 uint32_t cUnitsPerChunk;
283 /** Number of bitmap elements per chunk (for quickly locating the bitmap
284 * portion corresponding to an chunk). */
285 uint32_t cBitmapElementsPerChunk;
286
287#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
288 /** The next chunk to prune in. */
289 uint32_t idxChunkPrune;
290 /** Where in chunk offset to start pruning at. */
291 uint32_t offChunkPrune;
292 /** Profiling the pruning code. */
293 STAMPROFILE StatPruneProf;
294 /** Number of bytes recovered by the pruning. */
295 STAMPROFILE StatPruneRecovered;
296#endif
297
298#ifdef VBOX_WITH_STATISTICS
299 STAMPROFILE StatAlloc;
300#endif
301
302
303#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
304 /** Pointer to the array of unwind info running parallel to aChunks (same
305 * allocation as this structure, located after the bitmaps).
306 * (For Windows, the structures must reside in 32-bit RVA distance to the
307 * actual chunk, so they are allocated off the chunk.) */
308 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
309#endif
310
311 /** The allocation chunks. */
312 RT_FLEXIBLE_ARRAY_EXTENSION
313 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
314} IEMEXECMEMALLOCATOR;
315/** Pointer to an executable memory allocator. */
316typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
317
318/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
319#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
320
321
322#ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
323/**
324 * Allocation header.
325 */
326typedef struct IEMEXECMEMALLOCHDR
327{
328 /** Magic value / eyecatcher (IEMEXECMEMALLOCHDR_MAGIC). */
329 uint32_t uMagic;
330 /** The allocation chunk (for speeding up freeing). */
331 uint32_t idxChunk;
332 /** Pointer to the translation block the allocation belongs to.
333 * This is the whole point of the header. */
334 PIEMTB pTb;
335} IEMEXECMEMALLOCHDR;
336/** Pointer to an allocation header. */
337typedef IEMEXECMEMALLOCHDR *PIEMEXECMEMALLOCHDR;
338/** Magic value for IEMEXECMEMALLOCHDR ('ExeM'). */
339# define IEMEXECMEMALLOCHDR_MAGIC UINT32_C(0x4d657845)
340#endif
341
342
343static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
344
345#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
346/**
347 * Frees up executable memory when we're out space.
348 *
349 * This is an alternative to iemTbAllocatorFreeupNativeSpace() that frees up
350 * space in a more linear fashion from the allocator's point of view. It may
351 * also defragment if implemented & enabled
352 */
353static void iemExecMemAllocatorPrune(PVMCPU pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
354{
355# ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
356# error "IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING requires IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER"
357# endif
358 STAM_REL_PROFILE_START(&pExecMemAllocator->StatPruneProf, a);
359
360 /*
361 * Before we can start, we must process delayed frees.
362 */
363 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
364
365 AssertCompile(RT_IS_POWER_OF_TWO(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE));
366
367 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
368 AssertReturnVoid(RT_IS_POWER_OF_TWO(cbChunk));
369 AssertReturnVoid(cbChunk >= _1M && cbChunk <= _256M); /* see iemExecMemAllocatorInit */
370
371 uint32_t const cChunks = pExecMemAllocator->cChunks;
372 AssertReturnVoid(cChunks == pExecMemAllocator->cMaxChunks);
373 AssertReturnVoid(cChunks >= 1);
374
375 Assert(!pVCpu->iem.s.pCurTbR3);
376
377 /*
378 * Decide how much to prune. The chunk is is a multiple of two, so we'll be
379 * scanning a multiple of two here as well.
380 */
381 uint32_t cbToPrune = cbChunk;
382
383 /* Never more than 25%. */
384 if (cChunks < 4)
385 cbToPrune /= cChunks == 1 ? 4 : 2;
386
387 /* Upper limit. In a debug build a 4MB limit averages out at ~0.6ms per call. */
388 if (cbToPrune > _4M)
389 cbToPrune = _4M;
390
391 /*
392 * Adjust the pruning chunk and offset accordingly.
393 */
394 uint32_t idxChunk = pExecMemAllocator->idxChunkPrune;
395 uint32_t offChunk = pExecMemAllocator->offChunkPrune;
396 offChunk &= ~(uint32_t)(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1U);
397 if (offChunk >= cbChunk)
398 {
399 offChunk = 0;
400 idxChunk += 1;
401 }
402 if (idxChunk >= cChunks)
403 {
404 offChunk = 0;
405 idxChunk = 0;
406 }
407
408 uint32_t const offPruneEnd = RT_MIN(offChunk + cbToPrune, cbChunk);
409
410 /*
411 * Do the pruning. The current approach is the sever kind.
412 */
413 uint64_t cbPruned = 0;
414 uint8_t * const pbChunk = (uint8_t *)pExecMemAllocator->aChunks[idxChunk].pvChunk;
415 while (offChunk < offPruneEnd)
416 {
417 PIEMEXECMEMALLOCHDR pHdr = (PIEMEXECMEMALLOCHDR)&pbChunk[offChunk];
418
419 /* Is this the start of an allocation block for TB? (We typically have
420 one allocation at the start of each chunk for the unwind info where
421 pTb is NULL.) */
422 if ( pHdr->uMagic == IEMEXECMEMALLOCHDR_MAGIC
423 && pHdr->pTb != NULL
424 && pHdr->idxChunk == idxChunk)
425 {
426 PIEMTB const pTb = pHdr->pTb;
427 AssertPtr(pTb);
428
429 /* We now have to check that this isn't a old freed header, given
430 that we don't invalidate the header upon free because of darwin
431 restrictions on executable memory (iemExecMemAllocatorFree).
432 This relies upon iemTbAllocatorFreeInner resetting TB members. */
433 if ( pTb->Native.paInstructions == (PIEMNATIVEINSTR)(pHdr + 1)
434 && (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
435 {
436 uint32_t const cbBlock = RT_ALIGN_32(pTb->Native.cInstructions * sizeof(IEMNATIVEINSTR) + sizeof(*pHdr),
437 IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
438 AssertBreakStmt(offChunk + cbBlock <= cbChunk, offChunk += IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE); /* paranoia */
439
440 iemTbAllocatorFree(pVCpu, pTb);
441
442 cbPruned += cbBlock;
443 offChunk += cbBlock;
444 }
445 else
446 offChunk += IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE;
447 }
448 else
449 offChunk += IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE;
450 }
451 STAM_REL_PROFILE_ADD_PERIOD(&pExecMemAllocator->StatPruneRecovered, cbPruned);
452
453 /*
454 * Save the current pruning point.
455 */
456 pExecMemAllocator->offChunkPrune = offChunk;
457 pExecMemAllocator->idxChunkPrune = idxChunk;
458
459 STAM_REL_PROFILE_STOP(&pExecMemAllocator->StatPruneProf, a);
460}
461#endif /* IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING */
462
463
464/**
465 * Try allocate a block of @a cReqUnits in the chunk @a idxChunk.
466 */
467static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
468 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk, PIEMTB pTb)
469{
470 /*
471 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
472 */
473 Assert(!(cToScan & 63));
474 Assert(!(idxFirst & 63));
475 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
476 pbmAlloc += idxFirst / 64;
477
478 /*
479 * Scan the bitmap for cReqUnits of consequtive clear bits
480 */
481 /** @todo This can probably be done more efficiently for non-x86 systems. */
482 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
483 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
484 {
485 uint32_t idxAddBit = 1;
486 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
487 idxAddBit++;
488 if (idxAddBit >= cReqUnits)
489 {
490 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
491
492 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
493 pChunk->cFreeUnits -= cReqUnits;
494 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
495
496 pExecMemAllocator->cAllocations += 1;
497 uint32_t const cbReq = cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
498 pExecMemAllocator->cbAllocated += cbReq;
499 pExecMemAllocator->cbFree -= cbReq;
500 pExecMemAllocator->idxChunkHint = idxChunk;
501
502 void * const pvMem = (uint8_t *)pChunk->pvChunk
503 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
504#ifdef RT_OS_DARWIN
505 /*
506 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
507 * on darwin. So, we mark the pages returned as read+write after alloc and
508 * expect the caller to call iemExecMemAllocatorReadyForUse when done
509 * writing to the allocation.
510 *
511 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
512 * for details.
513 */
514 /** @todo detect if this is necessary... it wasn't required on 10.15 or
515 * whatever older version it was. */
516 int rc = RTMemProtect(pvMem, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
517 AssertRC(rc);
518#endif
519
520 /*
521 * Initialize the header and return.
522 */
523# ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
524 PIEMEXECMEMALLOCHDR const pHdr = (PIEMEXECMEMALLOCHDR)pvMem;
525 pHdr->uMagic = IEMEXECMEMALLOCHDR_MAGIC;
526 pHdr->idxChunk = idxChunk;
527 pHdr->pTb = pTb;
528 return pHdr + 1;
529#else
530 RT_NOREF(pTb);
531 return pvMem;
532#endif
533 }
534
535 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
536 }
537 return NULL;
538}
539
540
541static void *
542iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq, PIEMTB pTb)
543{
544 /*
545 * Figure out how much to allocate.
546 */
547#ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
548 uint32_t const cReqUnits = (cbReq + sizeof(IEMEXECMEMALLOCHDR) + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)
549#else
550 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)
551#endif
552 >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
553 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
554 {
555 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
556 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
557 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
558 {
559 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
560 pExecMemAllocator->cUnitsPerChunk - idxHint,
561 cReqUnits, idxChunk, pTb);
562 if (pvRet)
563 return pvRet;
564 }
565 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
566 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
567 cReqUnits, idxChunk, pTb);
568 }
569 return NULL;
570}
571
572
573/**
574 * Allocates @a cbReq bytes of executable memory.
575 *
576 * @returns Pointer to the memory, NULL if out of memory or other problem
577 * encountered.
578 * @param pVCpu The cross context virtual CPU structure of the calling
579 * thread.
580 * @param cbReq How many bytes are required.
581 * @param pTb The translation block that will be using the allocation.
582 */
583DECLHIDDEN(void *) iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq, PIEMTB pTb) RT_NOEXCEPT
584{
585 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
586 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
587 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
588 STAM_PROFILE_START(&pExecMemAllocator->StatAlloc, a);
589
590 for (unsigned iIteration = 0;; iIteration++)
591 {
592 if (cbReq <= pExecMemAllocator->cbFree)
593 {
594 uint32_t const cChunks = pExecMemAllocator->cChunks;
595 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
596 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
597 {
598 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb);
599 if (pvRet)
600 {
601 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
602 return pvRet;
603 }
604 }
605 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
606 {
607 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb);
608 if (pvRet)
609 {
610 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
611 return pvRet;
612 }
613 }
614 }
615
616 /*
617 * Can we grow it with another chunk?
618 */
619 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
620 {
621 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
622 AssertLogRelRCReturn(rc, NULL);
623
624 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
625 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb);
626 if (pvRet)
627 {
628 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
629 return pvRet;
630 }
631 AssertFailed();
632 }
633
634 /*
635 * Try prune native TBs once.
636 */
637 if (iIteration == 0)
638 {
639#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
640 iemExecMemAllocatorPrune(pVCpu, pExecMemAllocator);
641#else
642 /* No header included in the instruction count here. */
643 uint32_t const cNeededInstrs = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) / sizeof(IEMNATIVEINSTR);
644 iemTbAllocatorFreeupNativeSpace(pVCpu, cNeededInstrs);
645#endif
646 }
647 else
648 {
649 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeExecMemInstrBufAllocFailed);
650 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
651 return NULL;
652 }
653 }
654}
655
656
657/** This is a hook that we may need later for changing memory protection back
658 * to readonly+exec */
659DECLHIDDEN(void) iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb) RT_NOEXCEPT
660{
661#ifdef RT_OS_DARWIN
662 /* See iemExecMemAllocatorAllocInChunkInt for the explanation. */
663 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
664 AssertRC(rc); RT_NOREF(pVCpu);
665
666 /*
667 * Flush the instruction cache:
668 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
669 */
670 /* sys_dcache_flush(pv, cb); - not necessary */
671 sys_icache_invalidate(pv, cb);
672#elif defined(RT_OS_LINUX)
673 RT_NOREF(pVCpu);
674
675 /* There is __builtin___clear_cache() but it flushes both the instruction and data cache, so do it manually. */
676 static uint32_t s_u32CtrEl0 = 0;
677 if (!s_u32CtrEl0)
678 asm volatile ("mrs %0, ctr_el0":"=r" (s_u32CtrEl0));
679 uintptr_t cbICacheLine = (uintptr_t)4 << (s_u32CtrEl0 & 0xf);
680
681 uintptr_t pb = (uintptr_t)pv & ~(cbICacheLine - 1);
682 for (; pb < (uintptr_t)pv + cb; pb += cbICacheLine)
683 asm volatile ("ic ivau, %0" : : "r" (pb) : "memory");
684
685 asm volatile ("dsb ish\n\t isb\n\t" : : : "memory");
686#else
687 RT_NOREF(pVCpu, pv, cb);
688#endif
689}
690
691
692/**
693 * Frees executable memory.
694 */
695DECLHIDDEN(void) iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb) RT_NOEXCEPT
696{
697 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
698 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
699 AssertPtr(pv);
700#ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
701 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
702
703 /* Align the size as we did when allocating the block. */
704 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
705
706#else
707 PIEMEXECMEMALLOCHDR pHdr = (PIEMEXECMEMALLOCHDR)pv - 1;
708 Assert(!((uintptr_t)pHdr & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
709 AssertReturnVoid(pHdr->uMagic == IEMEXECMEMALLOCHDR_MAGIC);
710 uint32_t const idxChunk = pHdr->idxChunk;
711 AssertReturnVoid(idxChunk < pExecMemAllocator->cChunks);
712 pv = pHdr;
713
714 /* Adjust and align the size to cover the whole allocation area. */
715 cb = RT_ALIGN_Z(cb + sizeof(*pHdr), IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
716#endif
717
718 /* Free it / assert sanity. */
719 bool fFound = false;
720 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
721#ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
722 uint32_t const cChunks = pExecMemAllocator->cChunks;
723 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
724#endif
725 {
726 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
727 fFound = offChunk < cbChunk;
728 if (fFound)
729 {
730 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
731 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
732
733 /* Check that it's valid and free it. */
734 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
735 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
736 for (uint32_t i = 1; i < cReqUnits; i++)
737 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
738 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
739
740#if 0 /*def IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER - not necessary, we'll validate the header in the pruning code. */
741# ifdef RT_OS_DARWIN
742 int rc = RTMemProtect(pHdr, sizeof(*pHdr), RTMEM_PROT_WRITE | RTMEM_PROT_READ);
743 AssertRC(rc); RT_NOREF(pVCpu);
744# endif
745 pHdr->uMagic = 0;
746 pHdr->idxChunk = 0;
747 pHdr->pTb = NULL;
748# ifdef RT_OS_DARWIN
749 rc = RTMemProtect(pHdr, sizeof(*pHdr), RTMEM_PROT_EXEC | RTMEM_PROT_READ);
750 AssertRC(rc); RT_NOREF(pVCpu);
751# endif
752#endif
753 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
754 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
755
756 /* Update the stats. */
757 pExecMemAllocator->cbAllocated -= cb;
758 pExecMemAllocator->cbFree += cb;
759 pExecMemAllocator->cAllocations -= 1;
760 return;
761 }
762 }
763 AssertFailed();
764}
765
766
767
768#ifdef IN_RING3
769# ifdef RT_OS_WINDOWS
770
771/**
772 * Initializes the unwind info structures for windows hosts.
773 */
774static int
775iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
776 void *pvChunk, uint32_t idxChunk)
777{
778 RT_NOREF(pVCpu);
779
780 /*
781 * The AMD64 unwind opcodes.
782 *
783 * This is a program that starts with RSP after a RET instruction that
784 * ends up in recompiled code, and the operations we describe here will
785 * restore all non-volatile registers and bring RSP back to where our
786 * RET address is. This means it's reverse order from what happens in
787 * the prologue.
788 *
789 * Note! Using a frame register approach here both because we have one
790 * and but mainly because the UWOP_ALLOC_LARGE argument values
791 * would be a pain to write initializers for. On the positive
792 * side, we're impervious to changes in the the stack variable
793 * area can can deal with dynamic stack allocations if necessary.
794 */
795 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
796 {
797 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
798 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
799 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
800 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
801 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
802 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
803 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
804 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
805 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
806 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
807 };
808 union
809 {
810 IMAGE_UNWIND_INFO Info;
811 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
812 } s_UnwindInfo =
813 {
814 {
815 /* .Version = */ 1,
816 /* .Flags = */ 0,
817 /* .SizeOfProlog = */ 16, /* whatever */
818 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
819 /* .FrameRegister = */ X86_GREG_xBP,
820 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
821 }
822 };
823 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
824 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
825
826 /*
827 * Calc how much space we need and allocate it off the exec heap.
828 */
829 unsigned const cFunctionEntries = 1;
830 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
831 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
832 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
833 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeeded, NULL);
834 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
835 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
836
837 /*
838 * Initialize the structures.
839 */
840 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
841
842 paFunctions[0].BeginAddress = 0;
843 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
844 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
845
846 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
847 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
848
849 /*
850 * Register it.
851 */
852 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
853 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
854
855 return VINF_SUCCESS;
856}
857
858
859# else /* !RT_OS_WINDOWS */
860
861/**
862 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
863 */
864DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
865{
866 if (iValue >= 64)
867 {
868 Assert(iValue < 0x2000);
869 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
870 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
871 }
872 else if (iValue >= 0)
873 *Ptr.pb++ = (uint8_t)iValue;
874 else if (iValue > -64)
875 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
876 else
877 {
878 Assert(iValue > -0x2000);
879 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
880 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
881 }
882 return Ptr;
883}
884
885
886/**
887 * Emits an ULEB128 encoded value (up to 64-bit wide).
888 */
889DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
890{
891 while (uValue >= 0x80)
892 {
893 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
894 uValue >>= 7;
895 }
896 *Ptr.pb++ = (uint8_t)uValue;
897 return Ptr;
898}
899
900
901/**
902 * Emits a CFA rule as register @a uReg + offset @a off.
903 */
904DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
905{
906 *Ptr.pb++ = DW_CFA_def_cfa;
907 Ptr = iemDwarfPutUleb128(Ptr, uReg);
908 Ptr = iemDwarfPutUleb128(Ptr, off);
909 return Ptr;
910}
911
912
913/**
914 * Emits a register (@a uReg) save location:
915 * CFA + @a off * data_alignment_factor
916 */
917DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
918{
919 if (uReg < 0x40)
920 *Ptr.pb++ = DW_CFA_offset | uReg;
921 else
922 {
923 *Ptr.pb++ = DW_CFA_offset_extended;
924 Ptr = iemDwarfPutUleb128(Ptr, uReg);
925 }
926 Ptr = iemDwarfPutUleb128(Ptr, off);
927 return Ptr;
928}
929
930
931# if 0 /* unused */
932/**
933 * Emits a register (@a uReg) save location, using signed offset:
934 * CFA + @a offSigned * data_alignment_factor
935 */
936DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
937{
938 *Ptr.pb++ = DW_CFA_offset_extended_sf;
939 Ptr = iemDwarfPutUleb128(Ptr, uReg);
940 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
941 return Ptr;
942}
943# endif
944
945
946/**
947 * Initializes the unwind info section for non-windows hosts.
948 */
949static int
950iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
951 void *pvChunk, uint32_t idxChunk)
952{
953 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
954 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
955
956 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
957
958 /*
959 * Generate the CIE first.
960 */
961# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
962 uint8_t const iDwarfVer = 3;
963# else
964 uint8_t const iDwarfVer = 4;
965# endif
966 RTPTRUNION const PtrCie = Ptr;
967 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
968 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
969 *Ptr.pb++ = iDwarfVer; /* DwARF version */
970 *Ptr.pb++ = 0; /* Augmentation. */
971 if (iDwarfVer >= 4)
972 {
973 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
974 *Ptr.pb++ = 0; /* Segment selector size. */
975 }
976# ifdef RT_ARCH_AMD64
977 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
978# else
979 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
980# endif
981 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
982# ifdef RT_ARCH_AMD64
983 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
984# elif defined(RT_ARCH_ARM64)
985 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
986# else
987# error "port me"
988# endif
989 /* Initial instructions: */
990# ifdef RT_ARCH_AMD64
991 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
992 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
993 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
994 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
995 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
996 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
997 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
998 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
999# elif defined(RT_ARCH_ARM64)
1000# if 1
1001 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
1002# else
1003 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
1004# endif
1005 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
1006 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
1007 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
1008 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
1009 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
1010 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
1011 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
1012 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
1013 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
1014 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
1015 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
1016 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
1017 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
1018 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
1019# else
1020# error "port me"
1021# endif
1022 while ((Ptr.u - PtrCie.u) & 3)
1023 *Ptr.pb++ = DW_CFA_nop;
1024 /* Finalize the CIE size. */
1025 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
1026
1027 /*
1028 * Generate an FDE for the whole chunk area.
1029 */
1030# ifdef IEMNATIVE_USE_LIBUNWIND
1031 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
1032# endif
1033 RTPTRUNION const PtrFde = Ptr;
1034 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
1035 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
1036 Ptr.pu32++;
1037 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
1038 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
1039# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
1040 *Ptr.pb++ = DW_CFA_nop;
1041# endif
1042 while ((Ptr.u - PtrFde.u) & 3)
1043 *Ptr.pb++ = DW_CFA_nop;
1044 /* Finalize the FDE size. */
1045 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
1046
1047 /* Terminator entry. */
1048 *Ptr.pu32++ = 0;
1049 *Ptr.pu32++ = 0; /* just to be sure... */
1050 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
1051
1052 /*
1053 * Register it.
1054 */
1055# ifdef IEMNATIVE_USE_LIBUNWIND
1056 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
1057# else
1058 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
1059 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
1060# endif
1061
1062# ifdef IEMNATIVE_USE_GDB_JIT
1063 /*
1064 * Now for telling GDB about this (experimental).
1065 *
1066 * This seems to work best with ET_DYN.
1067 */
1068 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk,
1069 sizeof(GDBJITSYMFILE), NULL);
1070 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
1071 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
1072
1073 RT_ZERO(*pSymFile);
1074
1075 /*
1076 * The ELF header:
1077 */
1078 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1079 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1080 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1081 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1082 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1083 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1084 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1085 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1086# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1087 pSymFile->EHdr.e_type = ET_DYN;
1088# else
1089 pSymFile->EHdr.e_type = ET_REL;
1090# endif
1091# ifdef RT_ARCH_AMD64
1092 pSymFile->EHdr.e_machine = EM_AMD64;
1093# elif defined(RT_ARCH_ARM64)
1094 pSymFile->EHdr.e_machine = EM_AARCH64;
1095# else
1096# error "port me"
1097# endif
1098 pSymFile->EHdr.e_version = 1; /*?*/
1099 pSymFile->EHdr.e_entry = 0;
1100# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1101 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1102# else
1103 pSymFile->EHdr.e_phoff = 0;
1104# endif
1105 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1106 pSymFile->EHdr.e_flags = 0;
1107 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1108# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1109 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1110 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1111# else
1112 pSymFile->EHdr.e_phentsize = 0;
1113 pSymFile->EHdr.e_phnum = 0;
1114# endif
1115 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1116 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1117 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1118
1119 uint32_t offStrTab = 0;
1120#define APPEND_STR(a_szStr) do { \
1121 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1122 offStrTab += sizeof(a_szStr); \
1123 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1124 } while (0)
1125#define APPEND_STR_FMT(a_szStr, ...) do { \
1126 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1127 offStrTab++; \
1128 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1129 } while (0)
1130
1131 /*
1132 * Section headers.
1133 */
1134 /* Section header #0: NULL */
1135 unsigned i = 0;
1136 APPEND_STR("");
1137 RT_ZERO(pSymFile->aShdrs[i]);
1138 i++;
1139
1140 /* Section header: .eh_frame */
1141 pSymFile->aShdrs[i].sh_name = offStrTab;
1142 APPEND_STR(".eh_frame");
1143 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1144 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1145# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1146 pSymFile->aShdrs[i].sh_offset
1147 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1148# else
1149 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1150 pSymFile->aShdrs[i].sh_offset = 0;
1151# endif
1152
1153 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1154 pSymFile->aShdrs[i].sh_link = 0;
1155 pSymFile->aShdrs[i].sh_info = 0;
1156 pSymFile->aShdrs[i].sh_addralign = 1;
1157 pSymFile->aShdrs[i].sh_entsize = 0;
1158 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1159 i++;
1160
1161 /* Section header: .shstrtab */
1162 unsigned const iShStrTab = i;
1163 pSymFile->EHdr.e_shstrndx = iShStrTab;
1164 pSymFile->aShdrs[i].sh_name = offStrTab;
1165 APPEND_STR(".shstrtab");
1166 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1167 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1168# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1169 pSymFile->aShdrs[i].sh_offset
1170 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1171# else
1172 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1173 pSymFile->aShdrs[i].sh_offset = 0;
1174# endif
1175 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1176 pSymFile->aShdrs[i].sh_link = 0;
1177 pSymFile->aShdrs[i].sh_info = 0;
1178 pSymFile->aShdrs[i].sh_addralign = 1;
1179 pSymFile->aShdrs[i].sh_entsize = 0;
1180 i++;
1181
1182 /* Section header: .symbols */
1183 pSymFile->aShdrs[i].sh_name = offStrTab;
1184 APPEND_STR(".symtab");
1185 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1186 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1187 pSymFile->aShdrs[i].sh_offset
1188 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1189 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1190 pSymFile->aShdrs[i].sh_link = iShStrTab;
1191 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1192 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1193 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1194 i++;
1195
1196# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1197 /* Section header: .symbols */
1198 pSymFile->aShdrs[i].sh_name = offStrTab;
1199 APPEND_STR(".dynsym");
1200 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1201 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1202 pSymFile->aShdrs[i].sh_offset
1203 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1204 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1205 pSymFile->aShdrs[i].sh_link = iShStrTab;
1206 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1207 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1208 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1209 i++;
1210# endif
1211
1212# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1213 /* Section header: .dynamic */
1214 pSymFile->aShdrs[i].sh_name = offStrTab;
1215 APPEND_STR(".dynamic");
1216 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1217 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1218 pSymFile->aShdrs[i].sh_offset
1219 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1220 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1221 pSymFile->aShdrs[i].sh_link = iShStrTab;
1222 pSymFile->aShdrs[i].sh_info = 0;
1223 pSymFile->aShdrs[i].sh_addralign = 1;
1224 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1225 i++;
1226# endif
1227
1228 /* Section header: .text */
1229 unsigned const iShText = i;
1230 pSymFile->aShdrs[i].sh_name = offStrTab;
1231 APPEND_STR(".text");
1232 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1233 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1234# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1235 pSymFile->aShdrs[i].sh_offset
1236 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1237# else
1238 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1239 pSymFile->aShdrs[i].sh_offset = 0;
1240# endif
1241 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1242 pSymFile->aShdrs[i].sh_link = 0;
1243 pSymFile->aShdrs[i].sh_info = 0;
1244 pSymFile->aShdrs[i].sh_addralign = 1;
1245 pSymFile->aShdrs[i].sh_entsize = 0;
1246 i++;
1247
1248 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1249
1250# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1251 /*
1252 * The program headers:
1253 */
1254 /* Everything in a single LOAD segment: */
1255 i = 0;
1256 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1257 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1258 pSymFile->aPhdrs[i].p_offset
1259 = pSymFile->aPhdrs[i].p_vaddr
1260 = pSymFile->aPhdrs[i].p_paddr = 0;
1261 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1262 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1263 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1264 i++;
1265 /* The .dynamic segment. */
1266 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1267 pSymFile->aPhdrs[i].p_flags = PF_R;
1268 pSymFile->aPhdrs[i].p_offset
1269 = pSymFile->aPhdrs[i].p_vaddr
1270 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1271 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1272 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1273 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1274 i++;
1275
1276 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1277
1278 /*
1279 * The dynamic section:
1280 */
1281 i = 0;
1282 pSymFile->aDyn[i].d_tag = DT_SONAME;
1283 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1284 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1285 i++;
1286 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1287 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1288 i++;
1289 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1290 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1291 i++;
1292 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1293 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1294 i++;
1295 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1296 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1297 i++;
1298 pSymFile->aDyn[i].d_tag = DT_NULL;
1299 i++;
1300 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1301# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1302
1303 /*
1304 * Symbol tables:
1305 */
1306 /** @todo gdb doesn't seem to really like this ... */
1307 i = 0;
1308 pSymFile->aSymbols[i].st_name = 0;
1309 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1310 pSymFile->aSymbols[i].st_value = 0;
1311 pSymFile->aSymbols[i].st_size = 0;
1312 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1313 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1314# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1315 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1316# endif
1317 i++;
1318
1319 pSymFile->aSymbols[i].st_name = 0;
1320 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1321 pSymFile->aSymbols[i].st_value = 0;
1322 pSymFile->aSymbols[i].st_size = 0;
1323 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1324 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1325 i++;
1326
1327 pSymFile->aSymbols[i].st_name = offStrTab;
1328 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1329# if 0
1330 pSymFile->aSymbols[i].st_shndx = iShText;
1331 pSymFile->aSymbols[i].st_value = 0;
1332# else
1333 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1334 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1335# endif
1336 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1337 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1338 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1339# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1340 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1341 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1342# endif
1343 i++;
1344
1345 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1346 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1347
1348 /*
1349 * The GDB JIT entry and informing GDB.
1350 */
1351 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1352# if 1
1353 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1354# else
1355 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1356# endif
1357
1358 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1359 RTCritSectEnter(&g_IemNativeGdbJitLock);
1360 pEhFrame->GdbJitEntry.pNext = NULL;
1361 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1362 if (__jit_debug_descriptor.pTail)
1363 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1364 else
1365 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1366 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1367 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1368
1369 /* Notify GDB: */
1370 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1371 __jit_debug_register_code();
1372 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1373 RTCritSectLeave(&g_IemNativeGdbJitLock);
1374
1375# else /* !IEMNATIVE_USE_GDB_JIT */
1376 RT_NOREF(pVCpu);
1377# endif /* !IEMNATIVE_USE_GDB_JIT */
1378
1379 return VINF_SUCCESS;
1380}
1381
1382# endif /* !RT_OS_WINDOWS */
1383#endif /* IN_RING3 */
1384
1385
1386/**
1387 * Adds another chunk to the executable memory allocator.
1388 *
1389 * This is used by the init code for the initial allocation and later by the
1390 * regular allocator function when it's out of memory.
1391 */
1392static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1393{
1394 /* Check that we've room for growth. */
1395 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1396 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1397
1398 /* Allocate a chunk. */
1399#ifdef RT_OS_DARWIN
1400 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1401#else
1402 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1403#endif
1404 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1405
1406 /*
1407 * Add the chunk.
1408 *
1409 * This must be done before the unwind init so windows can allocate
1410 * memory from the chunk when using the alternative sub-allocator.
1411 */
1412 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1413#ifdef IN_RING3
1414 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1415#endif
1416 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1417 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1418 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1419 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1420
1421 pExecMemAllocator->cChunks = idxChunk + 1;
1422 pExecMemAllocator->idxChunkHint = idxChunk;
1423
1424 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1425 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1426
1427#ifdef IN_RING3
1428 /*
1429 * Initialize the unwind information (this cannot really fail atm).
1430 * (This sets pvUnwindInfo.)
1431 */
1432 int rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1433 if (RT_SUCCESS(rc))
1434 { /* likely */ }
1435 else
1436 {
1437 /* Just in case the impossible happens, undo the above up: */
1438 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1439 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1440 pExecMemAllocator->cChunks = idxChunk;
1441 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1442 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1443 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1444 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1445
1446 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1447 return rc;
1448 }
1449#endif
1450 return VINF_SUCCESS;
1451}
1452
1453
1454/**
1455 * Initializes the executable memory allocator for native recompilation on the
1456 * calling EMT.
1457 *
1458 * @returns VBox status code.
1459 * @param pVCpu The cross context virtual CPU structure of the calling
1460 * thread.
1461 * @param cbMax The max size of the allocator.
1462 * @param cbInitial The initial allocator size.
1463 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1464 * dependent).
1465 */
1466int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk) RT_NOEXCEPT
1467{
1468 /*
1469 * Validate input.
1470 */
1471 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1472 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1473 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1474 || cbChunk == 0
1475 || ( RT_IS_POWER_OF_TWO(cbChunk)
1476 && cbChunk >= _1M
1477 && cbChunk <= _256M
1478 && cbChunk <= cbMax),
1479 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1480 VERR_OUT_OF_RANGE);
1481
1482 /*
1483 * Adjust/figure out the chunk size.
1484 */
1485 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1486 {
1487 if (cbMax >= _256M)
1488 cbChunk = _64M;
1489 else
1490 {
1491 if (cbMax < _16M)
1492 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1493 else
1494 cbChunk = (uint32_t)cbMax / 4;
1495 if (!RT_IS_POWER_OF_TWO(cbChunk))
1496 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1497 }
1498 }
1499
1500 if (cbChunk > cbMax)
1501 cbMax = cbChunk;
1502 else
1503 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1504 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1505 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1506
1507 /*
1508 * Allocate and initialize the allocatore instance.
1509 */
1510 size_t const offBitmaps = RT_ALIGN_Z(RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]), RT_CACHELINE_SIZE);
1511 size_t const cbBitmaps = (size_t)(cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3)) * cMaxChunks;
1512 size_t cbNeeded = offBitmaps + cbBitmaps;
1513 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1514 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1515#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1516 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1517 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1518#endif
1519 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1520 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1521 VERR_NO_MEMORY);
1522 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1523 pExecMemAllocator->cbChunk = cbChunk;
1524 pExecMemAllocator->cMaxChunks = cMaxChunks;
1525 pExecMemAllocator->cChunks = 0;
1526 pExecMemAllocator->idxChunkHint = 0;
1527 pExecMemAllocator->cAllocations = 0;
1528 pExecMemAllocator->cbTotal = 0;
1529 pExecMemAllocator->cbFree = 0;
1530 pExecMemAllocator->cbAllocated = 0;
1531 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1532 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1533 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1534 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmaps); /* Mark everything as allocated. Clear when chunks are added. */
1535#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1536 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1537#endif
1538 for (uint32_t i = 0; i < cMaxChunks; i++)
1539 {
1540 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1541 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1542 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1543#ifdef IN_RING0
1544 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1545#else
1546 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1547#endif
1548 }
1549 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1550
1551 /*
1552 * Do the initial allocations.
1553 */
1554 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1555 {
1556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1557 AssertLogRelRCReturn(rc, rc);
1558 }
1559
1560 pExecMemAllocator->idxChunkHint = 0;
1561
1562 /*
1563 * Register statistics.
1564 */
1565 PUVM const pUVM = pVCpu->pUVCpu->pUVM;
1566 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cAllocations, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1567 "Current number of allocations", "/IEM/CPU%u/re/ExecMem/cAllocations", pVCpu->idCpu);
1568 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cChunks, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
1569 "Currently allocated chunks", "/IEM/CPU%u/re/ExecMem/cChunks", pVCpu->idCpu);
1570 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cMaxChunks, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
1571 "Maximum number of chunks", "/IEM/CPU%u/re/ExecMem/cMaxChunks", pVCpu->idCpu);
1572 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbChunk, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1573 "Allocation chunk size", "/IEM/CPU%u/re/ExecMem/cbChunk", pVCpu->idCpu);
1574 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbAllocated, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1575 "Number of bytes current allocated", "/IEM/CPU%u/re/ExecMem/cbAllocated", pVCpu->idCpu);
1576 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbFree, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1577 "Number of bytes current free", "/IEM/CPU%u/re/ExecMem/cbFree", pVCpu->idCpu);
1578 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbTotal, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1579 "Total number of byte", "/IEM/CPU%u/re/ExecMem/cbTotal", pVCpu->idCpu);
1580#ifdef VBOX_WITH_STATISTICS
1581 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatAlloc, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL,
1582 "Profiling the allocator", "/IEM/CPU%u/re/ExecMem/ProfAlloc", pVCpu->idCpu);
1583#endif
1584#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
1585 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatPruneProf, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL,
1586 "Pruning executable memory (alt)", "/IEM/CPU%u/re/ExecMem/Pruning", pVCpu->idCpu);
1587 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatPruneRecovered, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES_PER_CALL,
1588 "Bytes recovered while pruning", "/IEM/CPU%u/re/ExecMem/PruningRecovered", pVCpu->idCpu);
1589#endif
1590
1591 return VINF_SUCCESS;
1592}
1593
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette