VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 102065

Last change on this file since 102065 was 102065, checked in by vboxsync, 15 months ago

VMM/IEM: Native translation of IEM_MC_STORE_GREG_U16. Fixed a bunch of variable, register & stack allocator issues. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 310.6 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 102065 2023-11-10 16:14:51Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : ...
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMNativeFunctions.h"
103
104
105/*
106 * Narrow down configs here to avoid wasting time on unused configs here.
107 * Note! Same checks in IEMAllThrdRecompiler.cpp.
108 */
109
110#ifndef IEM_WITH_CODE_TLB
111# error The code TLB must be enabled for the recompiler.
112#endif
113
114#ifndef IEM_WITH_DATA_TLB
115# error The data TLB must be enabled for the recompiler.
116#endif
117
118#ifndef IEM_WITH_SETJMP
119# error The setjmp approach must be enabled for the recompiler.
120#endif
121
122/** @todo eliminate this clang build hack. */
123#if RT_CLANG_PREREQ(4, 0)
124# pragma GCC diagnostic ignored "-Wunused-function"
125#endif
126
127
128
129/*********************************************************************************************************************************
130* Defined Constants And Macros *
131*********************************************************************************************************************************/
132/** Always count instructions for now. */
133#define IEMNATIVE_WITH_INSTRUCTION_COUNTING
134
135
136/*********************************************************************************************************************************
137* Internal Functions *
138*********************************************************************************************************************************/
139#ifdef VBOX_STRICT
140static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
141 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
142static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
143#endif
144#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
145static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
146static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
147#endif
148DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
149
150
151/*********************************************************************************************************************************
152* Executable Memory Allocator *
153*********************************************************************************************************************************/
154/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
155 * Use an alternative chunk sub-allocator that does store internal data
156 * in the chunk.
157 *
158 * Using the RTHeapSimple is not practial on newer darwin systems where
159 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
160 * memory. We would have to change the protection of the whole chunk for
161 * every call to RTHeapSimple, which would be rather expensive.
162 *
163 * This alternative implemenation let restrict page protection modifications
164 * to the pages backing the executable memory we just allocated.
165 */
166#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
167/** The chunk sub-allocation unit size in bytes. */
168#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
169/** The chunk sub-allocation unit size as a shift factor. */
170#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
171
172#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
173# ifdef IEMNATIVE_USE_GDB_JIT
174# define IEMNATIVE_USE_GDB_JIT_ET_DYN
175
176/** GDB JIT: Code entry. */
177typedef struct GDBJITCODEENTRY
178{
179 struct GDBJITCODEENTRY *pNext;
180 struct GDBJITCODEENTRY *pPrev;
181 uint8_t *pbSymFile;
182 uint64_t cbSymFile;
183} GDBJITCODEENTRY;
184
185/** GDB JIT: Actions. */
186typedef enum GDBJITACTIONS : uint32_t
187{
188 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
189} GDBJITACTIONS;
190
191/** GDB JIT: Descriptor. */
192typedef struct GDBJITDESCRIPTOR
193{
194 uint32_t uVersion;
195 GDBJITACTIONS enmAction;
196 GDBJITCODEENTRY *pRelevant;
197 GDBJITCODEENTRY *pHead;
198 /** Our addition: */
199 GDBJITCODEENTRY *pTail;
200} GDBJITDESCRIPTOR;
201
202/** GDB JIT: Our simple symbol file data. */
203typedef struct GDBJITSYMFILE
204{
205 Elf64_Ehdr EHdr;
206# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
207 Elf64_Shdr aShdrs[5];
208# else
209 Elf64_Shdr aShdrs[7];
210 Elf64_Phdr aPhdrs[2];
211# endif
212 /** The dwarf ehframe data for the chunk. */
213 uint8_t abEhFrame[512];
214 char szzStrTab[128];
215 Elf64_Sym aSymbols[3];
216# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
217 Elf64_Sym aDynSyms[2];
218 Elf64_Dyn aDyn[6];
219# endif
220} GDBJITSYMFILE;
221
222extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
223extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
224
225/** Init once for g_IemNativeGdbJitLock. */
226static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
227/** Init once for the critical section. */
228static RTCRITSECT g_IemNativeGdbJitLock;
229
230/** GDB reads the info here. */
231GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
232
233/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
234DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
235{
236 ASMNopPause();
237}
238
239/** @callback_method_impl{FNRTONCE} */
240static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
241{
242 RT_NOREF(pvUser);
243 return RTCritSectInit(&g_IemNativeGdbJitLock);
244}
245
246
247# endif /* IEMNATIVE_USE_GDB_JIT */
248
249/**
250 * Per-chunk unwind info for non-windows hosts.
251 */
252typedef struct IEMEXECMEMCHUNKEHFRAME
253{
254# ifdef IEMNATIVE_USE_LIBUNWIND
255 /** The offset of the FDA into abEhFrame. */
256 uintptr_t offFda;
257# else
258 /** 'struct object' storage area. */
259 uint8_t abObject[1024];
260# endif
261# ifdef IEMNATIVE_USE_GDB_JIT
262# if 0
263 /** The GDB JIT 'symbol file' data. */
264 GDBJITSYMFILE GdbJitSymFile;
265# endif
266 /** The GDB JIT list entry. */
267 GDBJITCODEENTRY GdbJitEntry;
268# endif
269 /** The dwarf ehframe data for the chunk. */
270 uint8_t abEhFrame[512];
271} IEMEXECMEMCHUNKEHFRAME;
272/** Pointer to per-chunk info info for non-windows hosts. */
273typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
274#endif
275
276
277/**
278 * An chunk of executable memory.
279 */
280typedef struct IEMEXECMEMCHUNK
281{
282#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
283 /** Number of free items in this chunk. */
284 uint32_t cFreeUnits;
285 /** Hint were to start searching for free space in the allocation bitmap. */
286 uint32_t idxFreeHint;
287#else
288 /** The heap handle. */
289 RTHEAPSIMPLE hHeap;
290#endif
291 /** Pointer to the chunk. */
292 void *pvChunk;
293#ifdef IN_RING3
294 /**
295 * Pointer to the unwind information.
296 *
297 * This is used during C++ throw and longjmp (windows and probably most other
298 * platforms). Some debuggers (windbg) makes use of it as well.
299 *
300 * Windows: This is allocated from hHeap on windows because (at least for
301 * AMD64) the UNWIND_INFO structure address in the
302 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
303 *
304 * Others: Allocated from the regular heap to avoid unnecessary executable data
305 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
306 void *pvUnwindInfo;
307#elif defined(IN_RING0)
308 /** Allocation handle. */
309 RTR0MEMOBJ hMemObj;
310#endif
311} IEMEXECMEMCHUNK;
312/** Pointer to a memory chunk. */
313typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
314
315
316/**
317 * Executable memory allocator for the native recompiler.
318 */
319typedef struct IEMEXECMEMALLOCATOR
320{
321 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
322 uint32_t uMagic;
323
324 /** The chunk size. */
325 uint32_t cbChunk;
326 /** The maximum number of chunks. */
327 uint32_t cMaxChunks;
328 /** The current number of chunks. */
329 uint32_t cChunks;
330 /** Hint where to start looking for available memory. */
331 uint32_t idxChunkHint;
332 /** Statistics: Current number of allocations. */
333 uint32_t cAllocations;
334
335 /** The total amount of memory available. */
336 uint64_t cbTotal;
337 /** Total amount of free memory. */
338 uint64_t cbFree;
339 /** Total amount of memory allocated. */
340 uint64_t cbAllocated;
341
342#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
343 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
344 *
345 * Since the chunk size is a power of two and the minimum chunk size is a lot
346 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
347 * require a whole number of uint64_t elements in the allocation bitmap. So,
348 * for sake of simplicity, they are allocated as one continous chunk for
349 * simplicity/laziness. */
350 uint64_t *pbmAlloc;
351 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
352 uint32_t cUnitsPerChunk;
353 /** Number of bitmap elements per chunk (for quickly locating the bitmap
354 * portion corresponding to an chunk). */
355 uint32_t cBitmapElementsPerChunk;
356#else
357 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
358 * @{ */
359 /** The size of the heap internal block header. This is used to adjust the
360 * request memory size to make sure there is exacly enough room for a header at
361 * the end of the blocks we allocate before the next 64 byte alignment line. */
362 uint32_t cbHeapBlockHdr;
363 /** The size of initial heap allocation required make sure the first
364 * allocation is correctly aligned. */
365 uint32_t cbHeapAlignTweak;
366 /** The alignment tweak allocation address. */
367 void *pvAlignTweak;
368 /** @} */
369#endif
370
371#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
372 /** Pointer to the array of unwind info running parallel to aChunks (same
373 * allocation as this structure, located after the bitmaps).
374 * (For Windows, the structures must reside in 32-bit RVA distance to the
375 * actual chunk, so they are allocated off the chunk.) */
376 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
377#endif
378
379 /** The allocation chunks. */
380 RT_FLEXIBLE_ARRAY_EXTENSION
381 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
382} IEMEXECMEMALLOCATOR;
383/** Pointer to an executable memory allocator. */
384typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
385
386/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
387#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
388
389
390static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
391
392
393/**
394 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
395 * the heap statistics.
396 */
397static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
398 uint32_t cbReq, uint32_t idxChunk)
399{
400 pExecMemAllocator->cAllocations += 1;
401 pExecMemAllocator->cbAllocated += cbReq;
402#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
403 pExecMemAllocator->cbFree -= cbReq;
404#else
405 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
406#endif
407 pExecMemAllocator->idxChunkHint = idxChunk;
408
409#ifdef RT_OS_DARWIN
410 /*
411 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
412 * on darwin. So, we mark the pages returned as read+write after alloc and
413 * expect the caller to call iemExecMemAllocatorReadyForUse when done
414 * writing to the allocation.
415 *
416 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
417 * for details.
418 */
419 /** @todo detect if this is necessary... it wasn't required on 10.15 or
420 * whatever older version it was. */
421 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
422 AssertRC(rc);
423#endif
424
425 return pvRet;
426}
427
428
429#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
430static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
431 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
432{
433 /*
434 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
435 */
436 Assert(!(cToScan & 63));
437 Assert(!(idxFirst & 63));
438 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
439 pbmAlloc += idxFirst / 64;
440
441 /*
442 * Scan the bitmap for cReqUnits of consequtive clear bits
443 */
444 /** @todo This can probably be done more efficiently for non-x86 systems. */
445 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
446 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
447 {
448 uint32_t idxAddBit = 1;
449 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
450 idxAddBit++;
451 if (idxAddBit >= cReqUnits)
452 {
453 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
454
455 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
456 pChunk->cFreeUnits -= cReqUnits;
457 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
458
459 void * const pvRet = (uint8_t *)pChunk->pvChunk
460 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
461
462 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
463 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
464 }
465
466 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
467 }
468 return NULL;
469}
470#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
471
472
473static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
474{
475#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
476 /*
477 * Figure out how much to allocate.
478 */
479 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
480 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
481 {
482 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
483 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
484 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
485 {
486 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
487 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
488 if (pvRet)
489 return pvRet;
490 }
491 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
492 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
493 cReqUnits, idxChunk);
494 }
495#else
496 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
497 if (pvRet)
498 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
499#endif
500 return NULL;
501
502}
503
504
505/**
506 * Allocates @a cbReq bytes of executable memory.
507 *
508 * @returns Pointer to the memory, NULL if out of memory or other problem
509 * encountered.
510 * @param pVCpu The cross context virtual CPU structure of the calling
511 * thread.
512 * @param cbReq How many bytes are required.
513 */
514static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
515{
516 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
517 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
518 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
519
520 /*
521 * Adjust the request size so it'll fit the allocator alignment/whatnot.
522 *
523 * For the RTHeapSimple allocator this means to follow the logic described
524 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
525 * existing chunks if we think we've got sufficient free memory around.
526 *
527 * While for the alternative one we just align it up to a whole unit size.
528 */
529#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
530 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
531#else
532 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
533#endif
534 if (cbReq <= pExecMemAllocator->cbFree)
535 {
536 uint32_t const cChunks = pExecMemAllocator->cChunks;
537 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
538 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
539 {
540 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
541 if (pvRet)
542 return pvRet;
543 }
544 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
545 {
546 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
547 if (pvRet)
548 return pvRet;
549 }
550 }
551
552 /*
553 * Can we grow it with another chunk?
554 */
555 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
556 {
557 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
558 AssertLogRelRCReturn(rc, NULL);
559
560 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
561 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
562 if (pvRet)
563 return pvRet;
564 AssertFailed();
565 }
566
567 /* What now? Prune native translation blocks from the cache? */
568 AssertFailed();
569 return NULL;
570}
571
572
573/** This is a hook that we may need later for changing memory protection back
574 * to readonly+exec */
575static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
576{
577#ifdef RT_OS_DARWIN
578 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
579 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
580 AssertRC(rc); RT_NOREF(pVCpu);
581
582 /*
583 * Flush the instruction cache:
584 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
585 */
586 /* sys_dcache_flush(pv, cb); - not necessary */
587 sys_icache_invalidate(pv, cb);
588#else
589 RT_NOREF(pVCpu, pv, cb);
590#endif
591}
592
593
594/**
595 * Frees executable memory.
596 */
597void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
598{
599 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
600 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
601 Assert(pv);
602#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
603 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
604#else
605 Assert(!((uintptr_t)pv & 63));
606#endif
607
608 /* Align the size as we did when allocating the block. */
609#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
610 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
611#else
612 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
613#endif
614
615 /* Free it / assert sanity. */
616#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
617 uint32_t const cChunks = pExecMemAllocator->cChunks;
618 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
619 bool fFound = false;
620 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
621 {
622 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
623 fFound = offChunk < cbChunk;
624 if (fFound)
625 {
626#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
627 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
628 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
629
630 /* Check that it's valid and free it. */
631 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
632 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
633 for (uint32_t i = 1; i < cReqUnits; i++)
634 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
635 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
636
637 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
638 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
639
640 /* Update the stats. */
641 pExecMemAllocator->cbAllocated -= cb;
642 pExecMemAllocator->cbFree += cb;
643 pExecMemAllocator->cAllocations -= 1;
644 return;
645#else
646 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
647 break;
648#endif
649 }
650 }
651# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
652 AssertFailed();
653# else
654 Assert(fFound);
655# endif
656#endif
657
658#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
659 /* Update stats while cb is freshly calculated.*/
660 pExecMemAllocator->cbAllocated -= cb;
661 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
662 pExecMemAllocator->cAllocations -= 1;
663
664 /* Free it. */
665 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
666#endif
667}
668
669
670
671#ifdef IN_RING3
672# ifdef RT_OS_WINDOWS
673
674/**
675 * Initializes the unwind info structures for windows hosts.
676 */
677static int
678iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
679 void *pvChunk, uint32_t idxChunk)
680{
681 RT_NOREF(pVCpu);
682
683 /*
684 * The AMD64 unwind opcodes.
685 *
686 * This is a program that starts with RSP after a RET instruction that
687 * ends up in recompiled code, and the operations we describe here will
688 * restore all non-volatile registers and bring RSP back to where our
689 * RET address is. This means it's reverse order from what happens in
690 * the prologue.
691 *
692 * Note! Using a frame register approach here both because we have one
693 * and but mainly because the UWOP_ALLOC_LARGE argument values
694 * would be a pain to write initializers for. On the positive
695 * side, we're impervious to changes in the the stack variable
696 * area can can deal with dynamic stack allocations if necessary.
697 */
698 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
699 {
700 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
701 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
702 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
703 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
704 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
705 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
706 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
707 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
708 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
709 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
710 };
711 union
712 {
713 IMAGE_UNWIND_INFO Info;
714 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
715 } s_UnwindInfo =
716 {
717 {
718 /* .Version = */ 1,
719 /* .Flags = */ 0,
720 /* .SizeOfProlog = */ 16, /* whatever */
721 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
722 /* .FrameRegister = */ X86_GREG_xBP,
723 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
724 }
725 };
726 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
727 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
728
729 /*
730 * Calc how much space we need and allocate it off the exec heap.
731 */
732 unsigned const cFunctionEntries = 1;
733 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
734 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
735# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
736 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
737 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
738 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
739# else
740 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
741 - pExecMemAllocator->cbHeapBlockHdr;
742 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
743 32 /*cbAlignment*/);
744# endif
745 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
746 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
747
748 /*
749 * Initialize the structures.
750 */
751 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
752
753 paFunctions[0].BeginAddress = 0;
754 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
755 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
756
757 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
758 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
759
760 /*
761 * Register it.
762 */
763 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
764 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
765
766 return VINF_SUCCESS;
767}
768
769
770# else /* !RT_OS_WINDOWS */
771
772/**
773 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
774 */
775DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
776{
777 if (iValue >= 64)
778 {
779 Assert(iValue < 0x2000);
780 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
781 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
782 }
783 else if (iValue >= 0)
784 *Ptr.pb++ = (uint8_t)iValue;
785 else if (iValue > -64)
786 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
787 else
788 {
789 Assert(iValue > -0x2000);
790 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
791 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
792 }
793 return Ptr;
794}
795
796
797/**
798 * Emits an ULEB128 encoded value (up to 64-bit wide).
799 */
800DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
801{
802 while (uValue >= 0x80)
803 {
804 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
805 uValue >>= 7;
806 }
807 *Ptr.pb++ = (uint8_t)uValue;
808 return Ptr;
809}
810
811
812/**
813 * Emits a CFA rule as register @a uReg + offset @a off.
814 */
815DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
816{
817 *Ptr.pb++ = DW_CFA_def_cfa;
818 Ptr = iemDwarfPutUleb128(Ptr, uReg);
819 Ptr = iemDwarfPutUleb128(Ptr, off);
820 return Ptr;
821}
822
823
824/**
825 * Emits a register (@a uReg) save location:
826 * CFA + @a off * data_alignment_factor
827 */
828DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
829{
830 if (uReg < 0x40)
831 *Ptr.pb++ = DW_CFA_offset | uReg;
832 else
833 {
834 *Ptr.pb++ = DW_CFA_offset_extended;
835 Ptr = iemDwarfPutUleb128(Ptr, uReg);
836 }
837 Ptr = iemDwarfPutUleb128(Ptr, off);
838 return Ptr;
839}
840
841
842# if 0 /* unused */
843/**
844 * Emits a register (@a uReg) save location, using signed offset:
845 * CFA + @a offSigned * data_alignment_factor
846 */
847DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
848{
849 *Ptr.pb++ = DW_CFA_offset_extended_sf;
850 Ptr = iemDwarfPutUleb128(Ptr, uReg);
851 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
852 return Ptr;
853}
854# endif
855
856
857/**
858 * Initializes the unwind info section for non-windows hosts.
859 */
860static int
861iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
862 void *pvChunk, uint32_t idxChunk)
863{
864 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
865 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
866
867 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
868
869 /*
870 * Generate the CIE first.
871 */
872# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
873 uint8_t const iDwarfVer = 3;
874# else
875 uint8_t const iDwarfVer = 4;
876# endif
877 RTPTRUNION const PtrCie = Ptr;
878 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
879 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
880 *Ptr.pb++ = iDwarfVer; /* DwARF version */
881 *Ptr.pb++ = 0; /* Augmentation. */
882 if (iDwarfVer >= 4)
883 {
884 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
885 *Ptr.pb++ = 0; /* Segment selector size. */
886 }
887# ifdef RT_ARCH_AMD64
888 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
889# else
890 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
891# endif
892 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
893# ifdef RT_ARCH_AMD64
894 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
895# elif defined(RT_ARCH_ARM64)
896 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
897# else
898# error "port me"
899# endif
900 /* Initial instructions: */
901# ifdef RT_ARCH_AMD64
902 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
903 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
904 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
907 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
908 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
910# elif defined(RT_ARCH_ARM64)
911# if 1
912 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
913# else
914 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
915# endif
916 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
917 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
928 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
929 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
930# else
931# error "port me"
932# endif
933 while ((Ptr.u - PtrCie.u) & 3)
934 *Ptr.pb++ = DW_CFA_nop;
935 /* Finalize the CIE size. */
936 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
937
938 /*
939 * Generate an FDE for the whole chunk area.
940 */
941# ifdef IEMNATIVE_USE_LIBUNWIND
942 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
943# endif
944 RTPTRUNION const PtrFde = Ptr;
945 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
946 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
947 Ptr.pu32++;
948 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
949 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
950# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
951 *Ptr.pb++ = DW_CFA_nop;
952# endif
953 while ((Ptr.u - PtrFde.u) & 3)
954 *Ptr.pb++ = DW_CFA_nop;
955 /* Finalize the FDE size. */
956 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
957
958 /* Terminator entry. */
959 *Ptr.pu32++ = 0;
960 *Ptr.pu32++ = 0; /* just to be sure... */
961 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
962
963 /*
964 * Register it.
965 */
966# ifdef IEMNATIVE_USE_LIBUNWIND
967 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
968# else
969 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
970 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
971# endif
972
973# ifdef IEMNATIVE_USE_GDB_JIT
974 /*
975 * Now for telling GDB about this (experimental).
976 *
977 * This seems to work best with ET_DYN.
978 */
979 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
980# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
981 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
982 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
983# else
984 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
985 - pExecMemAllocator->cbHeapBlockHdr;
986 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
987# endif
988 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
989 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
990
991 RT_ZERO(*pSymFile);
992
993 /*
994 * The ELF header:
995 */
996 pSymFile->EHdr.e_ident[0] = ELFMAG0;
997 pSymFile->EHdr.e_ident[1] = ELFMAG1;
998 pSymFile->EHdr.e_ident[2] = ELFMAG2;
999 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1000 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1001 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1002 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1003 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1004# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1005 pSymFile->EHdr.e_type = ET_DYN;
1006# else
1007 pSymFile->EHdr.e_type = ET_REL;
1008# endif
1009# ifdef RT_ARCH_AMD64
1010 pSymFile->EHdr.e_machine = EM_AMD64;
1011# elif defined(RT_ARCH_ARM64)
1012 pSymFile->EHdr.e_machine = EM_AARCH64;
1013# else
1014# error "port me"
1015# endif
1016 pSymFile->EHdr.e_version = 1; /*?*/
1017 pSymFile->EHdr.e_entry = 0;
1018# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1019 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1020# else
1021 pSymFile->EHdr.e_phoff = 0;
1022# endif
1023 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1024 pSymFile->EHdr.e_flags = 0;
1025 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1026# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1027 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1028 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1029# else
1030 pSymFile->EHdr.e_phentsize = 0;
1031 pSymFile->EHdr.e_phnum = 0;
1032# endif
1033 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1034 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1035 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1036
1037 uint32_t offStrTab = 0;
1038#define APPEND_STR(a_szStr) do { \
1039 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1040 offStrTab += sizeof(a_szStr); \
1041 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1042 } while (0)
1043#define APPEND_STR_FMT(a_szStr, ...) do { \
1044 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1045 offStrTab++; \
1046 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1047 } while (0)
1048
1049 /*
1050 * Section headers.
1051 */
1052 /* Section header #0: NULL */
1053 unsigned i = 0;
1054 APPEND_STR("");
1055 RT_ZERO(pSymFile->aShdrs[i]);
1056 i++;
1057
1058 /* Section header: .eh_frame */
1059 pSymFile->aShdrs[i].sh_name = offStrTab;
1060 APPEND_STR(".eh_frame");
1061 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1062 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1063# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1064 pSymFile->aShdrs[i].sh_offset
1065 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1066# else
1067 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1068 pSymFile->aShdrs[i].sh_offset = 0;
1069# endif
1070
1071 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1072 pSymFile->aShdrs[i].sh_link = 0;
1073 pSymFile->aShdrs[i].sh_info = 0;
1074 pSymFile->aShdrs[i].sh_addralign = 1;
1075 pSymFile->aShdrs[i].sh_entsize = 0;
1076 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1077 i++;
1078
1079 /* Section header: .shstrtab */
1080 unsigned const iShStrTab = i;
1081 pSymFile->EHdr.e_shstrndx = iShStrTab;
1082 pSymFile->aShdrs[i].sh_name = offStrTab;
1083 APPEND_STR(".shstrtab");
1084 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1085 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1086# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1087 pSymFile->aShdrs[i].sh_offset
1088 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1089# else
1090 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1091 pSymFile->aShdrs[i].sh_offset = 0;
1092# endif
1093 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1094 pSymFile->aShdrs[i].sh_link = 0;
1095 pSymFile->aShdrs[i].sh_info = 0;
1096 pSymFile->aShdrs[i].sh_addralign = 1;
1097 pSymFile->aShdrs[i].sh_entsize = 0;
1098 i++;
1099
1100 /* Section header: .symbols */
1101 pSymFile->aShdrs[i].sh_name = offStrTab;
1102 APPEND_STR(".symtab");
1103 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1104 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1105 pSymFile->aShdrs[i].sh_offset
1106 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1107 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1108 pSymFile->aShdrs[i].sh_link = iShStrTab;
1109 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1110 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1111 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1112 i++;
1113
1114# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1115 /* Section header: .symbols */
1116 pSymFile->aShdrs[i].sh_name = offStrTab;
1117 APPEND_STR(".dynsym");
1118 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1119 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1120 pSymFile->aShdrs[i].sh_offset
1121 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1122 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1123 pSymFile->aShdrs[i].sh_link = iShStrTab;
1124 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1125 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1126 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1127 i++;
1128# endif
1129
1130# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1131 /* Section header: .dynamic */
1132 pSymFile->aShdrs[i].sh_name = offStrTab;
1133 APPEND_STR(".dynamic");
1134 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1135 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1136 pSymFile->aShdrs[i].sh_offset
1137 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1138 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1139 pSymFile->aShdrs[i].sh_link = iShStrTab;
1140 pSymFile->aShdrs[i].sh_info = 0;
1141 pSymFile->aShdrs[i].sh_addralign = 1;
1142 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1143 i++;
1144# endif
1145
1146 /* Section header: .text */
1147 unsigned const iShText = i;
1148 pSymFile->aShdrs[i].sh_name = offStrTab;
1149 APPEND_STR(".text");
1150 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1151 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1152# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1153 pSymFile->aShdrs[i].sh_offset
1154 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1155# else
1156 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1157 pSymFile->aShdrs[i].sh_offset = 0;
1158# endif
1159 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1160 pSymFile->aShdrs[i].sh_link = 0;
1161 pSymFile->aShdrs[i].sh_info = 0;
1162 pSymFile->aShdrs[i].sh_addralign = 1;
1163 pSymFile->aShdrs[i].sh_entsize = 0;
1164 i++;
1165
1166 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1167
1168# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1169 /*
1170 * The program headers:
1171 */
1172 /* Everything in a single LOAD segment: */
1173 i = 0;
1174 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1175 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1176 pSymFile->aPhdrs[i].p_offset
1177 = pSymFile->aPhdrs[i].p_vaddr
1178 = pSymFile->aPhdrs[i].p_paddr = 0;
1179 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1180 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1181 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1182 i++;
1183 /* The .dynamic segment. */
1184 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1185 pSymFile->aPhdrs[i].p_flags = PF_R;
1186 pSymFile->aPhdrs[i].p_offset
1187 = pSymFile->aPhdrs[i].p_vaddr
1188 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1189 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1190 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1191 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1192 i++;
1193
1194 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1195
1196 /*
1197 * The dynamic section:
1198 */
1199 i = 0;
1200 pSymFile->aDyn[i].d_tag = DT_SONAME;
1201 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1202 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1203 i++;
1204 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1205 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1206 i++;
1207 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1208 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1209 i++;
1210 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1211 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1212 i++;
1213 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1214 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1215 i++;
1216 pSymFile->aDyn[i].d_tag = DT_NULL;
1217 i++;
1218 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1219# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1220
1221 /*
1222 * Symbol tables:
1223 */
1224 /** @todo gdb doesn't seem to really like this ... */
1225 i = 0;
1226 pSymFile->aSymbols[i].st_name = 0;
1227 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1228 pSymFile->aSymbols[i].st_value = 0;
1229 pSymFile->aSymbols[i].st_size = 0;
1230 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1231 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1232# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1233 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1234# endif
1235 i++;
1236
1237 pSymFile->aSymbols[i].st_name = 0;
1238 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1239 pSymFile->aSymbols[i].st_value = 0;
1240 pSymFile->aSymbols[i].st_size = 0;
1241 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1242 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1243 i++;
1244
1245 pSymFile->aSymbols[i].st_name = offStrTab;
1246 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1247# if 0
1248 pSymFile->aSymbols[i].st_shndx = iShText;
1249 pSymFile->aSymbols[i].st_value = 0;
1250# else
1251 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1252 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1253# endif
1254 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1255 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1256 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1257# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1258 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1259 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1260# endif
1261 i++;
1262
1263 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1264 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1265
1266 /*
1267 * The GDB JIT entry and informing GDB.
1268 */
1269 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1270# if 1
1271 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1272# else
1273 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1274# endif
1275
1276 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1277 RTCritSectEnter(&g_IemNativeGdbJitLock);
1278 pEhFrame->GdbJitEntry.pNext = NULL;
1279 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1280 if (__jit_debug_descriptor.pTail)
1281 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1282 else
1283 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1284 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1285 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1286
1287 /* Notify GDB: */
1288 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1289 __jit_debug_register_code();
1290 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1291 RTCritSectLeave(&g_IemNativeGdbJitLock);
1292
1293# else /* !IEMNATIVE_USE_GDB_JIT */
1294 RT_NOREF(pVCpu);
1295# endif /* !IEMNATIVE_USE_GDB_JIT */
1296
1297 return VINF_SUCCESS;
1298}
1299
1300# endif /* !RT_OS_WINDOWS */
1301#endif /* IN_RING3 */
1302
1303
1304/**
1305 * Adds another chunk to the executable memory allocator.
1306 *
1307 * This is used by the init code for the initial allocation and later by the
1308 * regular allocator function when it's out of memory.
1309 */
1310static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1311{
1312 /* Check that we've room for growth. */
1313 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1314 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1315
1316 /* Allocate a chunk. */
1317#ifdef RT_OS_DARWIN
1318 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1319#else
1320 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1321#endif
1322 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1323
1324#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1325 int rc = VINF_SUCCESS;
1326#else
1327 /* Initialize the heap for the chunk. */
1328 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1329 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1330 AssertRC(rc);
1331 if (RT_SUCCESS(rc))
1332 {
1333 /*
1334 * We want the memory to be aligned on 64 byte, so the first time thru
1335 * here we do some exploratory allocations to see how we can achieve this.
1336 * On subsequent runs we only make an initial adjustment allocation, if
1337 * necessary.
1338 *
1339 * Since we own the heap implementation, we know that the internal block
1340 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1341 * so all we need to wrt allocation size adjustments is to add 32 bytes
1342 * to the size, align up by 64 bytes, and subtract 32 bytes.
1343 *
1344 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1345 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1346 * allocation to force subsequent allocations to return 64 byte aligned
1347 * user areas.
1348 */
1349 if (!pExecMemAllocator->cbHeapBlockHdr)
1350 {
1351 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1352 pExecMemAllocator->cbHeapAlignTweak = 64;
1353 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1354 32 /*cbAlignment*/);
1355 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1356
1357 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1358 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1359 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1360 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1361 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1362
1363 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1364 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1365 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1366 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1367 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1368
1369 RTHeapSimpleFree(hHeap, pvTest2);
1370 RTHeapSimpleFree(hHeap, pvTest1);
1371 }
1372 else
1373 {
1374 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1375 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1376 }
1377 if (RT_SUCCESS(rc))
1378#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1379 {
1380 /*
1381 * Add the chunk.
1382 *
1383 * This must be done before the unwind init so windows can allocate
1384 * memory from the chunk when using the alternative sub-allocator.
1385 */
1386 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1387#ifdef IN_RING3
1388 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1389#endif
1390#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1391 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1392#else
1393 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1394 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1395 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1396 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1397#endif
1398
1399 pExecMemAllocator->cChunks = idxChunk + 1;
1400 pExecMemAllocator->idxChunkHint = idxChunk;
1401
1402#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1403 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1404 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1405#else
1406 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1407 pExecMemAllocator->cbTotal += cbFree;
1408 pExecMemAllocator->cbFree += cbFree;
1409#endif
1410
1411#ifdef IN_RING3
1412 /*
1413 * Initialize the unwind information (this cannot really fail atm).
1414 * (This sets pvUnwindInfo.)
1415 */
1416 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1417 if (RT_SUCCESS(rc))
1418#endif
1419 {
1420 return VINF_SUCCESS;
1421 }
1422
1423#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1424 /* Just in case the impossible happens, undo the above up: */
1425 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1426 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1427 pExecMemAllocator->cChunks = idxChunk;
1428 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1429 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1430 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1431 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1432#endif
1433 }
1434#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1435 }
1436#endif
1437 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1438 RT_NOREF(pVCpu);
1439 return rc;
1440}
1441
1442
1443/**
1444 * Initializes the executable memory allocator for native recompilation on the
1445 * calling EMT.
1446 *
1447 * @returns VBox status code.
1448 * @param pVCpu The cross context virtual CPU structure of the calling
1449 * thread.
1450 * @param cbMax The max size of the allocator.
1451 * @param cbInitial The initial allocator size.
1452 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1453 * dependent).
1454 */
1455int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1456{
1457 /*
1458 * Validate input.
1459 */
1460 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1461 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1462 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1463 || cbChunk == 0
1464 || ( RT_IS_POWER_OF_TWO(cbChunk)
1465 && cbChunk >= _1M
1466 && cbChunk <= _256M
1467 && cbChunk <= cbMax),
1468 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1469 VERR_OUT_OF_RANGE);
1470
1471 /*
1472 * Adjust/figure out the chunk size.
1473 */
1474 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1475 {
1476 if (cbMax >= _256M)
1477 cbChunk = _64M;
1478 else
1479 {
1480 if (cbMax < _16M)
1481 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1482 else
1483 cbChunk = (uint32_t)cbMax / 4;
1484 if (!RT_IS_POWER_OF_TWO(cbChunk))
1485 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1486 }
1487 }
1488
1489 if (cbChunk > cbMax)
1490 cbMax = cbChunk;
1491 else
1492 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1493 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1494 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1495
1496 /*
1497 * Allocate and initialize the allocatore instance.
1498 */
1499 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1500#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1501 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1502 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1503 cbNeeded += cbBitmap * cMaxChunks;
1504 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1505 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1506#endif
1507#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1508 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1509 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1510#endif
1511 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1512 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1513 VERR_NO_MEMORY);
1514 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1515 pExecMemAllocator->cbChunk = cbChunk;
1516 pExecMemAllocator->cMaxChunks = cMaxChunks;
1517 pExecMemAllocator->cChunks = 0;
1518 pExecMemAllocator->idxChunkHint = 0;
1519 pExecMemAllocator->cAllocations = 0;
1520 pExecMemAllocator->cbTotal = 0;
1521 pExecMemAllocator->cbFree = 0;
1522 pExecMemAllocator->cbAllocated = 0;
1523#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1524 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1525 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1526 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1527 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1528#endif
1529#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1530 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1531#endif
1532 for (uint32_t i = 0; i < cMaxChunks; i++)
1533 {
1534#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1535 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1536 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1537#else
1538 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1539#endif
1540 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1541#ifdef IN_RING0
1542 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1543#else
1544 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1545#endif
1546 }
1547 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1548
1549 /*
1550 * Do the initial allocations.
1551 */
1552 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1553 {
1554 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1555 AssertLogRelRCReturn(rc, rc);
1556 }
1557
1558 pExecMemAllocator->idxChunkHint = 0;
1559
1560 return VINF_SUCCESS;
1561}
1562
1563
1564/*********************************************************************************************************************************
1565* Native Recompilation *
1566*********************************************************************************************************************************/
1567
1568
1569/**
1570 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1571 */
1572IEM_DECL_IMPL_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1573{
1574 pVCpu->iem.s.cInstructions += idxInstr;
1575 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1576}
1577
1578
1579/**
1580 * Used by TB code when it wants to raise a \#GP(0).
1581 */
1582IEM_DECL_IMPL_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu, uint8_t idxInstr))
1583{
1584 pVCpu->iem.s.cInstructions += idxInstr;
1585 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1586#ifndef _MSC_VER
1587 return VINF_IEM_RAISED_XCPT; /* not reached */
1588#endif
1589}
1590
1591
1592/**
1593 * Reinitializes the native recompiler state.
1594 *
1595 * Called before starting a new recompile job.
1596 */
1597static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
1598{
1599 pReNative->cLabels = 0;
1600 pReNative->bmLabelTypes = 0;
1601 pReNative->cFixups = 0;
1602#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1603 pReNative->pDbgInfo->cEntries = 0;
1604#endif
1605 pReNative->pTbOrg = pTb;
1606 pReNative->cCondDepth = 0;
1607 pReNative->uCondSeqNo = 0;
1608 pReNative->uCheckIrqSeqNo = 0;
1609
1610 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
1611#if IEMNATIVE_HST_GREG_COUNT < 32
1612 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
1613#endif
1614 ;
1615 pReNative->Core.bmHstRegsWithGstShadow = 0;
1616 pReNative->Core.bmGstRegShadows = 0;
1617 pReNative->Core.bmVars = 0;
1618 pReNative->Core.bmStack = 0;
1619 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
1620 pReNative->Core.u64ArgVars = UINT64_MAX;
1621
1622 /* Full host register reinit: */
1623 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
1624 {
1625 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
1626 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
1627 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
1628 }
1629
1630 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
1631 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
1632#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1633 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
1634#endif
1635#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1636 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
1637#endif
1638 );
1639 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
1640 {
1641 fRegs &= ~RT_BIT_32(idxReg);
1642 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
1643 }
1644
1645 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
1646#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1647 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
1648#endif
1649#ifdef IEMNATIVE_REG_FIXED_TMP0
1650 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
1651#endif
1652 return pReNative;
1653}
1654
1655
1656/**
1657 * Allocates and initializes the native recompiler state.
1658 *
1659 * This is called the first time an EMT wants to recompile something.
1660 *
1661 * @returns Pointer to the new recompiler state.
1662 * @param pVCpu The cross context virtual CPU structure of the calling
1663 * thread.
1664 * @param pTb The TB that's about to be recompiled.
1665 * @thread EMT(pVCpu)
1666 */
1667static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
1668{
1669 VMCPU_ASSERT_EMT(pVCpu);
1670
1671 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
1672 AssertReturn(pReNative, NULL);
1673
1674 /*
1675 * Try allocate all the buffers and stuff we need.
1676 */
1677 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
1678 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
1679 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
1680#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1681 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
1682#endif
1683 if (RT_LIKELY( pReNative->pInstrBuf
1684 && pReNative->paLabels
1685 && pReNative->paFixups)
1686#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1687 && pReNative->pDbgInfo
1688#endif
1689 )
1690 {
1691 /*
1692 * Set the buffer & array sizes on success.
1693 */
1694 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
1695 pReNative->cLabelsAlloc = _8K;
1696 pReNative->cFixupsAlloc = _16K;
1697#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1698 pReNative->cDbgInfoAlloc = _16K;
1699#endif
1700
1701 /*
1702 * Done, just need to save it and reinit it.
1703 */
1704 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
1705 return iemNativeReInit(pReNative, pTb);
1706 }
1707
1708 /*
1709 * Failed. Cleanup and return.
1710 */
1711 AssertFailed();
1712 RTMemFree(pReNative->pInstrBuf);
1713 RTMemFree(pReNative->paLabels);
1714 RTMemFree(pReNative->paFixups);
1715#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1716 RTMemFree(pReNative->pDbgInfo);
1717#endif
1718 RTMemFree(pReNative);
1719 return NULL;
1720}
1721
1722
1723/**
1724 * Creates a label
1725 *
1726 * If the label does not yet have a defined position,
1727 * call iemNativeLabelDefine() later to set it.
1728 *
1729 * @returns Label ID. Throws VBox status code on failure, so no need to check
1730 * the return value.
1731 * @param pReNative The native recompile state.
1732 * @param enmType The label type.
1733 * @param offWhere The instruction offset of the label. UINT32_MAX if the
1734 * label is not yet defined (default).
1735 * @param uData Data associated with the lable. Only applicable to
1736 * certain type of labels. Default is zero.
1737 */
1738DECL_HIDDEN_THROW(uint32_t)
1739iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1740 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
1741{
1742 /*
1743 * Locate existing label definition.
1744 *
1745 * This is only allowed for forward declarations where offWhere=UINT32_MAX
1746 * and uData is zero.
1747 */
1748 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1749 uint32_t const cLabels = pReNative->cLabels;
1750 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
1751#ifndef VBOX_STRICT
1752 && offWhere == UINT32_MAX
1753 && uData == 0
1754#endif
1755 )
1756 {
1757 /** @todo Since this is only used for labels with uData = 0, just use a
1758 * lookup array? */
1759 for (uint32_t i = 0; i < cLabels; i++)
1760 if ( paLabels[i].enmType == enmType
1761 && paLabels[i].uData == uData)
1762 {
1763#ifdef VBOX_STRICT
1764 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
1765 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
1766#endif
1767 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
1768 return i;
1769 }
1770 }
1771
1772 /*
1773 * Make sure we've got room for another label.
1774 */
1775 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
1776 { /* likely */ }
1777 else
1778 {
1779 uint32_t cNew = pReNative->cLabelsAlloc;
1780 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
1781 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
1782 cNew *= 2;
1783 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
1784 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
1785 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
1786 pReNative->paLabels = paLabels;
1787 pReNative->cLabelsAlloc = cNew;
1788 }
1789
1790 /*
1791 * Define a new label.
1792 */
1793 paLabels[cLabels].off = offWhere;
1794 paLabels[cLabels].enmType = enmType;
1795 paLabels[cLabels].uData = uData;
1796 pReNative->cLabels = cLabels + 1;
1797
1798 Assert((unsigned)enmType < 64);
1799 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
1800
1801 if (offWhere != UINT32_MAX)
1802 {
1803#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1804 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
1805 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
1806#endif
1807 }
1808 return cLabels;
1809}
1810
1811
1812/**
1813 * Defines the location of an existing label.
1814 *
1815 * @param pReNative The native recompile state.
1816 * @param idxLabel The label to define.
1817 * @param offWhere The position.
1818 */
1819DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
1820{
1821 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
1822 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
1823 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
1824 pLabel->off = offWhere;
1825#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1826 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
1827 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
1828#endif
1829}
1830
1831
1832/**
1833 * Looks up a lable.
1834 *
1835 * @returns Label ID if found, UINT32_MAX if not.
1836 */
1837static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1838 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
1839{
1840 Assert((unsigned)enmType < 64);
1841 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
1842 {
1843 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1844 uint32_t const cLabels = pReNative->cLabels;
1845 for (uint32_t i = 0; i < cLabels; i++)
1846 if ( paLabels[i].enmType == enmType
1847 && paLabels[i].uData == uData
1848 && ( paLabels[i].off == offWhere
1849 || offWhere == UINT32_MAX
1850 || paLabels[i].off == UINT32_MAX))
1851 return i;
1852 }
1853 return UINT32_MAX;
1854}
1855
1856
1857/**
1858 * Adds a fixup.
1859 *
1860 * @throws VBox status code (int) on failure.
1861 * @param pReNative The native recompile state.
1862 * @param offWhere The instruction offset of the fixup location.
1863 * @param idxLabel The target label ID for the fixup.
1864 * @param enmType The fixup type.
1865 * @param offAddend Fixup addend if applicable to the type. Default is 0.
1866 */
1867DECL_HIDDEN_THROW(void)
1868iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
1869 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
1870{
1871 Assert(idxLabel <= UINT16_MAX);
1872 Assert((unsigned)enmType <= UINT8_MAX);
1873
1874 /*
1875 * Make sure we've room.
1876 */
1877 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
1878 uint32_t const cFixups = pReNative->cFixups;
1879 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
1880 { /* likely */ }
1881 else
1882 {
1883 uint32_t cNew = pReNative->cFixupsAlloc;
1884 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
1885 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
1886 cNew *= 2;
1887 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
1888 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
1889 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
1890 pReNative->paFixups = paFixups;
1891 pReNative->cFixupsAlloc = cNew;
1892 }
1893
1894 /*
1895 * Add the fixup.
1896 */
1897 paFixups[cFixups].off = offWhere;
1898 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
1899 paFixups[cFixups].enmType = enmType;
1900 paFixups[cFixups].offAddend = offAddend;
1901 pReNative->cFixups = cFixups + 1;
1902}
1903
1904
1905/**
1906 * Slow code path for iemNativeInstrBufEnsure.
1907 */
1908DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
1909{
1910 /* Double the buffer size till we meet the request. */
1911 uint32_t cNew = pReNative->cInstrBufAlloc;
1912 AssertReturn(cNew > 0, NULL);
1913 do
1914 cNew *= 2;
1915 while (cNew < off + cInstrReq);
1916
1917 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
1918#ifdef RT_ARCH_ARM64
1919 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
1920#else
1921 uint32_t const cbMaxInstrBuf = _2M;
1922#endif
1923 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
1924
1925 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
1926 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
1927
1928 pReNative->cInstrBufAlloc = cNew;
1929 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
1930}
1931
1932#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1933
1934/**
1935 * Grows the static debug info array used during recompilation.
1936 *
1937 * @returns Pointer to the new debug info block; throws VBox status code on
1938 * failure, so no need to check the return value.
1939 */
1940DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
1941{
1942 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
1943 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
1944 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
1945 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
1946 pReNative->pDbgInfo = pDbgInfo;
1947 pReNative->cDbgInfoAlloc = cNew;
1948 return pDbgInfo;
1949}
1950
1951
1952/**
1953 * Adds a new debug info uninitialized entry, returning the pointer to it.
1954 */
1955DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
1956{
1957 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
1958 { /* likely */ }
1959 else
1960 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
1961 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
1962}
1963
1964
1965/**
1966 * Debug Info: Adds a native offset record, if necessary.
1967 */
1968static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1969{
1970 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
1971
1972 /*
1973 * Search backwards to see if we've got a similar record already.
1974 */
1975 uint32_t idx = pDbgInfo->cEntries;
1976 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
1977 while (idx-- > idxStop)
1978 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
1979 {
1980 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
1981 return;
1982 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
1983 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
1984 break;
1985 }
1986
1987 /*
1988 * Add it.
1989 */
1990 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
1991 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
1992 pEntry->NativeOffset.offNative = off;
1993}
1994
1995
1996/**
1997 * Debug Info: Record info about a label.
1998 */
1999static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2000{
2001 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2002 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2003 pEntry->Label.uUnused = 0;
2004 pEntry->Label.enmLabel = (uint8_t)enmType;
2005 pEntry->Label.uData = uData;
2006}
2007
2008
2009/**
2010 * Debug Info: Record info about a threaded call.
2011 */
2012static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2013{
2014 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2015 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2016 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2017 pEntry->ThreadedCall.uUnused = 0;
2018 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2019}
2020
2021
2022/**
2023 * Debug Info: Record info about a new guest instruction.
2024 */
2025static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2026{
2027 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2028 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2029 pEntry->GuestInstruction.uUnused = 0;
2030 pEntry->GuestInstruction.fExec = fExec;
2031}
2032
2033
2034/**
2035 * Debug Info: Record info about guest register shadowing.
2036 */
2037static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2038 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
2039{
2040 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2041 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2042 pEntry->GuestRegShadowing.uUnused = 0;
2043 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2044 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2045 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2046}
2047
2048#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2049
2050
2051/*********************************************************************************************************************************
2052* Register Allocator *
2053*********************************************************************************************************************************/
2054
2055/**
2056 * Register parameter indexes (indexed by argument number).
2057 */
2058DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2059{
2060 IEMNATIVE_CALL_ARG0_GREG,
2061 IEMNATIVE_CALL_ARG1_GREG,
2062 IEMNATIVE_CALL_ARG2_GREG,
2063 IEMNATIVE_CALL_ARG3_GREG,
2064#if defined(IEMNATIVE_CALL_ARG4_GREG)
2065 IEMNATIVE_CALL_ARG4_GREG,
2066# if defined(IEMNATIVE_CALL_ARG5_GREG)
2067 IEMNATIVE_CALL_ARG5_GREG,
2068# if defined(IEMNATIVE_CALL_ARG6_GREG)
2069 IEMNATIVE_CALL_ARG6_GREG,
2070# if defined(IEMNATIVE_CALL_ARG7_GREG)
2071 IEMNATIVE_CALL_ARG7_GREG,
2072# endif
2073# endif
2074# endif
2075#endif
2076};
2077
2078/**
2079 * Call register masks indexed by argument count.
2080 */
2081DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2082{
2083 0,
2084 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2085 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2086 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2087 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2088 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2089#if defined(IEMNATIVE_CALL_ARG4_GREG)
2090 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2091 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2092# if defined(IEMNATIVE_CALL_ARG5_GREG)
2093 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2094 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2095# if defined(IEMNATIVE_CALL_ARG6_GREG)
2096 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2097 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2098 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2099# if defined(IEMNATIVE_CALL_ARG7_GREG)
2100 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2101 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2102 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2103# endif
2104# endif
2105# endif
2106#endif
2107};
2108
2109#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2110/**
2111 * BP offset of the stack argument slots.
2112 *
2113 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2114 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2115 */
2116DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2117{
2118 IEMNATIVE_FP_OFF_STACK_ARG0,
2119# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2120 IEMNATIVE_FP_OFF_STACK_ARG1,
2121# endif
2122# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2123 IEMNATIVE_FP_OFF_STACK_ARG2,
2124# endif
2125# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2126 IEMNATIVE_FP_OFF_STACK_ARG3,
2127# endif
2128};
2129AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2130#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2131
2132/**
2133 * Info about shadowed guest register values.
2134 * @see IEMNATIVEGSTREG
2135 */
2136static struct
2137{
2138 /** Offset in VMCPU. */
2139 uint32_t off;
2140 /** The field size. */
2141 uint8_t cb;
2142 /** Name (for logging). */
2143 const char *pszName;
2144} const g_aGstShadowInfo[] =
2145{
2146#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2147 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2148 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2149 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2150 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2151 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2152 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2153 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2154 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2155 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2156 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2157 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2158 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2159 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2160 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2161 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2162 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2163 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2164 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2165 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2166 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2167 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2168 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2169 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2170 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2171 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2172 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2173 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2174 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2175 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2176 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2177 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2178 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2179 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2180 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2181 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2182 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2183#undef CPUMCTX_OFF_AND_SIZE
2184};
2185AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2186
2187
2188/** Host CPU general purpose register names. */
2189DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2190{
2191#ifdef RT_ARCH_AMD64
2192 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2193#elif RT_ARCH_ARM64
2194 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2195 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2196#else
2197# error "port me"
2198#endif
2199};
2200
2201
2202DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
2203 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
2204{
2205 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2206
2207 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
2208 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2209 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
2210 return (uint8_t)idxReg;
2211}
2212
2213
2214/**
2215 * Tries to locate a suitable register in the given register mask.
2216 *
2217 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2218 * failed.
2219 *
2220 * @returns Host register number on success, returns UINT8_MAX on failure.
2221 */
2222static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2223{
2224 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2225 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2226 if (fRegs)
2227 {
2228 /** @todo pick better here: */
2229 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2230
2231 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2232 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2233 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2234 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2235
2236 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2237 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2238 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2239 return idxReg;
2240 }
2241 return UINT8_MAX;
2242}
2243
2244
2245/**
2246 * Locate a register, possibly freeing one up.
2247 *
2248 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2249 * failed.
2250 *
2251 * @returns Host register number on success. Returns UINT8_MAX if no registers
2252 * found, the caller is supposed to deal with this and raise a
2253 * allocation type specific status code (if desired).
2254 *
2255 * @throws VBox status code if we're run into trouble spilling a variable of
2256 * recording debug info. Does NOT throw anything if we're out of
2257 * registers, though.
2258 */
2259static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
2260 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
2261{
2262 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2263 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
2264
2265 /*
2266 * Try a freed register that's shadowing a guest register
2267 */
2268 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2269 if (fRegs)
2270 {
2271 unsigned const idxReg = (fPreferVolatile
2272 ? ASMBitFirstSetU32(fRegs)
2273 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2274 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK: fRegs))
2275 - 1;
2276
2277 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2278 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2279 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2280 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2281
2282 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2283 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2284 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2285 return idxReg;
2286 }
2287
2288 /*
2289 * Try free up a variable that's in a register.
2290 *
2291 * We do two rounds here, first evacuating variables we don't need to be
2292 * saved on the stack, then in the second round move things to the stack.
2293 */
2294 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
2295 {
2296 uint32_t fVars = pReNative->Core.bmVars;
2297 while (fVars)
2298 {
2299 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
2300 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
2301 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
2302 && (RT_BIT_32(idxReg) & fRegMask)
2303 && ( iLoop == 0
2304 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
2305 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack))
2306 {
2307 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
2308 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
2309 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2310 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2311 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
2312
2313 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
2314 {
2315 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
2316 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
2317 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeVarCalcBpDisp(pReNative, idxVar), idxReg);
2318 }
2319
2320 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2321 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2322 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2323 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
2324 return idxReg;
2325 }
2326 fVars &= ~RT_BIT_32(idxVar);
2327 }
2328 }
2329
2330 return UINT8_MAX;
2331}
2332
2333
2334/**
2335 * Moves a variable to a different register or spills it onto the stack.
2336 *
2337 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
2338 * kinds can easily be recreated if needed later.
2339 *
2340 * @returns The new code buffer position, UINT32_MAX on failure.
2341 * @param pReNative The native recompile state.
2342 * @param off The current code buffer position.
2343 * @param idxVar The variable index.
2344 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
2345 * call-volatile registers.
2346 */
2347static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
2348 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
2349{
2350 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
2351 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
2352
2353 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
2354 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
2355 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
2356 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
2357 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
2358 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
2359 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
2360 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
2361
2362
2363 /** @todo Add statistics on this.*/
2364 /** @todo Implement basic variable liveness analysis (python) so variables
2365 * can be freed immediately once no longer used. This has the potential to
2366 * be trashing registers and stack for dead variables. */
2367
2368 /*
2369 * First try move it to a different register, as that's cheaper.
2370 */
2371 fForbiddenRegs |= RT_BIT_32(idxRegOld);
2372 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
2373 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
2374 if (fRegs)
2375 {
2376 /* Avoid using shadow registers, if possible. */
2377 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
2378 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
2379 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
2380 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
2381
2382 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2383 Log12(("iemNativeRegMoveOrSpillStackVar: moving idxVar=%d from %s to %s (fGstRegShadows=%RX64)\n",
2384 idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
2385 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
2386 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
2387 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
2388 if (fGstRegShadows)
2389 {
2390 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegNew);
2391 while (fGstRegShadows)
2392 {
2393 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
2394 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
2395
2396 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
2397 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
2398 }
2399 }
2400
2401 pReNative->Core.aVars[idxVar].idxReg = (uint8_t)idxRegNew;
2402 pReNative->Core.bmHstRegs |= RT_BIT_32(idxRegNew);
2403 }
2404 /*
2405 * Otherwise we must spill the register onto the stack.
2406 */
2407 else
2408 {
2409 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
2410 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%d/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
2411 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
2412 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_7));
2413 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
2414
2415 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
2416 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2417 }
2418
2419 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
2420 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
2421 return off;
2422}
2423
2424
2425/**
2426 * Allocates a temporary host general purpose register.
2427 *
2428 * This may emit code to save register content onto the stack in order to free
2429 * up a register.
2430 *
2431 * @returns The host register number; throws VBox status code on failure,
2432 * so no need to check the return value.
2433 * @param pReNative The native recompile state.
2434 * @param poff Pointer to the variable with the code buffer position.
2435 * This will be update if we need to move a variable from
2436 * register to stack in order to satisfy the request.
2437 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
2438 * registers (@c true, default) or the other way around
2439 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
2440 */
2441DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
2442{
2443 /*
2444 * Try find a completely unused register, preferably a call-volatile one.
2445 */
2446 uint8_t idxReg;
2447 uint32_t fRegs = ~pReNative->Core.bmHstRegs
2448 & ~pReNative->Core.bmHstRegsWithGstShadow
2449 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
2450 if (fRegs)
2451 {
2452 if (fPreferVolatile)
2453 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
2454 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2455 else
2456 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2457 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2458 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2459 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2460 }
2461 else
2462 {
2463 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
2464 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
2465 }
2466 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
2467}
2468
2469
2470/**
2471 * Allocates a temporary register for loading an immediate value into.
2472 *
2473 * This will emit code to load the immediate, unless there happens to be an
2474 * unused register with the value already loaded.
2475 *
2476 * The caller will not modify the returned register, it must be considered
2477 * read-only. Free using iemNativeRegFreeTmpImm.
2478 *
2479 * @returns The host register number; throws VBox status code on failure, so no
2480 * need to check the return value.
2481 * @param pReNative The native recompile state.
2482 * @param poff Pointer to the variable with the code buffer position.
2483 * @param uImm The immediate value that the register must hold upon
2484 * return.
2485 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
2486 * registers (@c true, default) or the other way around
2487 * (@c false).
2488 *
2489 * @note Reusing immediate values has not been implemented yet.
2490 */
2491DECL_HIDDEN_THROW(uint8_t)
2492iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
2493{
2494 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
2495 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
2496 return idxReg;
2497}
2498
2499
2500/**
2501 * Marks host register @a idxHstReg as containing a shadow copy of guest
2502 * register @a enmGstReg.
2503 *
2504 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
2505 * host register before calling.
2506 */
2507DECL_FORCE_INLINE(void)
2508iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
2509{
2510 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
2511
2512 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
2513 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg);
2514 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
2515 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
2516#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2517 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2518 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
2519#else
2520 RT_NOREF(off);
2521#endif
2522}
2523
2524
2525/**
2526 * Clear any guest register shadow claims from @a idxHstReg.
2527 *
2528 * The register does not need to be shadowing any guest registers.
2529 */
2530DECL_FORCE_INLINE(void)
2531iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
2532{
2533 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
2534 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
2535 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2536 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2537
2538#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2539 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2540 if (fGstRegs)
2541 {
2542 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2543 while (fGstRegs)
2544 {
2545 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2546 fGstRegs &= ~RT_BIT_64(iGstReg);
2547 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
2548 }
2549 }
2550#else
2551 RT_NOREF(off);
2552#endif
2553
2554 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2555 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2556 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
2557}
2558
2559
2560/**
2561 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
2562 * to @a idxRegTo.
2563 */
2564DECL_FORCE_INLINE(void)
2565iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
2566 IEMNATIVEGSTREG enmGstReg, uint32_t off)
2567{
2568 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
2569 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
2570 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows);
2571 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
2572 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
2573
2574 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows &= ~RT_BIT_64(enmGstReg);
2575 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows = RT_BIT_64(enmGstReg);
2576 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
2577#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2578 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2579 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
2580#else
2581 RT_NOREF(off);
2582#endif
2583}
2584
2585
2586/**
2587 * Allocates a temporary host general purpose register for keeping a guest
2588 * register value.
2589 *
2590 * Since we may already have a register holding the guest register value,
2591 * code will be emitted to do the loading if that's not the case. Code may also
2592 * be emitted if we have to free up a register to satify the request.
2593 *
2594 * @returns The host register number; throws VBox status code on failure, so no
2595 * need to check the return value.
2596 * @param pReNative The native recompile state.
2597 * @param poff Pointer to the variable with the code buffer
2598 * position. This will be update if we need to move a
2599 * variable from register to stack in order to satisfy
2600 * the request.
2601 * @param enmGstReg The guest register that will is to be updated.
2602 * @param enmIntendedUse How the caller will be using the host register.
2603 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
2604 */
2605DECL_HIDDEN_THROW(uint8_t)
2606iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
2607 IEMNATIVEGSTREG enmGstReg, IEMNATIVEGSTREGUSE enmIntendedUse)
2608{
2609 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
2610#ifdef LOG_ENABLED
2611 static const char * const s_pszIntendedUse[] = { "fetch", "update", "destructive calc" };
2612#endif
2613
2614 /*
2615 * First check if the guest register value is already in a host register.
2616 */
2617 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
2618 {
2619 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2620 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2621 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2622 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2623
2624 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
2625 {
2626 /*
2627 * If the register will trash the guest shadow copy, try find a
2628 * completely unused register we can use instead. If that fails,
2629 * we need to disassociate the host reg from the guest reg.
2630 */
2631 /** @todo would be nice to know if preserving the register is in any way helpful. */
2632 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
2633 && ( ~pReNative->Core.bmHstRegs
2634 & ~pReNative->Core.bmHstRegsWithGstShadow
2635 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
2636 {
2637 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff);
2638
2639 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
2640
2641 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
2642 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2643 g_apszIemNativeHstRegNames[idxRegNew]));
2644 idxReg = idxRegNew;
2645 }
2646 else
2647 {
2648 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2649 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
2650 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2651 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2652 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
2653 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2654 else
2655 {
2656 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
2657 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
2658 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
2659 }
2660 }
2661 }
2662 else
2663 {
2664 AssertMsg(enmIntendedUse != kIemNativeGstRegUse_ForUpdate,
2665 ("This shouldn't happen: idxReg=%d enmGstReg=%d\n", idxReg, enmGstReg));
2666
2667 /*
2668 * Allocate a new register, copy the value and, if updating, the
2669 * guest shadow copy assignment to the new register.
2670 */
2671 /** @todo share register for readonly access. */
2672 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2673
2674 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
2675
2676 if (enmIntendedUse != kIemNativeGstRegUse_ForUpdate)
2677 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
2678 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2679 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
2680 else
2681 {
2682 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
2683 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for update\n",
2684 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2685 g_apszIemNativeHstRegNames[idxRegNew]));
2686 }
2687 idxReg = idxRegNew;
2688 }
2689
2690#ifdef VBOX_STRICT
2691 /* Strict builds: Check that the value is correct. */
2692 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
2693#endif
2694
2695 return idxReg;
2696 }
2697
2698 /*
2699 * Allocate a new register, load it with the guest value and designate it as a copy of the
2700 */
2701 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2702
2703 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
2704
2705 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2706 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
2707 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
2708 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2709
2710 return idxRegNew;
2711}
2712
2713
2714/**
2715 * Allocates a temporary host general purpose register that already holds the
2716 * given guest register value.
2717 *
2718 * The use case for this function is places where the shadowing state cannot be
2719 * modified due to branching and such. This will fail if the we don't have a
2720 * current shadow copy handy or if it's incompatible. The only code that will
2721 * be emitted here is value checking code in strict builds.
2722 *
2723 * The intended use can only be readonly!
2724 *
2725 * @returns The host register number, UINT8_MAX if not present.
2726 * @param pReNative The native recompile state.
2727 * @param poff Pointer to the instruction buffer offset.
2728 * Will be updated in strict builds if a register is
2729 * found.
2730 * @param enmGstReg The guest register that will is to be updated.
2731 * @note In strict builds, this may throw instruction buffer growth failures.
2732 * Non-strict builds will not throw anything.
2733 * @sa iemNativeRegAllocTmpForGuestReg
2734 */
2735DECL_HIDDEN_THROW(uint8_t)
2736iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
2737{
2738 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
2739
2740 /*
2741 * First check if the guest register value is already in a host register.
2742 */
2743 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
2744 {
2745 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2746 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2747 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2748 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2749
2750 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
2751 {
2752 /*
2753 * We only do readonly use here, so easy compared to the other
2754 * variant of this code.
2755 */
2756 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
2757 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
2758 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2759 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
2760 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
2761
2762#ifdef VBOX_STRICT
2763 /* Strict builds: Check that the value is correct. */
2764 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
2765#else
2766 RT_NOREF(poff);
2767#endif
2768 return idxReg;
2769 }
2770 }
2771
2772 return UINT8_MAX;
2773}
2774
2775
2776DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar);
2777
2778
2779/**
2780 * Allocates argument registers for a function call.
2781 *
2782 * @returns New code buffer offset on success; throws VBox status code on failure, so no
2783 * need to check the return value.
2784 * @param pReNative The native recompile state.
2785 * @param off The current code buffer offset.
2786 * @param cArgs The number of arguments the function call takes.
2787 */
2788DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
2789{
2790 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
2791 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
2792 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2793 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2794
2795 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
2796 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
2797 else if (cArgs == 0)
2798 return true;
2799
2800 /*
2801 * Do we get luck and all register are free and not shadowing anything?
2802 */
2803 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
2804 for (uint32_t i = 0; i < cArgs; i++)
2805 {
2806 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2807 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
2808 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2809 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2810 }
2811 /*
2812 * Okay, not lucky so we have to free up the registers.
2813 */
2814 else
2815 for (uint32_t i = 0; i < cArgs; i++)
2816 {
2817 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2818 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
2819 {
2820 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
2821 {
2822 case kIemNativeWhat_Var:
2823 {
2824 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
2825 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars),
2826 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
2827 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
2828 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
2829
2830 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2831 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2832 else
2833 {
2834 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
2835 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2836 }
2837 break;
2838 }
2839
2840 case kIemNativeWhat_Tmp:
2841 case kIemNativeWhat_Arg:
2842 case kIemNativeWhat_rc:
2843 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
2844 default:
2845 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
2846 }
2847
2848 }
2849 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2850 {
2851 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2852 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2853 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2854 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2855 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2856 }
2857 else
2858 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
2859 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
2860 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
2861 }
2862 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
2863 return true;
2864}
2865
2866
2867DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
2868
2869
2870#if 0
2871/**
2872 * Frees a register assignment of any type.
2873 *
2874 * @param pReNative The native recompile state.
2875 * @param idxHstReg The register to free.
2876 *
2877 * @note Does not update variables.
2878 */
2879DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2880{
2881 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2882 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
2883 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
2884 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
2885 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
2886 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
2887 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
2888 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
2889 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
2890 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
2891 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
2892 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
2893 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2894 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2895
2896 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2897 /* no flushing, right:
2898 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2899 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2900 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
2901 */
2902}
2903#endif
2904
2905
2906/**
2907 * Frees a temporary register.
2908 *
2909 * Any shadow copies of guest registers assigned to the host register will not
2910 * be flushed by this operation.
2911 */
2912DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2913{
2914 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
2915 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
2916 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2917 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
2918 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
2919}
2920
2921
2922/**
2923 * Frees a temporary immediate register.
2924 *
2925 * It is assumed that the call has not modified the register, so it still hold
2926 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
2927 */
2928DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2929{
2930 iemNativeRegFreeTmp(pReNative, idxHstReg);
2931}
2932
2933
2934/**
2935 * Called right before emitting a call instruction to move anything important
2936 * out of call-volatile registers, free and flush the call-volatile registers,
2937 * optionally freeing argument variables.
2938 *
2939 * @returns New code buffer offset, UINT32_MAX on failure.
2940 * @param pReNative The native recompile state.
2941 * @param off The code buffer offset.
2942 * @param cArgs The number of arguments the function call takes.
2943 * It is presumed that the host register part of these have
2944 * been allocated as such already and won't need moving,
2945 * just freeing.
2946 */
2947DECL_HIDDEN_THROW(uint32_t)
2948iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
2949{
2950 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
2951
2952 /*
2953 * Move anything important out of volatile registers.
2954 */
2955 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
2956 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
2957 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
2958#ifdef IEMNATIVE_REG_FIXED_TMP0
2959 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2960#endif
2961 & ~g_afIemNativeCallRegs[cArgs];
2962
2963 fRegsToMove &= pReNative->Core.bmHstRegs;
2964 if (!fRegsToMove)
2965 { /* likely */ }
2966 else
2967 {
2968 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
2969 while (fRegsToMove != 0)
2970 {
2971 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
2972 fRegsToMove &= ~RT_BIT_32(idxReg);
2973
2974 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
2975 {
2976 case kIemNativeWhat_Var:
2977 {
2978 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
2979 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
2980 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
2981 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
2982 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%d enmKind=%d idxReg=%d\n",
2983 idxVar, pReNative->Core.aVars[idxVar].enmKind, pReNative->Core.aVars[idxVar].idxReg));
2984 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2985 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2986 else
2987 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
2988 continue;
2989 }
2990
2991 case kIemNativeWhat_Arg:
2992 AssertMsgFailed(("What?!?: %u\n", idxReg));
2993 continue;
2994
2995 case kIemNativeWhat_rc:
2996 case kIemNativeWhat_Tmp:
2997 AssertMsgFailed(("Missing free: %u\n", idxReg));
2998 continue;
2999
3000 case kIemNativeWhat_FixedTmp:
3001 case kIemNativeWhat_pVCpuFixed:
3002 case kIemNativeWhat_pCtxFixed:
3003 case kIemNativeWhat_FixedReserved:
3004 case kIemNativeWhat_Invalid:
3005 case kIemNativeWhat_End:
3006 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
3007 }
3008 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
3009 }
3010 }
3011
3012 /*
3013 * Do the actual freeing.
3014 */
3015 if (pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3016 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n", pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK));
3017 pReNative->Core.bmHstRegs &= ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3018
3019 /* If there are guest register shadows in any call-volatile register, we
3020 have to clear the corrsponding guest register masks for each register. */
3021 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3022 if (fHstRegsWithGstShadow)
3023 {
3024 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
3025 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
3026 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
3027 do
3028 {
3029 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
3030 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3031
3032 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
3033 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3034 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3035 } while (fHstRegsWithGstShadow != 0);
3036 }
3037
3038 return off;
3039}
3040
3041
3042/**
3043 * Flushes a set of guest register shadow copies.
3044 *
3045 * This is usually done after calling a threaded function or a C-implementation
3046 * of an instruction.
3047 *
3048 * @param pReNative The native recompile state.
3049 * @param fGstRegs Set of guest registers to flush.
3050 */
3051DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
3052{
3053 /*
3054 * Reduce the mask by what's currently shadowed
3055 */
3056 fGstRegs &= pReNative->Core.bmGstRegShadows;
3057 if (fGstRegs)
3058 {
3059 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n",
3060 fGstRegs, pReNative->Core.bmGstRegShadows, pReNative->Core.bmGstRegShadows & ~fGstRegs));
3061 pReNative->Core.bmGstRegShadows &= ~fGstRegs;
3062 if (pReNative->Core.bmGstRegShadows)
3063 {
3064 /*
3065 * Partial.
3066 */
3067 do
3068 {
3069 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3070 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3071 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3072 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3073 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
3074
3075 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
3076 fGstRegs &= ~fInThisHstReg;
3077 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows &= ~fInThisHstReg;
3078 if (!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3079 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3080 } while (fGstRegs != 0);
3081 }
3082 else
3083 {
3084 /*
3085 * Clear all.
3086 */
3087 do
3088 {
3089 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3090 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3091 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3092 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3093 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
3094
3095 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
3096 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3097 } while (fGstRegs != 0);
3098 pReNative->Core.bmHstRegsWithGstShadow = 0;
3099 }
3100 }
3101}
3102
3103
3104/**
3105 * Flushes any delayed guest register writes.
3106 *
3107 * This must be called prior to calling CImpl functions and any helpers that use
3108 * the guest state (like raising exceptions) and such.
3109 *
3110 * This optimization has not yet been implemented. The first target would be
3111 * RIP updates, since these are the most common ones.
3112 */
3113DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3114{
3115 RT_NOREF(pReNative, off);
3116 return off;
3117}
3118
3119
3120#ifdef VBOX_STRICT
3121/**
3122 * Does internal register allocator sanity checks.
3123 */
3124static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
3125{
3126 /*
3127 * Iterate host registers building a guest shadowing set.
3128 */
3129 uint64_t bmGstRegShadows = 0;
3130 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
3131 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
3132 while (bmHstRegsWithGstShadow)
3133 {
3134 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
3135 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3136 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3137
3138 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3139 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
3140 bmGstRegShadows |= fThisGstRegShadows;
3141 while (fThisGstRegShadows)
3142 {
3143 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
3144 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
3145 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
3146 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
3147 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
3148 }
3149 }
3150 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
3151 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
3152 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
3153
3154 /*
3155 * Now the other way around, checking the guest to host index array.
3156 */
3157 bmHstRegsWithGstShadow = 0;
3158 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
3159 while (bmGstRegShadows)
3160 {
3161 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
3162 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
3163 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
3164
3165 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3166 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
3167 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
3168 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
3169 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3170 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
3171 }
3172 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
3173 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
3174 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
3175}
3176#endif
3177
3178
3179/*********************************************************************************************************************************
3180* Code Emitters (larger snippets) *
3181*********************************************************************************************************************************/
3182
3183/**
3184 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
3185 * extending to 64-bit width.
3186 *
3187 * @returns New code buffer offset on success, UINT32_MAX on failure.
3188 * @param pReNative .
3189 * @param off The current code buffer position.
3190 * @param idxHstReg The host register to load the guest register value into.
3191 * @param enmGstReg The guest register to load.
3192 *
3193 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
3194 * that is something the caller needs to do if applicable.
3195 */
3196DECL_HIDDEN_THROW(uint32_t)
3197iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
3198{
3199 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
3200 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
3201
3202 switch (g_aGstShadowInfo[enmGstReg].cb)
3203 {
3204 case sizeof(uint64_t):
3205 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3206 case sizeof(uint32_t):
3207 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3208 case sizeof(uint16_t):
3209 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3210#if 0 /* not present in the table. */
3211 case sizeof(uint8_t):
3212 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3213#endif
3214 default:
3215 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
3216 }
3217}
3218
3219
3220#ifdef VBOX_STRICT
3221/**
3222 * Emitting code that checks that the content of register @a idxReg is the same
3223 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
3224 * instruction if that's not the case.
3225 *
3226 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
3227 * Trashes EFLAGS on AMD64.
3228 */
3229static uint32_t
3230iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
3231{
3232# ifdef RT_ARCH_AMD64
3233 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
3234
3235 /* cmp reg, [mem] */
3236 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
3237 {
3238 if (idxReg >= 8)
3239 pbCodeBuf[off++] = X86_OP_REX_R;
3240 pbCodeBuf[off++] = 0x38;
3241 }
3242 else
3243 {
3244 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
3245 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
3246 else
3247 {
3248 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
3249 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3250 else
3251 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
3252 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
3253 if (idxReg >= 8)
3254 pbCodeBuf[off++] = X86_OP_REX_R;
3255 }
3256 pbCodeBuf[off++] = 0x39;
3257 }
3258 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
3259
3260 /* je/jz +1 */
3261 pbCodeBuf[off++] = 0x74;
3262 pbCodeBuf[off++] = 0x01;
3263
3264 /* int3 */
3265 pbCodeBuf[off++] = 0xcc;
3266
3267 /* For values smaller than the register size, we must check that the rest
3268 of the register is all zeros. */
3269 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
3270 {
3271 /* test reg64, imm32 */
3272 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3273 pbCodeBuf[off++] = 0xf7;
3274 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3275 pbCodeBuf[off++] = 0;
3276 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
3277 pbCodeBuf[off++] = 0xff;
3278 pbCodeBuf[off++] = 0xff;
3279
3280 /* je/jz +1 */
3281 pbCodeBuf[off++] = 0x74;
3282 pbCodeBuf[off++] = 0x01;
3283
3284 /* int3 */
3285 pbCodeBuf[off++] = 0xcc;
3286 }
3287 else if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
3288 {
3289 /* rol reg64, 32 */
3290 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3291 pbCodeBuf[off++] = 0xc1;
3292 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3293 pbCodeBuf[off++] = 32;
3294
3295 /* test reg32, ffffffffh */
3296 if (idxReg >= 8)
3297 pbCodeBuf[off++] = X86_OP_REX_B;
3298 pbCodeBuf[off++] = 0xf7;
3299 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3300 pbCodeBuf[off++] = 0xff;
3301 pbCodeBuf[off++] = 0xff;
3302 pbCodeBuf[off++] = 0xff;
3303 pbCodeBuf[off++] = 0xff;
3304
3305 /* je/jz +1 */
3306 pbCodeBuf[off++] = 0x74;
3307 pbCodeBuf[off++] = 0x01;
3308
3309 /* int3 */
3310 pbCodeBuf[off++] = 0xcc;
3311
3312 /* rol reg64, 32 */
3313 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3314 pbCodeBuf[off++] = 0xc1;
3315 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3316 pbCodeBuf[off++] = 32;
3317 }
3318
3319# elif defined(RT_ARCH_ARM64)
3320 /* mov TMP0, [gstreg] */
3321 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
3322
3323 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3324 /* sub tmp0, tmp0, idxReg */
3325 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
3326 /* cbz tmp0, +1 */
3327 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
3328 /* brk #0x1000+enmGstReg */
3329 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
3330
3331# else
3332# error "Port me!"
3333# endif
3334 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3335 return off;
3336}
3337#endif /* VBOX_STRICT */
3338
3339
3340
3341/**
3342 * Emits a code for checking the return code of a call and rcPassUp, returning
3343 * from the code if either are non-zero.
3344 */
3345DECL_HIDDEN_THROW(uint32_t)
3346iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
3347{
3348#ifdef RT_ARCH_AMD64
3349 /*
3350 * AMD64: eax = call status code.
3351 */
3352
3353 /* edx = rcPassUp */
3354 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
3355# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3356 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
3357# endif
3358
3359 /* edx = eax | rcPassUp */
3360 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3361 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
3362 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
3363 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3364
3365 /* Jump to non-zero status return path. */
3366 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
3367
3368 /* done. */
3369
3370#elif RT_ARCH_ARM64
3371 /*
3372 * ARM64: w0 = call status code.
3373 */
3374 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr); /** @todo 32-bit imm load? Fixed counter register? */
3375 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
3376
3377 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3378
3379 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
3380
3381 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
3382 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
3383 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
3384
3385#else
3386# error "port me"
3387#endif
3388 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3389 return off;
3390}
3391
3392
3393/**
3394 * Emits code to check if the content of @a idxAddrReg is a canonical address,
3395 * raising a \#GP(0) if it isn't.
3396 *
3397 * @returns New code buffer offset, UINT32_MAX on failure.
3398 * @param pReNative The native recompile state.
3399 * @param off The code buffer offset.
3400 * @param idxAddrReg The host register with the address to check.
3401 * @param idxInstr The current instruction.
3402 */
3403DECL_HIDDEN_THROW(uint32_t)
3404iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
3405{
3406 RT_NOREF(idxInstr);
3407
3408 /*
3409 * Make sure we don't have any outstanding guest register writes as we may
3410 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
3411 */
3412 off = iemNativeRegFlushPendingWrites(pReNative, off);
3413
3414#ifdef RT_ARCH_AMD64
3415 /*
3416 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
3417 * return raisexcpt();
3418 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
3419 */
3420 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3421
3422 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
3423 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
3424 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
3425 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
3426
3427# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3428 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3429# else
3430 uint32_t const offFixup = off;
3431 off = iemNativeEmitJzToFixed(pReNative, off, 0);
3432 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3433 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3434 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3435# endif
3436
3437 iemNativeRegFreeTmp(pReNative, iTmpReg);
3438
3439#elif defined(RT_ARCH_ARM64)
3440 /*
3441 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
3442 * return raisexcpt();
3443 * ----
3444 * mov x1, 0x800000000000
3445 * add x1, x0, x1
3446 * cmp xzr, x1, lsr 48
3447 * and either:
3448 * b.ne .Lraisexcpt
3449 * or:
3450 * b.eq .Lnoexcept
3451 * movz x1, #instruction-number
3452 * b .Lraisexcpt
3453 * .Lnoexcept:
3454 */
3455 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3456
3457 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
3458 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
3459 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, idxAddrReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
3460
3461# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3462 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3463# else
3464 uint32_t const offFixup = off;
3465 off = iemNativeEmitJzToFixed(pReNative, off, 0);
3466 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3467 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3468 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3469# endif
3470
3471 iemNativeRegFreeTmp(pReNative, iTmpReg);
3472
3473#else
3474# error "Port me"
3475#endif
3476 return off;
3477}
3478
3479
3480/**
3481 * Emits code to check if the content of @a idxAddrReg is within the limit of
3482 * idxSegReg, raising a \#GP(0) if it isn't.
3483 *
3484 * @returns New code buffer offset; throws VBox status code on error.
3485 * @param pReNative The native recompile state.
3486 * @param off The code buffer offset.
3487 * @param idxAddrReg The host register (32-bit) with the address to
3488 * check.
3489 * @param idxSegReg The segment register (X86_SREG_XXX) to check
3490 * against.
3491 * @param idxInstr The current instruction.
3492 */
3493DECL_HIDDEN_THROW(uint32_t)
3494iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3495 uint8_t idxAddrReg, uint8_t idxSegReg, uint8_t idxInstr)
3496{
3497 /*
3498 * Make sure we don't have any outstanding guest register writes as we may
3499 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
3500 */
3501 off = iemNativeRegFlushPendingWrites(pReNative, off);
3502
3503 /** @todo implement expand down/whatnot checking */
3504 AssertStmt(idxSegReg == X86_SREG_CS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
3505
3506 uint8_t const iTmpLimReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
3507 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + idxSegReg),
3508 kIemNativeGstRegUse_ForUpdate);
3509
3510 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, iTmpLimReg);
3511
3512#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3513 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3514 RT_NOREF(idxInstr);
3515#else
3516 uint32_t const offFixup = off;
3517 off = iemNativeEmitJbeToFixed(pReNative, off, 0);
3518 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3519 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3520 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3521#endif
3522
3523 iemNativeRegFreeTmp(pReNative, iTmpLimReg);
3524 return off;
3525}
3526
3527
3528/**
3529 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
3530 *
3531 * @returns The flush mask.
3532 * @param fCImpl The IEM_CIMPL_F_XXX flags.
3533 * @param fGstShwFlush The starting flush mask.
3534 */
3535DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
3536{
3537 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
3538 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
3539 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
3540 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
3541 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
3542 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
3543 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
3544 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
3545 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
3546 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
3547 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
3548 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
3549 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
3550 return fGstShwFlush;
3551}
3552
3553
3554/**
3555 * Emits a call to a CImpl function or something similar.
3556 */
3557static int32_t iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush,
3558 uintptr_t pfnCImpl, uint8_t cbInstr, uint8_t cAddParams,
3559 uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
3560{
3561 /*
3562 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
3563 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
3564 */
3565 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
3566 fGstShwFlush
3567 | RT_BIT_64(kIemNativeGstReg_Pc)
3568 | RT_BIT_64(kIemNativeGstReg_EFlags));
3569 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
3570
3571 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
3572
3573 /*
3574 * Load the parameters.
3575 */
3576#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
3577 /* Special code the hidden VBOXSTRICTRC pointer. */
3578 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3579 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
3580 if (cAddParams > 0)
3581 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
3582 if (cAddParams > 1)
3583 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
3584 if (cAddParams > 2)
3585 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
3586 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3587
3588#else
3589 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
3590 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3591 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
3592 if (cAddParams > 0)
3593 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
3594 if (cAddParams > 1)
3595 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
3596 if (cAddParams > 2)
3597# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
3598 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
3599# else
3600 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
3601# endif
3602#endif
3603
3604 /*
3605 * Make the call.
3606 */
3607 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
3608
3609#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
3610 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3611#endif
3612
3613 /*
3614 * Check the status code.
3615 */
3616 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
3617}
3618
3619
3620/**
3621 * Emits a call to a threaded worker function.
3622 */
3623static uint32_t iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
3624{
3625 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
3626 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
3627 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
3628
3629#ifdef RT_ARCH_AMD64
3630 /* Load the parameters and emit the call. */
3631# ifdef RT_OS_WINDOWS
3632# ifndef VBOXSTRICTRC_STRICT_ENABLED
3633 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
3634 if (cParams > 0)
3635 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
3636 if (cParams > 1)
3637 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
3638 if (cParams > 2)
3639 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
3640# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
3641 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
3642 if (cParams > 0)
3643 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
3644 if (cParams > 1)
3645 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
3646 if (cParams > 2)
3647 {
3648 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
3649 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
3650 }
3651 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3652# endif /* VBOXSTRICTRC_STRICT_ENABLED */
3653# else
3654 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
3655 if (cParams > 0)
3656 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
3657 if (cParams > 1)
3658 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
3659 if (cParams > 2)
3660 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
3661# endif
3662
3663 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
3664
3665# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
3666 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3667# endif
3668
3669#elif RT_ARCH_ARM64
3670 /*
3671 * ARM64:
3672 */
3673 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3674 if (cParams > 0)
3675 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
3676 if (cParams > 1)
3677 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
3678 if (cParams > 2)
3679 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
3680
3681 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
3682
3683#else
3684# error "port me"
3685#endif
3686
3687 /*
3688 * Check the status code.
3689 */
3690 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
3691
3692 return off;
3693}
3694
3695
3696/**
3697 * Emits the code at the RaiseGP0 label.
3698 */
3699static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3700{
3701 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
3702 if (idxLabel != UINT32_MAX)
3703 {
3704 iemNativeLabelDefine(pReNative, idxLabel, off);
3705
3706 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu, uint8_t idxInstr) */
3707 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3708#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3709 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, 0);
3710#endif
3711 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
3712
3713 /* jump back to the return sequence. */
3714 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3715 }
3716 return off;
3717}
3718
3719
3720/**
3721 * Emits the code at the ReturnWithFlags label (returns
3722 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
3723 */
3724static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3725{
3726 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
3727 if (idxLabel != UINT32_MAX)
3728 {
3729 iemNativeLabelDefine(pReNative, idxLabel, off);
3730
3731 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
3732
3733 /* jump back to the return sequence. */
3734 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3735 }
3736 return off;
3737}
3738
3739
3740/**
3741 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
3742 */
3743static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3744{
3745 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
3746 if (idxLabel != UINT32_MAX)
3747 {
3748 iemNativeLabelDefine(pReNative, idxLabel, off);
3749
3750 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
3751
3752 /* jump back to the return sequence. */
3753 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3754 }
3755 return off;
3756}
3757
3758
3759/**
3760 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
3761 */
3762static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3763{
3764 /*
3765 * Generate the rc + rcPassUp fiddling code if needed.
3766 */
3767 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
3768 if (idxLabel != UINT32_MAX)
3769 {
3770 iemNativeLabelDefine(pReNative, idxLabel, off);
3771
3772 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
3773#ifdef RT_ARCH_AMD64
3774# ifdef RT_OS_WINDOWS
3775# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3776 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
3777# endif
3778 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
3779 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
3780# else
3781 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
3782 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
3783# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3784 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
3785# endif
3786# endif
3787# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3788 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
3789# endif
3790
3791#else
3792 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
3793 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3794 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
3795#endif
3796
3797 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
3798 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3799 }
3800 return off;
3801}
3802
3803
3804/**
3805 * Emits a standard epilog.
3806 */
3807static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
3808{
3809 *pidxReturnLabel = UINT32_MAX;
3810
3811 /*
3812 * Successful return, so clear the return register (eax, w0).
3813 */
3814 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
3815
3816 /*
3817 * Define label for common return point.
3818 */
3819 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
3820 *pidxReturnLabel = idxReturn;
3821
3822 /*
3823 * Restore registers and return.
3824 */
3825#ifdef RT_ARCH_AMD64
3826 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
3827
3828 /* Reposition esp at the r15 restore point. */
3829 pbCodeBuf[off++] = X86_OP_REX_W;
3830 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
3831 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
3832 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
3833
3834 /* Pop non-volatile registers and return */
3835 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
3836 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
3837 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
3838 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
3839 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
3840 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
3841 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
3842 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
3843# ifdef RT_OS_WINDOWS
3844 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
3845 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
3846# endif
3847 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
3848 pbCodeBuf[off++] = 0xc9; /* leave */
3849 pbCodeBuf[off++] = 0xc3; /* ret */
3850 pbCodeBuf[off++] = 0xcc; /* int3 poison */
3851
3852#elif RT_ARCH_ARM64
3853 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
3854
3855 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
3856 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
3857 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
3858 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
3859 IEMNATIVE_FRAME_VAR_SIZE / 8);
3860 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
3861 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3862 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
3863 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3864 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
3865 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3866 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
3867 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3868 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
3869 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3870 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
3871 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
3872
3873 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
3874 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
3875 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
3876 IEMNATIVE_FRAME_SAVE_REG_SIZE);
3877
3878 /* retab / ret */
3879# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
3880 if (1)
3881 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
3882 else
3883# endif
3884 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
3885
3886#else
3887# error "port me"
3888#endif
3889 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3890
3891 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
3892}
3893
3894
3895/**
3896 * Emits a standard prolog.
3897 */
3898static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3899{
3900#ifdef RT_ARCH_AMD64
3901 /*
3902 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
3903 * reserving 64 bytes for stack variables plus 4 non-register argument
3904 * slots. Fixed register assignment: xBX = pReNative;
3905 *
3906 * Since we always do the same register spilling, we can use the same
3907 * unwind description for all the code.
3908 */
3909 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
3910 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
3911 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
3912 pbCodeBuf[off++] = 0x8b;
3913 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
3914 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
3915 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
3916# ifdef RT_OS_WINDOWS
3917 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
3918 pbCodeBuf[off++] = 0x8b;
3919 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
3920 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
3921 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
3922# else
3923 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
3924 pbCodeBuf[off++] = 0x8b;
3925 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
3926# endif
3927 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
3928 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
3929 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
3930 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
3931 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
3932 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
3933 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
3934 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
3935
3936 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
3937 X86_GREG_xSP,
3938 IEMNATIVE_FRAME_ALIGN_SIZE
3939 + IEMNATIVE_FRAME_VAR_SIZE
3940 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
3941 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
3942 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
3943 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
3944 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
3945
3946#elif RT_ARCH_ARM64
3947 /*
3948 * We set up a stack frame exactly like on x86, only we have to push the
3949 * return address our selves here. We save all non-volatile registers.
3950 */
3951 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
3952
3953# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
3954 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
3955 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
3956 * in any way conditional, so just emitting this instructions now and hoping for the best... */
3957 /* pacibsp */
3958 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
3959# endif
3960
3961 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
3962 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
3963 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
3964 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
3965 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
3966 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
3967 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3968 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
3969 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3970 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
3971 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3972 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
3973 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3974 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
3975 /* Save the BP and LR (ret address) registers at the top of the frame. */
3976 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3977 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
3978 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
3979 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
3980 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
3981 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
3982
3983 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
3984 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
3985
3986 /* mov r28, r0 */
3987 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
3988 /* mov r27, r1 */
3989 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
3990
3991#else
3992# error "port me"
3993#endif
3994 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3995 return off;
3996}
3997
3998
3999
4000
4001/*********************************************************************************************************************************
4002* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
4003*********************************************************************************************************************************/
4004
4005#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
4006 { \
4007 Assert(pReNative->Core.bmVars == 0); \
4008 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
4009 Assert(pReNative->Core.bmStack == 0); \
4010 pReNative->fMc = (a_fMcFlags); \
4011 pReNative->fCImpl = (a_fCImplFlags); \
4012 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
4013
4014/** We have to get to the end in recompilation mode, as otherwise we won't
4015 * generate code for all the IEM_MC_IF_XXX branches. */
4016#define IEM_MC_END() \
4017 iemNativeVarFreeAll(pReNative); \
4018 } return off
4019
4020
4021
4022/*********************************************************************************************************************************
4023* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
4024*********************************************************************************************************************************/
4025
4026#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
4027 pReNative->fMc = 0; \
4028 pReNative->fCImpl = (a_fFlags); \
4029 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
4030
4031
4032#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
4033 pReNative->fMc = 0; \
4034 pReNative->fCImpl = (a_fFlags); \
4035 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
4036
4037DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4038 uint8_t idxInstr, uint64_t a_fGstShwFlush,
4039 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
4040{
4041 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
4042}
4043
4044
4045#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
4046 pReNative->fMc = 0; \
4047 pReNative->fCImpl = (a_fFlags); \
4048 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
4049 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
4050
4051DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4052 uint8_t idxInstr, uint64_t a_fGstShwFlush,
4053 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
4054{
4055 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
4056}
4057
4058
4059#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
4060 pReNative->fMc = 0; \
4061 pReNative->fCImpl = (a_fFlags); \
4062 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
4063 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
4064
4065DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4066 uint8_t idxInstr, uint64_t a_fGstShwFlush,
4067 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
4068 uint64_t uArg2)
4069{
4070 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
4071}
4072
4073
4074
4075/*********************************************************************************************************************************
4076* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
4077*********************************************************************************************************************************/
4078
4079/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
4080 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
4081DECL_INLINE_THROW(uint32_t)
4082iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4083{
4084 /*
4085 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
4086 * return with special status code and make the execution loop deal with
4087 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
4088 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
4089 * could continue w/o interruption, it probably will drop into the
4090 * debugger, so not worth the effort of trying to services it here and we
4091 * just lump it in with the handling of the others.
4092 *
4093 * To simplify the code and the register state management even more (wrt
4094 * immediate in AND operation), we always update the flags and skip the
4095 * extra check associated conditional jump.
4096 */
4097 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
4098 <= UINT32_MAX);
4099 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4100 kIemNativeGstRegUse_ForUpdate);
4101 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
4102 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
4103 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
4104 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
4105 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
4106
4107 /* Free but don't flush the EFLAGS register. */
4108 iemNativeRegFreeTmp(pReNative, idxEflReg);
4109
4110 return off;
4111}
4112
4113
4114#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr) \
4115 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4116
4117#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr) \
4118 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr); \
4119 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4120
4121/** Same as iemRegAddToRip64AndFinishingNoFlags. */
4122DECL_INLINE_THROW(uint32_t)
4123iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4124{
4125 /* Allocate a temporary PC register. */
4126 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4127
4128 /* Perform the addition and store the result. */
4129 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
4130 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4131
4132 /* Free but don't flush the PC register. */
4133 iemNativeRegFreeTmp(pReNative, idxPcReg);
4134
4135 return off;
4136}
4137
4138
4139#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr) \
4140 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4141
4142#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr) \
4143 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr); \
4144 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4145
4146/** Same as iemRegAddToEip32AndFinishingNoFlags. */
4147DECL_INLINE_THROW(uint32_t)
4148iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4149{
4150 /* Allocate a temporary PC register. */
4151 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4152
4153 /* Perform the addition and store the result. */
4154 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
4155 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4156
4157 /* Free but don't flush the PC register. */
4158 iemNativeRegFreeTmp(pReNative, idxPcReg);
4159
4160 return off;
4161}
4162
4163
4164#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr) \
4165 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr))
4166
4167#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr) \
4168 IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr); \
4169 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4170
4171/** Same as iemRegAddToIp16AndFinishingNoFlags. */
4172DECL_INLINE_THROW(uint32_t)
4173iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
4174{
4175 /* Allocate a temporary PC register. */
4176 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4177
4178 /* Perform the addition and store the result. */
4179 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
4180 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4181 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4182
4183 /* Free but don't flush the PC register. */
4184 iemNativeRegFreeTmp(pReNative, idxPcReg);
4185
4186 return off;
4187}
4188
4189
4190
4191/*********************************************************************************************************************************
4192* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
4193*********************************************************************************************************************************/
4194
4195#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize) \
4196 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
4197 (a_enmEffOpSize), pCallEntry->idxInstr)
4198
4199#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
4200 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize); \
4201 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4202
4203#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
4204 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
4205 IEMMODE_16BIT, pCallEntry->idxInstr)
4206
4207#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
4208 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
4209 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4210
4211#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr) \
4212 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
4213 IEMMODE_64BIT, pCallEntry->idxInstr)
4214
4215#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr) \
4216 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr); \
4217 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4218
4219/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
4220 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
4221 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
4222DECL_INLINE_THROW(uint32_t)
4223iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
4224 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
4225{
4226 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
4227
4228 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4229 off = iemNativeRegFlushPendingWrites(pReNative, off);
4230
4231 /* Allocate a temporary PC register. */
4232 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4233
4234 /* Perform the addition. */
4235 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
4236
4237 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
4238 {
4239 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
4240 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
4241 }
4242 else
4243 {
4244 /* Just truncate the result to 16-bit IP. */
4245 Assert(enmEffOpSize == IEMMODE_16BIT);
4246 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4247 }
4248 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4249
4250 /* Free but don't flush the PC register. */
4251 iemNativeRegFreeTmp(pReNative, idxPcReg);
4252
4253 return off;
4254}
4255
4256
4257#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize) \
4258 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
4259 (a_enmEffOpSize), pCallEntry->idxInstr)
4260
4261#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
4262 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize); \
4263 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4264
4265#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
4266 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
4267 IEMMODE_16BIT, pCallEntry->idxInstr)
4268
4269#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
4270 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
4271 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4272
4273#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
4274 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
4275 IEMMODE_32BIT, pCallEntry->idxInstr)
4276
4277#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
4278 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
4279 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4280
4281/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
4282 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
4283 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
4284DECL_INLINE_THROW(uint32_t)
4285iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
4286 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
4287{
4288 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
4289
4290 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4291 off = iemNativeRegFlushPendingWrites(pReNative, off);
4292
4293 /* Allocate a temporary PC register. */
4294 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4295
4296 /* Perform the addition. */
4297 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
4298
4299 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
4300 if (enmEffOpSize == IEMMODE_16BIT)
4301 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4302
4303 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
4304 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
4305
4306 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4307
4308 /* Free but don't flush the PC register. */
4309 iemNativeRegFreeTmp(pReNative, idxPcReg);
4310
4311 return off;
4312}
4313
4314
4315#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr) \
4316 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr)
4317
4318#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr) \
4319 IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr); \
4320 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4321
4322#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
4323 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr)
4324
4325#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
4326 IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
4327 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4328
4329#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr) \
4330 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr)
4331
4332#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr) \
4333 IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr); \
4334 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
4335
4336/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
4337DECL_INLINE_THROW(uint32_t)
4338iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4339 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
4340{
4341 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4342 off = iemNativeRegFlushPendingWrites(pReNative, off);
4343
4344 /* Allocate a temporary PC register. */
4345 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4346
4347 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
4348 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
4349 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4350 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
4351 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4352
4353 /* Free but don't flush the PC register. */
4354 iemNativeRegFreeTmp(pReNative, idxPcReg);
4355
4356 return off;
4357}
4358
4359
4360
4361/*********************************************************************************************************************************
4362* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
4363*********************************************************************************************************************************/
4364
4365/**
4366 * Pushes an IEM_MC_IF_XXX onto the condition stack.
4367 *
4368 * @returns Pointer to the condition stack entry on success, NULL on failure
4369 * (too many nestings)
4370 */
4371DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
4372{
4373 uint32_t const idxStack = pReNative->cCondDepth;
4374 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
4375
4376 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
4377 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
4378
4379 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
4380 pEntry->fInElse = false;
4381 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
4382 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
4383
4384 return pEntry;
4385}
4386
4387
4388/**
4389 * Start of the if-block, snapshotting the register and variable state.
4390 */
4391DECL_INLINE_THROW(void)
4392iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
4393{
4394 Assert(offIfBlock != UINT32_MAX);
4395 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4396 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4397 Assert(!pEntry->fInElse);
4398
4399 /* Define the start of the IF block if request or for disassembly purposes. */
4400 if (idxLabelIf != UINT32_MAX)
4401 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
4402#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4403 else
4404 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
4405#else
4406 RT_NOREF(offIfBlock);
4407#endif
4408
4409 /* Copy the initial state so we can restore it in the 'else' block. */
4410 pEntry->InitialState = pReNative->Core;
4411}
4412
4413
4414#define IEM_MC_ELSE() } while (0); \
4415 off = iemNativeEmitElse(pReNative, off); \
4416 do {
4417
4418/** Emits code related to IEM_MC_ELSE. */
4419DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4420{
4421 /* Check sanity and get the conditional stack entry. */
4422 Assert(off != UINT32_MAX);
4423 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4424 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4425 Assert(!pEntry->fInElse);
4426
4427 /* Jump to the endif */
4428 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
4429
4430 /* Define the else label and enter the else part of the condition. */
4431 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
4432 pEntry->fInElse = true;
4433
4434 /* Snapshot the core state so we can do a merge at the endif and restore
4435 the snapshot we took at the start of the if-block. */
4436 pEntry->IfFinalState = pReNative->Core;
4437 pReNative->Core = pEntry->InitialState;
4438
4439 return off;
4440}
4441
4442
4443#define IEM_MC_ENDIF() } while (0); \
4444 off = iemNativeEmitEndIf(pReNative, off)
4445
4446/** Emits code related to IEM_MC_ENDIF. */
4447DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4448{
4449 /* Check sanity and get the conditional stack entry. */
4450 Assert(off != UINT32_MAX);
4451 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4452 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4453
4454 /*
4455 * Now we have find common group with the core state at the end of the
4456 * if-final. Use the smallest common denominator and just drop anything
4457 * that isn't the same in both states.
4458 */
4459 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
4460 * which is why we're doing this at the end of the else-block.
4461 * But we'd need more info about future for that to be worth the effort. */
4462 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
4463 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
4464 {
4465 /* shadow guest stuff first. */
4466 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
4467 if (fGstRegs)
4468 {
4469 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
4470 do
4471 {
4472 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4473 fGstRegs &= ~RT_BIT_64(idxGstReg);
4474
4475 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4476 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
4477 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
4478 {
4479 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
4480 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
4481 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
4482 }
4483 } while (fGstRegs);
4484 }
4485 else
4486 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
4487
4488 /* Check variables next. For now we must require them to be identical
4489 or stuff we can recreate. */
4490 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
4491 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
4492 if (fVars)
4493 {
4494 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
4495 do
4496 {
4497 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
4498 fVars &= ~RT_BIT_32(idxVar);
4499
4500 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
4501 {
4502 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
4503 continue;
4504 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4505 {
4506 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
4507 if (idxHstReg != UINT8_MAX)
4508 {
4509 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4510 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4511 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u\n",
4512 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
4513 }
4514 continue;
4515 }
4516 }
4517 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
4518 continue;
4519
4520 /* Irreconcilable, so drop it. */
4521 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
4522 if (idxHstReg != UINT8_MAX)
4523 {
4524 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4525 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4526 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u (also dropped)\n",
4527 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
4528 }
4529 Log11(("iemNativeEmitEndIf: Freeing variable #%u\n", idxVar));
4530 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
4531 } while (fVars);
4532 }
4533
4534 /* Finally, check that the host register allocations matches. */
4535 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
4536 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
4537 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
4538 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
4539 }
4540
4541 /*
4542 * Define the endif label and maybe the else one if we're still in the 'if' part.
4543 */
4544 if (!pEntry->fInElse)
4545 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
4546 else
4547 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
4548 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
4549
4550 /* Pop the conditional stack.*/
4551 pReNative->cCondDepth -= 1;
4552
4553 return off;
4554}
4555
4556
4557#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
4558 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
4559 do {
4560
4561/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
4562DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
4563{
4564 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4565
4566 /* Get the eflags. */
4567 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4568 kIemNativeGstRegUse_ReadOnly);
4569
4570 /* Test and jump. */
4571 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
4572
4573 /* Free but don't flush the EFlags register. */
4574 iemNativeRegFreeTmp(pReNative, idxEflReg);
4575
4576 /* Make a copy of the core state now as we start the if-block. */
4577 iemNativeCondStartIfBlock(pReNative, off);
4578
4579 return off;
4580}
4581
4582
4583#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
4584 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
4585 do {
4586
4587/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
4588DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
4589{
4590 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4591
4592 /* Get the eflags. */
4593 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4594 kIemNativeGstRegUse_ReadOnly);
4595
4596 /* Test and jump. */
4597 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
4598
4599 /* Free but don't flush the EFlags register. */
4600 iemNativeRegFreeTmp(pReNative, idxEflReg);
4601
4602 /* Make a copy of the core state now as we start the if-block. */
4603 iemNativeCondStartIfBlock(pReNative, off);
4604
4605 return off;
4606}
4607
4608
4609#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
4610 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
4611 do {
4612
4613/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
4614DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
4615{
4616 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4617
4618 /* Get the eflags. */
4619 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4620 kIemNativeGstRegUse_ReadOnly);
4621
4622 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4623 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4624
4625 /* Test and jump. */
4626 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4627
4628 /* Free but don't flush the EFlags register. */
4629 iemNativeRegFreeTmp(pReNative, idxEflReg);
4630
4631 /* Make a copy of the core state now as we start the if-block. */
4632 iemNativeCondStartIfBlock(pReNative, off);
4633
4634 return off;
4635}
4636
4637
4638#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
4639 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
4640 do {
4641
4642/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
4643DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
4644{
4645 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4646
4647 /* Get the eflags. */
4648 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4649 kIemNativeGstRegUse_ReadOnly);
4650
4651 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4652 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4653
4654 /* Test and jump. */
4655 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4656
4657 /* Free but don't flush the EFlags register. */
4658 iemNativeRegFreeTmp(pReNative, idxEflReg);
4659
4660 /* Make a copy of the core state now as we start the if-block. */
4661 iemNativeCondStartIfBlock(pReNative, off);
4662
4663 return off;
4664}
4665
4666
4667#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
4668 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
4669 do {
4670
4671#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
4672 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
4673 do {
4674
4675/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
4676DECL_INLINE_THROW(uint32_t)
4677iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4678 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
4679{
4680 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4681
4682 /* Get the eflags. */
4683 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4684 kIemNativeGstRegUse_ReadOnly);
4685
4686 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
4687 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
4688
4689 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
4690 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
4691 Assert(iBitNo1 != iBitNo2);
4692
4693#ifdef RT_ARCH_AMD64
4694 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
4695
4696 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4697 if (iBitNo1 > iBitNo2)
4698 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
4699 else
4700 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
4701 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4702
4703#elif defined(RT_ARCH_ARM64)
4704 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4705 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4706
4707 /* and tmpreg, eflreg, #1<<iBitNo1 */
4708 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
4709
4710 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
4711 if (iBitNo1 > iBitNo2)
4712 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4713 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
4714 else
4715 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4716 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
4717
4718 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4719
4720#else
4721# error "Port me"
4722#endif
4723
4724 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
4725 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
4726 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
4727
4728 /* Free but don't flush the EFlags and tmp registers. */
4729 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4730 iemNativeRegFreeTmp(pReNative, idxEflReg);
4731
4732 /* Make a copy of the core state now as we start the if-block. */
4733 iemNativeCondStartIfBlock(pReNative, off);
4734
4735 return off;
4736}
4737
4738
4739#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
4740 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
4741 do {
4742
4743#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
4744 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
4745 do {
4746
4747/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
4748 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
4749DECL_INLINE_THROW(uint32_t)
4750iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
4751 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
4752{
4753 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4754
4755 /* We need an if-block label for the non-inverted variant. */
4756 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
4757 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
4758
4759 /* Get the eflags. */
4760 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4761 kIemNativeGstRegUse_ReadOnly);
4762
4763 /* Translate the flag masks to bit numbers. */
4764 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4765 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4766
4767 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
4768 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
4769 Assert(iBitNo1 != iBitNo);
4770
4771 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
4772 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
4773 Assert(iBitNo2 != iBitNo);
4774 Assert(iBitNo2 != iBitNo1);
4775
4776#ifdef RT_ARCH_AMD64
4777 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
4778#elif defined(RT_ARCH_ARM64)
4779 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4780#endif
4781
4782 /* Check for the lone bit first. */
4783 if (!fInverted)
4784 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4785 else
4786 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
4787
4788 /* Then extract and compare the other two bits. */
4789#ifdef RT_ARCH_AMD64
4790 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4791 if (iBitNo1 > iBitNo2)
4792 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
4793 else
4794 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
4795 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
4796
4797#elif defined(RT_ARCH_ARM64)
4798 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4799
4800 /* and tmpreg, eflreg, #1<<iBitNo1 */
4801 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
4802
4803 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
4804 if (iBitNo1 > iBitNo2)
4805 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4806 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
4807 else
4808 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
4809 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
4810
4811 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4812
4813#else
4814# error "Port me"
4815#endif
4816
4817 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
4818 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
4819 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
4820
4821 /* Free but don't flush the EFlags and tmp registers. */
4822 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4823 iemNativeRegFreeTmp(pReNative, idxEflReg);
4824
4825 /* Make a copy of the core state now as we start the if-block. */
4826 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
4827
4828 return off;
4829}
4830
4831
4832#define IEM_MC_IF_CX_IS_NZ() \
4833 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
4834 do {
4835
4836/** Emits code for IEM_MC_IF_CX_IS_NZ. */
4837DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4838{
4839 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4840
4841 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4842 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4843 kIemNativeGstRegUse_ReadOnly);
4844 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
4845 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4846
4847 iemNativeCondStartIfBlock(pReNative, off);
4848 return off;
4849}
4850
4851
4852#define IEM_MC_IF_ECX_IS_NZ() \
4853 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
4854 do {
4855
4856#define IEM_MC_IF_RCX_IS_NZ() \
4857 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
4858 do {
4859
4860/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
4861DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
4862{
4863 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4864
4865 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4866 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4867 kIemNativeGstRegUse_ReadOnly);
4868 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
4869 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4870
4871 iemNativeCondStartIfBlock(pReNative, off);
4872 return off;
4873}
4874
4875
4876#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
4877 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
4878 do {
4879
4880#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
4881 off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
4882 do {
4883
4884/** Emits code for IEM_MC_IF_CX_IS_NZ. */
4885DECL_INLINE_THROW(uint32_t)
4886iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
4887{
4888 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4889
4890 /* We have to load both RCX and EFLAGS before we can start branching,
4891 otherwise we'll end up in the else-block with an inconsistent
4892 register allocator state.
4893 Doing EFLAGS first as it's more likely to be loaded, right? */
4894 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4895 kIemNativeGstRegUse_ReadOnly);
4896 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4897 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4898 kIemNativeGstRegUse_ReadOnly);
4899
4900 /** @todo we could reduce this to a single branch instruction by spending a
4901 * temporary register and some setnz stuff. Not sure if loops are
4902 * worth it. */
4903 /* Check CX. */
4904 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
4905
4906 /* Check the EFlags bit. */
4907 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4908 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4909 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
4910 !fCheckIfSet /*fJmpIfSet*/);
4911
4912 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4913 iemNativeRegFreeTmp(pReNative, idxEflReg);
4914
4915 iemNativeCondStartIfBlock(pReNative, off);
4916 return off;
4917}
4918
4919
4920#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
4921 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
4922 do {
4923
4924#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
4925 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
4926 do {
4927
4928#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
4929 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
4930 do {
4931
4932#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
4933 off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
4934 do {
4935
4936/** Emits code for IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET,
4937 * IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET,
4938 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET and
4939 * IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET. */
4940DECL_INLINE_THROW(uint32_t)
4941iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4942 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
4943{
4944 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
4945
4946 /* We have to load both RCX and EFLAGS before we can start branching,
4947 otherwise we'll end up in the else-block with an inconsistent
4948 register allocator state.
4949 Doing EFLAGS first as it's more likely to be loaded, right? */
4950 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4951 kIemNativeGstRegUse_ReadOnly);
4952 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
4953 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + X86_GREG_xCX),
4954 kIemNativeGstRegUse_ReadOnly);
4955
4956 /** @todo we could reduce this to a single branch instruction by spending a
4957 * temporary register and some setnz stuff. Not sure if loops are
4958 * worth it. */
4959 /* Check RCX/ECX. */
4960 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
4961
4962 /* Check the EFlags bit. */
4963 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4964 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4965 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
4966 !fCheckIfSet /*fJmpIfSet*/);
4967
4968 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
4969 iemNativeRegFreeTmp(pReNative, idxEflReg);
4970
4971 iemNativeCondStartIfBlock(pReNative, off);
4972 return off;
4973}
4974
4975
4976
4977/*********************************************************************************************************************************
4978* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
4979*********************************************************************************************************************************/
4980/** Number of hidden arguments for CIMPL calls.
4981 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
4982#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
4983# define IEM_CIMPL_HIDDEN_ARGS 3
4984#else
4985# define IEM_CIMPL_HIDDEN_ARGS 2
4986#endif
4987
4988#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
4989 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
4990
4991#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
4992 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
4993
4994#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_iArg) \
4995 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
4996
4997#define IEM_MC_LOCAL(a_Type, a_Name) \
4998 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
4999
5000#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
5001 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
5002
5003
5004/**
5005 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
5006 */
5007DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
5008{
5009 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
5010 return IEM_CIMPL_HIDDEN_ARGS;
5011 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
5012 return 1;
5013 return 0;
5014}
5015
5016
5017/**
5018 * Internal work that allocates a variable with kind set to
5019 * kIemNativeVarKind_Invalid and no current stack allocation.
5020 *
5021 * The kind will either be set by the caller or later when the variable is first
5022 * assigned a value.
5023 */
5024static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
5025{
5026 Assert(cbType > 0 && cbType <= 64);
5027 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
5028 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
5029 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
5030 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
5031 pReNative->Core.aVars[idxVar].cbVar = cbType;
5032 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
5033 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5034 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
5035 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
5036 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
5037 pReNative->Core.aVars[idxVar].u.uValue = 0;
5038 return idxVar;
5039}
5040
5041
5042/**
5043 * Internal work that allocates an argument variable w/o setting enmKind.
5044 */
5045static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
5046{
5047 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
5048 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
5049 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
5050
5051 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
5052 pReNative->Core.aidxArgVars[iArgNo] = idxVar;
5053 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
5054 return idxVar;
5055}
5056
5057
5058/**
5059 * Changes the variable to a stack variable.
5060 *
5061 * Currently this is s only possible to do the first time the variable is used,
5062 * switching later is can be implemented but not done.
5063 *
5064 * @param pReNative The recompiler state.
5065 * @param idxVar The variable.
5066 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
5067 */
5068static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
5069{
5070 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
5071 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
5072 {
5073 /* We could in theory transition from immediate to stack as well, but it
5074 would involve the caller doing work storing the value on the stack. So,
5075 till that's required we only allow transition from invalid. */
5076 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
5077 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5078 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Stack;
5079
5080 if (pReNative->Core.aVars[idxVar].idxStackSlot == UINT8_MAX)
5081 {
5082 if (pReNative->Core.aVars[idxVar].cbVar <= sizeof(uint64_t))
5083 {
5084 unsigned const iSlot = ASMBitFirstSetU32(~pReNative->Core.bmStack) - 1;
5085 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
5086 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
5087 pReNative->Core.aVars[idxVar].idxStackSlot = iSlot;
5088 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x\n", idxVar, iSlot));
5089 return;
5090 }
5091 /* cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;*/
5092 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
5093 Assert(pReNative->Core.aVars[idxVar].cbVar <= 64);
5094 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pReNative->Core.aVars[idxVar].cbVar) - 4) - 1;
5095 uint32_t fBitAllocMask = RT_BIT_32((pReNative->Core.aVars[idxVar].cbVar + 7) >> 3) - 1;
5096 uint32_t bmStack = ~pReNative->Core.bmStack;
5097 while (bmStack != UINT32_MAX)
5098 {
5099 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
5100 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
5101 if (!(iSlot & fBitAlignMask))
5102 {
5103 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
5104 {
5105 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
5106 pReNative->Core.aVars[idxVar].idxStackSlot = iSlot;
5107 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n",
5108 idxVar, iSlot, fBitAllocMask, pReNative->Core.aVars[idxVar].cbVar));
5109 return;
5110 }
5111 }
5112 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
5113 }
5114 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
5115 }
5116 }
5117}
5118
5119
5120/**
5121 * Changes it to a variable with a constant value.
5122 *
5123 * This does not require stack storage as we know the value and can always
5124 * reload it, unless of course it's referenced.
5125 *
5126 * @param pReNative The recompiler state.
5127 * @param idxVar The variable.
5128 * @param uValue The immediate value.
5129 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
5130 */
5131static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
5132{
5133 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
5134 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Immediate)
5135 {
5136 /* Only simple trasnsitions for now. */
5137 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
5138 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5139 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
5140 }
5141 pReNative->Core.aVars[idxVar].u.uValue = uValue;
5142}
5143
5144
5145/**
5146 * Changes the variable to a reference (pointer) to @a idxOtherVar.
5147 *
5148 * @param pReNative The recompiler state.
5149 * @param idxVar The variable.
5150 * @param idxOtherVar The variable to take the (stack) address of.
5151 *
5152 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
5153 */
5154static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
5155{
5156 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
5157 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
5158
5159 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
5160 {
5161 /* Only simple trasnsitions for now. */
5162 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
5163 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
5164 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
5165 }
5166 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar;
5167
5168 /* Update the other variable, ensure it's a stack variable. */
5169 /** @todo handle variables with const values... that's go boom now. */
5170 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
5171 iemNativeVarSetKindToStack(pReNative, idxOtherVar);
5172}
5173
5174
5175DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
5176{
5177 return iemNativeArgAllocInt(pReNative, iArgNo, cbType);
5178}
5179
5180
5181DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
5182{
5183 uint8_t const idxVar = iemNativeArgAllocInt(pReNative, iArgNo, cbType);
5184 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
5185 return idxVar;
5186}
5187
5188
5189DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
5190{
5191 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
5192 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
5193 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
5194 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
5195
5196 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
5197 iemNativeVarSetKindToLocalRef(pReNative, idxArgVar, idxOtherVar);
5198 return idxArgVar;
5199}
5200
5201
5202DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
5203{
5204 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
5205 iemNativeVarSetKindToStack(pReNative, idxVar);
5206 return idxVar;
5207}
5208
5209
5210DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
5211{
5212 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
5213 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
5214 return idxVar;
5215}
5216
5217
5218/**
5219 * Makes sure variable @a idxVar has a register assigned to it.
5220 *
5221 * @returns The host register number.
5222 * @param pReNative The recompiler state.
5223 * @param idxVar The variable.
5224 * @param poff Pointer to the instruction buffer offset.
5225 * In case a register needs to be freed up.
5226 */
5227DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocRegister(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff)
5228{
5229 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
5230
5231 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
5232 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5233 return idxReg;
5234
5235 /*
5236 * We have to allocate a register for the variable, even if its a stack one
5237 * as we don't know if there are modification being made to it before its
5238 * finalized (todo: analyze and insert hints about that?).
5239 *
5240 * If we can, we try get the correct register for argument variables. This
5241 * is assuming that most argument variables are fetched as close as possible
5242 * to the actual call, so that there aren't any interfering hidden calls
5243 * (memory accesses, etc) inbetween.
5244 *
5245 * If we cannot or it's a variable, we make sure no argument registers
5246 * that will be used by this MC block will be allocated here, and we always
5247 * prefer non-volatile registers to avoid needing to spill stuff for internal
5248 * call.
5249 */
5250 /** @todo Detect too early argument value fetches and warn about hidden
5251 * calls causing less optimal code to be generated in the python script. */
5252
5253 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
5254 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
5255 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
5256 idxReg = g_aidxIemNativeCallRegs[uArgNo];
5257 else
5258 {
5259 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
5260 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
5261 & ~pReNative->Core.bmHstRegsWithGstShadow
5262 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
5263 & fNotArgsMask;
5264 if (fRegs)
5265 {
5266 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
5267 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5268 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
5269 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
5270 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
5271 }
5272 else
5273 {
5274 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
5275 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
5276 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
5277 }
5278 }
5279 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
5280 pReNative->Core.aVars[idxVar].idxReg = idxReg;
5281 return idxReg;
5282}
5283
5284
5285/**
5286 * Worker that frees the stack slots for variable @a idxVar if any allocated.
5287 *
5288 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
5289 */
5290DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
5291{
5292 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
5293 Assert(idxStackSlot == UINT8_MAX || idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
5294 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
5295 {
5296 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
5297 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
5298 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
5299 Assert(cSlots > 0);
5300 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
5301 Log11(("iemNativeVarFreeStackSlots: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n", idxVar, idxStackSlot, fAllocMask, cbVar));
5302 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
5303 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
5304 }
5305}
5306
5307
5308/**
5309 * Worker that frees a single variable.
5310 *
5311 * ASSUMES that @a idxVar is valid.
5312 */
5313DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
5314{
5315 Assert( pReNative->Core.aVars[idxVar].enmKind > kIemNativeVarKind_Invalid
5316 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
5317
5318 /* Free the host register first if any assigned. */
5319 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
5320 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5321 {
5322 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
5323 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
5324 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5325 }
5326
5327 /* Free argument mapping. */
5328 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
5329 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aidxArgVars))
5330 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
5331
5332 /* Free the stack slots. */
5333 iemNativeVarFreeStackSlots(pReNative, idxVar);
5334
5335 /* Free the actual variable. */
5336 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
5337 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
5338}
5339
5340
5341/**
5342 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
5343 */
5344DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
5345{
5346 while (bmVars != 0)
5347 {
5348 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
5349 bmVars &= ~RT_BIT_32(idxVar);
5350
5351#if 1 /** @todo optimize by simplifying this later... */
5352 iemNativeVarFreeOneWorker(pReNative, idxVar);
5353#else
5354 /* Only need to free the host register, the rest is done as bulk updates below. */
5355 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
5356 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5357 {
5358 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
5359 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
5360 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5361 }
5362#endif
5363 }
5364#if 0 /** @todo optimize by simplifying this later... */
5365 pReNative->Core.bmVars = 0;
5366 pReNative->Core.bmStack = 0;
5367 pReNative->Core.u64ArgVars = UINT64_MAX;
5368#endif
5369}
5370
5371
5372/**
5373 * This is called by IEM_MC_END() to clean up all variables.
5374 */
5375DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
5376{
5377 uint32_t const bmVars = pReNative->Core.bmVars;
5378 if (bmVars != 0)
5379 iemNativeVarFreeAllSlow(pReNative, bmVars);
5380 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
5381 Assert(pReNative->Core.bmStack == 0);
5382}
5383
5384
5385/*********************************************************************************************************************************
5386* Emitters for IEM_MC_CALL_CIMPL_XXX *
5387*********************************************************************************************************************************/
5388
5389/**
5390 * Emits code to load a reference to the given guest register into @a idxGprDst.
5391 */
5392DECL_INLINE_THROW(uint32_t)
5393iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
5394 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
5395{
5396 /*
5397 * Get the offset relative to the CPUMCTX structure.
5398 */
5399 uint32_t offCpumCtx;
5400 switch (enmClass)
5401 {
5402 case kIemNativeGstRegRef_Gpr:
5403 Assert(idxRegInClass < 16);
5404 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
5405 break;
5406
5407 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
5408 Assert(idxRegInClass < 4);
5409 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
5410 break;
5411
5412 case kIemNativeGstRegRef_EFlags:
5413 Assert(idxRegInClass == 0);
5414 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
5415 break;
5416
5417 case kIemNativeGstRegRef_MxCsr:
5418 Assert(idxRegInClass == 0);
5419 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
5420 break;
5421
5422 case kIemNativeGstRegRef_FpuReg:
5423 Assert(idxRegInClass < 8);
5424 AssertFailed(); /** @todo what kind of indexing? */
5425 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
5426 break;
5427
5428 case kIemNativeGstRegRef_MReg:
5429 Assert(idxRegInClass < 8);
5430 AssertFailed(); /** @todo what kind of indexing? */
5431 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
5432 break;
5433
5434 case kIemNativeGstRegRef_XReg:
5435 Assert(idxRegInClass < 16);
5436 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
5437 break;
5438
5439 default:
5440 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
5441 }
5442
5443 /*
5444 * Load the value into the destination register.
5445 */
5446#ifdef RT_ARCH_AMD64
5447 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
5448
5449#elif defined(RT_ARCH_ARM64)
5450 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5451 Assert(offCpumCtx < 4096);
5452 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
5453
5454#else
5455# error "Port me!"
5456#endif
5457
5458 return off;
5459}
5460
5461
5462/**
5463 * Common code for CIMPL and AIMPL calls.
5464 *
5465 * These are calls that uses argument variables and such. They should not be
5466 * confused with internal calls required to implement an MC operation,
5467 * like a TLB load and similar.
5468 *
5469 * Upon return all that is left to do is to load any hidden arguments and
5470 * perform the call. All argument variables are freed.
5471 *
5472 * @returns New code buffer offset; throws VBox status code on error.
5473 * @param pReNative The native recompile state.
5474 * @param off The code buffer offset.
5475 * @param cArgs The total nubmer of arguments (includes hidden
5476 * count).
5477 * @param cHiddenArgs The number of hidden arguments. The hidden
5478 * arguments must not have any variable declared for
5479 * them, whereas all the regular arguments must
5480 * (tstIEMCheckMc ensures this).
5481 */
5482DECL_HIDDEN_THROW(uint32_t)
5483iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
5484{
5485#ifdef VBOX_STRICT
5486 /*
5487 * Assert sanity.
5488 */
5489 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
5490 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
5491 for (unsigned i = 0; i < cHiddenArgs; i++)
5492 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
5493 for (unsigned i = cHiddenArgs; i < cArgs; i++)
5494 {
5495 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
5496 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
5497 }
5498#endif
5499
5500 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
5501
5502 /*
5503 * First, go over the host registers that will be used for arguments and make
5504 * sure they either hold the desired argument or are free.
5505 */
5506 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
5507 for (uint32_t i = 0; i < cRegArgs; i++)
5508 {
5509 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
5510 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
5511 {
5512 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
5513 {
5514 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
5515 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
5516 Assert(pReNative->Core.aVars[idxVar].idxReg == idxArgReg);
5517 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
5518 if (uArgNo == i)
5519 { /* prefect */ }
5520 else
5521 {
5522 /* The variable allocator logic should make sure this is impossible. */
5523 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
5524
5525 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5526 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
5527 else
5528 {
5529 /* just free it, can be reloaded if used again */
5530 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5531 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
5532 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
5533 }
5534 }
5535 }
5536 else
5537 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
5538 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
5539 }
5540 }
5541
5542 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
5543
5544 /*
5545 * Make sure the argument variables are loaded into their respective registers.
5546 *
5547 * We can optimize this by ASSUMING that any register allocations are for
5548 * registeres that have already been loaded and are ready. The previous step
5549 * saw to that.
5550 */
5551 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
5552 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
5553 {
5554 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
5555 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
5556 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == pReNative->Core.aidxArgVars[i]
5557 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
5558 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
5559 else
5560 {
5561 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
5562 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5563 {
5564 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
5565 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].idxReg);
5566 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg))
5567 | RT_BIT_32(idxArgReg);
5568 pReNative->Core.aVars[idxVar].idxReg = idxArgReg;
5569 }
5570 else
5571 {
5572 /* Use ARG0 as temp for stuff we need registers for. */
5573 switch (pReNative->Core.aVars[idxVar].enmKind)
5574 {
5575 case kIemNativeVarKind_Stack:
5576 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX,
5577 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5578 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeVarCalcBpDisp(pReNative, idxVar));
5579 continue;
5580
5581 case kIemNativeVarKind_Immediate:
5582 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].u.uValue);
5583 continue;
5584
5585 case kIemNativeVarKind_VarRef:
5586 {
5587 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
5588 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
5589 AssertStmt(pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX,
5590 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_4));
5591 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg,
5592 iemNativeStackCalcBpDisp(pReNative->Core.aVars[idxOtherVar].idxStackSlot));
5593 continue;
5594 }
5595
5596 case kIemNativeVarKind_GstRegRef:
5597 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
5598 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
5599 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
5600 continue;
5601
5602 case kIemNativeVarKind_Invalid:
5603 case kIemNativeVarKind_End:
5604 break;
5605 }
5606 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5607 }
5608 }
5609 }
5610#ifdef VBOX_STRICT
5611 else
5612 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
5613 {
5614 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
5615 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
5616 }
5617#endif
5618
5619#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
5620 /*
5621 * If there are any stack arguments, make sure they are in their place as well.
5622 *
5623 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since it the
5624 * caller will load it later and it must be free (see first loop).
5625 */
5626 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
5627 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
5628 {
5629 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
5630 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
5631 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5632 {
5633 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
5634 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].idxReg);
5635 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg);
5636 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5637 }
5638 else
5639 {
5640 /* Use ARG0 as temp for stuff we need registers for. */
5641 switch (pReNative->Core.aVars[idxVar].enmKind)
5642 {
5643 case kIemNativeVarKind_Stack:
5644 AssertStmt(pReNative->Core.aVars[idxVar].idxStackSlot != UINT8_MAX,
5645 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5646 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
5647 iemNativeVarCalcBpDisp(pReNative, idxVar));
5648 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
5649 continue;
5650
5651 case kIemNativeVarKind_Immediate:
5652 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].u.uValue);
5653 continue;
5654
5655 case kIemNativeVarKind_VarRef:
5656 {
5657 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
5658 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
5659 AssertStmt(pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX,
5660 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_4));
5661 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
5662 iemNativeStackCalcBpDisp(pReNative->Core.aVars[idxOtherVar].idxStackSlot));
5663 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
5664 continue;
5665 }
5666
5667 case kIemNativeVarKind_GstRegRef:
5668 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
5669 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
5670 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
5671 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
5672 continue;
5673
5674 case kIemNativeVarKind_Invalid:
5675 case kIemNativeVarKind_End:
5676 break;
5677 }
5678 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
5679 }
5680 }
5681#else
5682 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
5683#endif
5684
5685 /*
5686 * Free all argument variables (simplified).
5687 * Their lifetime always expires with the call they are for.
5688 */
5689 /** @todo Make the python script check that arguments aren't used after
5690 * IEM_MC_CALL_XXXX. */
5691 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
5692 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
5693 * an argument value. There is also some FPU stuff. */
5694 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
5695 {
5696 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
5697 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
5698
5699 /* no need to free registers: */
5700 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
5701 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
5702 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
5703 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
5704 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
5705 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
5706
5707 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
5708 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
5709 iemNativeVarFreeStackSlots(pReNative, idxVar);
5710 }
5711 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
5712
5713 /*
5714 * Flush volatile registers as we make the call.
5715 */
5716 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
5717
5718 return off;
5719}
5720
5721
5722/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
5723DECL_HIDDEN_THROW(uint32_t)
5724iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
5725 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
5726
5727{
5728 /*
5729 * Do all the call setup and cleanup.
5730 */
5731 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
5732
5733 /*
5734 * Load the two hidden arguments.
5735 */
5736#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
5737 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5738 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5739 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
5740#else
5741 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5742 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
5743#endif
5744
5745 /*
5746 * Make the call and check the return code.
5747 *
5748 * Shadow PC copies are always flushed here, other stuff depends on flags.
5749 * Segment and general purpose registers are explictily flushed via the
5750 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
5751 * macros.
5752 */
5753 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
5754#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
5755 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5756#endif
5757 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
5758 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
5759 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
5760 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
5761
5762 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
5763}
5764
5765
5766#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
5767 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
5768
5769/** Emits code for IEM_MC_CALL_CIMPL_1. */
5770DECL_INLINE_THROW(uint32_t)
5771iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
5772 uintptr_t pfnCImpl, uint8_t idxArg0)
5773{
5774 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5775 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5776 RT_NOREF_PV(idxArg0);
5777
5778 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
5779}
5780
5781
5782#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
5783 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
5784
5785/** Emits code for IEM_MC_CALL_CIMPL_2. */
5786DECL_INLINE_THROW(uint32_t)
5787iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
5788 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
5789{
5790 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5791 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5792 RT_NOREF_PV(idxArg0);
5793
5794 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
5795 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
5796 RT_NOREF_PV(idxArg1);
5797
5798 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
5799}
5800
5801
5802#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
5803 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
5804 (uintptr_t)a_pfnCImpl, a0, a1, a2)
5805
5806/** Emits code for IEM_MC_CALL_CIMPL_3. */
5807DECL_INLINE_THROW(uint32_t)
5808iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
5809 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
5810{
5811pReNative->pInstrBuf[off++] = 0xcc;
5812 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5813 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5814 RT_NOREF_PV(idxArg0);
5815
5816 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
5817 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
5818 RT_NOREF_PV(idxArg1);
5819
5820 Assert(idxArg2 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg2)));
5821 Assert(pReNative->Core.aVars[idxArg2].uArgNo == 2 + IEM_CIMPL_HIDDEN_ARGS);
5822 RT_NOREF_PV(idxArg2);
5823
5824 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
5825}
5826
5827
5828#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
5829 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
5830 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
5831
5832/** Emits code for IEM_MC_CALL_CIMPL_4. */
5833DECL_INLINE_THROW(uint32_t)
5834iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
5835 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
5836{
5837pReNative->pInstrBuf[off++] = 0xcc;
5838 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5839 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5840 RT_NOREF_PV(idxArg0);
5841
5842 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
5843 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
5844 RT_NOREF_PV(idxArg1);
5845
5846 Assert(idxArg2 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg2)));
5847 Assert(pReNative->Core.aVars[idxArg2].uArgNo == 2 + IEM_CIMPL_HIDDEN_ARGS);
5848 RT_NOREF_PV(idxArg2);
5849
5850 Assert(idxArg3 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg3)));
5851 Assert(pReNative->Core.aVars[idxArg3].uArgNo == 3 + IEM_CIMPL_HIDDEN_ARGS);
5852 RT_NOREF_PV(idxArg3);
5853
5854 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
5855}
5856
5857
5858#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
5859 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
5860 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
5861
5862/** Emits code for IEM_MC_CALL_CIMPL_4. */
5863DECL_INLINE_THROW(uint32_t)
5864iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
5865 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
5866{
5867pReNative->pInstrBuf[off++] = 0xcc;
5868 Assert(idxArg0 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg0)));
5869 Assert(pReNative->Core.aVars[idxArg0].uArgNo == 0 + IEM_CIMPL_HIDDEN_ARGS);
5870 RT_NOREF_PV(idxArg0);
5871
5872 Assert(idxArg1 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg1)));
5873 Assert(pReNative->Core.aVars[idxArg1].uArgNo == 1 + IEM_CIMPL_HIDDEN_ARGS);
5874 RT_NOREF_PV(idxArg1);
5875
5876 Assert(idxArg2 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg2)));
5877 Assert(pReNative->Core.aVars[idxArg2].uArgNo == 2 + IEM_CIMPL_HIDDEN_ARGS);
5878 RT_NOREF_PV(idxArg2);
5879
5880 Assert(idxArg3 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg3)));
5881 Assert(pReNative->Core.aVars[idxArg3].uArgNo == 3 + IEM_CIMPL_HIDDEN_ARGS);
5882 RT_NOREF_PV(idxArg3);
5883
5884 Assert(idxArg4 < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxArg4)));
5885 Assert(pReNative->Core.aVars[idxArg4].uArgNo == 4 + IEM_CIMPL_HIDDEN_ARGS);
5886 RT_NOREF_PV(idxArg4);
5887
5888 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
5889}
5890
5891
5892/** Recompiler debugging: Flush guest register shadow copies. */
5893#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
5894
5895
5896
5897
5898/*********************************************************************************************************************************
5899* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
5900*********************************************************************************************************************************/
5901
5902#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
5903 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg)
5904
5905/** Emits code for IEM_MC_FETCH_GREG_U16. */
5906DECL_INLINE_THROW(uint32_t)
5907iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
5908{
5909 Assert(idxDstVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxDstVar)));
5910 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint16_t));
5911
5912 /*
5913 * We can either just load the low 16-bit of the GPR into a host register
5914 * for the variable, or we can do so via a shadow copy host register. The
5915 * latter will avoid having to reload it if it's being stored later, but
5916 * will waste a host register if it isn't touched again. Since we don't
5917 * know what going to happen, we choose the latter for now.
5918 */
5919 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5920 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + iGReg),
5921 kIemNativeGstRegUse_ReadOnly);
5922
5923 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5924 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
5925 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
5926
5927 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
5928 return off;
5929}
5930
5931
5932
5933/*********************************************************************************************************************************
5934* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
5935*********************************************************************************************************************************/
5936
5937#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
5938 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
5939
5940/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
5941DECL_INLINE_THROW(uint32_t)
5942iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
5943{
5944 Assert(iGRegEx < 20);
5945 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5946 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + (iGRegEx & 15)),
5947 kIemNativeGstRegUse_ForUpdate);
5948#ifdef RT_ARCH_AMD64
5949 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
5950
5951 /* To the lowest byte of the register: mov r8, imm8 */
5952 if (iGRegEx < 16)
5953 {
5954 if (idxGstTmpReg >= 8)
5955 pbCodeBuf[off++] = X86_OP_REX_B;
5956 else if (idxGstTmpReg >= 4)
5957 pbCodeBuf[off++] = X86_OP_REX;
5958 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
5959 pbCodeBuf[off++] = u8Value;
5960 }
5961 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
5962 else if (idxGstTmpReg < 4)
5963 {
5964 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
5965 pbCodeBuf[off++] = u8Value;
5966 }
5967 else
5968 {
5969 /* ror reg64, 8 */
5970 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
5971 pbCodeBuf[off++] = 0xc1;
5972 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5973 pbCodeBuf[off++] = 8;
5974
5975 /* mov reg8, imm8 */
5976 if (idxGstTmpReg >= 8)
5977 pbCodeBuf[off++] = X86_OP_REX_B;
5978 else if (idxGstTmpReg >= 4)
5979 pbCodeBuf[off++] = X86_OP_REX;
5980 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
5981 pbCodeBuf[off++] = u8Value;
5982
5983 /* rol reg64, 8 */
5984 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
5985 pbCodeBuf[off++] = 0xc1;
5986 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5987 pbCodeBuf[off++] = 8;
5988 }
5989
5990#elif defined(RT_ARCH_ARM64)
5991 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
5992 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5993 if (iGRegEx < 16)
5994 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
5995 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
5996 else
5997 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
5998 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
5999 iemNativeRegFreeTmp(pReNative, idxImmReg);
6000
6001#else
6002# error "Port me!"
6003#endif
6004
6005 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6006
6007 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
6008
6009 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
6010 return off;
6011}
6012
6013
6014#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
6015 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
6016
6017/** Emits code for IEM_MC_STORE_GREG_U16. */
6018DECL_INLINE_THROW(uint32_t)
6019iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
6020{
6021pReNative->pInstrBuf[off++] = 0xcc;
6022 Assert(iGReg < 16);
6023 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6024 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + iGReg),
6025 kIemNativeGstRegUse_ForUpdate);
6026#ifdef RT_ARCH_AMD64
6027 /* mov reg16, imm16 */
6028 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6029 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6030 if (idxGstTmpReg >= 8)
6031 pbCodeBuf[off++] = X86_OP_REX_B;
6032 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
6033 pbCodeBuf[off++] = RT_BYTE1(uValue);
6034 pbCodeBuf[off++] = RT_BYTE2(uValue);
6035
6036#elif defined(RT_ARCH_ARM64)
6037 /* movk xdst, #uValue, lsl #0 */
6038 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6039 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
6040
6041#else
6042# error "Port me!"
6043#endif
6044
6045 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6046
6047 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg & 15]));
6048 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
6049 return off;
6050}
6051
6052
6053/** Emits code for IEM_MC_STORE_GREG_U16. */
6054DECL_INLINE_THROW(uint32_t)
6055iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
6056{
6057 Assert(iGReg < 16);
6058 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
6059
6060 /*
6061 * If it's a constant value (unlikely) we treat this as a
6062 * IEM_MC_STORE_GREG_U16_CONST statement.
6063 */
6064 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
6065 { /* likely */ }
6066 else
6067 {
6068 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind != kIemNativeVarKind_Immediate,
6069 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6070 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pReNative->Core.aVars[idxValueVar].u.uValue);
6071 }
6072
6073 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6074 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + iGReg),
6075 kIemNativeGstRegUse_ForUpdate);
6076
6077#ifdef RT_ARCH_AMD64
6078 /* mov reg16, reg16 or [mem16] */
6079 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
6080 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6081 if (pReNative->Core.aVars[idxValueVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6082 {
6083 if (idxGstTmpReg >= 8 || pReNative->Core.aVars[idxValueVar].idxReg >= 8)
6084 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
6085 | (pReNative->Core.aVars[idxValueVar].idxReg >= 8 ? X86_OP_REX_B : 0);
6086 pbCodeBuf[off++] = 0x8b;
6087 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pReNative->Core.aVars[idxValueVar].idxReg & 7);
6088 }
6089 else
6090 {
6091 AssertStmt(pReNative->Core.aVars[idxValueVar].idxStackSlot != UINT8_MAX,
6092 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_4));
6093 if (idxGstTmpReg >= 8)
6094 pbCodeBuf[off++] = X86_OP_REX_R;
6095 pbCodeBuf[off++] = 0x8b;
6096 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeVarCalcBpDisp(pReNative, idxValueVar), pReNative);
6097 }
6098
6099#elif defined(RT_ARCH_ARM64)
6100 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
6101 uint8_t const idxVarReg = iemNativeVarAllocRegister(pReNative, idxDstVar, &off);
6102 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6103 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
6104
6105#else
6106# error "Port me!"
6107#endif
6108
6109 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6110
6111 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg & 15]));
6112 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
6113 return off;
6114}
6115
6116
6117
6118/*********************************************************************************************************************************
6119* General purpose register manipulation (add, sub). *
6120*********************************************************************************************************************************/
6121
6122#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
6123 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
6124
6125/** Emits code for IEM_MC_SUB_GREG_U16. */
6126DECL_INLINE_THROW(uint32_t)
6127iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
6128{
6129 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6130 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + iGReg),
6131 kIemNativeGstRegUse_ForUpdate);
6132
6133#ifdef RT_ARCH_AMD64
6134 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
6135 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6136 if (idxGstTmpReg >= 8)
6137 pbCodeBuf[off++] = X86_OP_REX_B;
6138 if (uSubtrahend)
6139 {
6140 pbCodeBuf[off++] = 0xff; /* dec */
6141 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
6142 }
6143 else
6144 {
6145 pbCodeBuf[off++] = 0x81;
6146 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
6147 pbCodeBuf[off++] = uSubtrahend;
6148 pbCodeBuf[off++] = 0;
6149 }
6150
6151#else
6152 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6153 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6154
6155 /* sub tmp, gstgrp, uSubtrahend */
6156 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
6157
6158 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
6159 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
6160
6161 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6162#endif
6163
6164 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6165
6166 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
6167
6168 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
6169 return off;
6170}
6171
6172
6173#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
6174 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
6175
6176#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
6177 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
6178
6179/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
6180DECL_INLINE_THROW(uint32_t)
6181iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
6182{
6183 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6184 (IEMNATIVEGSTREG)(kIemNativeGstReg_GprFirst + iGReg),
6185 kIemNativeGstRegUse_ForUpdate);
6186
6187#ifdef RT_ARCH_AMD64
6188 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6189 if (f64Bit)
6190 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
6191 else if (idxGstTmpReg >= 8)
6192 pbCodeBuf[off++] = X86_OP_REX_B;
6193 if (uSubtrahend == 1)
6194 {
6195 /* dec */
6196 pbCodeBuf[off++] = 0xff;
6197 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
6198 }
6199 else if (uSubtrahend < 128)
6200 {
6201 pbCodeBuf[off++] = 0x83; /* sub */
6202 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
6203 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
6204 }
6205 else
6206 {
6207 pbCodeBuf[off++] = 0x81; /* sub */
6208 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
6209 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
6210 pbCodeBuf[off++] = 0;
6211 pbCodeBuf[off++] = 0;
6212 pbCodeBuf[off++] = 0;
6213 }
6214
6215#else
6216 /* sub tmp, gstgrp, uSubtrahend */
6217 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6218 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
6219
6220#endif
6221
6222 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6223
6224 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
6225
6226 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
6227 return off;
6228}
6229
6230
6231
6232/*********************************************************************************************************************************
6233* Builtin functions *
6234*********************************************************************************************************************************/
6235
6236/**
6237 * Built-in function that calls a C-implemention function taking zero arguments.
6238 */
6239static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_DeferToCImpl0)
6240{
6241 PFNIEMCIMPL0 const pfnCImpl = (PFNIEMCIMPL0)(uintptr_t)pCallEntry->auParams[0];
6242 uint8_t const cbInstr = (uint8_t)pCallEntry->auParams[1];
6243 uint64_t const fGstShwFlush = (uint8_t)pCallEntry->auParams[2];
6244 return iemNativeEmitCImplCall(pReNative, off, pCallEntry->idxInstr, fGstShwFlush, (uintptr_t)pfnCImpl, cbInstr, 0, 0, 0, 0);
6245}
6246
6247
6248/**
6249 * Built-in function that checks for pending interrupts that can be delivered or
6250 * forced action flags.
6251 *
6252 * This triggers after the completion of an instruction, so EIP is already at
6253 * the next instruction. If an IRQ or important FF is pending, this will return
6254 * a non-zero status that stops TB execution.
6255 */
6256static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckIrq)
6257{
6258 RT_NOREF(pCallEntry);
6259
6260 /* It's too convenient to use iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet below
6261 and I'm too lazy to create a 'Fixed' version of that one. */
6262 uint32_t const idxLabelVmCheck = iemNativeLabelCreate(pReNative, kIemNativeLabelType_CheckIrq,
6263 UINT32_MAX, pReNative->uCheckIrqSeqNo++);
6264
6265 uint32_t const idxLabelReturnBreak = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnBreak);
6266
6267 /* Again, we need to load the extended EFLAGS before we actually need them
6268 in case we jump. We couldn't use iemNativeRegAllocTmpForGuestReg if we
6269 loaded them inside the check, as the shadow state would not be correct
6270 when the code branches before the load. Ditto PC. */
6271 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6272 kIemNativeGstRegUse_ReadOnly);
6273
6274 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ReadOnly);
6275
6276 uint8_t idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6277
6278 /*
6279 * Start by checking the local forced actions of the EMT we're on for IRQs
6280 * and other FFs that needs servicing.
6281 */
6282 /** @todo this isn't even close to the NMI and interrupt conditions in EM! */
6283 /* Load FFs in to idxTmpReg and AND with all relevant flags. */
6284 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, fLocalForcedActions));
6285 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
6286 VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
6287 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
6288 | VMCPU_FF_TLB_FLUSH
6289 | VMCPU_FF_UNHALT ),
6290 true /*fSetFlags*/);
6291 /* If we end up with ZERO in idxTmpReg there is nothing to do.*/
6292 uint32_t const offFixupJumpToVmCheck1 = off;
6293 off = iemNativeEmitJzToFixed(pReNative, off, 0);
6294
6295 /* Some relevant FFs are set, but if's only APIC or/and PIC being set,
6296 these may be supressed by EFLAGS.IF or CPUMIsInInterruptShadow. */
6297 off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
6298 ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC), true /*fSetFlags*/);
6299 /* Return VINF_IEM_REEXEC_BREAK if other FFs are set. */
6300 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
6301
6302 /* So, it's only interrupt releated FFs and we need to see if IRQs are being
6303 suppressed by the CPU or not. */
6304 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, X86_EFL_IF_BIT, idxLabelVmCheck);
6305 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, CPUMCTX_INHIBIT_SHADOW,
6306 idxLabelReturnBreak);
6307
6308 /* We've got shadow flags set, so we must check that the PC they are valid
6309 for matches our current PC value. */
6310 /** @todo AMD64 can do this more efficiently w/o loading uRipInhibitInt into
6311 * a register. */
6312 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.uRipInhibitInt));
6313 off = iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, idxTmpReg, idxPcReg, idxLabelReturnBreak);
6314
6315 /*
6316 * Now check the force flags of the VM.
6317 */
6318 iemNativeLabelDefine(pReNative, idxLabelVmCheck, off);
6319 iemNativeFixupFixedJump(pReNative, offFixupJumpToVmCheck1, off);
6320 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, CTX_SUFF(pVM))); /* idxTmpReg = pVM */
6321 off = iemNativeEmitLoadGpr32ByGpr(pReNative, off, idxTmpReg, idxTmpReg, RT_UOFFSETOF(VMCC, fGlobalForcedActions));
6322 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, VM_FF_ALL_MASK, true /*fSetFlags*/);
6323 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabelReturnBreak);
6324
6325 /** @todo STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckIrqBreaks); */
6326
6327 /*
6328 * We're good, no IRQs or FFs pending.
6329 */
6330 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6331 iemNativeRegFreeTmp(pReNative, idxEflReg);
6332 iemNativeRegFreeTmp(pReNative, idxPcReg);
6333
6334 return off;
6335}
6336
6337
6338/**
6339 * Built-in function checks if IEMCPU::fExec has the expected value.
6340 */
6341static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckMode)
6342{
6343 uint32_t const fExpectedExec = (uint32_t)pCallEntry->auParams[0];
6344 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6345
6346 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6347 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, IEMTB_F_KEY_MASK);
6348 off = iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(pReNative, off, idxTmpReg, fExpectedExec & IEMTB_F_KEY_MASK,
6349 kIemNativeLabelType_ReturnBreak);
6350 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6351 return off;
6352}
6353
6354
6355
6356/*********************************************************************************************************************************
6357* The native code generator functions for each MC block. *
6358*********************************************************************************************************************************/
6359
6360
6361/*
6362 * Include g_apfnIemNativeRecompileFunctions and associated functions.
6363 *
6364 * This should probably live in it's own file later, but lets see what the
6365 * compile times turn out to be first.
6366 */
6367#include "IEMNativeFunctions.cpp.h"
6368
6369
6370
6371/*********************************************************************************************************************************
6372* Recompiler Core. *
6373*********************************************************************************************************************************/
6374
6375
6376/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
6377static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
6378{
6379 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
6380 pDis->cbCachedInstr += cbMaxRead;
6381 RT_NOREF(cbMinRead);
6382 return VERR_NO_DATA;
6383}
6384
6385
6386/**
6387 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
6388 * @returns pszBuf.
6389 * @param fFlags The flags.
6390 * @param pszBuf The output buffer.
6391 * @param cbBuf The output buffer size. At least 32 bytes.
6392 */
6393DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
6394{
6395 Assert(cbBuf >= 32);
6396 static RTSTRTUPLE const s_aModes[] =
6397 {
6398 /* [00] = */ { RT_STR_TUPLE("16BIT") },
6399 /* [01] = */ { RT_STR_TUPLE("32BIT") },
6400 /* [02] = */ { RT_STR_TUPLE("!2!") },
6401 /* [03] = */ { RT_STR_TUPLE("!3!") },
6402 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
6403 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
6404 /* [06] = */ { RT_STR_TUPLE("!6!") },
6405 /* [07] = */ { RT_STR_TUPLE("!7!") },
6406 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
6407 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
6408 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
6409 /* [0b] = */ { RT_STR_TUPLE("!b!") },
6410 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
6411 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
6412 /* [0e] = */ { RT_STR_TUPLE("!e!") },
6413 /* [0f] = */ { RT_STR_TUPLE("!f!") },
6414 /* [10] = */ { RT_STR_TUPLE("!10!") },
6415 /* [11] = */ { RT_STR_TUPLE("!11!") },
6416 /* [12] = */ { RT_STR_TUPLE("!12!") },
6417 /* [13] = */ { RT_STR_TUPLE("!13!") },
6418 /* [14] = */ { RT_STR_TUPLE("!14!") },
6419 /* [15] = */ { RT_STR_TUPLE("!15!") },
6420 /* [16] = */ { RT_STR_TUPLE("!16!") },
6421 /* [17] = */ { RT_STR_TUPLE("!17!") },
6422 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
6423 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
6424 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
6425 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
6426 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
6427 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
6428 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
6429 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
6430 };
6431 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
6432 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
6433 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
6434
6435 pszBuf[off++] = ' ';
6436 pszBuf[off++] = 'C';
6437 pszBuf[off++] = 'P';
6438 pszBuf[off++] = 'L';
6439 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
6440 Assert(off < 32);
6441
6442 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
6443
6444 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
6445 {
6446 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
6447 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
6448 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
6449 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
6450 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
6451 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
6452 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
6453 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
6454 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
6455 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
6456 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
6457 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
6458 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
6459 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
6460 };
6461 if (fFlags)
6462 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
6463 if (s_aFlags[i].fFlag & fFlags)
6464 {
6465 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
6466 pszBuf[off++] = ' ';
6467 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
6468 off += s_aFlags[i].cchName;
6469 fFlags &= ~s_aFlags[i].fFlag;
6470 if (!fFlags)
6471 break;
6472 }
6473 pszBuf[off] = '\0';
6474
6475 return pszBuf;
6476}
6477
6478
6479DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
6480{
6481 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
6482
6483 char szDisBuf[512];
6484 DISSTATE Dis;
6485 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
6486 uint32_t const cNative = pTb->Native.cInstructions;
6487 uint32_t offNative = 0;
6488#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6489 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
6490#endif
6491 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
6492 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
6493 : DISCPUMODE_64BIT;
6494#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
6495 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
6496#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
6497 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
6498#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
6499# error "Port me"
6500#else
6501 csh hDisasm = ~(size_t)0;
6502# if defined(RT_ARCH_AMD64)
6503 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
6504# elif defined(RT_ARCH_ARM64)
6505 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
6506# else
6507# error "Port me"
6508# endif
6509 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
6510#endif
6511
6512 /*
6513 * Print TB info.
6514 */
6515 pHlp->pfnPrintf(pHlp,
6516 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
6517 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
6518 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
6519 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
6520#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6521 if (pDbgInfo && pDbgInfo->cEntries > 1)
6522 {
6523 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
6524
6525 /*
6526 * This disassembly is driven by the debug info which follows the native
6527 * code and indicates when it starts with the next guest instructions,
6528 * where labels are and such things.
6529 */
6530 uint32_t idxThreadedCall = 0;
6531 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
6532 uint8_t idxRange = UINT8_MAX;
6533 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
6534 uint32_t offRange = 0;
6535 uint32_t offOpcodes = 0;
6536 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
6537 uint32_t const cDbgEntries = pDbgInfo->cEntries;
6538 uint32_t iDbgEntry = 1;
6539 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
6540
6541 while (offNative < cNative)
6542 {
6543 /* If we're at or have passed the point where the next chunk of debug
6544 info starts, process it. */
6545 if (offDbgNativeNext <= offNative)
6546 {
6547 offDbgNativeNext = UINT32_MAX;
6548 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
6549 {
6550 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
6551 {
6552 case kIemTbDbgEntryType_GuestInstruction:
6553 {
6554 /* Did the exec flag change? */
6555 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
6556 {
6557 pHlp->pfnPrintf(pHlp,
6558 " fExec change %#08x -> %#08x %s\n",
6559 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
6560 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
6561 szDisBuf, sizeof(szDisBuf)));
6562 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
6563 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
6564 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
6565 : DISCPUMODE_64BIT;
6566 }
6567
6568 /* New opcode range? We need to fend up a spurious debug info entry here for cases
6569 where the compilation was aborted before the opcode was recorded and the actual
6570 instruction was translated to a threaded call. This may happen when we run out
6571 of ranges, or when some complicated interrupts/FFs are found to be pending or
6572 similar. So, we just deal with it here rather than in the compiler code as it
6573 is a lot simpler to do up here. */
6574 if ( idxRange == UINT8_MAX
6575 || idxRange >= cRanges
6576 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
6577 {
6578 idxRange += 1;
6579 if (idxRange < cRanges)
6580 offRange = 0;
6581 else
6582 continue;
6583 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes);
6584 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
6585 + (pTb->aRanges[idxRange].idxPhysPage == 0
6586 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
6587 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
6588 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
6589 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
6590 pTb->aRanges[idxRange].idxPhysPage);
6591 }
6592
6593 /* Disassemble the instruction. */
6594 uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
6595 uint32_t cbInstr = 1;
6596 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
6597 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
6598 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
6599 if (RT_SUCCESS(rc))
6600 {
6601 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
6602 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
6603 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6604 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6605
6606 static unsigned const s_offMarker = 55;
6607 static char const s_szMarker[] = " ; <--- guest";
6608 if (cch < s_offMarker)
6609 {
6610 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
6611 cch = s_offMarker;
6612 }
6613 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
6614 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
6615
6616 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
6617 }
6618 else
6619 {
6620 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
6621 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
6622 cbInstr = 1;
6623 }
6624 GCPhysPc += cbInstr;
6625 offOpcodes += cbInstr;
6626 offRange += cbInstr;
6627 continue;
6628 }
6629
6630 case kIemTbDbgEntryType_ThreadedCall:
6631 pHlp->pfnPrintf(pHlp,
6632 " Call #%u to %s (%u args)%s\n",
6633 idxThreadedCall,
6634 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
6635 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
6636 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? " - recompiled" : "");
6637 idxThreadedCall++;
6638 continue;
6639
6640 case kIemTbDbgEntryType_GuestRegShadowing:
6641 {
6642 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
6643 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
6644 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
6645 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
6646 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
6647 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
6648 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
6649 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
6650 else
6651 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
6652 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
6653 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
6654 continue;
6655 }
6656
6657 case kIemTbDbgEntryType_Label:
6658 {
6659 const char *pszName = "what_the_fudge";
6660 const char *pszComment = "";
6661 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
6662 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
6663 {
6664 case kIemNativeLabelType_Return:
6665 pszName = "Return";
6666 break;
6667 case kIemNativeLabelType_ReturnBreak:
6668 pszName = "ReturnBreak";
6669 break;
6670 case kIemNativeLabelType_ReturnWithFlags:
6671 pszName = "ReturnWithFlags";
6672 break;
6673 case kIemNativeLabelType_NonZeroRetOrPassUp:
6674 pszName = "NonZeroRetOrPassUp";
6675 break;
6676 case kIemNativeLabelType_RaiseGp0:
6677 pszName = "RaiseGp0";
6678 break;
6679 case kIemNativeLabelType_If:
6680 pszName = "If";
6681 fNumbered = true;
6682 break;
6683 case kIemNativeLabelType_Else:
6684 pszName = "Else";
6685 fNumbered = true;
6686 pszComment = " ; regs state restored pre-if-block";
6687 break;
6688 case kIemNativeLabelType_Endif:
6689 pszName = "Endif";
6690 fNumbered = true;
6691 break;
6692 case kIemNativeLabelType_CheckIrq:
6693 pszName = "CheckIrq_CheckVM";
6694 fNumbered = true;
6695 break;
6696 case kIemNativeLabelType_Invalid:
6697 case kIemNativeLabelType_End:
6698 break;
6699 }
6700 if (fNumbered)
6701 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
6702 else
6703 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
6704 continue;
6705 }
6706
6707 case kIemTbDbgEntryType_NativeOffset:
6708 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
6709 Assert(offDbgNativeNext > offNative);
6710 break;
6711
6712 default:
6713 AssertFailed();
6714 }
6715 iDbgEntry++;
6716 break;
6717 }
6718 }
6719
6720 /*
6721 * Disassemble the next native instruction.
6722 */
6723 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
6724# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
6725 uint32_t cbInstr = sizeof(paNative[0]);
6726 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
6727 if (RT_SUCCESS(rc))
6728 {
6729# if defined(RT_ARCH_AMD64)
6730 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
6731 {
6732 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
6733 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
6734 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args)%s\n",
6735 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
6736 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
6737 uInfo & 0x8000 ? " - recompiled" : "");
6738 else
6739 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
6740 }
6741 else
6742# endif
6743 {
6744# ifdef RT_ARCH_AMD64
6745 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
6746 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
6747 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6748 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6749# elif defined(RT_ARCH_ARM64)
6750 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
6751 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6752 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6753# else
6754# error "Port me"
6755# endif
6756 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
6757 }
6758 }
6759 else
6760 {
6761# if defined(RT_ARCH_AMD64)
6762 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
6763 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
6764# elif defined(RT_ARCH_ARM64)
6765 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
6766# else
6767# error "Port me"
6768# endif
6769 cbInstr = sizeof(paNative[0]);
6770 }
6771 offNative += cbInstr / sizeof(paNative[0]);
6772
6773# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
6774 cs_insn *pInstr;
6775 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
6776 (uintptr_t)pNativeCur, 1, &pInstr);
6777 if (cInstrs > 0)
6778 {
6779 Assert(cInstrs == 1);
6780# if defined(RT_ARCH_AMD64)
6781 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
6782 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
6783# else
6784 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
6785 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
6786# endif
6787 offNative += pInstr->size / sizeof(*pNativeCur);
6788 cs_free(pInstr, cInstrs);
6789 }
6790 else
6791 {
6792# if defined(RT_ARCH_AMD64)
6793 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
6794 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
6795# else
6796 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
6797# endif
6798 offNative++;
6799 }
6800# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
6801 }
6802 }
6803 else
6804#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
6805 {
6806 /*
6807 * No debug info, just disassemble the x86 code and then the native code.
6808 *
6809 * First the guest code:
6810 */
6811 for (unsigned i = 0; i < pTb->cRanges; i++)
6812 {
6813 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
6814 + (pTb->aRanges[i].idxPhysPage == 0
6815 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
6816 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
6817 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
6818 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
6819 unsigned off = pTb->aRanges[i].offOpcodes;
6820 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
6821 while (off < cbOpcodes)
6822 {
6823 uint32_t cbInstr = 1;
6824 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
6825 &pTb->pabOpcodes[off], cbOpcodes - off,
6826 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
6827 if (RT_SUCCESS(rc))
6828 {
6829 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
6830 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
6831 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6832 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6833 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
6834 GCPhysPc += cbInstr;
6835 off += cbInstr;
6836 }
6837 else
6838 {
6839 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
6840 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
6841 break;
6842 }
6843 }
6844 }
6845
6846 /*
6847 * Then the native code:
6848 */
6849 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
6850 while (offNative < cNative)
6851 {
6852 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
6853# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
6854 uint32_t cbInstr = sizeof(paNative[0]);
6855 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
6856 if (RT_SUCCESS(rc))
6857 {
6858# if defined(RT_ARCH_AMD64)
6859 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
6860 {
6861 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
6862 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
6863 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args)%s\n",
6864 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
6865 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
6866 uInfo & 0x8000 ? " - recompiled" : "");
6867 else
6868 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
6869 }
6870 else
6871# endif
6872 {
6873# ifdef RT_ARCH_AMD64
6874 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
6875 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
6876 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6877 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6878# elif defined(RT_ARCH_ARM64)
6879 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
6880 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
6881 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
6882# else
6883# error "Port me"
6884# endif
6885 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
6886 }
6887 }
6888 else
6889 {
6890# if defined(RT_ARCH_AMD64)
6891 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
6892 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
6893# else
6894 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
6895# endif
6896 cbInstr = sizeof(paNative[0]);
6897 }
6898 offNative += cbInstr / sizeof(paNative[0]);
6899
6900# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
6901 cs_insn *pInstr;
6902 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
6903 (uintptr_t)pNativeCur, 1, &pInstr);
6904 if (cInstrs > 0)
6905 {
6906 Assert(cInstrs == 1);
6907# if defined(RT_ARCH_AMD64)
6908 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
6909 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
6910# else
6911 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
6912 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
6913# endif
6914 offNative += pInstr->size / sizeof(*pNativeCur);
6915 cs_free(pInstr, cInstrs);
6916 }
6917 else
6918 {
6919# if defined(RT_ARCH_AMD64)
6920 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
6921 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
6922# else
6923 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
6924# endif
6925 offNative++;
6926 }
6927# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
6928 }
6929 }
6930
6931#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
6932 /* Cleanup. */
6933 cs_close(&hDisasm);
6934#endif
6935}
6936
6937
6938/**
6939 * Recompiles the given threaded TB into a native one.
6940 *
6941 * In case of failure the translation block will be returned as-is.
6942 *
6943 * @returns pTb.
6944 * @param pVCpu The cross context virtual CPU structure of the calling
6945 * thread.
6946 * @param pTb The threaded translation to recompile to native.
6947 */
6948DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
6949{
6950 /*
6951 * The first time thru, we allocate the recompiler state, the other times
6952 * we just need to reset it before using it again.
6953 */
6954 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
6955 if (RT_LIKELY(pReNative))
6956 iemNativeReInit(pReNative, pTb);
6957 else
6958 {
6959 pReNative = iemNativeInit(pVCpu, pTb);
6960 AssertReturn(pReNative, pTb);
6961 }
6962
6963 /*
6964 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
6965 * for aborting if an error happens.
6966 */
6967 uint32_t cCallsLeft = pTb->Thrd.cCalls;
6968#ifdef LOG_ENABLED
6969 uint32_t const cCallsOrg = cCallsLeft;
6970#endif
6971 uint32_t off = 0;
6972 int rc = VINF_SUCCESS;
6973 IEMNATIVE_TRY_SETJMP(pReNative, rc)
6974 {
6975 /*
6976 * Emit prolog code (fixed).
6977 */
6978 off = iemNativeEmitProlog(pReNative, off);
6979
6980 /*
6981 * Convert the calls to native code.
6982 */
6983#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6984 int32_t iGstInstr = -1;
6985 uint32_t fExec = pTb->fFlags;
6986#endif
6987 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
6988 while (cCallsLeft-- > 0)
6989 {
6990 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
6991
6992 /*
6993 * Debug info and assembly markup.
6994 */
6995#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6996 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
6997 fExec = pCallEntry->auParams[0];
6998 iemNativeDbgInfoAddNativeOffset(pReNative, off);
6999 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
7000 {
7001 if (iGstInstr < (int32_t)pTb->cInstructions)
7002 iemNativeDbgInfoAddGuestInstruction(pReNative, fExec);
7003 else
7004 Assert(iGstInstr == pTb->cInstructions);
7005 iGstInstr = pCallEntry->idxInstr;
7006 }
7007 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
7008#endif
7009#if defined(VBOX_STRICT)
7010 off = iemNativeEmitMarker(pReNative, off,
7011 RT_MAKE_U32((pTb->Thrd.cCalls - cCallsLeft - 1) | (pfnRecom ? 0x8000 : 0),
7012 pCallEntry->enmFunction));
7013#endif
7014#if defined(VBOX_STRICT)
7015 iemNativeRegAssertSanity(pReNative);
7016#endif
7017
7018 /*
7019 * Actual work.
7020 */
7021 Log2(("%u[%u]: %s%s\n", pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr,
7022 g_apszIemThreadedFunctions[pCallEntry->enmFunction], pfnRecom ? "" : "(todo)"));
7023 if (pfnRecom) /** @todo stats on this. */
7024 {
7025 //STAM_COUNTER_INC()
7026 off = pfnRecom(pReNative, off, pCallEntry);
7027 }
7028 else
7029 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
7030 Assert(off <= pReNative->cInstrBufAlloc);
7031 Assert(pReNative->cCondDepth == 0);
7032
7033 /*
7034 * Advance.
7035 */
7036 pCallEntry++;
7037 }
7038
7039 /*
7040 * Emit the epilog code.
7041 */
7042 uint32_t idxReturnLabel;
7043 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
7044
7045 /*
7046 * Generate special jump labels.
7047 */
7048 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
7049 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
7050 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
7051 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
7052 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
7053 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
7054 }
7055 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
7056 {
7057 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
7058 return pTb;
7059 }
7060 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
7061 Assert(off <= pReNative->cInstrBufAlloc);
7062
7063 /*
7064 * Make sure all labels has been defined.
7065 */
7066 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
7067#ifdef VBOX_STRICT
7068 uint32_t const cLabels = pReNative->cLabels;
7069 for (uint32_t i = 0; i < cLabels; i++)
7070 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
7071#endif
7072
7073 /*
7074 * Allocate executable memory, copy over the code we've generated.
7075 */
7076 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
7077 if (pTbAllocator->pDelayedFreeHead)
7078 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
7079
7080 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
7081 AssertReturn(paFinalInstrBuf, pTb);
7082 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
7083
7084 /*
7085 * Apply fixups.
7086 */
7087 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
7088 uint32_t const cFixups = pReNative->cFixups;
7089 for (uint32_t i = 0; i < cFixups; i++)
7090 {
7091 Assert(paFixups[i].off < off);
7092 Assert(paFixups[i].idxLabel < cLabels);
7093 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
7094 switch (paFixups[i].enmType)
7095 {
7096#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
7097 case kIemNativeFixupType_Rel32:
7098 Assert(paFixups[i].off + 4 <= off);
7099 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
7100 continue;
7101
7102#elif defined(RT_ARCH_ARM64)
7103 case kIemNativeFixupType_RelImm26At0:
7104 {
7105 Assert(paFixups[i].off < off);
7106 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
7107 Assert(offDisp >= -262144 && offDisp < 262144);
7108 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
7109 continue;
7110 }
7111
7112 case kIemNativeFixupType_RelImm19At5:
7113 {
7114 Assert(paFixups[i].off < off);
7115 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
7116 Assert(offDisp >= -262144 && offDisp < 262144);
7117 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
7118 continue;
7119 }
7120
7121 case kIemNativeFixupType_RelImm14At5:
7122 {
7123 Assert(paFixups[i].off < off);
7124 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
7125 Assert(offDisp >= -8192 && offDisp < 8192);
7126 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
7127 continue;
7128 }
7129
7130#endif
7131 case kIemNativeFixupType_Invalid:
7132 case kIemNativeFixupType_End:
7133 break;
7134 }
7135 AssertFailed();
7136 }
7137
7138 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
7139
7140 /*
7141 * Convert the translation block.
7142 */
7143 //RT_BREAKPOINT();
7144 RTMemFree(pTb->Thrd.paCalls);
7145 pTb->Native.paInstructions = paFinalInstrBuf;
7146 pTb->Native.cInstructions = off;
7147 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
7148#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7149 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
7150 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
7151#endif
7152
7153 Assert(pTbAllocator->cThreadedTbs > 0);
7154 pTbAllocator->cThreadedTbs -= 1;
7155 pTbAllocator->cNativeTbs += 1;
7156 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
7157
7158#ifdef LOG_ENABLED
7159 /*
7160 * Disassemble to the log if enabled.
7161 */
7162 if (LogIs3Enabled())
7163 {
7164 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
7165 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
7166 }
7167#endif
7168
7169 return pTb;
7170}
7171
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette