VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103583

Last change on this file since 103583 was 103555, checked in by vboxsync, 13 months ago

VMM/IEM: Added native translation for IEM_MC_AND_LOCAL_U8/16/32/64 and IEM_MC_OR_LOCAL_U8/16/32/64. Annotate disassembly (on amd64) with VMCPU member names. bugref:10376

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 637.1 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103555 2024-02-24 02:14:09Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef VBOX_STRICT
133static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
134 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
135static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
136#endif
137#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
138static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
139static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
140#endif
141DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
142DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
143 IEMNATIVEGSTREG enmGstReg, uint32_t off);
144DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
145
146
147/*********************************************************************************************************************************
148* Executable Memory Allocator *
149*********************************************************************************************************************************/
150/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
151 * Use an alternative chunk sub-allocator that does store internal data
152 * in the chunk.
153 *
154 * Using the RTHeapSimple is not practial on newer darwin systems where
155 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
156 * memory. We would have to change the protection of the whole chunk for
157 * every call to RTHeapSimple, which would be rather expensive.
158 *
159 * This alternative implemenation let restrict page protection modifications
160 * to the pages backing the executable memory we just allocated.
161 */
162#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
163/** The chunk sub-allocation unit size in bytes. */
164#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
165/** The chunk sub-allocation unit size as a shift factor. */
166#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
167
168#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
169# ifdef IEMNATIVE_USE_GDB_JIT
170# define IEMNATIVE_USE_GDB_JIT_ET_DYN
171
172/** GDB JIT: Code entry. */
173typedef struct GDBJITCODEENTRY
174{
175 struct GDBJITCODEENTRY *pNext;
176 struct GDBJITCODEENTRY *pPrev;
177 uint8_t *pbSymFile;
178 uint64_t cbSymFile;
179} GDBJITCODEENTRY;
180
181/** GDB JIT: Actions. */
182typedef enum GDBJITACTIONS : uint32_t
183{
184 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
185} GDBJITACTIONS;
186
187/** GDB JIT: Descriptor. */
188typedef struct GDBJITDESCRIPTOR
189{
190 uint32_t uVersion;
191 GDBJITACTIONS enmAction;
192 GDBJITCODEENTRY *pRelevant;
193 GDBJITCODEENTRY *pHead;
194 /** Our addition: */
195 GDBJITCODEENTRY *pTail;
196} GDBJITDESCRIPTOR;
197
198/** GDB JIT: Our simple symbol file data. */
199typedef struct GDBJITSYMFILE
200{
201 Elf64_Ehdr EHdr;
202# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
203 Elf64_Shdr aShdrs[5];
204# else
205 Elf64_Shdr aShdrs[7];
206 Elf64_Phdr aPhdrs[2];
207# endif
208 /** The dwarf ehframe data for the chunk. */
209 uint8_t abEhFrame[512];
210 char szzStrTab[128];
211 Elf64_Sym aSymbols[3];
212# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
213 Elf64_Sym aDynSyms[2];
214 Elf64_Dyn aDyn[6];
215# endif
216} GDBJITSYMFILE;
217
218extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
219extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
220
221/** Init once for g_IemNativeGdbJitLock. */
222static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
223/** Init once for the critical section. */
224static RTCRITSECT g_IemNativeGdbJitLock;
225
226/** GDB reads the info here. */
227GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
228
229/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
230DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
231{
232 ASMNopPause();
233}
234
235/** @callback_method_impl{FNRTONCE} */
236static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
237{
238 RT_NOREF(pvUser);
239 return RTCritSectInit(&g_IemNativeGdbJitLock);
240}
241
242
243# endif /* IEMNATIVE_USE_GDB_JIT */
244
245/**
246 * Per-chunk unwind info for non-windows hosts.
247 */
248typedef struct IEMEXECMEMCHUNKEHFRAME
249{
250# ifdef IEMNATIVE_USE_LIBUNWIND
251 /** The offset of the FDA into abEhFrame. */
252 uintptr_t offFda;
253# else
254 /** 'struct object' storage area. */
255 uint8_t abObject[1024];
256# endif
257# ifdef IEMNATIVE_USE_GDB_JIT
258# if 0
259 /** The GDB JIT 'symbol file' data. */
260 GDBJITSYMFILE GdbJitSymFile;
261# endif
262 /** The GDB JIT list entry. */
263 GDBJITCODEENTRY GdbJitEntry;
264# endif
265 /** The dwarf ehframe data for the chunk. */
266 uint8_t abEhFrame[512];
267} IEMEXECMEMCHUNKEHFRAME;
268/** Pointer to per-chunk info info for non-windows hosts. */
269typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
270#endif
271
272
273/**
274 * An chunk of executable memory.
275 */
276typedef struct IEMEXECMEMCHUNK
277{
278#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
279 /** Number of free items in this chunk. */
280 uint32_t cFreeUnits;
281 /** Hint were to start searching for free space in the allocation bitmap. */
282 uint32_t idxFreeHint;
283#else
284 /** The heap handle. */
285 RTHEAPSIMPLE hHeap;
286#endif
287 /** Pointer to the chunk. */
288 void *pvChunk;
289#ifdef IN_RING3
290 /**
291 * Pointer to the unwind information.
292 *
293 * This is used during C++ throw and longjmp (windows and probably most other
294 * platforms). Some debuggers (windbg) makes use of it as well.
295 *
296 * Windows: This is allocated from hHeap on windows because (at least for
297 * AMD64) the UNWIND_INFO structure address in the
298 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
299 *
300 * Others: Allocated from the regular heap to avoid unnecessary executable data
301 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
302 void *pvUnwindInfo;
303#elif defined(IN_RING0)
304 /** Allocation handle. */
305 RTR0MEMOBJ hMemObj;
306#endif
307} IEMEXECMEMCHUNK;
308/** Pointer to a memory chunk. */
309typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
310
311
312/**
313 * Executable memory allocator for the native recompiler.
314 */
315typedef struct IEMEXECMEMALLOCATOR
316{
317 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
318 uint32_t uMagic;
319
320 /** The chunk size. */
321 uint32_t cbChunk;
322 /** The maximum number of chunks. */
323 uint32_t cMaxChunks;
324 /** The current number of chunks. */
325 uint32_t cChunks;
326 /** Hint where to start looking for available memory. */
327 uint32_t idxChunkHint;
328 /** Statistics: Current number of allocations. */
329 uint32_t cAllocations;
330
331 /** The total amount of memory available. */
332 uint64_t cbTotal;
333 /** Total amount of free memory. */
334 uint64_t cbFree;
335 /** Total amount of memory allocated. */
336 uint64_t cbAllocated;
337
338#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
339 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
340 *
341 * Since the chunk size is a power of two and the minimum chunk size is a lot
342 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
343 * require a whole number of uint64_t elements in the allocation bitmap. So,
344 * for sake of simplicity, they are allocated as one continous chunk for
345 * simplicity/laziness. */
346 uint64_t *pbmAlloc;
347 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
348 uint32_t cUnitsPerChunk;
349 /** Number of bitmap elements per chunk (for quickly locating the bitmap
350 * portion corresponding to an chunk). */
351 uint32_t cBitmapElementsPerChunk;
352#else
353 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
354 * @{ */
355 /** The size of the heap internal block header. This is used to adjust the
356 * request memory size to make sure there is exacly enough room for a header at
357 * the end of the blocks we allocate before the next 64 byte alignment line. */
358 uint32_t cbHeapBlockHdr;
359 /** The size of initial heap allocation required make sure the first
360 * allocation is correctly aligned. */
361 uint32_t cbHeapAlignTweak;
362 /** The alignment tweak allocation address. */
363 void *pvAlignTweak;
364 /** @} */
365#endif
366
367#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
368 /** Pointer to the array of unwind info running parallel to aChunks (same
369 * allocation as this structure, located after the bitmaps).
370 * (For Windows, the structures must reside in 32-bit RVA distance to the
371 * actual chunk, so they are allocated off the chunk.) */
372 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
373#endif
374
375 /** The allocation chunks. */
376 RT_FLEXIBLE_ARRAY_EXTENSION
377 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
378} IEMEXECMEMALLOCATOR;
379/** Pointer to an executable memory allocator. */
380typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
381
382/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
383#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
384
385
386static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
387
388
389/**
390 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
391 * the heap statistics.
392 */
393static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
394 uint32_t cbReq, uint32_t idxChunk)
395{
396 pExecMemAllocator->cAllocations += 1;
397 pExecMemAllocator->cbAllocated += cbReq;
398#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
399 pExecMemAllocator->cbFree -= cbReq;
400#else
401 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
402#endif
403 pExecMemAllocator->idxChunkHint = idxChunk;
404
405#ifdef RT_OS_DARWIN
406 /*
407 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
408 * on darwin. So, we mark the pages returned as read+write after alloc and
409 * expect the caller to call iemExecMemAllocatorReadyForUse when done
410 * writing to the allocation.
411 *
412 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
413 * for details.
414 */
415 /** @todo detect if this is necessary... it wasn't required on 10.15 or
416 * whatever older version it was. */
417 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
418 AssertRC(rc);
419#endif
420
421 return pvRet;
422}
423
424
425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
426static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
427 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
428{
429 /*
430 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
431 */
432 Assert(!(cToScan & 63));
433 Assert(!(idxFirst & 63));
434 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
435 pbmAlloc += idxFirst / 64;
436
437 /*
438 * Scan the bitmap for cReqUnits of consequtive clear bits
439 */
440 /** @todo This can probably be done more efficiently for non-x86 systems. */
441 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
442 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
443 {
444 uint32_t idxAddBit = 1;
445 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
446 idxAddBit++;
447 if (idxAddBit >= cReqUnits)
448 {
449 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
450
451 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
452 pChunk->cFreeUnits -= cReqUnits;
453 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
454
455 void * const pvRet = (uint8_t *)pChunk->pvChunk
456 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
457
458 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
459 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
460 }
461
462 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
463 }
464 return NULL;
465}
466#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
467
468
469static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
470{
471#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
472 /*
473 * Figure out how much to allocate.
474 */
475 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
476 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
477 {
478 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
479 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
480 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
481 {
482 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
483 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
484 if (pvRet)
485 return pvRet;
486 }
487 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
488 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
489 cReqUnits, idxChunk);
490 }
491#else
492 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
493 if (pvRet)
494 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
495#endif
496 return NULL;
497
498}
499
500
501/**
502 * Allocates @a cbReq bytes of executable memory.
503 *
504 * @returns Pointer to the memory, NULL if out of memory or other problem
505 * encountered.
506 * @param pVCpu The cross context virtual CPU structure of the calling
507 * thread.
508 * @param cbReq How many bytes are required.
509 */
510static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
511{
512 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
513 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
514 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
515
516
517 for (unsigned iIteration = 0;; iIteration++)
518 {
519 /*
520 * Adjust the request size so it'll fit the allocator alignment/whatnot.
521 *
522 * For the RTHeapSimple allocator this means to follow the logic described
523 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
524 * existing chunks if we think we've got sufficient free memory around.
525 *
526 * While for the alternative one we just align it up to a whole unit size.
527 */
528#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
529 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
530#else
531 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
532#endif
533 if (cbReq <= pExecMemAllocator->cbFree)
534 {
535 uint32_t const cChunks = pExecMemAllocator->cChunks;
536 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
537 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
538 {
539 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
540 if (pvRet)
541 return pvRet;
542 }
543 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
544 {
545 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
546 if (pvRet)
547 return pvRet;
548 }
549 }
550
551 /*
552 * Can we grow it with another chunk?
553 */
554 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
555 {
556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
557 AssertLogRelRCReturn(rc, NULL);
558
559 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
560 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
561 if (pvRet)
562 return pvRet;
563 AssertFailed();
564 }
565
566 /*
567 * Try prune native TBs once.
568 */
569 if (iIteration == 0)
570 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
571 else
572 {
573 /** @todo stats... */
574 return NULL;
575 }
576 }
577
578}
579
580
581/** This is a hook that we may need later for changing memory protection back
582 * to readonly+exec */
583static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
584{
585#ifdef RT_OS_DARWIN
586 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
587 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
588 AssertRC(rc); RT_NOREF(pVCpu);
589
590 /*
591 * Flush the instruction cache:
592 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
593 */
594 /* sys_dcache_flush(pv, cb); - not necessary */
595 sys_icache_invalidate(pv, cb);
596#else
597 RT_NOREF(pVCpu, pv, cb);
598#endif
599}
600
601
602/**
603 * Frees executable memory.
604 */
605void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
606{
607 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
608 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
609 Assert(pv);
610#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
611 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
612#else
613 Assert(!((uintptr_t)pv & 63));
614#endif
615
616 /* Align the size as we did when allocating the block. */
617#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
618 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
619#else
620 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
621#endif
622
623 /* Free it / assert sanity. */
624#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
625 uint32_t const cChunks = pExecMemAllocator->cChunks;
626 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
627 bool fFound = false;
628 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
629 {
630 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
631 fFound = offChunk < cbChunk;
632 if (fFound)
633 {
634#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
635 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
636 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
637
638 /* Check that it's valid and free it. */
639 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
640 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
641 for (uint32_t i = 1; i < cReqUnits; i++)
642 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
643 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
644
645 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
646 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
647
648 /* Update the stats. */
649 pExecMemAllocator->cbAllocated -= cb;
650 pExecMemAllocator->cbFree += cb;
651 pExecMemAllocator->cAllocations -= 1;
652 return;
653#else
654 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
655 break;
656#endif
657 }
658 }
659# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
660 AssertFailed();
661# else
662 Assert(fFound);
663# endif
664#endif
665
666#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
667 /* Update stats while cb is freshly calculated.*/
668 pExecMemAllocator->cbAllocated -= cb;
669 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
670 pExecMemAllocator->cAllocations -= 1;
671
672 /* Free it. */
673 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
674#endif
675}
676
677
678
679#ifdef IN_RING3
680# ifdef RT_OS_WINDOWS
681
682/**
683 * Initializes the unwind info structures for windows hosts.
684 */
685static int
686iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
687 void *pvChunk, uint32_t idxChunk)
688{
689 RT_NOREF(pVCpu);
690
691 /*
692 * The AMD64 unwind opcodes.
693 *
694 * This is a program that starts with RSP after a RET instruction that
695 * ends up in recompiled code, and the operations we describe here will
696 * restore all non-volatile registers and bring RSP back to where our
697 * RET address is. This means it's reverse order from what happens in
698 * the prologue.
699 *
700 * Note! Using a frame register approach here both because we have one
701 * and but mainly because the UWOP_ALLOC_LARGE argument values
702 * would be a pain to write initializers for. On the positive
703 * side, we're impervious to changes in the the stack variable
704 * area can can deal with dynamic stack allocations if necessary.
705 */
706 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
707 {
708 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
709 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
710 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
711 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
712 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
713 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
714 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
715 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
716 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
717 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
718 };
719 union
720 {
721 IMAGE_UNWIND_INFO Info;
722 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
723 } s_UnwindInfo =
724 {
725 {
726 /* .Version = */ 1,
727 /* .Flags = */ 0,
728 /* .SizeOfProlog = */ 16, /* whatever */
729 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
730 /* .FrameRegister = */ X86_GREG_xBP,
731 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
732 }
733 };
734 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
735 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
736
737 /*
738 * Calc how much space we need and allocate it off the exec heap.
739 */
740 unsigned const cFunctionEntries = 1;
741 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
742 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
743# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
744 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
745 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
746 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
747# else
748 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
749 - pExecMemAllocator->cbHeapBlockHdr;
750 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
751 32 /*cbAlignment*/);
752# endif
753 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
754 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
755
756 /*
757 * Initialize the structures.
758 */
759 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
760
761 paFunctions[0].BeginAddress = 0;
762 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
763 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
764
765 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
766 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
767
768 /*
769 * Register it.
770 */
771 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
772 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
773
774 return VINF_SUCCESS;
775}
776
777
778# else /* !RT_OS_WINDOWS */
779
780/**
781 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
782 */
783DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
784{
785 if (iValue >= 64)
786 {
787 Assert(iValue < 0x2000);
788 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
789 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
790 }
791 else if (iValue >= 0)
792 *Ptr.pb++ = (uint8_t)iValue;
793 else if (iValue > -64)
794 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
795 else
796 {
797 Assert(iValue > -0x2000);
798 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
799 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
800 }
801 return Ptr;
802}
803
804
805/**
806 * Emits an ULEB128 encoded value (up to 64-bit wide).
807 */
808DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
809{
810 while (uValue >= 0x80)
811 {
812 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
813 uValue >>= 7;
814 }
815 *Ptr.pb++ = (uint8_t)uValue;
816 return Ptr;
817}
818
819
820/**
821 * Emits a CFA rule as register @a uReg + offset @a off.
822 */
823DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
824{
825 *Ptr.pb++ = DW_CFA_def_cfa;
826 Ptr = iemDwarfPutUleb128(Ptr, uReg);
827 Ptr = iemDwarfPutUleb128(Ptr, off);
828 return Ptr;
829}
830
831
832/**
833 * Emits a register (@a uReg) save location:
834 * CFA + @a off * data_alignment_factor
835 */
836DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
837{
838 if (uReg < 0x40)
839 *Ptr.pb++ = DW_CFA_offset | uReg;
840 else
841 {
842 *Ptr.pb++ = DW_CFA_offset_extended;
843 Ptr = iemDwarfPutUleb128(Ptr, uReg);
844 }
845 Ptr = iemDwarfPutUleb128(Ptr, off);
846 return Ptr;
847}
848
849
850# if 0 /* unused */
851/**
852 * Emits a register (@a uReg) save location, using signed offset:
853 * CFA + @a offSigned * data_alignment_factor
854 */
855DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
856{
857 *Ptr.pb++ = DW_CFA_offset_extended_sf;
858 Ptr = iemDwarfPutUleb128(Ptr, uReg);
859 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
860 return Ptr;
861}
862# endif
863
864
865/**
866 * Initializes the unwind info section for non-windows hosts.
867 */
868static int
869iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
870 void *pvChunk, uint32_t idxChunk)
871{
872 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
873 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
874
875 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
876
877 /*
878 * Generate the CIE first.
879 */
880# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
881 uint8_t const iDwarfVer = 3;
882# else
883 uint8_t const iDwarfVer = 4;
884# endif
885 RTPTRUNION const PtrCie = Ptr;
886 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
887 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
888 *Ptr.pb++ = iDwarfVer; /* DwARF version */
889 *Ptr.pb++ = 0; /* Augmentation. */
890 if (iDwarfVer >= 4)
891 {
892 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
893 *Ptr.pb++ = 0; /* Segment selector size. */
894 }
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
897# else
898 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
899# endif
900 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
901# ifdef RT_ARCH_AMD64
902 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
903# elif defined(RT_ARCH_ARM64)
904 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
905# else
906# error "port me"
907# endif
908 /* Initial instructions: */
909# ifdef RT_ARCH_AMD64
910 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
912 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
913 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
914 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
916 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
917 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
918# elif defined(RT_ARCH_ARM64)
919# if 1
920 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
921# else
922 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
923# endif
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
930 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
931 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
932 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
933 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
934 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
935 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
936 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
937 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
938# else
939# error "port me"
940# endif
941 while ((Ptr.u - PtrCie.u) & 3)
942 *Ptr.pb++ = DW_CFA_nop;
943 /* Finalize the CIE size. */
944 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
945
946 /*
947 * Generate an FDE for the whole chunk area.
948 */
949# ifdef IEMNATIVE_USE_LIBUNWIND
950 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
951# endif
952 RTPTRUNION const PtrFde = Ptr;
953 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
954 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
955 Ptr.pu32++;
956 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
957 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
958# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
959 *Ptr.pb++ = DW_CFA_nop;
960# endif
961 while ((Ptr.u - PtrFde.u) & 3)
962 *Ptr.pb++ = DW_CFA_nop;
963 /* Finalize the FDE size. */
964 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
965
966 /* Terminator entry. */
967 *Ptr.pu32++ = 0;
968 *Ptr.pu32++ = 0; /* just to be sure... */
969 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
970
971 /*
972 * Register it.
973 */
974# ifdef IEMNATIVE_USE_LIBUNWIND
975 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
976# else
977 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
978 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
979# endif
980
981# ifdef IEMNATIVE_USE_GDB_JIT
982 /*
983 * Now for telling GDB about this (experimental).
984 *
985 * This seems to work best with ET_DYN.
986 */
987 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
988# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
989 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
990 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
991# else
992 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
993 - pExecMemAllocator->cbHeapBlockHdr;
994 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
995# endif
996 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
997 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
998
999 RT_ZERO(*pSymFile);
1000
1001 /*
1002 * The ELF header:
1003 */
1004 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1005 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1006 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1007 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1008 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1009 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1010 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1011 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1012# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1013 pSymFile->EHdr.e_type = ET_DYN;
1014# else
1015 pSymFile->EHdr.e_type = ET_REL;
1016# endif
1017# ifdef RT_ARCH_AMD64
1018 pSymFile->EHdr.e_machine = EM_AMD64;
1019# elif defined(RT_ARCH_ARM64)
1020 pSymFile->EHdr.e_machine = EM_AARCH64;
1021# else
1022# error "port me"
1023# endif
1024 pSymFile->EHdr.e_version = 1; /*?*/
1025 pSymFile->EHdr.e_entry = 0;
1026# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1027 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1028# else
1029 pSymFile->EHdr.e_phoff = 0;
1030# endif
1031 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1032 pSymFile->EHdr.e_flags = 0;
1033 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1034# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1035 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1036 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1037# else
1038 pSymFile->EHdr.e_phentsize = 0;
1039 pSymFile->EHdr.e_phnum = 0;
1040# endif
1041 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1042 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1043 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1044
1045 uint32_t offStrTab = 0;
1046#define APPEND_STR(a_szStr) do { \
1047 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1048 offStrTab += sizeof(a_szStr); \
1049 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1050 } while (0)
1051#define APPEND_STR_FMT(a_szStr, ...) do { \
1052 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1053 offStrTab++; \
1054 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1055 } while (0)
1056
1057 /*
1058 * Section headers.
1059 */
1060 /* Section header #0: NULL */
1061 unsigned i = 0;
1062 APPEND_STR("");
1063 RT_ZERO(pSymFile->aShdrs[i]);
1064 i++;
1065
1066 /* Section header: .eh_frame */
1067 pSymFile->aShdrs[i].sh_name = offStrTab;
1068 APPEND_STR(".eh_frame");
1069 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1070 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1071# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1072 pSymFile->aShdrs[i].sh_offset
1073 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1074# else
1075 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1076 pSymFile->aShdrs[i].sh_offset = 0;
1077# endif
1078
1079 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1080 pSymFile->aShdrs[i].sh_link = 0;
1081 pSymFile->aShdrs[i].sh_info = 0;
1082 pSymFile->aShdrs[i].sh_addralign = 1;
1083 pSymFile->aShdrs[i].sh_entsize = 0;
1084 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1085 i++;
1086
1087 /* Section header: .shstrtab */
1088 unsigned const iShStrTab = i;
1089 pSymFile->EHdr.e_shstrndx = iShStrTab;
1090 pSymFile->aShdrs[i].sh_name = offStrTab;
1091 APPEND_STR(".shstrtab");
1092 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1093 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1094# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1095 pSymFile->aShdrs[i].sh_offset
1096 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1097# else
1098 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1099 pSymFile->aShdrs[i].sh_offset = 0;
1100# endif
1101 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1102 pSymFile->aShdrs[i].sh_link = 0;
1103 pSymFile->aShdrs[i].sh_info = 0;
1104 pSymFile->aShdrs[i].sh_addralign = 1;
1105 pSymFile->aShdrs[i].sh_entsize = 0;
1106 i++;
1107
1108 /* Section header: .symbols */
1109 pSymFile->aShdrs[i].sh_name = offStrTab;
1110 APPEND_STR(".symtab");
1111 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1112 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1113 pSymFile->aShdrs[i].sh_offset
1114 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1115 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1116 pSymFile->aShdrs[i].sh_link = iShStrTab;
1117 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1118 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1119 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1120 i++;
1121
1122# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1123 /* Section header: .symbols */
1124 pSymFile->aShdrs[i].sh_name = offStrTab;
1125 APPEND_STR(".dynsym");
1126 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1127 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1128 pSymFile->aShdrs[i].sh_offset
1129 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1130 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1131 pSymFile->aShdrs[i].sh_link = iShStrTab;
1132 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1133 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1134 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1135 i++;
1136# endif
1137
1138# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1139 /* Section header: .dynamic */
1140 pSymFile->aShdrs[i].sh_name = offStrTab;
1141 APPEND_STR(".dynamic");
1142 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1143 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1144 pSymFile->aShdrs[i].sh_offset
1145 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1146 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1147 pSymFile->aShdrs[i].sh_link = iShStrTab;
1148 pSymFile->aShdrs[i].sh_info = 0;
1149 pSymFile->aShdrs[i].sh_addralign = 1;
1150 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1151 i++;
1152# endif
1153
1154 /* Section header: .text */
1155 unsigned const iShText = i;
1156 pSymFile->aShdrs[i].sh_name = offStrTab;
1157 APPEND_STR(".text");
1158 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1159 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1160# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1161 pSymFile->aShdrs[i].sh_offset
1162 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1163# else
1164 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1165 pSymFile->aShdrs[i].sh_offset = 0;
1166# endif
1167 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1168 pSymFile->aShdrs[i].sh_link = 0;
1169 pSymFile->aShdrs[i].sh_info = 0;
1170 pSymFile->aShdrs[i].sh_addralign = 1;
1171 pSymFile->aShdrs[i].sh_entsize = 0;
1172 i++;
1173
1174 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1175
1176# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1177 /*
1178 * The program headers:
1179 */
1180 /* Everything in a single LOAD segment: */
1181 i = 0;
1182 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1183 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1184 pSymFile->aPhdrs[i].p_offset
1185 = pSymFile->aPhdrs[i].p_vaddr
1186 = pSymFile->aPhdrs[i].p_paddr = 0;
1187 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1188 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1189 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1190 i++;
1191 /* The .dynamic segment. */
1192 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1193 pSymFile->aPhdrs[i].p_flags = PF_R;
1194 pSymFile->aPhdrs[i].p_offset
1195 = pSymFile->aPhdrs[i].p_vaddr
1196 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1197 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1198 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1199 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1200 i++;
1201
1202 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1203
1204 /*
1205 * The dynamic section:
1206 */
1207 i = 0;
1208 pSymFile->aDyn[i].d_tag = DT_SONAME;
1209 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1210 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1219 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1220 i++;
1221 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1222 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1223 i++;
1224 pSymFile->aDyn[i].d_tag = DT_NULL;
1225 i++;
1226 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1227# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1228
1229 /*
1230 * Symbol tables:
1231 */
1232 /** @todo gdb doesn't seem to really like this ... */
1233 i = 0;
1234 pSymFile->aSymbols[i].st_name = 0;
1235 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1236 pSymFile->aSymbols[i].st_value = 0;
1237 pSymFile->aSymbols[i].st_size = 0;
1238 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1239 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1240# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1241 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1242# endif
1243 i++;
1244
1245 pSymFile->aSymbols[i].st_name = 0;
1246 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1247 pSymFile->aSymbols[i].st_value = 0;
1248 pSymFile->aSymbols[i].st_size = 0;
1249 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1250 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1251 i++;
1252
1253 pSymFile->aSymbols[i].st_name = offStrTab;
1254 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1255# if 0
1256 pSymFile->aSymbols[i].st_shndx = iShText;
1257 pSymFile->aSymbols[i].st_value = 0;
1258# else
1259 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1260 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1261# endif
1262 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1263 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1264 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1265# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1266 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1267 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1268# endif
1269 i++;
1270
1271 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1272 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1273
1274 /*
1275 * The GDB JIT entry and informing GDB.
1276 */
1277 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1278# if 1
1279 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1280# else
1281 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1282# endif
1283
1284 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1285 RTCritSectEnter(&g_IemNativeGdbJitLock);
1286 pEhFrame->GdbJitEntry.pNext = NULL;
1287 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1288 if (__jit_debug_descriptor.pTail)
1289 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1290 else
1291 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1292 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1293 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1294
1295 /* Notify GDB: */
1296 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1297 __jit_debug_register_code();
1298 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1299 RTCritSectLeave(&g_IemNativeGdbJitLock);
1300
1301# else /* !IEMNATIVE_USE_GDB_JIT */
1302 RT_NOREF(pVCpu);
1303# endif /* !IEMNATIVE_USE_GDB_JIT */
1304
1305 return VINF_SUCCESS;
1306}
1307
1308# endif /* !RT_OS_WINDOWS */
1309#endif /* IN_RING3 */
1310
1311
1312/**
1313 * Adds another chunk to the executable memory allocator.
1314 *
1315 * This is used by the init code for the initial allocation and later by the
1316 * regular allocator function when it's out of memory.
1317 */
1318static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1319{
1320 /* Check that we've room for growth. */
1321 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1322 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1323
1324 /* Allocate a chunk. */
1325#ifdef RT_OS_DARWIN
1326 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1327#else
1328 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1329#endif
1330 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1331
1332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1333 int rc = VINF_SUCCESS;
1334#else
1335 /* Initialize the heap for the chunk. */
1336 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1337 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1338 AssertRC(rc);
1339 if (RT_SUCCESS(rc))
1340 {
1341 /*
1342 * We want the memory to be aligned on 64 byte, so the first time thru
1343 * here we do some exploratory allocations to see how we can achieve this.
1344 * On subsequent runs we only make an initial adjustment allocation, if
1345 * necessary.
1346 *
1347 * Since we own the heap implementation, we know that the internal block
1348 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1349 * so all we need to wrt allocation size adjustments is to add 32 bytes
1350 * to the size, align up by 64 bytes, and subtract 32 bytes.
1351 *
1352 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1353 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1354 * allocation to force subsequent allocations to return 64 byte aligned
1355 * user areas.
1356 */
1357 if (!pExecMemAllocator->cbHeapBlockHdr)
1358 {
1359 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1360 pExecMemAllocator->cbHeapAlignTweak = 64;
1361 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1362 32 /*cbAlignment*/);
1363 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1364
1365 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1372 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1373 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1374 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1375 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1376
1377 RTHeapSimpleFree(hHeap, pvTest2);
1378 RTHeapSimpleFree(hHeap, pvTest1);
1379 }
1380 else
1381 {
1382 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1383 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1384 }
1385 if (RT_SUCCESS(rc))
1386#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1387 {
1388 /*
1389 * Add the chunk.
1390 *
1391 * This must be done before the unwind init so windows can allocate
1392 * memory from the chunk when using the alternative sub-allocator.
1393 */
1394 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1395#ifdef IN_RING3
1396 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1397#endif
1398#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1399 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1400#else
1401 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1402 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1403 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1404 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1405#endif
1406
1407 pExecMemAllocator->cChunks = idxChunk + 1;
1408 pExecMemAllocator->idxChunkHint = idxChunk;
1409
1410#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1411 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1412 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1413#else
1414 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1415 pExecMemAllocator->cbTotal += cbFree;
1416 pExecMemAllocator->cbFree += cbFree;
1417#endif
1418
1419#ifdef IN_RING3
1420 /*
1421 * Initialize the unwind information (this cannot really fail atm).
1422 * (This sets pvUnwindInfo.)
1423 */
1424 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1425 if (RT_SUCCESS(rc))
1426#endif
1427 {
1428 return VINF_SUCCESS;
1429 }
1430
1431#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1432 /* Just in case the impossible happens, undo the above up: */
1433 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1434 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1435 pExecMemAllocator->cChunks = idxChunk;
1436 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1437 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1438 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1439 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1440#endif
1441 }
1442#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1443 }
1444#endif
1445 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1446 RT_NOREF(pVCpu);
1447 return rc;
1448}
1449
1450
1451/**
1452 * Initializes the executable memory allocator for native recompilation on the
1453 * calling EMT.
1454 *
1455 * @returns VBox status code.
1456 * @param pVCpu The cross context virtual CPU structure of the calling
1457 * thread.
1458 * @param cbMax The max size of the allocator.
1459 * @param cbInitial The initial allocator size.
1460 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1461 * dependent).
1462 */
1463int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1464{
1465 /*
1466 * Validate input.
1467 */
1468 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1469 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1470 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1471 || cbChunk == 0
1472 || ( RT_IS_POWER_OF_TWO(cbChunk)
1473 && cbChunk >= _1M
1474 && cbChunk <= _256M
1475 && cbChunk <= cbMax),
1476 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1477 VERR_OUT_OF_RANGE);
1478
1479 /*
1480 * Adjust/figure out the chunk size.
1481 */
1482 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1483 {
1484 if (cbMax >= _256M)
1485 cbChunk = _64M;
1486 else
1487 {
1488 if (cbMax < _16M)
1489 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1490 else
1491 cbChunk = (uint32_t)cbMax / 4;
1492 if (!RT_IS_POWER_OF_TWO(cbChunk))
1493 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1494 }
1495 }
1496
1497 if (cbChunk > cbMax)
1498 cbMax = cbChunk;
1499 else
1500 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1501 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1502 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1503
1504 /*
1505 * Allocate and initialize the allocatore instance.
1506 */
1507 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1508#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1509 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1510 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1511 cbNeeded += cbBitmap * cMaxChunks;
1512 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1513 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1514#endif
1515#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1516 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1517 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1518#endif
1519 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1520 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1521 VERR_NO_MEMORY);
1522 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1523 pExecMemAllocator->cbChunk = cbChunk;
1524 pExecMemAllocator->cMaxChunks = cMaxChunks;
1525 pExecMemAllocator->cChunks = 0;
1526 pExecMemAllocator->idxChunkHint = 0;
1527 pExecMemAllocator->cAllocations = 0;
1528 pExecMemAllocator->cbTotal = 0;
1529 pExecMemAllocator->cbFree = 0;
1530 pExecMemAllocator->cbAllocated = 0;
1531#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1532 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1533 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1534 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1535 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1536#endif
1537#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1538 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1539#endif
1540 for (uint32_t i = 0; i < cMaxChunks; i++)
1541 {
1542#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1543 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1544 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1545#else
1546 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1547#endif
1548 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1549#ifdef IN_RING0
1550 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1551#else
1552 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1553#endif
1554 }
1555 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1556
1557 /*
1558 * Do the initial allocations.
1559 */
1560 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1561 {
1562 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1563 AssertLogRelRCReturn(rc, rc);
1564 }
1565
1566 pExecMemAllocator->idxChunkHint = 0;
1567
1568 return VINF_SUCCESS;
1569}
1570
1571
1572/*********************************************************************************************************************************
1573* Native Recompilation *
1574*********************************************************************************************************************************/
1575
1576
1577/**
1578 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1579 */
1580IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1581{
1582 pVCpu->iem.s.cInstructions += idxInstr;
1583 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1584}
1585
1586
1587/**
1588 * Used by TB code when it wants to raise a \#GP(0).
1589 */
1590IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1591{
1592 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1593#ifndef _MSC_VER
1594 return VINF_IEM_RAISED_XCPT; /* not reached */
1595#endif
1596}
1597
1598
1599/**
1600 * Used by TB code when detecting opcode changes.
1601 * @see iemThreadeFuncWorkerObsoleteTb
1602 */
1603IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1604{
1605 /* We set fSafeToFree to false where as we're being called in the context
1606 of a TB callback function, which for native TBs means we cannot release
1607 the executable memory till we've returned our way back to iemTbExec as
1608 that return path codes via the native code generated for the TB. */
1609 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1610 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1611 return VINF_IEM_REEXEC_BREAK;
1612}
1613
1614
1615/**
1616 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1617 */
1618IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1619{
1620 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1621 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1622 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1623 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1624 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1625 return VINF_IEM_REEXEC_BREAK;
1626}
1627
1628
1629/**
1630 * Used by TB code when we missed a PC check after a branch.
1631 */
1632IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1633{
1634 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1635 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1636 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1637 pVCpu->iem.s.pbInstrBuf));
1638 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1639 return VINF_IEM_REEXEC_BREAK;
1640}
1641
1642
1643
1644/*********************************************************************************************************************************
1645* Helpers: Segmented memory fetches and stores. *
1646*********************************************************************************************************************************/
1647
1648/**
1649 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1650 */
1651IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1652{
1653#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1654 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1655#else
1656 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1657#endif
1658}
1659
1660
1661/**
1662 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1663 * to 16 bits.
1664 */
1665IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1666{
1667#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1668 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1669#else
1670 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1671#endif
1672}
1673
1674
1675/**
1676 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1677 * to 32 bits.
1678 */
1679IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1680{
1681#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1682 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1683#else
1684 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1685#endif
1686}
1687
1688/**
1689 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1690 * to 64 bits.
1691 */
1692IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1693{
1694#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1695 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1696#else
1697 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1698#endif
1699}
1700
1701
1702/**
1703 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1704 */
1705IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1706{
1707#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1708 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1709#else
1710 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1711#endif
1712}
1713
1714
1715/**
1716 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1717 * to 32 bits.
1718 */
1719IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1720{
1721#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1722 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1723#else
1724 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1725#endif
1726}
1727
1728
1729/**
1730 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1731 * to 64 bits.
1732 */
1733IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1734{
1735#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1736 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1737#else
1738 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1739#endif
1740}
1741
1742
1743/**
1744 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1745 */
1746IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1747{
1748#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1749 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1750#else
1751 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1752#endif
1753}
1754
1755
1756/**
1757 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1758 * to 64 bits.
1759 */
1760IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1761{
1762#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1763 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1764#else
1765 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1766#endif
1767}
1768
1769
1770/**
1771 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1772 */
1773IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1774{
1775#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1776 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1777#else
1778 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1779#endif
1780}
1781
1782
1783/**
1784 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1785 */
1786IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1787{
1788#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1789 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1790#else
1791 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1792#endif
1793}
1794
1795
1796/**
1797 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1798 */
1799IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1800{
1801#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1802 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1803#else
1804 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1805#endif
1806}
1807
1808
1809/**
1810 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1811 */
1812IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1813{
1814#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1815 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1816#else
1817 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1818#endif
1819}
1820
1821
1822/**
1823 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1824 */
1825IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1826{
1827#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1828 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1829#else
1830 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1831#endif
1832}
1833
1834
1835
1836/**
1837 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
1838 */
1839IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1840{
1841#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1842 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1843#else
1844 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1845#endif
1846}
1847
1848
1849/**
1850 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
1851 */
1852IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1853{
1854#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1855 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1856#else
1857 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1858#endif
1859}
1860
1861
1862/**
1863 * Used by TB code to store an 32-bit selector value onto a generic stack.
1864 *
1865 * Intel CPUs doesn't do write a whole dword, thus the special function.
1866 */
1867IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1868{
1869#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1870 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1871#else
1872 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1873#endif
1874}
1875
1876
1877/**
1878 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1879 */
1880IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1881{
1882#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1883 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1884#else
1885 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1886#endif
1887}
1888
1889
1890/**
1891 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1892 */
1893IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1894{
1895#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1896 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1897#else
1898 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
1899#endif
1900}
1901
1902
1903/**
1904 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1905 */
1906IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1907{
1908#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1909 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1910#else
1911 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
1912#endif
1913}
1914
1915
1916/**
1917 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1918 */
1919IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1920{
1921#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1922 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1923#else
1924 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
1925#endif
1926}
1927
1928
1929
1930/*********************************************************************************************************************************
1931* Helpers: Flat memory fetches and stores. *
1932*********************************************************************************************************************************/
1933
1934/**
1935 * Used by TB code to load unsigned 8-bit data w/ flat address.
1936 * @note Zero extending the value to 64-bit to simplify assembly.
1937 */
1938IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1939{
1940#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1941 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1942#else
1943 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1944#endif
1945}
1946
1947
1948/**
1949 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1950 * to 16 bits.
1951 * @note Zero extending the value to 64-bit to simplify assembly.
1952 */
1953IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1954{
1955#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1956 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1957#else
1958 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1959#endif
1960}
1961
1962
1963/**
1964 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1965 * to 32 bits.
1966 * @note Zero extending the value to 64-bit to simplify assembly.
1967 */
1968IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1969{
1970#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1971 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1972#else
1973 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1974#endif
1975}
1976
1977
1978/**
1979 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1980 * to 64 bits.
1981 */
1982IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1983{
1984#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1985 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1986#else
1987 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1988#endif
1989}
1990
1991
1992/**
1993 * Used by TB code to load unsigned 16-bit data w/ flat address.
1994 * @note Zero extending the value to 64-bit to simplify assembly.
1995 */
1996IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1997{
1998#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1999 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2000#else
2001 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2002#endif
2003}
2004
2005
2006/**
2007 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2008 * to 32 bits.
2009 * @note Zero extending the value to 64-bit to simplify assembly.
2010 */
2011IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2012{
2013#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2014 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2015#else
2016 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2017#endif
2018}
2019
2020
2021/**
2022 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2023 * to 64 bits.
2024 * @note Zero extending the value to 64-bit to simplify assembly.
2025 */
2026IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2027{
2028#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2029 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2030#else
2031 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2032#endif
2033}
2034
2035
2036/**
2037 * Used by TB code to load unsigned 32-bit data w/ flat address.
2038 * @note Zero extending the value to 64-bit to simplify assembly.
2039 */
2040IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2041{
2042#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2043 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2044#else
2045 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2046#endif
2047}
2048
2049
2050/**
2051 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2052 * to 64 bits.
2053 * @note Zero extending the value to 64-bit to simplify assembly.
2054 */
2055IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2056{
2057#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2058 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2059#else
2060 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2061#endif
2062}
2063
2064
2065/**
2066 * Used by TB code to load unsigned 64-bit data w/ flat address.
2067 */
2068IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2069{
2070#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2071 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2072#else
2073 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2074#endif
2075}
2076
2077
2078/**
2079 * Used by TB code to store unsigned 8-bit data w/ flat address.
2080 */
2081IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2082{
2083#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2084 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2085#else
2086 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2087#endif
2088}
2089
2090
2091/**
2092 * Used by TB code to store unsigned 16-bit data w/ flat address.
2093 */
2094IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2095{
2096#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2097 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2098#else
2099 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2100#endif
2101}
2102
2103
2104/**
2105 * Used by TB code to store unsigned 32-bit data w/ flat address.
2106 */
2107IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2108{
2109#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2110 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2111#else
2112 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2113#endif
2114}
2115
2116
2117/**
2118 * Used by TB code to store unsigned 64-bit data w/ flat address.
2119 */
2120IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2121{
2122#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2123 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2124#else
2125 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2126#endif
2127}
2128
2129
2130
2131/**
2132 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2133 */
2134IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2135{
2136#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2137 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2138#else
2139 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2140#endif
2141}
2142
2143
2144/**
2145 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2146 */
2147IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2148{
2149#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2150 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2151#else
2152 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2153#endif
2154}
2155
2156
2157/**
2158 * Used by TB code to store a segment selector value onto a flat stack.
2159 *
2160 * Intel CPUs doesn't do write a whole dword, thus the special function.
2161 */
2162IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2163{
2164#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2165 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2166#else
2167 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2168#endif
2169}
2170
2171
2172/**
2173 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2174 */
2175IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2176{
2177#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2178 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2179#else
2180 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2181#endif
2182}
2183
2184
2185/**
2186 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2187 */
2188IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2189{
2190#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2191 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2192#else
2193 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2194#endif
2195}
2196
2197
2198/**
2199 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2200 */
2201IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2202{
2203#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2204 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2205#else
2206 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2207#endif
2208}
2209
2210
2211/**
2212 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2213 */
2214IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2215{
2216#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2217 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2218#else
2219 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2220#endif
2221}
2222
2223
2224
2225/*********************************************************************************************************************************
2226* Helpers: Segmented memory mapping. *
2227*********************************************************************************************************************************/
2228
2229/**
2230 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2231 * segmentation.
2232 */
2233IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2234 RTGCPTR GCPtrMem, uint8_t iSegReg))
2235{
2236#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2237 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2238#else
2239 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2240#endif
2241}
2242
2243
2244/**
2245 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2246 */
2247IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2248 RTGCPTR GCPtrMem, uint8_t iSegReg))
2249{
2250#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2251 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2252#else
2253 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2254#endif
2255}
2256
2257
2258/**
2259 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2260 */
2261IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2262 RTGCPTR GCPtrMem, uint8_t iSegReg))
2263{
2264#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2265 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2266#else
2267 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2268#endif
2269}
2270
2271
2272/**
2273 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2274 */
2275IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2276 RTGCPTR GCPtrMem, uint8_t iSegReg))
2277{
2278#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2279 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2280#else
2281 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2282#endif
2283}
2284
2285
2286/**
2287 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2288 * segmentation.
2289 */
2290IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2291 RTGCPTR GCPtrMem, uint8_t iSegReg))
2292{
2293#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2294 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2295#else
2296 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2297#endif
2298}
2299
2300
2301/**
2302 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2303 */
2304IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2305 RTGCPTR GCPtrMem, uint8_t iSegReg))
2306{
2307#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2308 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2309#else
2310 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2311#endif
2312}
2313
2314
2315/**
2316 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2317 */
2318IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2319 RTGCPTR GCPtrMem, uint8_t iSegReg))
2320{
2321#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2322 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2323#else
2324 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2325#endif
2326}
2327
2328
2329/**
2330 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2331 */
2332IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2333 RTGCPTR GCPtrMem, uint8_t iSegReg))
2334{
2335#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2336 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2337#else
2338 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2339#endif
2340}
2341
2342
2343/**
2344 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2345 * segmentation.
2346 */
2347IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2348 RTGCPTR GCPtrMem, uint8_t iSegReg))
2349{
2350#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2351 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2352#else
2353 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2354#endif
2355}
2356
2357
2358/**
2359 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2360 */
2361IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2362 RTGCPTR GCPtrMem, uint8_t iSegReg))
2363{
2364#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2365 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2366#else
2367 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2368#endif
2369}
2370
2371
2372/**
2373 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2374 */
2375IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2376 RTGCPTR GCPtrMem, uint8_t iSegReg))
2377{
2378#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2379 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2380#else
2381 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2382#endif
2383}
2384
2385
2386/**
2387 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2388 */
2389IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2390 RTGCPTR GCPtrMem, uint8_t iSegReg))
2391{
2392#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2393 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2394#else
2395 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2396#endif
2397}
2398
2399
2400/**
2401 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2402 * segmentation.
2403 */
2404IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2405 RTGCPTR GCPtrMem, uint8_t iSegReg))
2406{
2407#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2408 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2409#else
2410 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2411#endif
2412}
2413
2414
2415/**
2416 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2417 */
2418IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2419 RTGCPTR GCPtrMem, uint8_t iSegReg))
2420{
2421#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2422 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2423#else
2424 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2425#endif
2426}
2427
2428
2429/**
2430 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2431 */
2432IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2433 RTGCPTR GCPtrMem, uint8_t iSegReg))
2434{
2435#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2436 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2437#else
2438 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2439#endif
2440}
2441
2442
2443/**
2444 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2445 */
2446IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2447 RTGCPTR GCPtrMem, uint8_t iSegReg))
2448{
2449#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2450 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2451#else
2452 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2453#endif
2454}
2455
2456
2457/**
2458 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2459 */
2460IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2461 RTGCPTR GCPtrMem, uint8_t iSegReg))
2462{
2463#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2464 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2465#else
2466 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2467#endif
2468}
2469
2470
2471/**
2472 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2473 */
2474IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2475 RTGCPTR GCPtrMem, uint8_t iSegReg))
2476{
2477#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2478 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2479#else
2480 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2481#endif
2482}
2483
2484
2485/**
2486 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2487 * segmentation.
2488 */
2489IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2490 RTGCPTR GCPtrMem, uint8_t iSegReg))
2491{
2492#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2493 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2494#else
2495 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2496#endif
2497}
2498
2499
2500/**
2501 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2502 */
2503IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2504 RTGCPTR GCPtrMem, uint8_t iSegReg))
2505{
2506#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2507 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2508#else
2509 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2510#endif
2511}
2512
2513
2514/**
2515 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2516 */
2517IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2518 RTGCPTR GCPtrMem, uint8_t iSegReg))
2519{
2520#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2521 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2522#else
2523 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2524#endif
2525}
2526
2527
2528/**
2529 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2530 */
2531IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2532 RTGCPTR GCPtrMem, uint8_t iSegReg))
2533{
2534#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2535 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2536#else
2537 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2538#endif
2539}
2540
2541
2542/*********************************************************************************************************************************
2543* Helpers: Flat memory mapping. *
2544*********************************************************************************************************************************/
2545
2546/**
2547 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2548 * address.
2549 */
2550IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2551{
2552#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2553 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2554#else
2555 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2556#endif
2557}
2558
2559
2560/**
2561 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2562 */
2563IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2564{
2565#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2566 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2567#else
2568 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2569#endif
2570}
2571
2572
2573/**
2574 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2575 */
2576IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2577{
2578#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2579 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2580#else
2581 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2582#endif
2583}
2584
2585
2586/**
2587 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2588 */
2589IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2590{
2591#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2592 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2593#else
2594 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2595#endif
2596}
2597
2598
2599/**
2600 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2601 * address.
2602 */
2603IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2604{
2605#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2606 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2607#else
2608 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2609#endif
2610}
2611
2612
2613/**
2614 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2615 */
2616IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2617{
2618#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2619 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2620#else
2621 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2622#endif
2623}
2624
2625
2626/**
2627 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2628 */
2629IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2630{
2631#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2632 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2633#else
2634 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2635#endif
2636}
2637
2638
2639/**
2640 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2641 */
2642IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2643{
2644#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2645 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2646#else
2647 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2648#endif
2649}
2650
2651
2652/**
2653 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2654 * address.
2655 */
2656IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2657{
2658#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2659 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2660#else
2661 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2662#endif
2663}
2664
2665
2666/**
2667 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2668 */
2669IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2670{
2671#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2672 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2673#else
2674 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2675#endif
2676}
2677
2678
2679/**
2680 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2681 */
2682IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2683{
2684#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2685 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2686#else
2687 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2688#endif
2689}
2690
2691
2692/**
2693 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2694 */
2695IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2696{
2697#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2698 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2699#else
2700 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2701#endif
2702}
2703
2704
2705/**
2706 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
2707 * address.
2708 */
2709IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2710{
2711#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2712 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2713#else
2714 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2715#endif
2716}
2717
2718
2719/**
2720 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2721 */
2722IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2723{
2724#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2725 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2726#else
2727 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2728#endif
2729}
2730
2731
2732/**
2733 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2734 */
2735IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2736{
2737#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2738 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2739#else
2740 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2741#endif
2742}
2743
2744
2745/**
2746 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2747 */
2748IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2749{
2750#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2751 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2752#else
2753 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2754#endif
2755}
2756
2757
2758/**
2759 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2760 */
2761IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2762{
2763#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2764 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2765#else
2766 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2767#endif
2768}
2769
2770
2771/**
2772 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2773 */
2774IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2775{
2776#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2777 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2778#else
2779 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2780#endif
2781}
2782
2783
2784/**
2785 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
2786 * address.
2787 */
2788IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2789{
2790#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2791 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2792#else
2793 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2794#endif
2795}
2796
2797
2798/**
2799 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2800 */
2801IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2802{
2803#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2804 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2805#else
2806 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2807#endif
2808}
2809
2810
2811/**
2812 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2813 */
2814IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2815{
2816#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2817 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2818#else
2819 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2820#endif
2821}
2822
2823
2824/**
2825 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2826 */
2827IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2828{
2829#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2830 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2831#else
2832 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2833#endif
2834}
2835
2836
2837/*********************************************************************************************************************************
2838* Helpers: Commit, rollback & unmap *
2839*********************************************************************************************************************************/
2840
2841/**
2842 * Used by TB code to commit and unmap a read-write memory mapping.
2843 */
2844IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2845{
2846 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2847}
2848
2849
2850/**
2851 * Used by TB code to commit and unmap a read-write memory mapping.
2852 */
2853IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2854{
2855 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2856}
2857
2858
2859/**
2860 * Used by TB code to commit and unmap a write-only memory mapping.
2861 */
2862IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2863{
2864 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2865}
2866
2867
2868/**
2869 * Used by TB code to commit and unmap a read-only memory mapping.
2870 */
2871IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2872{
2873 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2874}
2875
2876
2877/**
2878 * Reinitializes the native recompiler state.
2879 *
2880 * Called before starting a new recompile job.
2881 */
2882static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2883{
2884 pReNative->cLabels = 0;
2885 pReNative->bmLabelTypes = 0;
2886 pReNative->cFixups = 0;
2887#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2888 pReNative->pDbgInfo->cEntries = 0;
2889#endif
2890 pReNative->pTbOrg = pTb;
2891 pReNative->cCondDepth = 0;
2892 pReNative->uCondSeqNo = 0;
2893 pReNative->uCheckIrqSeqNo = 0;
2894 pReNative->uTlbSeqNo = 0;
2895
2896 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2897#if IEMNATIVE_HST_GREG_COUNT < 32
2898 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2899#endif
2900 ;
2901 pReNative->Core.bmHstRegsWithGstShadow = 0;
2902 pReNative->Core.bmGstRegShadows = 0;
2903 pReNative->Core.bmVars = 0;
2904 pReNative->Core.bmStack = 0;
2905 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2906 pReNative->Core.u64ArgVars = UINT64_MAX;
2907
2908 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 9);
2909 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2910 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2911 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2912 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2913 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2914 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2915 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2916 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2917 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2918
2919 /* Full host register reinit: */
2920 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2921 {
2922 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2923 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2924 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2925 }
2926
2927 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2928 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2929#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2930 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2931#endif
2932#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2933 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2934#endif
2935 );
2936 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2937 {
2938 fRegs &= ~RT_BIT_32(idxReg);
2939 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2940 }
2941
2942 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2943#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2944 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2945#endif
2946#ifdef IEMNATIVE_REG_FIXED_TMP0
2947 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2948#endif
2949 return pReNative;
2950}
2951
2952
2953/**
2954 * Allocates and initializes the native recompiler state.
2955 *
2956 * This is called the first time an EMT wants to recompile something.
2957 *
2958 * @returns Pointer to the new recompiler state.
2959 * @param pVCpu The cross context virtual CPU structure of the calling
2960 * thread.
2961 * @param pTb The TB that's about to be recompiled.
2962 * @thread EMT(pVCpu)
2963 */
2964static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
2965{
2966 VMCPU_ASSERT_EMT(pVCpu);
2967
2968 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
2969 AssertReturn(pReNative, NULL);
2970
2971 /*
2972 * Try allocate all the buffers and stuff we need.
2973 */
2974 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
2975 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
2976 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
2977#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2978 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
2979#endif
2980 if (RT_LIKELY( pReNative->pInstrBuf
2981 && pReNative->paLabels
2982 && pReNative->paFixups)
2983#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2984 && pReNative->pDbgInfo
2985#endif
2986 )
2987 {
2988 /*
2989 * Set the buffer & array sizes on success.
2990 */
2991 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
2992 pReNative->cLabelsAlloc = _8K;
2993 pReNative->cFixupsAlloc = _16K;
2994#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2995 pReNative->cDbgInfoAlloc = _16K;
2996#endif
2997
2998 /* Other constant stuff: */
2999 pReNative->pVCpu = pVCpu;
3000
3001 /*
3002 * Done, just need to save it and reinit it.
3003 */
3004 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3005 return iemNativeReInit(pReNative, pTb);
3006 }
3007
3008 /*
3009 * Failed. Cleanup and return.
3010 */
3011 AssertFailed();
3012 RTMemFree(pReNative->pInstrBuf);
3013 RTMemFree(pReNative->paLabels);
3014 RTMemFree(pReNative->paFixups);
3015#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3016 RTMemFree(pReNative->pDbgInfo);
3017#endif
3018 RTMemFree(pReNative);
3019 return NULL;
3020}
3021
3022
3023/**
3024 * Creates a label
3025 *
3026 * If the label does not yet have a defined position,
3027 * call iemNativeLabelDefine() later to set it.
3028 *
3029 * @returns Label ID. Throws VBox status code on failure, so no need to check
3030 * the return value.
3031 * @param pReNative The native recompile state.
3032 * @param enmType The label type.
3033 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3034 * label is not yet defined (default).
3035 * @param uData Data associated with the lable. Only applicable to
3036 * certain type of labels. Default is zero.
3037 */
3038DECL_HIDDEN_THROW(uint32_t)
3039iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3040 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3041{
3042 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3043
3044 /*
3045 * Locate existing label definition.
3046 *
3047 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3048 * and uData is zero.
3049 */
3050 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3051 uint32_t const cLabels = pReNative->cLabels;
3052 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3053#ifndef VBOX_STRICT
3054 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3055 && offWhere == UINT32_MAX
3056 && uData == 0
3057#endif
3058 )
3059 {
3060#ifndef VBOX_STRICT
3061 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3062 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3063 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3064 if (idxLabel < pReNative->cLabels)
3065 return idxLabel;
3066#else
3067 for (uint32_t i = 0; i < cLabels; i++)
3068 if ( paLabels[i].enmType == enmType
3069 && paLabels[i].uData == uData)
3070 {
3071 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3072 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3073 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3074 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3075 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3076 return i;
3077 }
3078 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3079 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3080#endif
3081 }
3082
3083 /*
3084 * Make sure we've got room for another label.
3085 */
3086 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3087 { /* likely */ }
3088 else
3089 {
3090 uint32_t cNew = pReNative->cLabelsAlloc;
3091 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3092 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3093 cNew *= 2;
3094 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3095 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3096 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3097 pReNative->paLabels = paLabels;
3098 pReNative->cLabelsAlloc = cNew;
3099 }
3100
3101 /*
3102 * Define a new label.
3103 */
3104 paLabels[cLabels].off = offWhere;
3105 paLabels[cLabels].enmType = enmType;
3106 paLabels[cLabels].uData = uData;
3107 pReNative->cLabels = cLabels + 1;
3108
3109 Assert((unsigned)enmType < 64);
3110 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3111
3112 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3113 {
3114 Assert(uData == 0);
3115 pReNative->aidxUniqueLabels[enmType] = cLabels;
3116 }
3117
3118 if (offWhere != UINT32_MAX)
3119 {
3120#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3121 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3122 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3123#endif
3124 }
3125 return cLabels;
3126}
3127
3128
3129/**
3130 * Defines the location of an existing label.
3131 *
3132 * @param pReNative The native recompile state.
3133 * @param idxLabel The label to define.
3134 * @param offWhere The position.
3135 */
3136DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3137{
3138 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3139 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3140 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3141 pLabel->off = offWhere;
3142#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3143 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3144 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3145#endif
3146}
3147
3148
3149/**
3150 * Looks up a lable.
3151 *
3152 * @returns Label ID if found, UINT32_MAX if not.
3153 */
3154static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3155 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3156{
3157 Assert((unsigned)enmType < 64);
3158 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3159 {
3160 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3161 return pReNative->aidxUniqueLabels[enmType];
3162
3163 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3164 uint32_t const cLabels = pReNative->cLabels;
3165 for (uint32_t i = 0; i < cLabels; i++)
3166 if ( paLabels[i].enmType == enmType
3167 && paLabels[i].uData == uData
3168 && ( paLabels[i].off == offWhere
3169 || offWhere == UINT32_MAX
3170 || paLabels[i].off == UINT32_MAX))
3171 return i;
3172 }
3173 return UINT32_MAX;
3174}
3175
3176
3177/**
3178 * Adds a fixup.
3179 *
3180 * @throws VBox status code (int) on failure.
3181 * @param pReNative The native recompile state.
3182 * @param offWhere The instruction offset of the fixup location.
3183 * @param idxLabel The target label ID for the fixup.
3184 * @param enmType The fixup type.
3185 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3186 */
3187DECL_HIDDEN_THROW(void)
3188iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3189 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3190{
3191 Assert(idxLabel <= UINT16_MAX);
3192 Assert((unsigned)enmType <= UINT8_MAX);
3193
3194 /*
3195 * Make sure we've room.
3196 */
3197 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3198 uint32_t const cFixups = pReNative->cFixups;
3199 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3200 { /* likely */ }
3201 else
3202 {
3203 uint32_t cNew = pReNative->cFixupsAlloc;
3204 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3205 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3206 cNew *= 2;
3207 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3208 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3209 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3210 pReNative->paFixups = paFixups;
3211 pReNative->cFixupsAlloc = cNew;
3212 }
3213
3214 /*
3215 * Add the fixup.
3216 */
3217 paFixups[cFixups].off = offWhere;
3218 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3219 paFixups[cFixups].enmType = enmType;
3220 paFixups[cFixups].offAddend = offAddend;
3221 pReNative->cFixups = cFixups + 1;
3222}
3223
3224
3225/**
3226 * Slow code path for iemNativeInstrBufEnsure.
3227 */
3228DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3229{
3230 /* Double the buffer size till we meet the request. */
3231 uint32_t cNew = pReNative->cInstrBufAlloc;
3232 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3233 do
3234 cNew *= 2;
3235 while (cNew < off + cInstrReq);
3236
3237 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3238#ifdef RT_ARCH_ARM64
3239 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3240#else
3241 uint32_t const cbMaxInstrBuf = _2M;
3242#endif
3243 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3244
3245 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3246 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3247
3248#ifdef VBOX_STRICT
3249 pReNative->offInstrBufChecked = off + cInstrReq;
3250#endif
3251 pReNative->cInstrBufAlloc = cNew;
3252 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3253}
3254
3255#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3256
3257/**
3258 * Grows the static debug info array used during recompilation.
3259 *
3260 * @returns Pointer to the new debug info block; throws VBox status code on
3261 * failure, so no need to check the return value.
3262 */
3263DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3264{
3265 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3266 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3267 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3268 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3269 pReNative->pDbgInfo = pDbgInfo;
3270 pReNative->cDbgInfoAlloc = cNew;
3271 return pDbgInfo;
3272}
3273
3274
3275/**
3276 * Adds a new debug info uninitialized entry, returning the pointer to it.
3277 */
3278DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3279{
3280 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3281 { /* likely */ }
3282 else
3283 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3284 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3285}
3286
3287
3288/**
3289 * Debug Info: Adds a native offset record, if necessary.
3290 */
3291static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3292{
3293 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3294
3295 /*
3296 * Search backwards to see if we've got a similar record already.
3297 */
3298 uint32_t idx = pDbgInfo->cEntries;
3299 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3300 while (idx-- > idxStop)
3301 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3302 {
3303 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3304 return;
3305 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3306 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3307 break;
3308 }
3309
3310 /*
3311 * Add it.
3312 */
3313 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3314 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3315 pEntry->NativeOffset.offNative = off;
3316}
3317
3318
3319/**
3320 * Debug Info: Record info about a label.
3321 */
3322static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3323{
3324 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3325 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3326 pEntry->Label.uUnused = 0;
3327 pEntry->Label.enmLabel = (uint8_t)enmType;
3328 pEntry->Label.uData = uData;
3329}
3330
3331
3332/**
3333 * Debug Info: Record info about a threaded call.
3334 */
3335static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3336{
3337 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3338 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3339 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3340 pEntry->ThreadedCall.uUnused = 0;
3341 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3342}
3343
3344
3345/**
3346 * Debug Info: Record info about a new guest instruction.
3347 */
3348static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3349{
3350 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3351 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3352 pEntry->GuestInstruction.uUnused = 0;
3353 pEntry->GuestInstruction.fExec = fExec;
3354}
3355
3356
3357/**
3358 * Debug Info: Record info about guest register shadowing.
3359 */
3360static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3361 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
3362{
3363 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3364 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3365 pEntry->GuestRegShadowing.uUnused = 0;
3366 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3367 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3368 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3369}
3370
3371#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3372
3373
3374/*********************************************************************************************************************************
3375* Register Allocator *
3376*********************************************************************************************************************************/
3377
3378/**
3379 * Register parameter indexes (indexed by argument number).
3380 */
3381DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3382{
3383 IEMNATIVE_CALL_ARG0_GREG,
3384 IEMNATIVE_CALL_ARG1_GREG,
3385 IEMNATIVE_CALL_ARG2_GREG,
3386 IEMNATIVE_CALL_ARG3_GREG,
3387#if defined(IEMNATIVE_CALL_ARG4_GREG)
3388 IEMNATIVE_CALL_ARG4_GREG,
3389# if defined(IEMNATIVE_CALL_ARG5_GREG)
3390 IEMNATIVE_CALL_ARG5_GREG,
3391# if defined(IEMNATIVE_CALL_ARG6_GREG)
3392 IEMNATIVE_CALL_ARG6_GREG,
3393# if defined(IEMNATIVE_CALL_ARG7_GREG)
3394 IEMNATIVE_CALL_ARG7_GREG,
3395# endif
3396# endif
3397# endif
3398#endif
3399};
3400
3401/**
3402 * Call register masks indexed by argument count.
3403 */
3404DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3405{
3406 0,
3407 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3408 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3409 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3410 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3411 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3412#if defined(IEMNATIVE_CALL_ARG4_GREG)
3413 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3414 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3415# if defined(IEMNATIVE_CALL_ARG5_GREG)
3416 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3417 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3418# if defined(IEMNATIVE_CALL_ARG6_GREG)
3419 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3420 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3421 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3422# if defined(IEMNATIVE_CALL_ARG7_GREG)
3423 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3424 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3425 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3426# endif
3427# endif
3428# endif
3429#endif
3430};
3431
3432#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3433/**
3434 * BP offset of the stack argument slots.
3435 *
3436 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3437 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3438 */
3439DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3440{
3441 IEMNATIVE_FP_OFF_STACK_ARG0,
3442# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3443 IEMNATIVE_FP_OFF_STACK_ARG1,
3444# endif
3445# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3446 IEMNATIVE_FP_OFF_STACK_ARG2,
3447# endif
3448# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3449 IEMNATIVE_FP_OFF_STACK_ARG3,
3450# endif
3451};
3452AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3453#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3454
3455/**
3456 * Info about shadowed guest register values.
3457 * @see IEMNATIVEGSTREG
3458 */
3459static struct
3460{
3461 /** Offset in VMCPU. */
3462 uint32_t off;
3463 /** The field size. */
3464 uint8_t cb;
3465 /** Name (for logging). */
3466 const char *pszName;
3467} const g_aGstShadowInfo[] =
3468{
3469#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3470 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3471 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3472 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3473 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3474 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3475 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3476 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3477 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3478 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3479 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3480 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3481 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3482 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3483 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3484 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3485 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3486 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3487 /* [kIemNativeGstReg_LivenessPadding17] = */ { UINT32_MAX / 4, 0, "pad17", },
3488 /* [kIemNativeGstReg_LivenessPadding18] = */ { UINT32_MAX / 4, 0, "pad18", },
3489 /* [kIemNativeGstReg_LivenessPadding19] = */ { UINT32_MAX / 4, 0, "pad19", },
3490 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3491 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3492 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3493 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3494 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3495 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3496 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3497 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3498 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3499 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3500 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3501 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3502 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3503 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3504 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3505 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3506 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3507 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3508 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3509 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3510 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3511 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3512 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3513 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3514 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3515#undef CPUMCTX_OFF_AND_SIZE
3516};
3517AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3518
3519
3520/** Host CPU general purpose register names. */
3521DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3522{
3523#ifdef RT_ARCH_AMD64
3524 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3525#elif RT_ARCH_ARM64
3526 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3527 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3528#else
3529# error "port me"
3530#endif
3531};
3532
3533
3534DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
3535 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
3536{
3537 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3538
3539 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
3540 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3541 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
3542 return (uint8_t)idxReg;
3543}
3544
3545
3546#if 0 /* unused */
3547/**
3548 * Tries to locate a suitable register in the given register mask.
3549 *
3550 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3551 * failed.
3552 *
3553 * @returns Host register number on success, returns UINT8_MAX on failure.
3554 */
3555static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3556{
3557 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3558 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3559 if (fRegs)
3560 {
3561 /** @todo pick better here: */
3562 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3563
3564 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3565 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3566 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3567 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3568
3569 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3570 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3571 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3572 return idxReg;
3573 }
3574 return UINT8_MAX;
3575}
3576#endif /* unused */
3577
3578
3579/**
3580 * Locate a register, possibly freeing one up.
3581 *
3582 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3583 * failed.
3584 *
3585 * @returns Host register number on success. Returns UINT8_MAX if no registers
3586 * found, the caller is supposed to deal with this and raise a
3587 * allocation type specific status code (if desired).
3588 *
3589 * @throws VBox status code if we're run into trouble spilling a variable of
3590 * recording debug info. Does NOT throw anything if we're out of
3591 * registers, though.
3592 */
3593static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3594 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3595{
3596 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3597 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3598 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3599
3600 /*
3601 * Try a freed register that's shadowing a guest register.
3602 */
3603 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3604 if (fRegs)
3605 {
3606 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3607
3608#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3609 /*
3610 * When we have livness information, we use it to kick out all shadowed
3611 * guest register that will not be needed any more in this TB. If we're
3612 * lucky, this may prevent us from ending up here again.
3613 *
3614 * Note! We must consider the previous entry here so we don't free
3615 * anything that the current threaded function requires (current
3616 * entry is produced by the next threaded function).
3617 */
3618 uint32_t const idxCurCall = pReNative->idxCurCall;
3619 if (idxCurCall > 0)
3620 {
3621 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3622
3623# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3624 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3625 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3626 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3627#else
3628 /* Construct a mask of the registers not in the read or write state.
3629 Note! We could skips writes, if they aren't from us, as this is just
3630 a hack to prevent trashing registers that have just been written
3631 or will be written when we retire the current instruction. */
3632 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3633 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3634 & IEMLIVENESSBIT_MASK;
3635#endif
3636 /* Merge EFLAGS. */
3637 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3638 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3639 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3640 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3641 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3642
3643 /* If it matches any shadowed registers. */
3644 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3645 {
3646 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3647 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3648 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3649
3650 /* See if we've got any unshadowed registers we can return now. */
3651 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3652 if (fUnshadowedRegs)
3653 {
3654 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3655 return (fPreferVolatile
3656 ? ASMBitFirstSetU32(fUnshadowedRegs)
3657 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3658 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3659 - 1;
3660 }
3661 }
3662 }
3663#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3664
3665 unsigned const idxReg = (fPreferVolatile
3666 ? ASMBitFirstSetU32(fRegs)
3667 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3668 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3669 - 1;
3670
3671 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3672 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3673 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3674 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3675
3676 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3677 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3678 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3679 return idxReg;
3680 }
3681
3682 /*
3683 * Try free up a variable that's in a register.
3684 *
3685 * We do two rounds here, first evacuating variables we don't need to be
3686 * saved on the stack, then in the second round move things to the stack.
3687 */
3688 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3689 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3690 {
3691 uint32_t fVars = pReNative->Core.bmVars;
3692 while (fVars)
3693 {
3694 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3695 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3696 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3697 && (RT_BIT_32(idxReg) & fRegMask)
3698 && ( iLoop == 0
3699 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3700 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3701 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3702 {
3703 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3704 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3705 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3706 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3707 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3708 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3709
3710 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3711 {
3712 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3713 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3714 }
3715
3716 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3717 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3718
3719 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3720 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3721 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3722 return idxReg;
3723 }
3724 fVars &= ~RT_BIT_32(idxVar);
3725 }
3726 }
3727
3728 return UINT8_MAX;
3729}
3730
3731
3732/**
3733 * Reassigns a variable to a different register specified by the caller.
3734 *
3735 * @returns The new code buffer position.
3736 * @param pReNative The native recompile state.
3737 * @param off The current code buffer position.
3738 * @param idxVar The variable index.
3739 * @param idxRegOld The old host register number.
3740 * @param idxRegNew The new host register number.
3741 * @param pszCaller The caller for logging.
3742 */
3743static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3744 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3745{
3746 Assert(pReNative->Core.aVars[idxVar].idxReg == idxRegOld);
3747 RT_NOREF(pszCaller);
3748
3749 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3750
3751 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3752 Log12(("%s: moving idxVar=%d from %s to %s (fGstRegShadows=%RX64)\n",
3753 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3754 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3755
3756 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3757 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3758 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3759 if (fGstRegShadows)
3760 {
3761 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3762 | RT_BIT_32(idxRegNew);
3763 while (fGstRegShadows)
3764 {
3765 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3766 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3767
3768 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3769 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3770 }
3771 }
3772
3773 pReNative->Core.aVars[idxVar].idxReg = (uint8_t)idxRegNew;
3774 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3775 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3776 return off;
3777}
3778
3779
3780/**
3781 * Moves a variable to a different register or spills it onto the stack.
3782 *
3783 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3784 * kinds can easily be recreated if needed later.
3785 *
3786 * @returns The new code buffer position.
3787 * @param pReNative The native recompile state.
3788 * @param off The current code buffer position.
3789 * @param idxVar The variable index.
3790 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3791 * call-volatile registers.
3792 */
3793static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3794 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3795{
3796 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3797 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
3798 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
3799
3800 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
3801 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3802 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3803 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3804 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3805 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3806 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3807 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3808 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3809
3810
3811 /** @todo Add statistics on this.*/
3812 /** @todo Implement basic variable liveness analysis (python) so variables
3813 * can be freed immediately once no longer used. This has the potential to
3814 * be trashing registers and stack for dead variables.
3815 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3816
3817 /*
3818 * First try move it to a different register, as that's cheaper.
3819 */
3820 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3821 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3822 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3823 if (fRegs)
3824 {
3825 /* Avoid using shadow registers, if possible. */
3826 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3827 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3828 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3829 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3830 }
3831
3832 /*
3833 * Otherwise we must spill the register onto the stack.
3834 */
3835 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3836 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%d/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3837 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3838 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3839
3840 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3841 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3842 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3843 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3844 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3845 return off;
3846}
3847
3848
3849/**
3850 * Allocates a temporary host general purpose register.
3851 *
3852 * This may emit code to save register content onto the stack in order to free
3853 * up a register.
3854 *
3855 * @returns The host register number; throws VBox status code on failure,
3856 * so no need to check the return value.
3857 * @param pReNative The native recompile state.
3858 * @param poff Pointer to the variable with the code buffer position.
3859 * This will be update if we need to move a variable from
3860 * register to stack in order to satisfy the request.
3861 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3862 * registers (@c true, default) or the other way around
3863 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3864 */
3865DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3866{
3867 /*
3868 * Try find a completely unused register, preferably a call-volatile one.
3869 */
3870 uint8_t idxReg;
3871 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3872 & ~pReNative->Core.bmHstRegsWithGstShadow
3873 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3874 if (fRegs)
3875 {
3876 if (fPreferVolatile)
3877 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3878 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3879 else
3880 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3881 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3882 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3883 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3884 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3885 }
3886 else
3887 {
3888 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3889 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3890 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3891 }
3892 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3893}
3894
3895
3896/**
3897 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3898 * registers.
3899 *
3900 * @returns The host register number; throws VBox status code on failure,
3901 * so no need to check the return value.
3902 * @param pReNative The native recompile state.
3903 * @param poff Pointer to the variable with the code buffer position.
3904 * This will be update if we need to move a variable from
3905 * register to stack in order to satisfy the request.
3906 * @param fRegMask Mask of acceptable registers.
3907 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3908 * registers (@c true, default) or the other way around
3909 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3910 */
3911DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3912 bool fPreferVolatile /*= true*/)
3913{
3914 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3915 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3916
3917 /*
3918 * Try find a completely unused register, preferably a call-volatile one.
3919 */
3920 uint8_t idxReg;
3921 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3922 & ~pReNative->Core.bmHstRegsWithGstShadow
3923 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3924 & fRegMask;
3925 if (fRegs)
3926 {
3927 if (fPreferVolatile)
3928 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3929 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3930 else
3931 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3932 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3933 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3934 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3935 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3936 }
3937 else
3938 {
3939 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
3940 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3941 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3942 }
3943 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3944}
3945
3946
3947/**
3948 * Allocates a temporary register for loading an immediate value into.
3949 *
3950 * This will emit code to load the immediate, unless there happens to be an
3951 * unused register with the value already loaded.
3952 *
3953 * The caller will not modify the returned register, it must be considered
3954 * read-only. Free using iemNativeRegFreeTmpImm.
3955 *
3956 * @returns The host register number; throws VBox status code on failure, so no
3957 * need to check the return value.
3958 * @param pReNative The native recompile state.
3959 * @param poff Pointer to the variable with the code buffer position.
3960 * @param uImm The immediate value that the register must hold upon
3961 * return.
3962 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3963 * registers (@c true, default) or the other way around
3964 * (@c false).
3965 *
3966 * @note Reusing immediate values has not been implemented yet.
3967 */
3968DECL_HIDDEN_THROW(uint8_t)
3969iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3970{
3971 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
3972 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3973 return idxReg;
3974}
3975
3976#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3977
3978# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3979/**
3980 * Helper for iemNativeLivenessGetStateByGstReg.
3981 *
3982 * @returns IEMLIVENESS_STATE_XXX
3983 * @param fMergedStateExp2 This is the RT_BIT_32() of each sub-state
3984 * ORed together.
3985 */
3986DECL_FORCE_INLINE(uint32_t)
3987iemNativeLivenessMergeExpandedEFlagsState(uint32_t fMergedStateExp2)
3988{
3989 /* INPUT trumps anything else. */
3990 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_INPUT))
3991 return IEMLIVENESS_STATE_INPUT;
3992
3993 /* CLOBBERED trumps XCPT_OR_CALL and UNUSED. */
3994 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_CLOBBERED))
3995 {
3996 /* If not all sub-fields are clobbered they must be considered INPUT. */
3997 if (fMergedStateExp2 & (RT_BIT_32(IEMLIVENESS_STATE_UNUSED) | RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL)))
3998 return IEMLIVENESS_STATE_INPUT;
3999 return IEMLIVENESS_STATE_CLOBBERED;
4000 }
4001
4002 /* XCPT_OR_CALL trumps UNUSED. */
4003 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL))
4004 return IEMLIVENESS_STATE_XCPT_OR_CALL;
4005
4006 return IEMLIVENESS_STATE_UNUSED;
4007}
4008# endif /* !IEMLIVENESS_EXTENDED_LAYOUT */
4009
4010
4011DECL_FORCE_INLINE(uint32_t)
4012iemNativeLivenessGetStateByGstRegEx(PCIEMLIVENESSENTRY pLivenessEntry, unsigned enmGstRegEx)
4013{
4014# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4015 return ((pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
4016 | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2);
4017# else
4018 return ( (pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
4019 | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2)
4020 | (((pLivenessEntry->Bit2.bm64 >> enmGstRegEx) << 2) & 4)
4021 | (((pLivenessEntry->Bit3.bm64 >> enmGstRegEx) << 2) & 8);
4022# endif
4023}
4024
4025
4026DECL_FORCE_INLINE(uint32_t)
4027iemNativeLivenessGetStateByGstReg(PCIEMLIVENESSENTRY pLivenessEntry, IEMNATIVEGSTREG enmGstReg)
4028{
4029 uint32_t uRet = iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, (unsigned)enmGstReg);
4030 if (enmGstReg == kIemNativeGstReg_EFlags)
4031 {
4032 /* Merge the eflags states to one. */
4033# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4034 uRet = RT_BIT_32(uRet);
4035 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflCf | (pLivenessEntry->Bit1.fEflCf << 1));
4036 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflPf | (pLivenessEntry->Bit1.fEflPf << 1));
4037 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflAf | (pLivenessEntry->Bit1.fEflAf << 1));
4038 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflZf | (pLivenessEntry->Bit1.fEflZf << 1));
4039 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflSf | (pLivenessEntry->Bit1.fEflSf << 1));
4040 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflOf | (pLivenessEntry->Bit1.fEflOf << 1));
4041 uRet = iemNativeLivenessMergeExpandedEFlagsState(uRet);
4042# else
4043 AssertCompile(IEMLIVENESSBIT_IDX_EFL_OTHER == (unsigned)kIemNativeGstReg_EFlags);
4044 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_CF);
4045 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_PF);
4046 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_AF);
4047 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_ZF);
4048 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_SF);
4049 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_OF);
4050# endif
4051 }
4052 return uRet;
4053}
4054
4055
4056# ifdef VBOX_STRICT
4057/** For assertions only, user checks that idxCurCall isn't zerow. */
4058DECL_FORCE_INLINE(uint32_t)
4059iemNativeLivenessGetPrevStateByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg)
4060{
4061 return iemNativeLivenessGetStateByGstReg(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], enmGstReg);
4062}
4063# endif /* VBOX_STRICT */
4064
4065#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4066
4067/**
4068 * Marks host register @a idxHstReg as containing a shadow copy of guest
4069 * register @a enmGstReg.
4070 *
4071 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
4072 * host register before calling.
4073 */
4074DECL_FORCE_INLINE(void)
4075iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4076{
4077 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
4078 Assert(!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4079 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
4080
4081 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
4082 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg); /** @todo why? not OR? */
4083 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
4084 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
4085#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4086 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4087 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
4088#else
4089 RT_NOREF(off);
4090#endif
4091}
4092
4093
4094/**
4095 * Clear any guest register shadow claims from @a idxHstReg.
4096 *
4097 * The register does not need to be shadowing any guest registers.
4098 */
4099DECL_FORCE_INLINE(void)
4100iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
4101{
4102 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4103 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4104 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4105 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4106 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4107
4108#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4109 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4110 if (fGstRegs)
4111 {
4112 Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
4113 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4114 while (fGstRegs)
4115 {
4116 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4117 fGstRegs &= ~RT_BIT_64(iGstReg);
4118 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
4119 }
4120 }
4121#else
4122 RT_NOREF(off);
4123#endif
4124
4125 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4126 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4127 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4128}
4129
4130
4131/**
4132 * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
4133 * and global overview flags.
4134 */
4135DECL_FORCE_INLINE(void)
4136iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4137{
4138 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4139 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4140 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4141 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4142 Assert(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg));
4143 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4144 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4145
4146#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4147 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4148 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
4149#else
4150 RT_NOREF(off);
4151#endif
4152
4153 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4154 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4155 if (!fGstRegShadowsNew)
4156 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4157 pReNative->Core.bmGstRegShadows &= ~RT_BIT_64(enmGstReg);
4158}
4159
4160
4161#if 0 /* unused */
4162/**
4163 * Clear any guest register shadow claim for @a enmGstReg.
4164 */
4165DECL_FORCE_INLINE(void)
4166iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4167{
4168 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4169 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4170 {
4171 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
4172 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4173 }
4174}
4175#endif
4176
4177
4178/**
4179 * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
4180 * as the new shadow of it.
4181 *
4182 * Unlike the other guest reg shadow helpers, this does the logging for you.
4183 * However, it is the liveness state is not asserted here, the caller must do
4184 * that.
4185 */
4186DECL_FORCE_INLINE(void)
4187iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
4188 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4189{
4190 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4191 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4192 {
4193 uint8_t const idxHstRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
4194 Assert(idxHstRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
4195 if (idxHstRegOld == idxHstRegNew)
4196 return;
4197 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s (from %s)\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4198 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstRegOld]));
4199 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4200 }
4201 else
4202 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4203 g_aGstShadowInfo[enmGstReg].pszName));
4204 iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
4205}
4206
4207
4208/**
4209 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
4210 * to @a idxRegTo.
4211 */
4212DECL_FORCE_INLINE(void)
4213iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
4214 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4215{
4216 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
4217 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] == idxRegFrom);
4218 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
4219 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
4220 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4221 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
4222 == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
4223 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
4224 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
4225
4226 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4227 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows = fGstRegShadowsFrom;
4228 if (!fGstRegShadowsFrom)
4229 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegFrom);
4230 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegTo);
4231 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows |= RT_BIT_64(enmGstReg);
4232 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
4233#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4234 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4235 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
4236#else
4237 RT_NOREF(off);
4238#endif
4239}
4240
4241
4242/**
4243 * Allocates a temporary host general purpose register for keeping a guest
4244 * register value.
4245 *
4246 * Since we may already have a register holding the guest register value,
4247 * code will be emitted to do the loading if that's not the case. Code may also
4248 * be emitted if we have to free up a register to satify the request.
4249 *
4250 * @returns The host register number; throws VBox status code on failure, so no
4251 * need to check the return value.
4252 * @param pReNative The native recompile state.
4253 * @param poff Pointer to the variable with the code buffer
4254 * position. This will be update if we need to move a
4255 * variable from register to stack in order to satisfy
4256 * the request.
4257 * @param enmGstReg The guest register that will is to be updated.
4258 * @param enmIntendedUse How the caller will be using the host register.
4259 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4260 * register is okay (default). The ASSUMPTION here is
4261 * that the caller has already flushed all volatile
4262 * registers, so this is only applied if we allocate a
4263 * new register.
4264 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4265 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4266 */
4267DECL_HIDDEN_THROW(uint8_t)
4268iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4269 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4270 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4271{
4272 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4273#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4274 AssertMsg( fSkipLivenessAssert
4275 || pReNative->idxCurCall == 0
4276 || enmGstReg == kIemNativeGstReg_Pc
4277 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4278 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4279 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4280 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4281 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4282 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4283#endif
4284 RT_NOREF(fSkipLivenessAssert);
4285#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4286 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4287#endif
4288 uint32_t const fRegMask = !fNoVolatileRegs
4289 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4290 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4291
4292 /*
4293 * First check if the guest register value is already in a host register.
4294 */
4295 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4296 {
4297 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4298 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4299 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4300 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4301
4302 /* It's not supposed to be allocated... */
4303 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4304 {
4305 /*
4306 * If the register will trash the guest shadow copy, try find a
4307 * completely unused register we can use instead. If that fails,
4308 * we need to disassociate the host reg from the guest reg.
4309 */
4310 /** @todo would be nice to know if preserving the register is in any way helpful. */
4311 /* If the purpose is calculations, try duplicate the register value as
4312 we'll be clobbering the shadow. */
4313 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4314 && ( ~pReNative->Core.bmHstRegs
4315 & ~pReNative->Core.bmHstRegsWithGstShadow
4316 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4317 {
4318 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4319
4320 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4321
4322 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4323 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4324 g_apszIemNativeHstRegNames[idxRegNew]));
4325 idxReg = idxRegNew;
4326 }
4327 /* If the current register matches the restrictions, go ahead and allocate
4328 it for the caller. */
4329 else if (fRegMask & RT_BIT_32(idxReg))
4330 {
4331 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4332 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4333 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4334 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4335 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4336 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4337 else
4338 {
4339 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4340 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4341 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4342 }
4343 }
4344 /* Otherwise, allocate a register that satisfies the caller and transfer
4345 the shadowing if compatible with the intended use. (This basically
4346 means the call wants a non-volatile register (RSP push/pop scenario).) */
4347 else
4348 {
4349 Assert(fNoVolatileRegs);
4350 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4351 !fNoVolatileRegs
4352 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4353 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4354 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4355 {
4356 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4357 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4358 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4359 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4360 }
4361 else
4362 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4363 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4364 g_apszIemNativeHstRegNames[idxRegNew]));
4365 idxReg = idxRegNew;
4366 }
4367 }
4368 else
4369 {
4370 /*
4371 * Oops. Shadowed guest register already allocated!
4372 *
4373 * Allocate a new register, copy the value and, if updating, the
4374 * guest shadow copy assignment to the new register.
4375 */
4376 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4377 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4378 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4379 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4380
4381 /** @todo share register for readonly access. */
4382 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4383 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4384
4385 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4386 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4387
4388 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4389 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4390 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4391 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4392 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4393 else
4394 {
4395 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4396 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4397 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4398 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4399 }
4400 idxReg = idxRegNew;
4401 }
4402 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4403
4404#ifdef VBOX_STRICT
4405 /* Strict builds: Check that the value is correct. */
4406 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4407#endif
4408
4409 return idxReg;
4410 }
4411
4412 /*
4413 * Allocate a new register, load it with the guest value and designate it as a copy of the
4414 */
4415 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4416
4417 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4418 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4419
4420 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4421 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4422 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4423 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4424
4425 return idxRegNew;
4426}
4427
4428
4429/**
4430 * Allocates a temporary host general purpose register that already holds the
4431 * given guest register value.
4432 *
4433 * The use case for this function is places where the shadowing state cannot be
4434 * modified due to branching and such. This will fail if the we don't have a
4435 * current shadow copy handy or if it's incompatible. The only code that will
4436 * be emitted here is value checking code in strict builds.
4437 *
4438 * The intended use can only be readonly!
4439 *
4440 * @returns The host register number, UINT8_MAX if not present.
4441 * @param pReNative The native recompile state.
4442 * @param poff Pointer to the instruction buffer offset.
4443 * Will be updated in strict builds if a register is
4444 * found.
4445 * @param enmGstReg The guest register that will is to be updated.
4446 * @note In strict builds, this may throw instruction buffer growth failures.
4447 * Non-strict builds will not throw anything.
4448 * @sa iemNativeRegAllocTmpForGuestReg
4449 */
4450DECL_HIDDEN_THROW(uint8_t)
4451iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4452{
4453 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4454#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4455 AssertMsg( pReNative->idxCurCall == 0
4456 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4457 || enmGstReg == kIemNativeGstReg_Pc,
4458 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4459#endif
4460
4461 /*
4462 * First check if the guest register value is already in a host register.
4463 */
4464 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4465 {
4466 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4467 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4468 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4469 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4470
4471 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4472 {
4473 /*
4474 * We only do readonly use here, so easy compared to the other
4475 * variant of this code.
4476 */
4477 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4478 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4479 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4480 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4481 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4482
4483#ifdef VBOX_STRICT
4484 /* Strict builds: Check that the value is correct. */
4485 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4486#else
4487 RT_NOREF(poff);
4488#endif
4489 return idxReg;
4490 }
4491 }
4492
4493 return UINT8_MAX;
4494}
4495
4496
4497DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar);
4498
4499
4500/**
4501 * Allocates argument registers for a function call.
4502 *
4503 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4504 * need to check the return value.
4505 * @param pReNative The native recompile state.
4506 * @param off The current code buffer offset.
4507 * @param cArgs The number of arguments the function call takes.
4508 */
4509DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4510{
4511 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4512 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4513 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4514 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4515
4516 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4517 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4518 else if (cArgs == 0)
4519 return true;
4520
4521 /*
4522 * Do we get luck and all register are free and not shadowing anything?
4523 */
4524 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4525 for (uint32_t i = 0; i < cArgs; i++)
4526 {
4527 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4528 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4529 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4530 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4531 }
4532 /*
4533 * Okay, not lucky so we have to free up the registers.
4534 */
4535 else
4536 for (uint32_t i = 0; i < cArgs; i++)
4537 {
4538 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4539 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4540 {
4541 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4542 {
4543 case kIemNativeWhat_Var:
4544 {
4545 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4546 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars),
4547 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4548 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
4549 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
4550
4551 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4552 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4553 else
4554 {
4555 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4556 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4557 }
4558 break;
4559 }
4560
4561 case kIemNativeWhat_Tmp:
4562 case kIemNativeWhat_Arg:
4563 case kIemNativeWhat_rc:
4564 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4565 default:
4566 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4567 }
4568
4569 }
4570 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4571 {
4572 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4573 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4574 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4575 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4576 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4577 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4578 }
4579 else
4580 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4581 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4582 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4583 }
4584 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4585 return true;
4586}
4587
4588
4589DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4590
4591
4592#if 0
4593/**
4594 * Frees a register assignment of any type.
4595 *
4596 * @param pReNative The native recompile state.
4597 * @param idxHstReg The register to free.
4598 *
4599 * @note Does not update variables.
4600 */
4601DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4602{
4603 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4604 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4605 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4606 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4607 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4608 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4609 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4610 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4611 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4612 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4613 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4614 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4615 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4616 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4617
4618 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4619 /* no flushing, right:
4620 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4621 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4622 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4623 */
4624}
4625#endif
4626
4627
4628/**
4629 * Frees a temporary register.
4630 *
4631 * Any shadow copies of guest registers assigned to the host register will not
4632 * be flushed by this operation.
4633 */
4634DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4635{
4636 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4637 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4638 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4639 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4640 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4641}
4642
4643
4644/**
4645 * Frees a temporary immediate register.
4646 *
4647 * It is assumed that the call has not modified the register, so it still hold
4648 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4649 */
4650DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4651{
4652 iemNativeRegFreeTmp(pReNative, idxHstReg);
4653}
4654
4655
4656/**
4657 * Frees a register assigned to a variable.
4658 *
4659 * The register will be disassociated from the variable.
4660 */
4661DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4662{
4663 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4664 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4665 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4666 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4667 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
4668
4669 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4670 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4671 if (!fFlushShadows)
4672 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%d\n",
4673 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4674 else
4675 {
4676 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4677 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4678 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4679 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4680 uint64_t fGstRegShadows = fGstRegShadowsOld;
4681 while (fGstRegShadows)
4682 {
4683 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4684 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4685
4686 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4687 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4688 }
4689 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%d\n",
4690 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4691 }
4692}
4693
4694
4695/**
4696 * Called right before emitting a call instruction to move anything important
4697 * out of call-volatile registers, free and flush the call-volatile registers,
4698 * optionally freeing argument variables.
4699 *
4700 * @returns New code buffer offset, UINT32_MAX on failure.
4701 * @param pReNative The native recompile state.
4702 * @param off The code buffer offset.
4703 * @param cArgs The number of arguments the function call takes.
4704 * It is presumed that the host register part of these have
4705 * been allocated as such already and won't need moving,
4706 * just freeing.
4707 * @param fKeepVars Mask of variables that should keep their register
4708 * assignments. Caller must take care to handle these.
4709 */
4710DECL_HIDDEN_THROW(uint32_t)
4711iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4712{
4713 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4714
4715 /* fKeepVars will reduce this mask. */
4716 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4717
4718 /*
4719 * Move anything important out of volatile registers.
4720 */
4721 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4722 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4723 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4724#ifdef IEMNATIVE_REG_FIXED_TMP0
4725 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4726#endif
4727 & ~g_afIemNativeCallRegs[cArgs];
4728
4729 fRegsToMove &= pReNative->Core.bmHstRegs;
4730 if (!fRegsToMove)
4731 { /* likely */ }
4732 else
4733 {
4734 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4735 while (fRegsToMove != 0)
4736 {
4737 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4738 fRegsToMove &= ~RT_BIT_32(idxReg);
4739
4740 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4741 {
4742 case kIemNativeWhat_Var:
4743 {
4744 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4745 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
4746 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
4747 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
4748 if (!(RT_BIT_32(idxVar) & fKeepVars))
4749 {
4750 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%d enmKind=%d idxReg=%d\n",
4751 idxVar, pReNative->Core.aVars[idxVar].enmKind, pReNative->Core.aVars[idxVar].idxReg));
4752 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4753 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4754 else
4755 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4756 }
4757 else
4758 fRegsToFree &= ~RT_BIT_32(idxReg);
4759 continue;
4760 }
4761
4762 case kIemNativeWhat_Arg:
4763 AssertMsgFailed(("What?!?: %u\n", idxReg));
4764 continue;
4765
4766 case kIemNativeWhat_rc:
4767 case kIemNativeWhat_Tmp:
4768 AssertMsgFailed(("Missing free: %u\n", idxReg));
4769 continue;
4770
4771 case kIemNativeWhat_FixedTmp:
4772 case kIemNativeWhat_pVCpuFixed:
4773 case kIemNativeWhat_pCtxFixed:
4774 case kIemNativeWhat_FixedReserved:
4775 case kIemNativeWhat_Invalid:
4776 case kIemNativeWhat_End:
4777 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4778 }
4779 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4780 }
4781 }
4782
4783 /*
4784 * Do the actual freeing.
4785 */
4786 if (pReNative->Core.bmHstRegs & fRegsToFree)
4787 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4788 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4789 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4790
4791 /* If there are guest register shadows in any call-volatile register, we
4792 have to clear the corrsponding guest register masks for each register. */
4793 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4794 if (fHstRegsWithGstShadow)
4795 {
4796 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4797 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4798 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4799 do
4800 {
4801 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4802 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4803
4804 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4805 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4806 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4807 } while (fHstRegsWithGstShadow != 0);
4808 }
4809
4810 return off;
4811}
4812
4813
4814/**
4815 * Flushes a set of guest register shadow copies.
4816 *
4817 * This is usually done after calling a threaded function or a C-implementation
4818 * of an instruction.
4819 *
4820 * @param pReNative The native recompile state.
4821 * @param fGstRegs Set of guest registers to flush.
4822 */
4823DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4824{
4825 /*
4826 * Reduce the mask by what's currently shadowed
4827 */
4828 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4829 fGstRegs &= bmGstRegShadowsOld;
4830 if (fGstRegs)
4831 {
4832 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4833 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4834 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4835 if (bmGstRegShadowsNew)
4836 {
4837 /*
4838 * Partial.
4839 */
4840 do
4841 {
4842 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4843 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4844 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4845 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4846 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4847
4848 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4849 fGstRegs &= ~fInThisHstReg;
4850 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4851 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4852 if (!fGstRegShadowsNew)
4853 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4854 } while (fGstRegs != 0);
4855 }
4856 else
4857 {
4858 /*
4859 * Clear all.
4860 */
4861 do
4862 {
4863 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4864 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4865 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4866 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4867 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4868
4869 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4870 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4871 } while (fGstRegs != 0);
4872 pReNative->Core.bmHstRegsWithGstShadow = 0;
4873 }
4874 }
4875}
4876
4877
4878/**
4879 * Flushes guest register shadow copies held by a set of host registers.
4880 *
4881 * This is used with the TLB lookup code for ensuring that we don't carry on
4882 * with any guest shadows in volatile registers, as these will get corrupted by
4883 * a TLB miss.
4884 *
4885 * @param pReNative The native recompile state.
4886 * @param fHstRegs Set of host registers to flush guest shadows for.
4887 */
4888DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4889{
4890 /*
4891 * Reduce the mask by what's currently shadowed.
4892 */
4893 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4894 fHstRegs &= bmHstRegsWithGstShadowOld;
4895 if (fHstRegs)
4896 {
4897 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4898 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4899 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4900 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4901 if (bmHstRegsWithGstShadowNew)
4902 {
4903 /*
4904 * Partial (likely).
4905 */
4906 uint64_t fGstShadows = 0;
4907 do
4908 {
4909 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4910 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4911 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4912 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4913
4914 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4915 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4916 fHstRegs &= ~RT_BIT_32(idxHstReg);
4917 } while (fHstRegs != 0);
4918 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4919 }
4920 else
4921 {
4922 /*
4923 * Clear all.
4924 */
4925 do
4926 {
4927 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4928 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4929 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4930 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4931
4932 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4933 fHstRegs &= ~RT_BIT_32(idxHstReg);
4934 } while (fHstRegs != 0);
4935 pReNative->Core.bmGstRegShadows = 0;
4936 }
4937 }
4938}
4939
4940
4941/**
4942 * Restores guest shadow copies in volatile registers.
4943 *
4944 * This is used after calling a helper function (think TLB miss) to restore the
4945 * register state of volatile registers.
4946 *
4947 * @param pReNative The native recompile state.
4948 * @param off The code buffer offset.
4949 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4950 * be active (allocated) w/o asserting. Hack.
4951 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4952 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4953 */
4954DECL_HIDDEN_THROW(uint32_t)
4955iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4956{
4957 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4958 if (fHstRegs)
4959 {
4960 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4961 do
4962 {
4963 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4964
4965 /* It's not fatal if a register is active holding a variable that
4966 shadowing a guest register, ASSUMING all pending guest register
4967 writes were flushed prior to the helper call. However, we'll be
4968 emitting duplicate restores, so it wasts code space. */
4969 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4970 RT_NOREF(fHstRegsActiveShadows);
4971
4972 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4973 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4974 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4975 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4976
4977 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4978 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4979
4980 fHstRegs &= ~RT_BIT_32(idxHstReg);
4981 } while (fHstRegs != 0);
4982 }
4983 return off;
4984}
4985
4986
4987/**
4988 * Flushes delayed write of a specific guest register.
4989 *
4990 * This must be called prior to calling CImpl functions and any helpers that use
4991 * the guest state (like raising exceptions) and such.
4992 *
4993 * This optimization has not yet been implemented. The first target would be
4994 * RIP updates, since these are the most common ones.
4995 */
4996DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4997 IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
4998{
4999 RT_NOREF(pReNative, enmClass, idxReg);
5000 return off;
5001}
5002
5003
5004/**
5005 * Flushes any delayed guest register writes.
5006 *
5007 * This must be called prior to calling CImpl functions and any helpers that use
5008 * the guest state (like raising exceptions) and such.
5009 *
5010 * This optimization has not yet been implemented. The first target would be
5011 * RIP updates, since these are the most common ones.
5012 */
5013DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5014{
5015 RT_NOREF(pReNative, off);
5016 return off;
5017}
5018
5019
5020#ifdef VBOX_STRICT
5021/**
5022 * Does internal register allocator sanity checks.
5023 */
5024static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5025{
5026 /*
5027 * Iterate host registers building a guest shadowing set.
5028 */
5029 uint64_t bmGstRegShadows = 0;
5030 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5031 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5032 while (bmHstRegsWithGstShadow)
5033 {
5034 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5035 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5036 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5037
5038 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5039 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5040 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5041 bmGstRegShadows |= fThisGstRegShadows;
5042 while (fThisGstRegShadows)
5043 {
5044 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5045 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5046 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5047 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5048 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5049 }
5050 }
5051 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5052 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5053 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5054
5055 /*
5056 * Now the other way around, checking the guest to host index array.
5057 */
5058 bmHstRegsWithGstShadow = 0;
5059 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5060 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5061 while (bmGstRegShadows)
5062 {
5063 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5064 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5065 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5066
5067 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5068 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5069 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5070 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5071 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5072 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5073 }
5074 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5075 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5076 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5077}
5078#endif
5079
5080
5081/*********************************************************************************************************************************
5082* Code Emitters (larger snippets) *
5083*********************************************************************************************************************************/
5084
5085/**
5086 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5087 * extending to 64-bit width.
5088 *
5089 * @returns New code buffer offset on success, UINT32_MAX on failure.
5090 * @param pReNative .
5091 * @param off The current code buffer position.
5092 * @param idxHstReg The host register to load the guest register value into.
5093 * @param enmGstReg The guest register to load.
5094 *
5095 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5096 * that is something the caller needs to do if applicable.
5097 */
5098DECL_HIDDEN_THROW(uint32_t)
5099iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5100{
5101 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
5102 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5103
5104 switch (g_aGstShadowInfo[enmGstReg].cb)
5105 {
5106 case sizeof(uint64_t):
5107 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5108 case sizeof(uint32_t):
5109 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5110 case sizeof(uint16_t):
5111 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5112#if 0 /* not present in the table. */
5113 case sizeof(uint8_t):
5114 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5115#endif
5116 default:
5117 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5118 }
5119}
5120
5121
5122#ifdef VBOX_STRICT
5123/**
5124 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5125 *
5126 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5127 * Trashes EFLAGS on AMD64.
5128 */
5129static uint32_t
5130iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5131{
5132# ifdef RT_ARCH_AMD64
5133 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5134
5135 /* rol reg64, 32 */
5136 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5137 pbCodeBuf[off++] = 0xc1;
5138 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5139 pbCodeBuf[off++] = 32;
5140
5141 /* test reg32, ffffffffh */
5142 if (idxReg >= 8)
5143 pbCodeBuf[off++] = X86_OP_REX_B;
5144 pbCodeBuf[off++] = 0xf7;
5145 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5146 pbCodeBuf[off++] = 0xff;
5147 pbCodeBuf[off++] = 0xff;
5148 pbCodeBuf[off++] = 0xff;
5149 pbCodeBuf[off++] = 0xff;
5150
5151 /* je/jz +1 */
5152 pbCodeBuf[off++] = 0x74;
5153 pbCodeBuf[off++] = 0x01;
5154
5155 /* int3 */
5156 pbCodeBuf[off++] = 0xcc;
5157
5158 /* rol reg64, 32 */
5159 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5160 pbCodeBuf[off++] = 0xc1;
5161 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5162 pbCodeBuf[off++] = 32;
5163
5164# elif defined(RT_ARCH_ARM64)
5165 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5166 /* lsr tmp0, reg64, #32 */
5167 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
5168 /* cbz tmp0, +1 */
5169 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5170 /* brk #0x1100 */
5171 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
5172
5173# else
5174# error "Port me!"
5175# endif
5176 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5177 return off;
5178}
5179#endif /* VBOX_STRICT */
5180
5181
5182#ifdef VBOX_STRICT
5183/**
5184 * Emitting code that checks that the content of register @a idxReg is the same
5185 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
5186 * instruction if that's not the case.
5187 *
5188 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5189 * Trashes EFLAGS on AMD64.
5190 */
5191static uint32_t
5192iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5193{
5194# ifdef RT_ARCH_AMD64
5195 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5196
5197 /* cmp reg, [mem] */
5198 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5199 {
5200 if (idxReg >= 8)
5201 pbCodeBuf[off++] = X86_OP_REX_R;
5202 pbCodeBuf[off++] = 0x38;
5203 }
5204 else
5205 {
5206 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5207 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5208 else
5209 {
5210 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5211 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5212 else
5213 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5214 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5215 if (idxReg >= 8)
5216 pbCodeBuf[off++] = X86_OP_REX_R;
5217 }
5218 pbCodeBuf[off++] = 0x39;
5219 }
5220 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
5221
5222 /* je/jz +1 */
5223 pbCodeBuf[off++] = 0x74;
5224 pbCodeBuf[off++] = 0x01;
5225
5226 /* int3 */
5227 pbCodeBuf[off++] = 0xcc;
5228
5229 /* For values smaller than the register size, we must check that the rest
5230 of the register is all zeros. */
5231 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
5232 {
5233 /* test reg64, imm32 */
5234 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5235 pbCodeBuf[off++] = 0xf7;
5236 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5237 pbCodeBuf[off++] = 0;
5238 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
5239 pbCodeBuf[off++] = 0xff;
5240 pbCodeBuf[off++] = 0xff;
5241
5242 /* je/jz +1 */
5243 pbCodeBuf[off++] = 0x74;
5244 pbCodeBuf[off++] = 0x01;
5245
5246 /* int3 */
5247 pbCodeBuf[off++] = 0xcc;
5248 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5249 }
5250 else
5251 {
5252 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5253 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
5254 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
5255 }
5256
5257# elif defined(RT_ARCH_ARM64)
5258 /* mov TMP0, [gstreg] */
5259 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
5260
5261 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5262 /* sub tmp0, tmp0, idxReg */
5263 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
5264 /* cbz tmp0, +1 */
5265 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5266 /* brk #0x1000+enmGstReg */
5267 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
5268 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5269
5270# else
5271# error "Port me!"
5272# endif
5273 return off;
5274}
5275#endif /* VBOX_STRICT */
5276
5277
5278#ifdef VBOX_STRICT
5279/**
5280 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
5281 * important bits.
5282 *
5283 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5284 * Trashes EFLAGS on AMD64.
5285 */
5286static uint32_t
5287iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
5288{
5289 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5290 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
5291 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
5292 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
5293
5294#ifdef RT_ARCH_AMD64
5295 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5296
5297 /* je/jz +1 */
5298 pbCodeBuf[off++] = 0x74;
5299 pbCodeBuf[off++] = 0x01;
5300
5301 /* int3 */
5302 pbCodeBuf[off++] = 0xcc;
5303
5304# elif defined(RT_ARCH_ARM64)
5305 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5306
5307 /* b.eq +1 */
5308 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
5309 /* brk #0x2000 */
5310 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
5311
5312# else
5313# error "Port me!"
5314# endif
5315 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5316
5317 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5318 return off;
5319}
5320#endif /* VBOX_STRICT */
5321
5322
5323/**
5324 * Emits a code for checking the return code of a call and rcPassUp, returning
5325 * from the code if either are non-zero.
5326 */
5327DECL_HIDDEN_THROW(uint32_t)
5328iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
5329{
5330#ifdef RT_ARCH_AMD64
5331 /*
5332 * AMD64: eax = call status code.
5333 */
5334
5335 /* edx = rcPassUp */
5336 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5337# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5338 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
5339# endif
5340
5341 /* edx = eax | rcPassUp */
5342 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5343 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
5344 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
5345 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5346
5347 /* Jump to non-zero status return path. */
5348 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
5349
5350 /* done. */
5351
5352#elif RT_ARCH_ARM64
5353 /*
5354 * ARM64: w0 = call status code.
5355 */
5356# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5357 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
5358# endif
5359 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5360
5361 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5362
5363 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
5364
5365 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5366 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
5367 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
5368
5369#else
5370# error "port me"
5371#endif
5372 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5373 RT_NOREF_PV(idxInstr);
5374 return off;
5375}
5376
5377
5378/**
5379 * Emits code to check if the content of @a idxAddrReg is a canonical address,
5380 * raising a \#GP(0) if it isn't.
5381 *
5382 * @returns New code buffer offset, UINT32_MAX on failure.
5383 * @param pReNative The native recompile state.
5384 * @param off The code buffer offset.
5385 * @param idxAddrReg The host register with the address to check.
5386 * @param idxInstr The current instruction.
5387 */
5388DECL_HIDDEN_THROW(uint32_t)
5389iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
5390{
5391 /*
5392 * Make sure we don't have any outstanding guest register writes as we may
5393 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5394 */
5395 off = iemNativeRegFlushPendingWrites(pReNative, off);
5396
5397#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5398 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5399#else
5400 RT_NOREF(idxInstr);
5401#endif
5402
5403#ifdef RT_ARCH_AMD64
5404 /*
5405 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
5406 * return raisexcpt();
5407 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
5408 */
5409 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5410
5411 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
5412 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
5413 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
5414 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
5415 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5416
5417 iemNativeRegFreeTmp(pReNative, iTmpReg);
5418
5419#elif defined(RT_ARCH_ARM64)
5420 /*
5421 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
5422 * return raisexcpt();
5423 * ----
5424 * mov x1, 0x800000000000
5425 * add x1, x0, x1
5426 * cmp xzr, x1, lsr 48
5427 * b.ne .Lraisexcpt
5428 */
5429 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5430
5431 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
5432 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
5433 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
5434 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5435
5436 iemNativeRegFreeTmp(pReNative, iTmpReg);
5437
5438#else
5439# error "Port me"
5440#endif
5441 return off;
5442}
5443
5444
5445/**
5446 * Emits code to check if that the content of @a idxAddrReg is within the limit
5447 * of CS, raising a \#GP(0) if it isn't.
5448 *
5449 * @returns New code buffer offset; throws VBox status code on error.
5450 * @param pReNative The native recompile state.
5451 * @param off The code buffer offset.
5452 * @param idxAddrReg The host register (32-bit) with the address to
5453 * check.
5454 * @param idxInstr The current instruction.
5455 */
5456DECL_HIDDEN_THROW(uint32_t)
5457iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5458 uint8_t idxAddrReg, uint8_t idxInstr)
5459{
5460 /*
5461 * Make sure we don't have any outstanding guest register writes as we may
5462 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5463 */
5464 off = iemNativeRegFlushPendingWrites(pReNative, off);
5465
5466#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5467 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5468#else
5469 RT_NOREF(idxInstr);
5470#endif
5471
5472 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5473 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
5474 kIemNativeGstRegUse_ReadOnly);
5475
5476 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
5477 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5478
5479 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
5480 return off;
5481}
5482
5483
5484/**
5485 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
5486 *
5487 * @returns The flush mask.
5488 * @param fCImpl The IEM_CIMPL_F_XXX flags.
5489 * @param fGstShwFlush The starting flush mask.
5490 */
5491DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
5492{
5493 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
5494 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
5495 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
5496 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
5497 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
5498 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
5499 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
5500 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
5501 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
5502 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
5503 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
5504 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
5505 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
5506 return fGstShwFlush;
5507}
5508
5509
5510/**
5511 * Emits a call to a CImpl function or something similar.
5512 */
5513DECL_HIDDEN_THROW(uint32_t)
5514iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
5515 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
5516{
5517 /*
5518 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
5519 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
5520 */
5521 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
5522 fGstShwFlush
5523 | RT_BIT_64(kIemNativeGstReg_Pc)
5524 | RT_BIT_64(kIemNativeGstReg_EFlags));
5525 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
5526
5527 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
5528
5529 /*
5530 * Load the parameters.
5531 */
5532#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
5533 /* Special code the hidden VBOXSTRICTRC pointer. */
5534 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5535 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
5536 if (cAddParams > 0)
5537 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
5538 if (cAddParams > 1)
5539 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
5540 if (cAddParams > 2)
5541 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
5542 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5543
5544#else
5545 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
5546 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5547 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
5548 if (cAddParams > 0)
5549 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
5550 if (cAddParams > 1)
5551 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
5552 if (cAddParams > 2)
5553# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
5554 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
5555# else
5556 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
5557# endif
5558#endif
5559
5560 /*
5561 * Make the call.
5562 */
5563 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
5564
5565#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
5566 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5567#endif
5568
5569 /*
5570 * Check the status code.
5571 */
5572 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
5573}
5574
5575
5576/**
5577 * Emits a call to a threaded worker function.
5578 */
5579DECL_HIDDEN_THROW(uint32_t)
5580iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
5581{
5582 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
5583 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
5584
5585#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5586 /* The threaded function may throw / long jmp, so set current instruction
5587 number if we're counting. */
5588 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5589#endif
5590
5591 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
5592
5593#ifdef RT_ARCH_AMD64
5594 /* Load the parameters and emit the call. */
5595# ifdef RT_OS_WINDOWS
5596# ifndef VBOXSTRICTRC_STRICT_ENABLED
5597 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
5598 if (cParams > 0)
5599 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
5600 if (cParams > 1)
5601 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
5602 if (cParams > 2)
5603 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
5604# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
5605 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
5606 if (cParams > 0)
5607 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
5608 if (cParams > 1)
5609 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
5610 if (cParams > 2)
5611 {
5612 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
5613 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
5614 }
5615 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5616# endif /* VBOXSTRICTRC_STRICT_ENABLED */
5617# else
5618 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
5619 if (cParams > 0)
5620 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
5621 if (cParams > 1)
5622 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
5623 if (cParams > 2)
5624 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
5625# endif
5626
5627 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
5628
5629# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
5630 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5631# endif
5632
5633#elif RT_ARCH_ARM64
5634 /*
5635 * ARM64:
5636 */
5637 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5638 if (cParams > 0)
5639 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
5640 if (cParams > 1)
5641 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
5642 if (cParams > 2)
5643 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
5644
5645 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
5646
5647#else
5648# error "port me"
5649#endif
5650
5651 /*
5652 * Check the status code.
5653 */
5654 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
5655
5656 return off;
5657}
5658
5659#ifdef VBOX_WITH_STATISTICS
5660/**
5661 * Emits code to update the thread call statistics.
5662 */
5663DECL_INLINE_THROW(uint32_t)
5664iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
5665{
5666 /*
5667 * Update threaded function stats.
5668 */
5669 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
5670 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
5671# if defined(RT_ARCH_ARM64)
5672 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
5673 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
5674 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
5675 iemNativeRegFreeTmp(pReNative, idxTmp1);
5676 iemNativeRegFreeTmp(pReNative, idxTmp2);
5677# else
5678 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
5679# endif
5680 return off;
5681}
5682#endif /* VBOX_WITH_STATISTICS */
5683
5684
5685/**
5686 * Emits the code at the CheckBranchMiss label.
5687 */
5688static uint32_t iemNativeEmitCheckBranchMiss(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5689{
5690 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_CheckBranchMiss);
5691 if (idxLabel != UINT32_MAX)
5692 {
5693 iemNativeLabelDefine(pReNative, idxLabel, off);
5694
5695 /* int iemNativeHlpCheckBranchMiss(PVMCPUCC pVCpu) */
5696 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5697 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpCheckBranchMiss);
5698
5699 /* jump back to the return sequence. */
5700 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5701 }
5702 return off;
5703}
5704
5705
5706/**
5707 * Emits the code at the NeedCsLimChecking label.
5708 */
5709static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5710{
5711 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
5712 if (idxLabel != UINT32_MAX)
5713 {
5714 iemNativeLabelDefine(pReNative, idxLabel, off);
5715
5716 /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
5717 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5718 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
5719
5720 /* jump back to the return sequence. */
5721 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5722 }
5723 return off;
5724}
5725
5726
5727/**
5728 * Emits the code at the ObsoleteTb label.
5729 */
5730static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5731{
5732 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
5733 if (idxLabel != UINT32_MAX)
5734 {
5735 iemNativeLabelDefine(pReNative, idxLabel, off);
5736
5737 /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
5738 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5739 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
5740
5741 /* jump back to the return sequence. */
5742 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5743 }
5744 return off;
5745}
5746
5747
5748/**
5749 * Emits the code at the RaiseGP0 label.
5750 */
5751static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5752{
5753 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
5754 if (idxLabel != UINT32_MAX)
5755 {
5756 iemNativeLabelDefine(pReNative, idxLabel, off);
5757
5758 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
5759 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5760 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
5761
5762 /* jump back to the return sequence. */
5763 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5764 }
5765 return off;
5766}
5767
5768
5769/**
5770 * Emits the code at the ReturnWithFlags label (returns
5771 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
5772 */
5773static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5774{
5775 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
5776 if (idxLabel != UINT32_MAX)
5777 {
5778 iemNativeLabelDefine(pReNative, idxLabel, off);
5779
5780 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
5781
5782 /* jump back to the return sequence. */
5783 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5784 }
5785 return off;
5786}
5787
5788
5789/**
5790 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
5791 */
5792static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5793{
5794 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
5795 if (idxLabel != UINT32_MAX)
5796 {
5797 iemNativeLabelDefine(pReNative, idxLabel, off);
5798
5799 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
5800
5801 /* jump back to the return sequence. */
5802 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5803 }
5804 return off;
5805}
5806
5807
5808/**
5809 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
5810 */
5811static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5812{
5813 /*
5814 * Generate the rc + rcPassUp fiddling code if needed.
5815 */
5816 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5817 if (idxLabel != UINT32_MAX)
5818 {
5819 iemNativeLabelDefine(pReNative, idxLabel, off);
5820
5821 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
5822#ifdef RT_ARCH_AMD64
5823# ifdef RT_OS_WINDOWS
5824# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5825 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
5826# endif
5827 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
5828 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
5829# else
5830 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
5831 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
5832# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5833 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
5834# endif
5835# endif
5836# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5837 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
5838# endif
5839
5840#else
5841 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
5842 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5843 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
5844#endif
5845
5846 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
5847 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5848 }
5849 return off;
5850}
5851
5852
5853/**
5854 * Emits a standard epilog.
5855 */
5856static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
5857{
5858 *pidxReturnLabel = UINT32_MAX;
5859
5860 /*
5861 * Successful return, so clear the return register (eax, w0).
5862 */
5863 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
5864
5865 /*
5866 * Define label for common return point.
5867 */
5868 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
5869 *pidxReturnLabel = idxReturn;
5870
5871 /*
5872 * Restore registers and return.
5873 */
5874#ifdef RT_ARCH_AMD64
5875 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5876
5877 /* Reposition esp at the r15 restore point. */
5878 pbCodeBuf[off++] = X86_OP_REX_W;
5879 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
5880 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
5881 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
5882
5883 /* Pop non-volatile registers and return */
5884 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
5885 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
5886 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
5887 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
5888 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
5889 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
5890 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
5891 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
5892# ifdef RT_OS_WINDOWS
5893 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
5894 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
5895# endif
5896 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
5897 pbCodeBuf[off++] = 0xc9; /* leave */
5898 pbCodeBuf[off++] = 0xc3; /* ret */
5899 pbCodeBuf[off++] = 0xcc; /* int3 poison */
5900
5901#elif RT_ARCH_ARM64
5902 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5903
5904 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
5905 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
5906 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
5907 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
5908 IEMNATIVE_FRAME_VAR_SIZE / 8);
5909 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
5910 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5911 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
5912 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5913 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
5914 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5915 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
5916 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5917 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
5918 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5919 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
5920 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
5921
5922 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
5923 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
5924 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
5925 IEMNATIVE_FRAME_SAVE_REG_SIZE);
5926
5927 /* retab / ret */
5928# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
5929 if (1)
5930 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
5931 else
5932# endif
5933 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
5934
5935#else
5936# error "port me"
5937#endif
5938 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5939
5940 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
5941}
5942
5943
5944/**
5945 * Emits a standard prolog.
5946 */
5947static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5948{
5949#ifdef RT_ARCH_AMD64
5950 /*
5951 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
5952 * reserving 64 bytes for stack variables plus 4 non-register argument
5953 * slots. Fixed register assignment: xBX = pReNative;
5954 *
5955 * Since we always do the same register spilling, we can use the same
5956 * unwind description for all the code.
5957 */
5958 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5959 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
5960 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
5961 pbCodeBuf[off++] = 0x8b;
5962 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
5963 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
5964 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
5965# ifdef RT_OS_WINDOWS
5966 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
5967 pbCodeBuf[off++] = 0x8b;
5968 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
5969 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
5970 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
5971# else
5972 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
5973 pbCodeBuf[off++] = 0x8b;
5974 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
5975# endif
5976 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
5977 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
5978 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
5979 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
5980 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
5981 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
5982 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
5983 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
5984
5985# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
5986 /* Save the frame pointer. */
5987 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
5988# endif
5989
5990 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
5991 X86_GREG_xSP,
5992 IEMNATIVE_FRAME_ALIGN_SIZE
5993 + IEMNATIVE_FRAME_VAR_SIZE
5994 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
5995 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
5996 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
5997 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
5998 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
5999
6000#elif RT_ARCH_ARM64
6001 /*
6002 * We set up a stack frame exactly like on x86, only we have to push the
6003 * return address our selves here. We save all non-volatile registers.
6004 */
6005 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
6006
6007# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
6008 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
6009 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
6010 * in any way conditional, so just emitting this instructions now and hoping for the best... */
6011 /* pacibsp */
6012 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
6013# endif
6014
6015 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
6016 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
6017 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6018 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6019 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
6020 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
6021 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6022 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6023 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6024 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6025 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6026 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6027 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6028 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6029 /* Save the BP and LR (ret address) registers at the top of the frame. */
6030 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6031 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6032 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6033 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
6034 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
6035 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
6036
6037 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
6038 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
6039
6040 /* mov r28, r0 */
6041 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
6042 /* mov r27, r1 */
6043 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
6044
6045# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6046 /* Save the frame pointer. */
6047 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
6048 ARMV8_A64_REG_X2);
6049# endif
6050
6051#else
6052# error "port me"
6053#endif
6054 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6055 return off;
6056}
6057
6058
6059
6060
6061/*********************************************************************************************************************************
6062* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
6063*********************************************************************************************************************************/
6064
6065#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
6066 { \
6067 Assert(pReNative->Core.bmVars == 0); \
6068 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
6069 Assert(pReNative->Core.bmStack == 0); \
6070 pReNative->fMc = (a_fMcFlags); \
6071 pReNative->fCImpl = (a_fCImplFlags); \
6072 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
6073
6074/** We have to get to the end in recompilation mode, as otherwise we won't
6075 * generate code for all the IEM_MC_IF_XXX branches. */
6076#define IEM_MC_END() \
6077 iemNativeVarFreeAll(pReNative); \
6078 } return off
6079
6080
6081
6082/*********************************************************************************************************************************
6083* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
6084*********************************************************************************************************************************/
6085
6086#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
6087 pReNative->fMc = 0; \
6088 pReNative->fCImpl = (a_fFlags); \
6089 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
6090
6091
6092#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
6093 pReNative->fMc = 0; \
6094 pReNative->fCImpl = (a_fFlags); \
6095 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
6096
6097DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6098 uint8_t idxInstr, uint64_t a_fGstShwFlush,
6099 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
6100{
6101 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
6102}
6103
6104
6105#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
6106 pReNative->fMc = 0; \
6107 pReNative->fCImpl = (a_fFlags); \
6108 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
6109 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
6110
6111DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6112 uint8_t idxInstr, uint64_t a_fGstShwFlush,
6113 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
6114{
6115 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
6116}
6117
6118
6119#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
6120 pReNative->fMc = 0; \
6121 pReNative->fCImpl = (a_fFlags); \
6122 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
6123 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
6124
6125DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6126 uint8_t idxInstr, uint64_t a_fGstShwFlush,
6127 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
6128 uint64_t uArg2)
6129{
6130 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
6131}
6132
6133
6134
6135/*********************************************************************************************************************************
6136* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
6137*********************************************************************************************************************************/
6138
6139/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
6140 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
6141DECL_INLINE_THROW(uint32_t)
6142iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6143{
6144 /*
6145 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
6146 * return with special status code and make the execution loop deal with
6147 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
6148 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
6149 * could continue w/o interruption, it probably will drop into the
6150 * debugger, so not worth the effort of trying to services it here and we
6151 * just lump it in with the handling of the others.
6152 *
6153 * To simplify the code and the register state management even more (wrt
6154 * immediate in AND operation), we always update the flags and skip the
6155 * extra check associated conditional jump.
6156 */
6157 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
6158 <= UINT32_MAX);
6159#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
6160 AssertMsg( pReNative->idxCurCall == 0
6161 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
6162 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
6163#endif
6164
6165 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6166 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
6167 true /*fSkipLivenessAssert*/);
6168 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
6169 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
6170 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
6171 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
6172 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
6173
6174 /* Free but don't flush the EFLAGS register. */
6175 iemNativeRegFreeTmp(pReNative, idxEflReg);
6176
6177 return off;
6178}
6179
6180
6181/** The VINF_SUCCESS dummy. */
6182template<int const a_rcNormal>
6183DECL_FORCE_INLINE(uint32_t)
6184iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6185{
6186 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
6187 if (a_rcNormal != VINF_SUCCESS)
6188 {
6189#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6190 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6191#else
6192 RT_NOREF_PV(idxInstr);
6193#endif
6194 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
6195 }
6196 return off;
6197}
6198
6199
6200#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
6201 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6202 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6203
6204#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
6205 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6206 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6207 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6208
6209/** Same as iemRegAddToRip64AndFinishingNoFlags. */
6210DECL_INLINE_THROW(uint32_t)
6211iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
6212{
6213 /* Allocate a temporary PC register. */
6214 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6215
6216 /* Perform the addition and store the result. */
6217 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
6218 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6219
6220 /* Free but don't flush the PC register. */
6221 iemNativeRegFreeTmp(pReNative, idxPcReg);
6222
6223 return off;
6224}
6225
6226
6227#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
6228 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6229 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6230
6231#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
6232 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6233 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6234 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6235
6236/** Same as iemRegAddToEip32AndFinishingNoFlags. */
6237DECL_INLINE_THROW(uint32_t)
6238iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
6239{
6240 /* Allocate a temporary PC register. */
6241 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6242
6243 /* Perform the addition and store the result. */
6244 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
6245 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6246
6247 /* Free but don't flush the PC register. */
6248 iemNativeRegFreeTmp(pReNative, idxPcReg);
6249
6250 return off;
6251}
6252
6253
6254#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
6255 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6256 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6257
6258#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
6259 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6260 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6261 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6262
6263/** Same as iemRegAddToIp16AndFinishingNoFlags. */
6264DECL_INLINE_THROW(uint32_t)
6265iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
6266{
6267 /* Allocate a temporary PC register. */
6268 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6269
6270 /* Perform the addition and store the result. */
6271 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
6272 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6273 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6274
6275 /* Free but don't flush the PC register. */
6276 iemNativeRegFreeTmp(pReNative, idxPcReg);
6277
6278 return off;
6279}
6280
6281
6282
6283/*********************************************************************************************************************************
6284* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
6285*********************************************************************************************************************************/
6286
6287#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6288 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6289 (a_enmEffOpSize), pCallEntry->idxInstr); \
6290 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6291
6292#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6293 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6294 (a_enmEffOpSize), pCallEntry->idxInstr); \
6295 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6296 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6297
6298#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
6299 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6300 IEMMODE_16BIT, pCallEntry->idxInstr); \
6301 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6302
6303#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
6304 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6305 IEMMODE_16BIT, pCallEntry->idxInstr); \
6306 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6307 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6308
6309#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
6310 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6311 IEMMODE_64BIT, pCallEntry->idxInstr); \
6312 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6313
6314#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
6315 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6316 IEMMODE_64BIT, pCallEntry->idxInstr); \
6317 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6318 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6319
6320/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
6321 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
6322 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
6323DECL_INLINE_THROW(uint32_t)
6324iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
6325 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
6326{
6327 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
6328
6329 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6330 off = iemNativeRegFlushPendingWrites(pReNative, off);
6331
6332 /* Allocate a temporary PC register. */
6333 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6334
6335 /* Perform the addition. */
6336 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
6337
6338 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
6339 {
6340 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
6341 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6342 }
6343 else
6344 {
6345 /* Just truncate the result to 16-bit IP. */
6346 Assert(enmEffOpSize == IEMMODE_16BIT);
6347 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6348 }
6349 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6350
6351 /* Free but don't flush the PC register. */
6352 iemNativeRegFreeTmp(pReNative, idxPcReg);
6353
6354 return off;
6355}
6356
6357
6358#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6359 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6360 (a_enmEffOpSize), pCallEntry->idxInstr); \
6361 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6362
6363#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6364 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6365 (a_enmEffOpSize), pCallEntry->idxInstr); \
6366 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6367 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6368
6369#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
6370 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6371 IEMMODE_16BIT, pCallEntry->idxInstr); \
6372 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6373
6374#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
6375 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6376 IEMMODE_16BIT, pCallEntry->idxInstr); \
6377 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6378 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6379
6380#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
6381 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6382 IEMMODE_32BIT, pCallEntry->idxInstr); \
6383 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6384
6385#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
6386 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6387 IEMMODE_32BIT, pCallEntry->idxInstr); \
6388 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6389 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6390
6391/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
6392 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
6393 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
6394DECL_INLINE_THROW(uint32_t)
6395iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
6396 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
6397{
6398 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
6399
6400 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6401 off = iemNativeRegFlushPendingWrites(pReNative, off);
6402
6403 /* Allocate a temporary PC register. */
6404 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6405
6406 /* Perform the addition. */
6407 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
6408
6409 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
6410 if (enmEffOpSize == IEMMODE_16BIT)
6411 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6412
6413 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
6414/** @todo we can skip this in 32-bit FLAT mode. */
6415 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6416
6417 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6418
6419 /* Free but don't flush the PC register. */
6420 iemNativeRegFreeTmp(pReNative, idxPcReg);
6421
6422 return off;
6423}
6424
6425
6426#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
6427 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
6428 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6429
6430#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
6431 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
6432 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6433 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6434
6435#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
6436 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
6437 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6438
6439#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
6440 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
6441 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6442 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6443
6444#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
6445 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
6446 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6447
6448#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
6449 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
6450 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6451 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6452
6453/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
6454DECL_INLINE_THROW(uint32_t)
6455iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6456 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
6457{
6458 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6459 off = iemNativeRegFlushPendingWrites(pReNative, off);
6460
6461 /* Allocate a temporary PC register. */
6462 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6463
6464 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
6465 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
6466 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6467 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6468 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6469
6470 /* Free but don't flush the PC register. */
6471 iemNativeRegFreeTmp(pReNative, idxPcReg);
6472
6473 return off;
6474}
6475
6476
6477
6478/*********************************************************************************************************************************
6479* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
6480*********************************************************************************************************************************/
6481
6482/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
6483#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
6484 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6485
6486/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
6487#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
6488 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6489
6490/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
6491#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
6492 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6493
6494/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
6495 * clears flags. */
6496#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
6497 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
6498 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6499
6500/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
6501 * clears flags. */
6502#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
6503 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
6504 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6505
6506/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
6507 * clears flags. */
6508#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
6509 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
6510 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6511
6512#undef IEM_MC_SET_RIP_U16_AND_FINISH
6513
6514
6515/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
6516#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
6517 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
6518
6519/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
6520#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
6521 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
6522
6523/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
6524 * clears flags. */
6525#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
6526 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
6527 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6528
6529/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
6530 * and clears flags. */
6531#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
6532 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
6533 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6534
6535#undef IEM_MC_SET_RIP_U32_AND_FINISH
6536
6537
6538/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
6539#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
6540 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
6541
6542/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
6543 * and clears flags. */
6544#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
6545 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
6546 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6547
6548#undef IEM_MC_SET_RIP_U64_AND_FINISH
6549
6550
6551/** Same as iemRegRipJumpU16AndFinishNoFlags,
6552 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
6553DECL_INLINE_THROW(uint32_t)
6554iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
6555 uint8_t idxInstr, uint8_t cbVar)
6556{
6557 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
6558 Assert(pReNative->Core.aVars[idxVarPc].cbVar == cbVar);
6559
6560 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6561 off = iemNativeRegFlushPendingWrites(pReNative, off);
6562
6563 /* Get a register with the new PC loaded from idxVarPc.
6564 Note! This ASSUMES that the high bits of the GPR is zeroed. */
6565 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
6566
6567 /* Check limit (may #GP(0) + exit TB). */
6568 if (!f64Bit)
6569/** @todo we can skip this test in FLAT 32-bit mode. */
6570 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6571 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
6572 else if (cbVar > sizeof(uint32_t))
6573 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6574
6575 /* Store the result. */
6576 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6577
6578 iemNativeVarRegisterRelease(pReNative, idxVarPc);
6579 /** @todo implictly free the variable? */
6580
6581 return off;
6582}
6583
6584
6585
6586/*********************************************************************************************************************************
6587* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
6588*********************************************************************************************************************************/
6589
6590/**
6591 * Pushes an IEM_MC_IF_XXX onto the condition stack.
6592 *
6593 * @returns Pointer to the condition stack entry on success, NULL on failure
6594 * (too many nestings)
6595 */
6596DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
6597{
6598 uint32_t const idxStack = pReNative->cCondDepth;
6599 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
6600
6601 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
6602 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
6603
6604 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
6605 pEntry->fInElse = false;
6606 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
6607 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
6608
6609 return pEntry;
6610}
6611
6612
6613/**
6614 * Start of the if-block, snapshotting the register and variable state.
6615 */
6616DECL_INLINE_THROW(void)
6617iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
6618{
6619 Assert(offIfBlock != UINT32_MAX);
6620 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
6621 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
6622 Assert(!pEntry->fInElse);
6623
6624 /* Define the start of the IF block if request or for disassembly purposes. */
6625 if (idxLabelIf != UINT32_MAX)
6626 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
6627#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6628 else
6629 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
6630#else
6631 RT_NOREF(offIfBlock);
6632#endif
6633
6634 /* Copy the initial state so we can restore it in the 'else' block. */
6635 pEntry->InitialState = pReNative->Core;
6636}
6637
6638
6639#define IEM_MC_ELSE() } while (0); \
6640 off = iemNativeEmitElse(pReNative, off); \
6641 do {
6642
6643/** Emits code related to IEM_MC_ELSE. */
6644DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6645{
6646 /* Check sanity and get the conditional stack entry. */
6647 Assert(off != UINT32_MAX);
6648 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
6649 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
6650 Assert(!pEntry->fInElse);
6651
6652 /* Jump to the endif */
6653 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
6654
6655 /* Define the else label and enter the else part of the condition. */
6656 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
6657 pEntry->fInElse = true;
6658
6659 /* Snapshot the core state so we can do a merge at the endif and restore
6660 the snapshot we took at the start of the if-block. */
6661 pEntry->IfFinalState = pReNative->Core;
6662 pReNative->Core = pEntry->InitialState;
6663
6664 return off;
6665}
6666
6667
6668#define IEM_MC_ENDIF() } while (0); \
6669 off = iemNativeEmitEndIf(pReNative, off)
6670
6671/** Emits code related to IEM_MC_ENDIF. */
6672DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6673{
6674 /* Check sanity and get the conditional stack entry. */
6675 Assert(off != UINT32_MAX);
6676 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
6677 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
6678
6679 /*
6680 * Now we have find common group with the core state at the end of the
6681 * if-final. Use the smallest common denominator and just drop anything
6682 * that isn't the same in both states.
6683 */
6684 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
6685 * which is why we're doing this at the end of the else-block.
6686 * But we'd need more info about future for that to be worth the effort. */
6687 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
6688 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
6689 {
6690 /* shadow guest stuff first. */
6691 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
6692 if (fGstRegs)
6693 {
6694 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
6695 do
6696 {
6697 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
6698 fGstRegs &= ~RT_BIT_64(idxGstReg);
6699
6700 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
6701 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
6702 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
6703 {
6704 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
6705 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
6706 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
6707 }
6708 } while (fGstRegs);
6709 }
6710 else
6711 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
6712
6713 /* Check variables next. For now we must require them to be identical
6714 or stuff we can recreate. */
6715 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
6716 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
6717 if (fVars)
6718 {
6719 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
6720 do
6721 {
6722 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
6723 fVars &= ~RT_BIT_32(idxVar);
6724
6725 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
6726 {
6727 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
6728 continue;
6729 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
6730 {
6731 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
6732 if (idxHstReg != UINT8_MAX)
6733 {
6734 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
6735 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6736 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u\n",
6737 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
6738 }
6739 continue;
6740 }
6741 }
6742 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
6743 continue;
6744
6745 /* Irreconcilable, so drop it. */
6746 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
6747 if (idxHstReg != UINT8_MAX)
6748 {
6749 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
6750 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6751 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u (also dropped)\n",
6752 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
6753 }
6754 Log11(("iemNativeEmitEndIf: Freeing variable #%u\n", idxVar));
6755 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
6756 } while (fVars);
6757 }
6758
6759 /* Finally, check that the host register allocations matches. */
6760 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
6761 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
6762 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
6763 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
6764 }
6765
6766 /*
6767 * Define the endif label and maybe the else one if we're still in the 'if' part.
6768 */
6769 if (!pEntry->fInElse)
6770 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
6771 else
6772 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
6773 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
6774
6775 /* Pop the conditional stack.*/
6776 pReNative->cCondDepth -= 1;
6777
6778 return off;
6779}
6780
6781
6782#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
6783 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
6784 do {
6785
6786/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
6787DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
6788{
6789 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6790
6791 /* Get the eflags. */
6792 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6793 kIemNativeGstRegUse_ReadOnly);
6794
6795 /* Test and jump. */
6796 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
6797
6798 /* Free but don't flush the EFlags register. */
6799 iemNativeRegFreeTmp(pReNative, idxEflReg);
6800
6801 /* Make a copy of the core state now as we start the if-block. */
6802 iemNativeCondStartIfBlock(pReNative, off);
6803
6804 return off;
6805}
6806
6807
6808#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
6809 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
6810 do {
6811
6812/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
6813DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
6814{
6815 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6816
6817 /* Get the eflags. */
6818 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6819 kIemNativeGstRegUse_ReadOnly);
6820
6821 /* Test and jump. */
6822 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
6823
6824 /* Free but don't flush the EFlags register. */
6825 iemNativeRegFreeTmp(pReNative, idxEflReg);
6826
6827 /* Make a copy of the core state now as we start the if-block. */
6828 iemNativeCondStartIfBlock(pReNative, off);
6829
6830 return off;
6831}
6832
6833
6834#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
6835 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
6836 do {
6837
6838/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
6839DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
6840{
6841 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6842
6843 /* Get the eflags. */
6844 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6845 kIemNativeGstRegUse_ReadOnly);
6846
6847 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6848 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6849
6850 /* Test and jump. */
6851 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
6852
6853 /* Free but don't flush the EFlags register. */
6854 iemNativeRegFreeTmp(pReNative, idxEflReg);
6855
6856 /* Make a copy of the core state now as we start the if-block. */
6857 iemNativeCondStartIfBlock(pReNative, off);
6858
6859 return off;
6860}
6861
6862
6863#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
6864 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
6865 do {
6866
6867/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
6868DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
6869{
6870 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6871
6872 /* Get the eflags. */
6873 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6874 kIemNativeGstRegUse_ReadOnly);
6875
6876 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6877 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6878
6879 /* Test and jump. */
6880 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
6881
6882 /* Free but don't flush the EFlags register. */
6883 iemNativeRegFreeTmp(pReNative, idxEflReg);
6884
6885 /* Make a copy of the core state now as we start the if-block. */
6886 iemNativeCondStartIfBlock(pReNative, off);
6887
6888 return off;
6889}
6890
6891
6892#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
6893 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
6894 do {
6895
6896#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
6897 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
6898 do {
6899
6900/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
6901DECL_INLINE_THROW(uint32_t)
6902iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6903 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
6904{
6905 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6906
6907 /* Get the eflags. */
6908 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6909 kIemNativeGstRegUse_ReadOnly);
6910
6911 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
6912 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
6913
6914 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
6915 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
6916 Assert(iBitNo1 != iBitNo2);
6917
6918#ifdef RT_ARCH_AMD64
6919 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
6920
6921 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6922 if (iBitNo1 > iBitNo2)
6923 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
6924 else
6925 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
6926 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6927
6928#elif defined(RT_ARCH_ARM64)
6929 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6930 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6931
6932 /* and tmpreg, eflreg, #1<<iBitNo1 */
6933 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
6934
6935 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
6936 if (iBitNo1 > iBitNo2)
6937 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6938 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
6939 else
6940 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6941 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
6942
6943 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6944
6945#else
6946# error "Port me"
6947#endif
6948
6949 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
6950 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
6951 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
6952
6953 /* Free but don't flush the EFlags and tmp registers. */
6954 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6955 iemNativeRegFreeTmp(pReNative, idxEflReg);
6956
6957 /* Make a copy of the core state now as we start the if-block. */
6958 iemNativeCondStartIfBlock(pReNative, off);
6959
6960 return off;
6961}
6962
6963
6964#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
6965 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
6966 do {
6967
6968#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
6969 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
6970 do {
6971
6972/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
6973 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
6974DECL_INLINE_THROW(uint32_t)
6975iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
6976 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
6977{
6978 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6979
6980 /* We need an if-block label for the non-inverted variant. */
6981 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
6982 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
6983
6984 /* Get the eflags. */
6985 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6986 kIemNativeGstRegUse_ReadOnly);
6987
6988 /* Translate the flag masks to bit numbers. */
6989 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6990 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6991
6992 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
6993 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
6994 Assert(iBitNo1 != iBitNo);
6995
6996 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
6997 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
6998 Assert(iBitNo2 != iBitNo);
6999 Assert(iBitNo2 != iBitNo1);
7000
7001#ifdef RT_ARCH_AMD64
7002 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
7003#elif defined(RT_ARCH_ARM64)
7004 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7005#endif
7006
7007 /* Check for the lone bit first. */
7008 if (!fInverted)
7009 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
7010 else
7011 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
7012
7013 /* Then extract and compare the other two bits. */
7014#ifdef RT_ARCH_AMD64
7015 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
7016 if (iBitNo1 > iBitNo2)
7017 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
7018 else
7019 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
7020 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
7021
7022#elif defined(RT_ARCH_ARM64)
7023 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7024
7025 /* and tmpreg, eflreg, #1<<iBitNo1 */
7026 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
7027
7028 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
7029 if (iBitNo1 > iBitNo2)
7030 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
7031 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
7032 else
7033 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
7034 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
7035
7036 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7037
7038#else
7039# error "Port me"
7040#endif
7041
7042 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
7043 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
7044 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
7045
7046 /* Free but don't flush the EFlags and tmp registers. */
7047 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7048 iemNativeRegFreeTmp(pReNative, idxEflReg);
7049
7050 /* Make a copy of the core state now as we start the if-block. */
7051 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
7052
7053 return off;
7054}
7055
7056
7057#define IEM_MC_IF_CX_IS_NZ() \
7058 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
7059 do {
7060
7061/** Emits code for IEM_MC_IF_CX_IS_NZ. */
7062DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7063{
7064 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7065
7066 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7067 kIemNativeGstRegUse_ReadOnly);
7068 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
7069 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7070
7071 iemNativeCondStartIfBlock(pReNative, off);
7072 return off;
7073}
7074
7075
7076#define IEM_MC_IF_ECX_IS_NZ() \
7077 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
7078 do {
7079
7080#define IEM_MC_IF_RCX_IS_NZ() \
7081 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
7082 do {
7083
7084/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
7085DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
7086{
7087 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7088
7089 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7090 kIemNativeGstRegUse_ReadOnly);
7091 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
7092 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7093
7094 iemNativeCondStartIfBlock(pReNative, off);
7095 return off;
7096}
7097
7098
7099#define IEM_MC_IF_CX_IS_NOT_ONE() \
7100 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
7101 do {
7102
7103/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
7104DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7105{
7106 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7107
7108 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7109 kIemNativeGstRegUse_ReadOnly);
7110#ifdef RT_ARCH_AMD64
7111 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7112#else
7113 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7114 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
7115 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7116#endif
7117 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7118
7119 iemNativeCondStartIfBlock(pReNative, off);
7120 return off;
7121}
7122
7123
7124#define IEM_MC_IF_ECX_IS_NOT_ONE() \
7125 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
7126 do {
7127
7128#define IEM_MC_IF_RCX_IS_NOT_ONE() \
7129 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
7130 do {
7131
7132/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
7133DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
7134{
7135 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7136
7137 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7138 kIemNativeGstRegUse_ReadOnly);
7139 if (f64Bit)
7140 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7141 else
7142 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7143 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7144
7145 iemNativeCondStartIfBlock(pReNative, off);
7146 return off;
7147}
7148
7149
7150#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
7151 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
7152 do {
7153
7154#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
7155 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
7156 do {
7157
7158/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
7159 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
7160DECL_INLINE_THROW(uint32_t)
7161iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
7162{
7163 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7164
7165 /* We have to load both RCX and EFLAGS before we can start branching,
7166 otherwise we'll end up in the else-block with an inconsistent
7167 register allocator state.
7168 Doing EFLAGS first as it's more likely to be loaded, right? */
7169 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7170 kIemNativeGstRegUse_ReadOnly);
7171 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7172 kIemNativeGstRegUse_ReadOnly);
7173
7174 /** @todo we could reduce this to a single branch instruction by spending a
7175 * temporary register and some setnz stuff. Not sure if loops are
7176 * worth it. */
7177 /* Check CX. */
7178#ifdef RT_ARCH_AMD64
7179 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7180#else
7181 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7182 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
7183 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7184#endif
7185
7186 /* Check the EFlags bit. */
7187 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7188 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7189 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
7190 !fCheckIfSet /*fJmpIfSet*/);
7191
7192 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7193 iemNativeRegFreeTmp(pReNative, idxEflReg);
7194
7195 iemNativeCondStartIfBlock(pReNative, off);
7196 return off;
7197}
7198
7199
7200#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
7201 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
7202 do {
7203
7204#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
7205 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
7206 do {
7207
7208#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
7209 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
7210 do {
7211
7212#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
7213 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
7214 do {
7215
7216/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
7217 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
7218 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
7219 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
7220DECL_INLINE_THROW(uint32_t)
7221iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7222 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
7223{
7224 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7225
7226 /* We have to load both RCX and EFLAGS before we can start branching,
7227 otherwise we'll end up in the else-block with an inconsistent
7228 register allocator state.
7229 Doing EFLAGS first as it's more likely to be loaded, right? */
7230 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7231 kIemNativeGstRegUse_ReadOnly);
7232 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7233 kIemNativeGstRegUse_ReadOnly);
7234
7235 /** @todo we could reduce this to a single branch instruction by spending a
7236 * temporary register and some setnz stuff. Not sure if loops are
7237 * worth it. */
7238 /* Check RCX/ECX. */
7239 if (f64Bit)
7240 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7241 else
7242 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7243
7244 /* Check the EFlags bit. */
7245 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7246 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7247 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
7248 !fCheckIfSet /*fJmpIfSet*/);
7249
7250 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7251 iemNativeRegFreeTmp(pReNative, idxEflReg);
7252
7253 iemNativeCondStartIfBlock(pReNative, off);
7254 return off;
7255}
7256
7257
7258
7259/*********************************************************************************************************************************
7260* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7261*********************************************************************************************************************************/
7262/** Number of hidden arguments for CIMPL calls.
7263 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
7264#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
7265# define IEM_CIMPL_HIDDEN_ARGS 3
7266#else
7267# define IEM_CIMPL_HIDDEN_ARGS 2
7268#endif
7269
7270#define IEM_MC_NOREF(a_Name) \
7271 RT_NOREF_PV(a_Name)
7272
7273#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
7274 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
7275
7276#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
7277 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
7278
7279#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
7280 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
7281
7282#define IEM_MC_LOCAL(a_Type, a_Name) \
7283 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
7284
7285#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
7286 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
7287
7288
7289/**
7290 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
7291 */
7292DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
7293{
7294 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
7295 return IEM_CIMPL_HIDDEN_ARGS;
7296 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
7297 return 1;
7298 return 0;
7299}
7300
7301
7302/**
7303 * Internal work that allocates a variable with kind set to
7304 * kIemNativeVarKind_Invalid and no current stack allocation.
7305 *
7306 * The kind will either be set by the caller or later when the variable is first
7307 * assigned a value.
7308 */
7309static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7310{
7311 Assert(cbType > 0 && cbType <= 64);
7312 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7313 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7314 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7315 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7316 pReNative->Core.aVars[idxVar].cbVar = cbType;
7317 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7318 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7319 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7320 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7321 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7322 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7323 pReNative->Core.aVars[idxVar].u.uValue = 0;
7324 return idxVar;
7325}
7326
7327
7328/**
7329 * Internal work that allocates an argument variable w/o setting enmKind.
7330 */
7331static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7332{
7333 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7334 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7335 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7336
7337 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7338 pReNative->Core.aidxArgVars[iArgNo] = idxVar;
7339 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7340 return idxVar;
7341}
7342
7343
7344/**
7345 * Gets the stack slot for a stack variable, allocating one if necessary.
7346 *
7347 * Calling this function implies that the stack slot will contain a valid
7348 * variable value. The caller deals with any register currently assigned to the
7349 * variable, typically by spilling it into the stack slot.
7350 *
7351 * @returns The stack slot number.
7352 * @param pReNative The recompiler state.
7353 * @param idxVar The variable.
7354 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7355 */
7356DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7357{
7358 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7359 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
7360
7361 /* Already got a slot? */
7362 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7363 if (idxStackSlot != UINT8_MAX)
7364 {
7365 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7366 return idxStackSlot;
7367 }
7368
7369 /*
7370 * A single slot is easy to allocate.
7371 * Allocate them from the top end, closest to BP, to reduce the displacement.
7372 */
7373 if (pReNative->Core.aVars[idxVar].cbVar <= sizeof(uint64_t))
7374 {
7375 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7376 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7377 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7378 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
7379 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x\n", idxVar, iSlot));
7380 return (uint8_t)iSlot;
7381 }
7382
7383 /*
7384 * We need more than one stack slot.
7385 *
7386 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7387 */
7388 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7389 Assert(pReNative->Core.aVars[idxVar].cbVar <= 64);
7390 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pReNative->Core.aVars[idxVar].cbVar) - 4) - 1;
7391 uint32_t fBitAllocMask = RT_BIT_32((pReNative->Core.aVars[idxVar].cbVar + 7) >> 3) - 1;
7392 uint32_t bmStack = ~pReNative->Core.bmStack;
7393 while (bmStack != UINT32_MAX)
7394 {
7395/** @todo allocate from the top to reduce BP displacement. */
7396 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
7397 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7398 if (!(iSlot & fBitAlignMask))
7399 {
7400 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
7401 {
7402 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7403 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
7404 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n",
7405 idxVar, iSlot, fBitAllocMask, pReNative->Core.aVars[idxVar].cbVar));
7406 return (uint8_t)iSlot;
7407 }
7408 }
7409 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
7410 }
7411 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7412}
7413
7414
7415/**
7416 * Changes the variable to a stack variable.
7417 *
7418 * Currently this is s only possible to do the first time the variable is used,
7419 * switching later is can be implemented but not done.
7420 *
7421 * @param pReNative The recompiler state.
7422 * @param idxVar The variable.
7423 * @throws VERR_IEM_VAR_IPE_2
7424 */
7425static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7426{
7427 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7428 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
7429 {
7430 /* We could in theory transition from immediate to stack as well, but it
7431 would involve the caller doing work storing the value on the stack. So,
7432 till that's required we only allow transition from invalid. */
7433 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7434 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7435 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7436 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Stack;
7437
7438 /* Note! We don't allocate a stack slot here, that's only done when a
7439 slot is actually needed to hold a variable value. */
7440 }
7441}
7442
7443
7444/**
7445 * Sets it to a variable with a constant value.
7446 *
7447 * This does not require stack storage as we know the value and can always
7448 * reload it, unless of course it's referenced.
7449 *
7450 * @param pReNative The recompiler state.
7451 * @param idxVar The variable.
7452 * @param uValue The immediate value.
7453 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7454 */
7455static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7456{
7457 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7458 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Immediate)
7459 {
7460 /* Only simple transitions for now. */
7461 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7462 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7463 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
7464 }
7465 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7466
7467 pReNative->Core.aVars[idxVar].u.uValue = uValue;
7468 AssertMsg( pReNative->Core.aVars[idxVar].cbVar >= sizeof(uint64_t)
7469 || pReNative->Core.aVars[idxVar].u.uValue < RT_BIT_64(pReNative->Core.aVars[idxVar].cbVar * 8),
7470 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pReNative->Core.aVars[idxVar].cbVar, uValue));
7471}
7472
7473
7474/**
7475 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7476 *
7477 * This does not require stack storage as we know the value and can always
7478 * reload it. Loading is postponed till needed.
7479 *
7480 * @param pReNative The recompiler state.
7481 * @param idxVar The variable.
7482 * @param idxOtherVar The variable to take the (stack) address of.
7483 *
7484 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7485 */
7486static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7487{
7488 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7489 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7490
7491 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7492 {
7493 /* Only simple transitions for now. */
7494 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7495 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7496 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7497 }
7498 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7499
7500 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar;
7501
7502 /* Update the other variable, ensure it's a stack variable. */
7503 /** @todo handle variables with const values... that'll go boom now. */
7504 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7505 iemNativeVarSetKindToStack(pReNative, idxOtherVar);
7506}
7507
7508
7509/**
7510 * Sets the variable to a reference (pointer) to a guest register reference.
7511 *
7512 * This does not require stack storage as we know the value and can always
7513 * reload it. Loading is postponed till needed.
7514 *
7515 * @param pReNative The recompiler state.
7516 * @param idxVar The variable.
7517 * @param enmRegClass The class guest registers to reference.
7518 * @param idxReg The register within @a enmRegClass to reference.
7519 *
7520 * @throws VERR_IEM_VAR_IPE_2
7521 */
7522static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7523 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7524{
7525 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7526
7527 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_GstRegRef)
7528 {
7529 /* Only simple transitions for now. */
7530 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7531 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7532 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_GstRegRef;
7533 }
7534 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7535
7536 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass = enmRegClass;
7537 pReNative->Core.aVars[idxVar].u.GstRegRef.idx = idxReg;
7538}
7539
7540
7541DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7542{
7543 return iemNativeArgAllocInt(pReNative, iArgNo, cbType);
7544}
7545
7546
7547DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7548{
7549 uint8_t const idxVar = iemNativeArgAllocInt(pReNative, iArgNo, cbType);
7550
7551 /* Since we're using a generic uint64_t value type, we must truncate it if
7552 the variable is smaller otherwise we may end up with too large value when
7553 scaling up a imm8 w/ sign-extension.
7554
7555 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7556 in the bios, bx=1) when running on arm, because clang expect 16-bit
7557 register parameters to have bits 16 and up set to zero. Instead of
7558 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7559 CF value in the result. */
7560 switch (cbType)
7561 {
7562 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7563 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7564 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7565 }
7566 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7567 return idxVar;
7568}
7569
7570
7571DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7572{
7573 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7574 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7575 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7576 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7577
7578 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7579 iemNativeVarSetKindToLocalRef(pReNative, idxArgVar, idxOtherVar);
7580 return idxArgVar;
7581}
7582
7583
7584DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7585{
7586 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7587 /* Don't set to stack now, leave that to the first use as for instance
7588 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7589 return idxVar;
7590}
7591
7592
7593DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7594{
7595 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7596
7597 /* Since we're using a generic uint64_t value type, we must truncate it if
7598 the variable is smaller otherwise we may end up with too large value when
7599 scaling up a imm8 w/ sign-extension. */
7600 switch (cbType)
7601 {
7602 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7603 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7604 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7605 }
7606 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7607 return idxVar;
7608}
7609
7610
7611/**
7612 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7613 * fixed till we call iemNativeVarRegisterRelease.
7614 *
7615 * @returns The host register number.
7616 * @param pReNative The recompiler state.
7617 * @param idxVar The variable.
7618 * @param poff Pointer to the instruction buffer offset.
7619 * In case a register needs to be freed up or the value
7620 * loaded off the stack.
7621 * @param fInitialized Set if the variable must already have been initialized.
7622 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7623 * the case.
7624 * @param idxRegPref Preferred register number or UINT8_MAX.
7625 */
7626DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7627 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7628{
7629 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7630 Assert(pReNative->Core.aVars[idxVar].cbVar <= 8);
7631 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7632
7633 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
7634 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7635 {
7636 Assert( pReNative->Core.aVars[idxVar].enmKind > kIemNativeVarKind_Invalid
7637 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
7638 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7639 return idxReg;
7640 }
7641
7642 /*
7643 * If the kind of variable has not yet been set, default to 'stack'.
7644 */
7645 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid
7646 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
7647 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid)
7648 iemNativeVarSetKindToStack(pReNative, idxVar);
7649
7650 /*
7651 * We have to allocate a register for the variable, even if its a stack one
7652 * as we don't know if there are modification being made to it before its
7653 * finalized (todo: analyze and insert hints about that?).
7654 *
7655 * If we can, we try get the correct register for argument variables. This
7656 * is assuming that most argument variables are fetched as close as possible
7657 * to the actual call, so that there aren't any interfering hidden calls
7658 * (memory accesses, etc) inbetween.
7659 *
7660 * If we cannot or it's a variable, we make sure no argument registers
7661 * that will be used by this MC block will be allocated here, and we always
7662 * prefer non-volatile registers to avoid needing to spill stuff for internal
7663 * call.
7664 */
7665 /** @todo Detect too early argument value fetches and warn about hidden
7666 * calls causing less optimal code to be generated in the python script. */
7667
7668 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7669 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7670 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7671 {
7672 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7673 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7674 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7675 }
7676 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7677 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7678 {
7679 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7680 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7681 & ~pReNative->Core.bmHstRegsWithGstShadow
7682 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7683 & fNotArgsMask;
7684 if (fRegs)
7685 {
7686 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7687 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7688 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7689 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7690 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7691 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7692 }
7693 else
7694 {
7695 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7696 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7697 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7698 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7699 }
7700 }
7701 else
7702 {
7703 idxReg = idxRegPref;
7704 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7705 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (preferred)\n", idxVar, idxReg));
7706 }
7707 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7708 pReNative->Core.aVars[idxVar].idxReg = idxReg;
7709
7710 /*
7711 * Load it off the stack if we've got a stack slot.
7712 */
7713 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7714 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7715 {
7716 Assert(fInitialized);
7717 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7718 switch (pReNative->Core.aVars[idxVar].cbVar)
7719 {
7720 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7721 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7722 case 3: AssertFailed(); RT_FALL_THRU();
7723 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7724 default: AssertFailed(); RT_FALL_THRU();
7725 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7726 }
7727 }
7728 else
7729 {
7730 Assert(idxStackSlot == UINT8_MAX);
7731 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7732 }
7733 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7734 return idxReg;
7735}
7736
7737
7738/**
7739 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7740 * guest register.
7741 *
7742 * This function makes sure there is a register for it and sets it to be the
7743 * current shadow copy of @a enmGstReg.
7744 *
7745 * @returns The host register number.
7746 * @param pReNative The recompiler state.
7747 * @param idxVar The variable.
7748 * @param enmGstReg The guest register this variable will be written to
7749 * after this call.
7750 * @param poff Pointer to the instruction buffer offset.
7751 * In case a register needs to be freed up or if the
7752 * variable content needs to be loaded off the stack.
7753 *
7754 * @note We DO NOT expect @a idxVar to be an argument variable,
7755 * because we can only in the commit stage of an instruction when this
7756 * function is used.
7757 */
7758DECL_HIDDEN_THROW(uint8_t)
7759iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7760{
7761 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7762 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7763 AssertMsgStmt( pReNative->Core.aVars[idxVar].cbVar <= 8
7764 && ( pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate
7765 || pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack),
7766 ("idxVar=%d cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pReNative->Core.aVars[idxVar].cbVar,
7767 pReNative->Core.aVars[idxVar].enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7768 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7769
7770 /*
7771 * This shouldn't ever be used for arguments, unless it's in a weird else
7772 * branch that doesn't do any calling and even then it's questionable.
7773 *
7774 * However, in case someone writes crazy wrong MC code and does register
7775 * updates before making calls, just use the regular register allocator to
7776 * ensure we get a register suitable for the intended argument number.
7777 */
7778 AssertStmt(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7779
7780 /*
7781 * If there is already a register for the variable, we transfer/set the
7782 * guest shadow copy assignment to it.
7783 */
7784 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
7785 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7786 {
7787 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7788 {
7789 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7790 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7791 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7792 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7793 }
7794 else
7795 {
7796 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7797 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7798 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7799 }
7800 /** @todo figure this one out. We need some way of making sure the register isn't
7801 * modified after this point, just in case we start writing crappy MC code. */
7802 pReNative->Core.aVars[idxVar].enmGstReg = enmGstReg;
7803 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7804 return idxReg;
7805 }
7806 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
7807
7808 /*
7809 * Because this is supposed to be the commit stage, we're just tag along with the
7810 * temporary register allocator and upgrade it to a variable register.
7811 */
7812 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7813 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7814 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7815 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7816 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7817 pReNative->Core.aVars[idxVar].idxReg = idxReg;
7818
7819 /*
7820 * Now we need to load the register value.
7821 */
7822 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate)
7823 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pReNative->Core.aVars[idxVar].u.uValue);
7824 else
7825 {
7826 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7827 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7828 switch (pReNative->Core.aVars[idxVar].cbVar)
7829 {
7830 case sizeof(uint64_t):
7831 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7832 break;
7833 case sizeof(uint32_t):
7834 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7835 break;
7836 case sizeof(uint16_t):
7837 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7838 break;
7839 case sizeof(uint8_t):
7840 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7841 break;
7842 default:
7843 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7844 }
7845 }
7846
7847 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7848 return idxReg;
7849}
7850
7851
7852/**
7853 * Sets the host register for @a idxVarRc to @a idxReg.
7854 *
7855 * The register must not be allocated. Any guest register shadowing will be
7856 * implictly dropped by this call.
7857 *
7858 * The variable must not have any register associated with it (causes
7859 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
7860 * implied.
7861 *
7862 * @returns idxReg
7863 * @param pReNative The recompiler state.
7864 * @param idxVar The variable.
7865 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
7866 * @param off For recording in debug info.
7867 *
7868 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
7869 */
7870DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
7871{
7872 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7873 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7874 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
7875 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
7876 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
7877
7878 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
7879 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7880
7881 iemNativeVarSetKindToStack(pReNative, idxVar);
7882 pReNative->Core.aVars[idxVar].idxReg = idxReg;
7883
7884 return idxReg;
7885}
7886
7887
7888/**
7889 * A convenient helper function.
7890 */
7891DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7892 uint8_t idxReg, uint32_t *poff)
7893{
7894 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
7895 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7896 return idxReg;
7897}
7898
7899
7900/**
7901 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7902 *
7903 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7904 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7905 * requirement of flushing anything in volatile host registers when making a
7906 * call.
7907 *
7908 * @returns New @a off value.
7909 * @param pReNative The recompiler state.
7910 * @param off The code buffer position.
7911 * @param fHstRegsNotToSave Set of registers not to save & restore.
7912 */
7913DECL_HIDDEN_THROW(uint32_t)
7914iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7915{
7916 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7917 if (fHstRegs)
7918 {
7919 do
7920 {
7921 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7922 fHstRegs &= ~RT_BIT_32(idxHstReg);
7923
7924 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7925 {
7926 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7927 AssertStmt( idxVar < RT_ELEMENTS(pReNative->Core.aVars)
7928 && (pReNative->Core.bmVars & RT_BIT_32(idxVar))
7929 && pReNative->Core.aVars[idxVar].idxReg == idxHstReg,
7930 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7931 switch (pReNative->Core.aVars[idxVar].enmKind)
7932 {
7933 case kIemNativeVarKind_Stack:
7934 {
7935 /* Temporarily spill the variable register. */
7936 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7937 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%d/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7938 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7939 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7940 continue;
7941 }
7942
7943 case kIemNativeVarKind_Immediate:
7944 case kIemNativeVarKind_VarRef:
7945 case kIemNativeVarKind_GstRegRef:
7946 /* It is weird to have any of these loaded at this point. */
7947 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7948 continue;
7949
7950 case kIemNativeVarKind_End:
7951 case kIemNativeVarKind_Invalid:
7952 break;
7953 }
7954 AssertFailed();
7955 }
7956 else
7957 {
7958 /*
7959 * Allocate a temporary stack slot and spill the register to it.
7960 */
7961 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7962 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7963 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7964 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7965 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7966 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7967 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7968 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7969 }
7970 } while (fHstRegs);
7971 }
7972 return off;
7973}
7974
7975
7976/**
7977 * Emit code to restore volatile registers after to a call to a helper.
7978 *
7979 * @returns New @a off value.
7980 * @param pReNative The recompiler state.
7981 * @param off The code buffer position.
7982 * @param fHstRegsNotToSave Set of registers not to save & restore.
7983 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7984 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7985 */
7986DECL_HIDDEN_THROW(uint32_t)
7987iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7988{
7989 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7990 if (fHstRegs)
7991 {
7992 do
7993 {
7994 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7995 fHstRegs &= ~RT_BIT_32(idxHstReg);
7996
7997 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7998 {
7999 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8000 AssertStmt( idxVar < RT_ELEMENTS(pReNative->Core.aVars)
8001 && (pReNative->Core.bmVars & RT_BIT_32(idxVar))
8002 && pReNative->Core.aVars[idxVar].idxReg == idxHstReg,
8003 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8004 switch (pReNative->Core.aVars[idxVar].enmKind)
8005 {
8006 case kIemNativeVarKind_Stack:
8007 {
8008 /* Unspill the variable register. */
8009 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8010 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%d/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8011 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8012 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8013 continue;
8014 }
8015
8016 case kIemNativeVarKind_Immediate:
8017 case kIemNativeVarKind_VarRef:
8018 case kIemNativeVarKind_GstRegRef:
8019 /* It is weird to have any of these loaded at this point. */
8020 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8021 continue;
8022
8023 case kIemNativeVarKind_End:
8024 case kIemNativeVarKind_Invalid:
8025 break;
8026 }
8027 AssertFailed();
8028 }
8029 else
8030 {
8031 /*
8032 * Restore from temporary stack slot.
8033 */
8034 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
8035 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
8036 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
8037 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
8038
8039 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8040 }
8041 } while (fHstRegs);
8042 }
8043 return off;
8044}
8045
8046
8047/**
8048 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8049 *
8050 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8051 */
8052DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8053{
8054 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8055 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8056 {
8057 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8058 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8059 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8060 Assert(cSlots > 0);
8061 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8062 Log11(("iemNativeVarFreeStackSlots: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n", idxVar, idxStackSlot, fAllocMask, cbVar));
8063 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8064 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8065 }
8066 else
8067 Assert(idxStackSlot == UINT8_MAX);
8068}
8069
8070
8071/**
8072 * Worker that frees a single variable.
8073 *
8074 * ASSUMES that @a idxVar is valid.
8075 */
8076DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8077{
8078 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8079 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8080 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8081
8082 /* Free the host register first if any assigned. */
8083 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8084 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8085 {
8086 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
8087 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8088 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8089 }
8090
8091 /* Free argument mapping. */
8092 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8093 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8094 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8095
8096 /* Free the stack slots. */
8097 iemNativeVarFreeStackSlots(pReNative, idxVar);
8098
8099 /* Free the actual variable. */
8100 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8101 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8102}
8103
8104
8105/**
8106 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8107 */
8108DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8109{
8110 while (bmVars != 0)
8111 {
8112 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8113 bmVars &= ~RT_BIT_32(idxVar);
8114
8115#if 1 /** @todo optimize by simplifying this later... */
8116 iemNativeVarFreeOneWorker(pReNative, idxVar);
8117#else
8118 /* Only need to free the host register, the rest is done as bulk updates below. */
8119 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8120 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8121 {
8122 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
8123 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8124 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8125 }
8126#endif
8127 }
8128#if 0 /** @todo optimize by simplifying this later... */
8129 pReNative->Core.bmVars = 0;
8130 pReNative->Core.bmStack = 0;
8131 pReNative->Core.u64ArgVars = UINT64_MAX;
8132#endif
8133}
8134
8135
8136/**
8137 * This is called by IEM_MC_END() to clean up all variables.
8138 */
8139DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
8140{
8141 uint32_t const bmVars = pReNative->Core.bmVars;
8142 if (bmVars != 0)
8143 iemNativeVarFreeAllSlow(pReNative, bmVars);
8144 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8145 Assert(pReNative->Core.bmStack == 0);
8146}
8147
8148
8149#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
8150
8151/**
8152 * This is called by IEM_MC_FREE_LOCAL.
8153 */
8154DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8155{
8156 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8157 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
8158 iemNativeVarFreeOneWorker(pReNative, idxVar);
8159}
8160
8161
8162#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
8163
8164/**
8165 * This is called by IEM_MC_FREE_ARG.
8166 */
8167DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8168{
8169 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8170 Assert(pReNative->Core.aVars[idxVar].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
8171 iemNativeVarFreeOneWorker(pReNative, idxVar);
8172}
8173
8174
8175#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
8176
8177/**
8178 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
8179 */
8180DECL_INLINE_THROW(uint32_t)
8181iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
8182{
8183 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
8184 AssertStmt(pReNative->Core.aVars[idxVarDst].enmKind == kIemNativeVarKind_Invalid,
8185 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8186 Assert( pReNative->Core.aVars[idxVarDst].cbVar == sizeof(uint16_t)
8187 || pReNative->Core.aVars[idxVarDst].cbVar == sizeof(uint32_t));
8188
8189 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
8190 AssertStmt( pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Stack
8191 || pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Immediate,
8192 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8193
8194 Assert(pReNative->Core.aVars[idxVarDst].cbVar < pReNative->Core.aVars[idxVarSrc].cbVar);
8195
8196 /*
8197 * Special case for immediates.
8198 */
8199 if (pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Immediate)
8200 {
8201 switch (pReNative->Core.aVars[idxVarDst].cbVar)
8202 {
8203 case sizeof(uint16_t):
8204 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pReNative->Core.aVars[idxVarSrc].u.uValue);
8205 break;
8206 case sizeof(uint32_t):
8207 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pReNative->Core.aVars[idxVarSrc].u.uValue);
8208 break;
8209 default: AssertFailed(); break;
8210 }
8211 }
8212 else
8213 {
8214 /*
8215 * The generic solution for now.
8216 */
8217 /** @todo optimize this by having the python script make sure the source
8218 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
8219 * statement. Then we could just transfer the register assignments. */
8220 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
8221 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
8222 switch (pReNative->Core.aVars[idxVarDst].cbVar)
8223 {
8224 case sizeof(uint16_t):
8225 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
8226 break;
8227 case sizeof(uint32_t):
8228 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
8229 break;
8230 default: AssertFailed(); break;
8231 }
8232 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
8233 iemNativeVarRegisterRelease(pReNative, idxVarDst);
8234 }
8235 return off;
8236}
8237
8238
8239
8240/*********************************************************************************************************************************
8241* Emitters for IEM_MC_CALL_CIMPL_XXX *
8242*********************************************************************************************************************************/
8243
8244/**
8245 * Emits code to load a reference to the given guest register into @a idxGprDst.
8246 */
8247DECL_INLINE_THROW(uint32_t)
8248iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8249 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8250{
8251 /*
8252 * Get the offset relative to the CPUMCTX structure.
8253 */
8254 uint32_t offCpumCtx;
8255 switch (enmClass)
8256 {
8257 case kIemNativeGstRegRef_Gpr:
8258 Assert(idxRegInClass < 16);
8259 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8260 break;
8261
8262 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8263 Assert(idxRegInClass < 4);
8264 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8265 break;
8266
8267 case kIemNativeGstRegRef_EFlags:
8268 Assert(idxRegInClass == 0);
8269 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8270 break;
8271
8272 case kIemNativeGstRegRef_MxCsr:
8273 Assert(idxRegInClass == 0);
8274 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8275 break;
8276
8277 case kIemNativeGstRegRef_FpuReg:
8278 Assert(idxRegInClass < 8);
8279 AssertFailed(); /** @todo what kind of indexing? */
8280 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8281 break;
8282
8283 case kIemNativeGstRegRef_MReg:
8284 Assert(idxRegInClass < 8);
8285 AssertFailed(); /** @todo what kind of indexing? */
8286 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8287 break;
8288
8289 case kIemNativeGstRegRef_XReg:
8290 Assert(idxRegInClass < 16);
8291 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8292 break;
8293
8294 default:
8295 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8296 }
8297
8298 /*
8299 * Load the value into the destination register.
8300 */
8301#ifdef RT_ARCH_AMD64
8302 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8303
8304#elif defined(RT_ARCH_ARM64)
8305 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8306 Assert(offCpumCtx < 4096);
8307 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8308
8309#else
8310# error "Port me!"
8311#endif
8312
8313 return off;
8314}
8315
8316
8317/**
8318 * Common code for CIMPL and AIMPL calls.
8319 *
8320 * These are calls that uses argument variables and such. They should not be
8321 * confused with internal calls required to implement an MC operation,
8322 * like a TLB load and similar.
8323 *
8324 * Upon return all that is left to do is to load any hidden arguments and
8325 * perform the call. All argument variables are freed.
8326 *
8327 * @returns New code buffer offset; throws VBox status code on error.
8328 * @param pReNative The native recompile state.
8329 * @param off The code buffer offset.
8330 * @param cArgs The total nubmer of arguments (includes hidden
8331 * count).
8332 * @param cHiddenArgs The number of hidden arguments. The hidden
8333 * arguments must not have any variable declared for
8334 * them, whereas all the regular arguments must
8335 * (tstIEMCheckMc ensures this).
8336 */
8337DECL_HIDDEN_THROW(uint32_t)
8338iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
8339{
8340#ifdef VBOX_STRICT
8341 /*
8342 * Assert sanity.
8343 */
8344 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8345 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8346 for (unsigned i = 0; i < cHiddenArgs; i++)
8347 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8348 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8349 {
8350 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8351 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8352 }
8353 iemNativeRegAssertSanity(pReNative);
8354#endif
8355
8356 /*
8357 * Before we do anything else, go over variables that are referenced and
8358 * make sure they are not in a register.
8359 */
8360 uint32_t bmVars = pReNative->Core.bmVars;
8361 if (bmVars)
8362 {
8363 do
8364 {
8365 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8366 bmVars &= ~RT_BIT_32(idxVar);
8367
8368 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8369 {
8370 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8371 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
8372 {
8373 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8374 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8375 idxVar, idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8376 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8377 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8378
8379 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8380 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8381 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8382 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8383 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8384 }
8385 }
8386 } while (bmVars != 0);
8387#if 0 //def VBOX_STRICT
8388 iemNativeRegAssertSanity(pReNative);
8389#endif
8390 }
8391
8392 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8393
8394 /*
8395 * First, go over the host registers that will be used for arguments and make
8396 * sure they either hold the desired argument or are free.
8397 */
8398 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8399 {
8400 for (uint32_t i = 0; i < cRegArgs; i++)
8401 {
8402 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8403 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8404 {
8405 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8406 {
8407 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8408 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8409 Assert(pReNative->Core.aVars[idxVar].idxReg == idxArgReg);
8410 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8411 if (uArgNo == i)
8412 { /* prefect */ }
8413 /* The variable allocator logic should make sure this is impossible,
8414 except for when the return register is used as a parameter (ARM,
8415 but not x86). */
8416#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8417 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8418 {
8419# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8420# error "Implement this"
8421# endif
8422 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8423 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8424 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8425 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8426 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8427 }
8428#endif
8429 else
8430 {
8431 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8432
8433 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
8434 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8435 else
8436 {
8437 /* just free it, can be reloaded if used again */
8438 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8439 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8440 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8441 }
8442 }
8443 }
8444 else
8445 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8446 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8447 }
8448 }
8449#if 0 //def VBOX_STRICT
8450 iemNativeRegAssertSanity(pReNative);
8451#endif
8452 }
8453
8454 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8455
8456#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8457 /*
8458 * If there are any stack arguments, make sure they are in their place as well.
8459 *
8460 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8461 * the caller) be loading it later and it must be free (see first loop).
8462 */
8463 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8464 {
8465 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8466 {
8467 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
8468 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8469 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8470 {
8471 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8472 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].idxReg);
8473 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg);
8474 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8475 }
8476 else
8477 {
8478 /* Use ARG0 as temp for stuff we need registers for. */
8479 switch (pReNative->Core.aVars[idxVar].enmKind)
8480 {
8481 case kIemNativeVarKind_Stack:
8482 {
8483 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8484 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8485 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8486 iemNativeStackCalcBpDisp(idxStackSlot));
8487 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8488 continue;
8489 }
8490
8491 case kIemNativeVarKind_Immediate:
8492 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].u.uValue);
8493 continue;
8494
8495 case kIemNativeVarKind_VarRef:
8496 {
8497 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
8498 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8499 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
8500 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8501 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8502 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8503 {
8504 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8505 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8506 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8507 }
8508 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8509 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8510 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8511 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8512 continue;
8513 }
8514
8515 case kIemNativeVarKind_GstRegRef:
8516 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8517 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
8518 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
8519 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8520 continue;
8521
8522 case kIemNativeVarKind_Invalid:
8523 case kIemNativeVarKind_End:
8524 break;
8525 }
8526 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8527 }
8528 }
8529# if 0 //def VBOX_STRICT
8530 iemNativeRegAssertSanity(pReNative);
8531# endif
8532 }
8533#else
8534 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8535#endif
8536
8537 /*
8538 * Make sure the argument variables are loaded into their respective registers.
8539 *
8540 * We can optimize this by ASSUMING that any register allocations are for
8541 * registeres that have already been loaded and are ready. The previous step
8542 * saw to that.
8543 */
8544 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8545 {
8546 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8547 {
8548 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8549 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8550 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == pReNative->Core.aidxArgVars[i]
8551 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8552 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8553 else
8554 {
8555 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
8556 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8557 {
8558 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
8559 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].idxReg);
8560 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg))
8561 | RT_BIT_32(idxArgReg);
8562 pReNative->Core.aVars[idxVar].idxReg = idxArgReg;
8563 }
8564 else
8565 {
8566 /* Use ARG0 as temp for stuff we need registers for. */
8567 switch (pReNative->Core.aVars[idxVar].enmKind)
8568 {
8569 case kIemNativeVarKind_Stack:
8570 {
8571 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8572 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8573 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8574 continue;
8575 }
8576
8577 case kIemNativeVarKind_Immediate:
8578 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].u.uValue);
8579 continue;
8580
8581 case kIemNativeVarKind_VarRef:
8582 {
8583 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
8584 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8585 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
8586 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8587 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8588 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8589 {
8590 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8591 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8592 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8593 }
8594 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8595 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8596 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8597 continue;
8598 }
8599
8600 case kIemNativeVarKind_GstRegRef:
8601 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8602 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
8603 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
8604 continue;
8605
8606 case kIemNativeVarKind_Invalid:
8607 case kIemNativeVarKind_End:
8608 break;
8609 }
8610 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8611 }
8612 }
8613 }
8614#if 0 //def VBOX_STRICT
8615 iemNativeRegAssertSanity(pReNative);
8616#endif
8617 }
8618#ifdef VBOX_STRICT
8619 else
8620 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8621 {
8622 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8623 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8624 }
8625#endif
8626
8627 /*
8628 * Free all argument variables (simplified).
8629 * Their lifetime always expires with the call they are for.
8630 */
8631 /** @todo Make the python script check that arguments aren't used after
8632 * IEM_MC_CALL_XXXX. */
8633 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8634 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8635 * an argument value. There is also some FPU stuff. */
8636 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8637 {
8638 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
8639 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8640
8641 /* no need to free registers: */
8642 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8643 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8644 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8645 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8646 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8647 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8648
8649 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8650 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8651 iemNativeVarFreeStackSlots(pReNative, idxVar);
8652 }
8653 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8654
8655 /*
8656 * Flush volatile registers as we make the call.
8657 */
8658 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8659
8660 return off;
8661}
8662
8663
8664/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
8665DECL_HIDDEN_THROW(uint32_t)
8666iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
8667 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
8668
8669{
8670 /*
8671 * Do all the call setup and cleanup.
8672 */
8673 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
8674
8675 /*
8676 * Load the two or three hidden arguments.
8677 */
8678#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
8679 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
8680 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8681 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
8682#else
8683 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8684 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
8685#endif
8686
8687 /*
8688 * Make the call and check the return code.
8689 *
8690 * Shadow PC copies are always flushed here, other stuff depends on flags.
8691 * Segment and general purpose registers are explictily flushed via the
8692 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
8693 * macros.
8694 */
8695 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
8696#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
8697 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
8698#endif
8699 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
8700 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
8701 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
8702 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
8703
8704 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
8705}
8706
8707
8708#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
8709 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
8710
8711/** Emits code for IEM_MC_CALL_CIMPL_1. */
8712DECL_INLINE_THROW(uint32_t)
8713iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8714 uintptr_t pfnCImpl, uint8_t idxArg0)
8715{
8716 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8717 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
8718}
8719
8720
8721#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
8722 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
8723
8724/** Emits code for IEM_MC_CALL_CIMPL_2. */
8725DECL_INLINE_THROW(uint32_t)
8726iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8727 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
8728{
8729 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8730 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
8731 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
8732}
8733
8734
8735#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
8736 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
8737 (uintptr_t)a_pfnCImpl, a0, a1, a2)
8738
8739/** Emits code for IEM_MC_CALL_CIMPL_3. */
8740DECL_INLINE_THROW(uint32_t)
8741iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8742 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8743{
8744 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8745 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
8746 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
8747 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
8748}
8749
8750
8751#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
8752 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
8753 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
8754
8755/** Emits code for IEM_MC_CALL_CIMPL_4. */
8756DECL_INLINE_THROW(uint32_t)
8757iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8758 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
8759{
8760 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8761 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
8762 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
8763 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
8764 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
8765}
8766
8767
8768#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
8769 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
8770 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
8771
8772/** Emits code for IEM_MC_CALL_CIMPL_4. */
8773DECL_INLINE_THROW(uint32_t)
8774iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8775 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
8776{
8777 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8778 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
8779 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
8780 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
8781 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
8782 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
8783}
8784
8785
8786/** Recompiler debugging: Flush guest register shadow copies. */
8787#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
8788
8789
8790
8791/*********************************************************************************************************************************
8792* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
8793*********************************************************************************************************************************/
8794
8795/**
8796 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
8797 */
8798DECL_INLINE_THROW(uint32_t)
8799iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
8800 uintptr_t pfnAImpl, uint8_t cArgs)
8801{
8802 if (idxVarRc != UINT8_MAX)
8803 {
8804 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
8805 AssertStmt(pReNative->Core.aVars[idxVarRc].uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
8806 AssertStmt(pReNative->Core.aVars[idxVarRc].cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
8807 }
8808
8809 /*
8810 * Do all the call setup and cleanup.
8811 */
8812 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
8813
8814 /*
8815 * Make the call and update the return code variable if we've got one.
8816 */
8817 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
8818 if (idxVarRc < RT_ELEMENTS(pReNative->Core.aVars))
8819 {
8820pReNative->pInstrBuf[off++] = 0xcc; /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
8821 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
8822 }
8823
8824 return off;
8825}
8826
8827
8828
8829#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
8830 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
8831
8832#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
8833 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
8834
8835/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
8836DECL_INLINE_THROW(uint32_t)
8837iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
8838{
8839 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
8840}
8841
8842
8843#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
8844 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
8845
8846#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
8847 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
8848
8849/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
8850DECL_INLINE_THROW(uint32_t)
8851iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
8852{
8853 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
8854 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
8855}
8856
8857
8858#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
8859 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
8860
8861#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
8862 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
8863
8864/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
8865DECL_INLINE_THROW(uint32_t)
8866iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
8867 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
8868{
8869 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
8870 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
8871 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
8872}
8873
8874
8875#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
8876 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
8877
8878#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
8879 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
8880
8881/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
8882DECL_INLINE_THROW(uint32_t)
8883iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
8884 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8885{
8886 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
8887 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
8888 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
8889 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
8890}
8891
8892
8893#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
8894 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
8895
8896#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
8897 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
8898
8899/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
8900DECL_INLINE_THROW(uint32_t)
8901iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
8902 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
8903{
8904 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
8905 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
8906 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
8907 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
8908 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
8909}
8910
8911
8912
8913/*********************************************************************************************************************************
8914* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
8915*********************************************************************************************************************************/
8916
8917#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
8918 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
8919
8920#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
8921 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
8922
8923#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
8924 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
8925
8926#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
8927 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
8928
8929
8930/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
8931 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
8932DECL_INLINE_THROW(uint32_t)
8933iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
8934{
8935 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8936 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
8937 Assert(iGRegEx < 20);
8938
8939 /* Same discussion as in iemNativeEmitFetchGregU16 */
8940 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
8941 kIemNativeGstRegUse_ReadOnly);
8942
8943 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8944 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8945
8946 /* The value is zero-extended to the full 64-bit host register width. */
8947 if (iGRegEx < 16)
8948 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
8949 else
8950 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
8951
8952 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8953 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8954 return off;
8955}
8956
8957
8958#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
8959 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
8960
8961#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
8962 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
8963
8964#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
8965 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
8966
8967/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
8968DECL_INLINE_THROW(uint32_t)
8969iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
8970{
8971 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8972 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
8973 Assert(iGRegEx < 20);
8974
8975 /* Same discussion as in iemNativeEmitFetchGregU16 */
8976 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
8977 kIemNativeGstRegUse_ReadOnly);
8978
8979 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8980 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8981
8982 if (iGRegEx < 16)
8983 {
8984 switch (cbSignExtended)
8985 {
8986 case sizeof(uint16_t):
8987 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
8988 break;
8989 case sizeof(uint32_t):
8990 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
8991 break;
8992 case sizeof(uint64_t):
8993 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
8994 break;
8995 default: AssertFailed(); break;
8996 }
8997 }
8998 else
8999 {
9000 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
9001 switch (cbSignExtended)
9002 {
9003 case sizeof(uint16_t):
9004 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
9005 break;
9006 case sizeof(uint32_t):
9007 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
9008 break;
9009 case sizeof(uint64_t):
9010 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
9011 break;
9012 default: AssertFailed(); break;
9013 }
9014 }
9015
9016 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9017 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9018 return off;
9019}
9020
9021
9022
9023#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
9024 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
9025
9026#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
9027 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
9028
9029#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
9030 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
9031
9032/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
9033DECL_INLINE_THROW(uint32_t)
9034iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
9035{
9036 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9037 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
9038 Assert(iGReg < 16);
9039
9040 /*
9041 * We can either just load the low 16-bit of the GPR into a host register
9042 * for the variable, or we can do so via a shadow copy host register. The
9043 * latter will avoid having to reload it if it's being stored later, but
9044 * will waste a host register if it isn't touched again. Since we don't
9045 * know what going to happen, we choose the latter for now.
9046 */
9047 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9048 kIemNativeGstRegUse_ReadOnly);
9049
9050 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9051 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9052 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
9053 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9054
9055 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9056 return off;
9057}
9058
9059
9060#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
9061 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
9062
9063#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
9064 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
9065
9066/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
9067DECL_INLINE_THROW(uint32_t)
9068iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
9069{
9070 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9071 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
9072 Assert(iGReg < 16);
9073
9074 /*
9075 * We can either just load the low 16-bit of the GPR into a host register
9076 * for the variable, or we can do so via a shadow copy host register. The
9077 * latter will avoid having to reload it if it's being stored later, but
9078 * will waste a host register if it isn't touched again. Since we don't
9079 * know what going to happen, we choose the latter for now.
9080 */
9081 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9082 kIemNativeGstRegUse_ReadOnly);
9083
9084 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9085 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9086 if (cbSignExtended == sizeof(uint32_t))
9087 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
9088 else
9089 {
9090 Assert(cbSignExtended == sizeof(uint64_t));
9091 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
9092 }
9093 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9094
9095 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9096 return off;
9097}
9098
9099
9100#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
9101 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
9102
9103#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
9104 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
9105
9106/** Emits code for IEM_MC_FETCH_GREG_U32. */
9107DECL_INLINE_THROW(uint32_t)
9108iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
9109{
9110 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9111 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF_PV(cbZeroExtended);
9112 Assert(iGReg < 16);
9113
9114 /*
9115 * We can either just load the low 16-bit of the GPR into a host register
9116 * for the variable, or we can do so via a shadow copy host register. The
9117 * latter will avoid having to reload it if it's being stored later, but
9118 * will waste a host register if it isn't touched again. Since we don't
9119 * know what going to happen, we choose the latter for now.
9120 */
9121 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9122 kIemNativeGstRegUse_ReadOnly);
9123
9124 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9125 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9126 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
9127 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9128
9129 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9130 return off;
9131}
9132
9133
9134#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
9135 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
9136
9137/** Emits code for IEM_MC_FETCH_GREG_U32. */
9138DECL_INLINE_THROW(uint32_t)
9139iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
9140{
9141 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9142 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint64_t));
9143 Assert(iGReg < 16);
9144
9145 /*
9146 * We can either just load the low 32-bit of the GPR into a host register
9147 * for the variable, or we can do so via a shadow copy host register. The
9148 * latter will avoid having to reload it if it's being stored later, but
9149 * will waste a host register if it isn't touched again. Since we don't
9150 * know what going to happen, we choose the latter for now.
9151 */
9152 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9153 kIemNativeGstRegUse_ReadOnly);
9154
9155 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9156 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9157 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
9158 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9159
9160 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9161 return off;
9162}
9163
9164
9165#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
9166 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
9167
9168#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
9169 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
9170
9171/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
9172 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
9173DECL_INLINE_THROW(uint32_t)
9174iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
9175{
9176 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9177 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint64_t));
9178 Assert(iGReg < 16);
9179
9180 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9181 kIemNativeGstRegUse_ReadOnly);
9182
9183 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9184 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9185 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
9186 /** @todo name the register a shadow one already? */
9187 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9188
9189 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9190 return off;
9191}
9192
9193
9194
9195/*********************************************************************************************************************************
9196* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
9197*********************************************************************************************************************************/
9198
9199#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
9200 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
9201
9202/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
9203DECL_INLINE_THROW(uint32_t)
9204iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
9205{
9206 Assert(iGRegEx < 20);
9207 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
9208 kIemNativeGstRegUse_ForUpdate);
9209#ifdef RT_ARCH_AMD64
9210 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
9211
9212 /* To the lowest byte of the register: mov r8, imm8 */
9213 if (iGRegEx < 16)
9214 {
9215 if (idxGstTmpReg >= 8)
9216 pbCodeBuf[off++] = X86_OP_REX_B;
9217 else if (idxGstTmpReg >= 4)
9218 pbCodeBuf[off++] = X86_OP_REX;
9219 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
9220 pbCodeBuf[off++] = u8Value;
9221 }
9222 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
9223 else if (idxGstTmpReg < 4)
9224 {
9225 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
9226 pbCodeBuf[off++] = u8Value;
9227 }
9228 else
9229 {
9230 /* ror reg64, 8 */
9231 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9232 pbCodeBuf[off++] = 0xc1;
9233 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9234 pbCodeBuf[off++] = 8;
9235
9236 /* mov reg8, imm8 */
9237 if (idxGstTmpReg >= 8)
9238 pbCodeBuf[off++] = X86_OP_REX_B;
9239 else if (idxGstTmpReg >= 4)
9240 pbCodeBuf[off++] = X86_OP_REX;
9241 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
9242 pbCodeBuf[off++] = u8Value;
9243
9244 /* rol reg64, 8 */
9245 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9246 pbCodeBuf[off++] = 0xc1;
9247 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9248 pbCodeBuf[off++] = 8;
9249 }
9250
9251#elif defined(RT_ARCH_ARM64)
9252 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
9253 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9254 if (iGRegEx < 16)
9255 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
9256 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
9257 else
9258 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
9259 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
9260 iemNativeRegFreeTmp(pReNative, idxImmReg);
9261
9262#else
9263# error "Port me!"
9264#endif
9265
9266 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9267
9268 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
9269
9270 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9271 return off;
9272}
9273
9274
9275#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
9276 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
9277
9278/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
9279DECL_INLINE_THROW(uint32_t)
9280iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
9281{
9282 Assert(iGRegEx < 20);
9283 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9284
9285 /*
9286 * If it's a constant value (unlikely) we treat this as a
9287 * IEM_MC_STORE_GREG_U8_CONST statement.
9288 */
9289 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
9290 { /* likely */ }
9291 else
9292 {
9293 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
9294 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9295 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pReNative->Core.aVars[idxValueVar].u.uValue);
9296 }
9297
9298 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
9299 kIemNativeGstRegUse_ForUpdate);
9300 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
9301
9302#ifdef RT_ARCH_AMD64
9303 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
9304 if (iGRegEx < 16)
9305 {
9306 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
9307 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
9308 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
9309 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
9310 pbCodeBuf[off++] = X86_OP_REX;
9311 pbCodeBuf[off++] = 0x8a;
9312 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
9313 }
9314 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
9315 else if (idxGstTmpReg < 4 && idxVarReg < 4)
9316 {
9317 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
9318 pbCodeBuf[off++] = 0x8a;
9319 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
9320 }
9321 else
9322 {
9323 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
9324
9325 /* ror reg64, 8 */
9326 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9327 pbCodeBuf[off++] = 0xc1;
9328 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9329 pbCodeBuf[off++] = 8;
9330
9331 /* mov reg8, reg8(r/m) */
9332 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
9333 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
9334 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
9335 pbCodeBuf[off++] = X86_OP_REX;
9336 pbCodeBuf[off++] = 0x8a;
9337 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
9338
9339 /* rol reg64, 8 */
9340 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9341 pbCodeBuf[off++] = 0xc1;
9342 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9343 pbCodeBuf[off++] = 8;
9344 }
9345
9346#elif defined(RT_ARCH_ARM64)
9347 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
9348 or
9349 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
9350 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9351 if (iGRegEx < 16)
9352 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
9353 else
9354 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
9355
9356#else
9357# error "Port me!"
9358#endif
9359 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9360
9361 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9362
9363 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
9364 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9365 return off;
9366}
9367
9368
9369
9370#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
9371 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
9372
9373/** Emits code for IEM_MC_STORE_GREG_U16. */
9374DECL_INLINE_THROW(uint32_t)
9375iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
9376{
9377 Assert(iGReg < 16);
9378 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9379 kIemNativeGstRegUse_ForUpdate);
9380#ifdef RT_ARCH_AMD64
9381 /* mov reg16, imm16 */
9382 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
9383 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9384 if (idxGstTmpReg >= 8)
9385 pbCodeBuf[off++] = X86_OP_REX_B;
9386 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
9387 pbCodeBuf[off++] = RT_BYTE1(uValue);
9388 pbCodeBuf[off++] = RT_BYTE2(uValue);
9389
9390#elif defined(RT_ARCH_ARM64)
9391 /* movk xdst, #uValue, lsl #0 */
9392 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9393 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
9394
9395#else
9396# error "Port me!"
9397#endif
9398
9399 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9400
9401 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9402 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9403 return off;
9404}
9405
9406
9407#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
9408 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
9409
9410/** Emits code for IEM_MC_STORE_GREG_U16. */
9411DECL_INLINE_THROW(uint32_t)
9412iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
9413{
9414 Assert(iGReg < 16);
9415 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9416
9417 /*
9418 * If it's a constant value (unlikely) we treat this as a
9419 * IEM_MC_STORE_GREG_U16_CONST statement.
9420 */
9421 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
9422 { /* likely */ }
9423 else
9424 {
9425 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
9426 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9427 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pReNative->Core.aVars[idxValueVar].u.uValue);
9428 }
9429
9430 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9431 kIemNativeGstRegUse_ForUpdate);
9432
9433#ifdef RT_ARCH_AMD64
9434 /* mov reg16, reg16 or [mem16] */
9435 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
9436 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9437 if (pReNative->Core.aVars[idxValueVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9438 {
9439 if (idxGstTmpReg >= 8 || pReNative->Core.aVars[idxValueVar].idxReg >= 8)
9440 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
9441 | (pReNative->Core.aVars[idxValueVar].idxReg >= 8 ? X86_OP_REX_B : 0);
9442 pbCodeBuf[off++] = 0x8b;
9443 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pReNative->Core.aVars[idxValueVar].idxReg & 7);
9444 }
9445 else
9446 {
9447 uint8_t const idxStackSlot = pReNative->Core.aVars[idxValueVar].idxStackSlot;
9448 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9449 if (idxGstTmpReg >= 8)
9450 pbCodeBuf[off++] = X86_OP_REX_R;
9451 pbCodeBuf[off++] = 0x8b;
9452 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
9453 }
9454
9455#elif defined(RT_ARCH_ARM64)
9456 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
9457 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
9458 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9459 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
9460 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9461
9462#else
9463# error "Port me!"
9464#endif
9465
9466 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9467
9468 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9469 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9470 return off;
9471}
9472
9473
9474#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
9475 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
9476
9477/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
9478DECL_INLINE_THROW(uint32_t)
9479iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
9480{
9481 Assert(iGReg < 16);
9482 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9483 kIemNativeGstRegUse_ForFullWrite);
9484 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
9485 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9486 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9487 return off;
9488}
9489
9490
9491#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
9492 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
9493
9494/** Emits code for IEM_MC_STORE_GREG_U32. */
9495DECL_INLINE_THROW(uint32_t)
9496iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
9497{
9498 Assert(iGReg < 16);
9499 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9500
9501 /*
9502 * If it's a constant value (unlikely) we treat this as a
9503 * IEM_MC_STORE_GREG_U32_CONST statement.
9504 */
9505 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
9506 { /* likely */ }
9507 else
9508 {
9509 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
9510 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9511 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pReNative->Core.aVars[idxValueVar].u.uValue);
9512 }
9513
9514 /*
9515 * For the rest we allocate a guest register for the variable and writes
9516 * it to the CPUMCTX structure.
9517 */
9518 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
9519 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9520#ifdef VBOX_STRICT
9521 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
9522#endif
9523 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9524 return off;
9525}
9526
9527
9528#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
9529 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
9530
9531/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
9532DECL_INLINE_THROW(uint32_t)
9533iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
9534{
9535 Assert(iGReg < 16);
9536 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9537 kIemNativeGstRegUse_ForFullWrite);
9538 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
9539 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9540 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9541 return off;
9542}
9543
9544
9545#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
9546 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
9547
9548/** Emits code for IEM_MC_STORE_GREG_U64. */
9549DECL_INLINE_THROW(uint32_t)
9550iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
9551{
9552 Assert(iGReg < 16);
9553 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9554
9555 /*
9556 * If it's a constant value (unlikely) we treat this as a
9557 * IEM_MC_STORE_GREG_U64_CONST statement.
9558 */
9559 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
9560 { /* likely */ }
9561 else
9562 {
9563 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
9564 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9565 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pReNative->Core.aVars[idxValueVar].u.uValue);
9566 }
9567
9568 /*
9569 * For the rest we allocate a guest register for the variable and writes
9570 * it to the CPUMCTX structure.
9571 */
9572 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
9573 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9574 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9575 return off;
9576}
9577
9578
9579#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
9580 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
9581
9582/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
9583DECL_INLINE_THROW(uint32_t)
9584iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
9585{
9586 Assert(iGReg < 16);
9587 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9588 kIemNativeGstRegUse_ForUpdate);
9589 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
9590 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9591 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9592 return off;
9593}
9594
9595
9596/*********************************************************************************************************************************
9597* General purpose register manipulation (add, sub). *
9598*********************************************************************************************************************************/
9599
9600#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
9601 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
9602
9603/** Emits code for IEM_MC_ADD_GREG_U16. */
9604DECL_INLINE_THROW(uint32_t)
9605iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
9606{
9607 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9608 kIemNativeGstRegUse_ForUpdate);
9609
9610#ifdef RT_ARCH_AMD64
9611 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
9612 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9613 if (idxGstTmpReg >= 8)
9614 pbCodeBuf[off++] = X86_OP_REX_B;
9615 if (uAddend == 1)
9616 {
9617 pbCodeBuf[off++] = 0xff; /* inc */
9618 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9619 }
9620 else
9621 {
9622 pbCodeBuf[off++] = 0x81;
9623 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9624 pbCodeBuf[off++] = uAddend;
9625 pbCodeBuf[off++] = 0;
9626 }
9627
9628#else
9629 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
9630 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9631
9632 /* sub tmp, gstgrp, uAddend */
9633 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
9634
9635 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
9636 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
9637
9638 iemNativeRegFreeTmp(pReNative, idxTmpReg);
9639#endif
9640
9641 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9642
9643 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9644
9645 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9646 return off;
9647}
9648
9649
9650#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
9651 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
9652
9653#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
9654 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
9655
9656/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
9657DECL_INLINE_THROW(uint32_t)
9658iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
9659{
9660 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9661 kIemNativeGstRegUse_ForUpdate);
9662
9663#ifdef RT_ARCH_AMD64
9664 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9665 if (f64Bit)
9666 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
9667 else if (idxGstTmpReg >= 8)
9668 pbCodeBuf[off++] = X86_OP_REX_B;
9669 if (uAddend == 1)
9670 {
9671 pbCodeBuf[off++] = 0xff; /* inc */
9672 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9673 }
9674 else if (uAddend < 128)
9675 {
9676 pbCodeBuf[off++] = 0x83; /* add */
9677 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9678 pbCodeBuf[off++] = RT_BYTE1(uAddend);
9679 }
9680 else
9681 {
9682 pbCodeBuf[off++] = 0x81; /* add */
9683 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9684 pbCodeBuf[off++] = RT_BYTE1(uAddend);
9685 pbCodeBuf[off++] = 0;
9686 pbCodeBuf[off++] = 0;
9687 pbCodeBuf[off++] = 0;
9688 }
9689
9690#else
9691 /* sub tmp, gstgrp, uAddend */
9692 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9693 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
9694
9695#endif
9696
9697 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9698
9699 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9700
9701 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9702 return off;
9703}
9704
9705
9706
9707#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
9708 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
9709
9710/** Emits code for IEM_MC_SUB_GREG_U16. */
9711DECL_INLINE_THROW(uint32_t)
9712iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
9713{
9714 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9715 kIemNativeGstRegUse_ForUpdate);
9716
9717#ifdef RT_ARCH_AMD64
9718 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
9719 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9720 if (idxGstTmpReg >= 8)
9721 pbCodeBuf[off++] = X86_OP_REX_B;
9722 if (uSubtrahend == 1)
9723 {
9724 pbCodeBuf[off++] = 0xff; /* dec */
9725 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9726 }
9727 else
9728 {
9729 pbCodeBuf[off++] = 0x81;
9730 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
9731 pbCodeBuf[off++] = uSubtrahend;
9732 pbCodeBuf[off++] = 0;
9733 }
9734
9735#else
9736 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
9737 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9738
9739 /* sub tmp, gstgrp, uSubtrahend */
9740 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
9741
9742 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
9743 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
9744
9745 iemNativeRegFreeTmp(pReNative, idxTmpReg);
9746#endif
9747
9748 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9749
9750 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9751
9752 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9753 return off;
9754}
9755
9756
9757#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
9758 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
9759
9760#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
9761 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
9762
9763/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
9764DECL_INLINE_THROW(uint32_t)
9765iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
9766{
9767 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9768 kIemNativeGstRegUse_ForUpdate);
9769
9770#ifdef RT_ARCH_AMD64
9771 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9772 if (f64Bit)
9773 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
9774 else if (idxGstTmpReg >= 8)
9775 pbCodeBuf[off++] = X86_OP_REX_B;
9776 if (uSubtrahend == 1)
9777 {
9778 pbCodeBuf[off++] = 0xff; /* dec */
9779 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9780 }
9781 else if (uSubtrahend < 128)
9782 {
9783 pbCodeBuf[off++] = 0x83; /* sub */
9784 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
9785 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
9786 }
9787 else
9788 {
9789 pbCodeBuf[off++] = 0x81; /* sub */
9790 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
9791 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
9792 pbCodeBuf[off++] = 0;
9793 pbCodeBuf[off++] = 0;
9794 pbCodeBuf[off++] = 0;
9795 }
9796
9797#else
9798 /* sub tmp, gstgrp, uSubtrahend */
9799 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9800 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
9801
9802#endif
9803
9804 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9805
9806 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9807
9808 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9809 return off;
9810}
9811
9812
9813/*********************************************************************************************************************************
9814* Local variable manipulation (add, sub, and, or). *
9815*********************************************************************************************************************************/
9816
9817#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
9818 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
9819
9820#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
9821 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
9822
9823#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
9824 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
9825
9826#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
9827 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
9828
9829/** Emits code for AND'ing a local and a constant value. */
9830DECL_INLINE_THROW(uint32_t)
9831iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
9832{
9833 Assert(pReNative->Core.aVars[idxVar].cbVar == cbMask);
9834#ifdef VBOX_STRICT
9835 switch (cbMask)
9836 {
9837 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
9838 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
9839 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
9840 case sizeof(uint64_t): break;
9841 default: AssertFailedBreak();
9842 }
9843#endif
9844
9845 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
9846 if (cbMask <= sizeof(uint32_t))
9847 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
9848 else
9849 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
9850 iemNativeVarRegisterRelease(pReNative, idxVar);
9851 return off;
9852}
9853
9854
9855#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
9856 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
9857
9858#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
9859 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
9860
9861#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
9862 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
9863
9864#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
9865 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
9866
9867/** Emits code for OR'ing a local and a constant value. */
9868DECL_INLINE_THROW(uint32_t)
9869iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
9870{
9871 Assert(pReNative->Core.aVars[idxVar].cbVar == cbMask);
9872#ifdef VBOX_STRICT
9873 switch (cbMask)
9874 {
9875 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
9876 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
9877 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
9878 case sizeof(uint64_t): break;
9879 default: AssertFailedBreak();
9880 }
9881#endif
9882
9883 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
9884 if (cbMask <= sizeof(uint32_t))
9885 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
9886 else
9887 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
9888 iemNativeVarRegisterRelease(pReNative, idxVar);
9889 return off;
9890}
9891
9892
9893
9894
9895/*********************************************************************************************************************************
9896* EFLAGS *
9897*********************************************************************************************************************************/
9898
9899#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
9900# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
9901#else
9902# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
9903 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
9904
9905DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
9906{
9907 if (fEflOutput)
9908 {
9909 PVMCPUCC const pVCpu = pReNative->pVCpu;
9910# ifndef IEMLIVENESS_EXTENDED_LAYOUT
9911 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
9912 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
9913 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
9914# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
9915 if (fEflOutput & (a_fEfl)) \
9916 { \
9917 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
9918 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
9919 else \
9920 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
9921 } else do { } while (0)
9922# else
9923 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
9924 IEMLIVENESSBIT const LivenessClobbered =
9925 {
9926 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
9927 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
9928 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
9929 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
9930 };
9931 IEMLIVENESSBIT const LivenessDelayable =
9932 {
9933 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
9934 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
9935 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
9936 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
9937 };
9938# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
9939 if (fEflOutput & (a_fEfl)) \
9940 { \
9941 if (LivenessClobbered.a_fLivenessMember) \
9942 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
9943 else if (LivenessDelayable.a_fLivenessMember) \
9944 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
9945 else \
9946 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
9947 } else do { } while (0)
9948# endif
9949 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
9950 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
9951 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
9952 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
9953 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
9954 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
9955 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
9956# undef CHECK_FLAG_AND_UPDATE_STATS
9957 }
9958 RT_NOREF(fEflInput);
9959}
9960#endif /* VBOX_WITH_STATISTICS */
9961
9962#undef IEM_MC_FETCH_EFLAGS /* should not be used */
9963#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
9964 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
9965
9966/** Handles IEM_MC_FETCH_EFLAGS_EX. */
9967DECL_INLINE_THROW(uint32_t)
9968iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
9969 uint32_t fEflInput, uint32_t fEflOutput)
9970{
9971 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
9972 Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
9973 RT_NOREF(fEflInput, fEflOutput);
9974
9975#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9976# ifdef VBOX_STRICT
9977 if ( pReNative->idxCurCall != 0
9978 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
9979 {
9980 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
9981 uint32_t const fBoth = fEflInput | fEflOutput;
9982# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
9983 AssertMsg( !(fBoth & (a_fElfConst)) \
9984 || (!(fEflInput & (a_fElfConst)) \
9985 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
9986 : !(fEflOutput & (a_fElfConst)) \
9987 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
9988 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
9989 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
9990 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
9991 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
9992 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
9993 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
9994 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
9995 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
9996 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
9997# undef ASSERT_ONE_EFL
9998 }
9999# endif
10000#endif
10001
10002 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
10003 * the existing shadow copy. */
10004 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
10005 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
10006 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
10007 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
10008 return off;
10009}
10010
10011
10012
10013/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
10014 * start using it with custom native code emission (inlining assembly
10015 * instruction helpers). */
10016#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
10017#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
10018 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
10019 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput)
10020
10021/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
10022DECL_INLINE_THROW(uint32_t)
10023iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput)
10024{
10025 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
10026 Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
10027 RT_NOREF(fEflOutput);
10028
10029 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
10030
10031#ifdef VBOX_STRICT
10032 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
10033 uint32_t offFixup = off;
10034 off = iemNativeEmitJnzToFixed(pReNative, off, off);
10035 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
10036 iemNativeFixupFixedJump(pReNative, offFixup, off);
10037
10038 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
10039 offFixup = off;
10040 off = iemNativeEmitJzToFixed(pReNative, off, off);
10041 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
10042 iemNativeFixupFixedJump(pReNative, offFixup, off);
10043
10044 /** @todo validate that only bits in the fElfOutput mask changed. */
10045#endif
10046
10047 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
10048 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
10049 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
10050 return off;
10051}
10052
10053
10054
10055/*********************************************************************************************************************************
10056* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
10057*********************************************************************************************************************************/
10058
10059#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
10060 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
10061
10062#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
10063 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
10064
10065#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
10066 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
10067
10068
10069/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
10070 * IEM_MC_FETCH_SREG_ZX_U64. */
10071DECL_INLINE_THROW(uint32_t)
10072iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
10073{
10074 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10075 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbVar); RT_NOREF(cbVar);
10076 Assert(iSReg < X86_SREG_COUNT);
10077
10078 /*
10079 * For now, we will not create a shadow copy of a selector. The rational
10080 * is that since we do not recompile the popping and loading of segment
10081 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
10082 * pushing and moving to registers, there is only a small chance that the
10083 * shadow copy will be accessed again before the register is reloaded. One
10084 * scenario would be nested called in 16-bit code, but I doubt it's worth
10085 * the extra register pressure atm.
10086 *
10087 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
10088 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
10089 * store scencario covered at present (r160730).
10090 */
10091 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10092 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10093 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
10094 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10095 return off;
10096}
10097
10098
10099
10100/*********************************************************************************************************************************
10101* Register references. *
10102*********************************************************************************************************************************/
10103
10104#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
10105 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
10106
10107#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGReg) \
10108 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
10109
10110/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
10111DECL_INLINE_THROW(uint32_t)
10112iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
10113{
10114 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
10115 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
10116 Assert(iGRegEx < 20);
10117
10118 if (iGRegEx < 16)
10119 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
10120 else
10121 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
10122
10123 /* If we've delayed writing back the register value, flush it now. */
10124 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
10125
10126 /* If it's not a const reference we need to flush the shadow copy of the register now. */
10127 if (!fConst)
10128 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
10129
10130 return off;
10131}
10132
10133#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
10134 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
10135
10136#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
10137 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
10138
10139#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
10140 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
10141
10142#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
10143 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
10144
10145#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
10146 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
10147
10148#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
10149 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
10150
10151#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
10152 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
10153
10154#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
10155 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
10156
10157#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
10158 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
10159
10160#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
10161 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
10162
10163/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
10164DECL_INLINE_THROW(uint32_t)
10165iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
10166{
10167 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
10168 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
10169 Assert(iGReg < 16);
10170
10171 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
10172
10173 /* If we've delayed writing back the register value, flush it now. */
10174 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
10175
10176 /* If it's not a const reference we need to flush the shadow copy of the register now. */
10177 if (!fConst)
10178 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
10179
10180 return off;
10181}
10182
10183
10184#undef IEM_MC_REF_EFLAGS /* should not be used. */
10185#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
10186 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
10187 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
10188
10189/** Handles IEM_MC_REF_EFLAGS. */
10190DECL_INLINE_THROW(uint32_t)
10191iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
10192{
10193 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
10194 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
10195
10196 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
10197
10198 /* If we've delayed writing back the register value, flush it now. */
10199 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
10200
10201 /* If there is a shadow copy of guest EFLAGS, flush it now. */
10202 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
10203
10204 return off;
10205}
10206
10207
10208/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
10209 * different code from threaded recompiler, maybe it would be helpful. For now
10210 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
10211#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
10212
10213
10214
10215/*********************************************************************************************************************************
10216* Effective Address Calculation *
10217*********************************************************************************************************************************/
10218#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
10219 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
10220
10221/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
10222 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
10223DECL_INLINE_THROW(uint32_t)
10224iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
10225 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
10226{
10227 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
10228
10229 /*
10230 * Handle the disp16 form with no registers first.
10231 *
10232 * Convert to an immediate value, as that'll delay the register allocation
10233 * and assignment till the memory access / call / whatever and we can use
10234 * a more appropriate register (or none at all).
10235 */
10236 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
10237 {
10238 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
10239 return off;
10240 }
10241
10242 /* Determin the displacment. */
10243 uint16_t u16EffAddr;
10244 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
10245 {
10246 case 0: u16EffAddr = 0; break;
10247 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
10248 case 2: u16EffAddr = u16Disp; break;
10249 default: AssertFailedStmt(u16EffAddr = 0);
10250 }
10251
10252 /* Determine the registers involved. */
10253 uint8_t idxGstRegBase;
10254 uint8_t idxGstRegIndex;
10255 switch (bRm & X86_MODRM_RM_MASK)
10256 {
10257 case 0:
10258 idxGstRegBase = X86_GREG_xBX;
10259 idxGstRegIndex = X86_GREG_xSI;
10260 break;
10261 case 1:
10262 idxGstRegBase = X86_GREG_xBX;
10263 idxGstRegIndex = X86_GREG_xDI;
10264 break;
10265 case 2:
10266 idxGstRegBase = X86_GREG_xBP;
10267 idxGstRegIndex = X86_GREG_xSI;
10268 break;
10269 case 3:
10270 idxGstRegBase = X86_GREG_xBP;
10271 idxGstRegIndex = X86_GREG_xDI;
10272 break;
10273 case 4:
10274 idxGstRegBase = X86_GREG_xSI;
10275 idxGstRegIndex = UINT8_MAX;
10276 break;
10277 case 5:
10278 idxGstRegBase = X86_GREG_xDI;
10279 idxGstRegIndex = UINT8_MAX;
10280 break;
10281 case 6:
10282 idxGstRegBase = X86_GREG_xBP;
10283 idxGstRegIndex = UINT8_MAX;
10284 break;
10285#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
10286 default:
10287#endif
10288 case 7:
10289 idxGstRegBase = X86_GREG_xBX;
10290 idxGstRegIndex = UINT8_MAX;
10291 break;
10292 }
10293
10294 /*
10295 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
10296 */
10297 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
10298 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
10299 kIemNativeGstRegUse_ReadOnly);
10300 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
10301 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
10302 kIemNativeGstRegUse_ReadOnly)
10303 : UINT8_MAX;
10304#ifdef RT_ARCH_AMD64
10305 if (idxRegIndex == UINT8_MAX)
10306 {
10307 if (u16EffAddr == 0)
10308 {
10309 /* movxz ret, base */
10310 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
10311 }
10312 else
10313 {
10314 /* lea ret32, [base64 + disp32] */
10315 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
10316 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10317 if (idxRegRet >= 8 || idxRegBase >= 8)
10318 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
10319 pbCodeBuf[off++] = 0x8d;
10320 if (idxRegBase != X86_GREG_x12 /*SIB*/)
10321 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
10322 else
10323 {
10324 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
10325 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
10326 }
10327 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
10328 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
10329 pbCodeBuf[off++] = 0;
10330 pbCodeBuf[off++] = 0;
10331 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10332
10333 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
10334 }
10335 }
10336 else
10337 {
10338 /* lea ret32, [index64 + base64 (+ disp32)] */
10339 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
10340 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10341 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
10342 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10343 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
10344 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
10345 pbCodeBuf[off++] = 0x8d;
10346 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
10347 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
10348 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
10349 if (bMod == X86_MOD_MEM4)
10350 {
10351 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
10352 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
10353 pbCodeBuf[off++] = 0;
10354 pbCodeBuf[off++] = 0;
10355 }
10356 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10357 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
10358 }
10359
10360#elif defined(RT_ARCH_ARM64)
10361 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
10362 if (u16EffAddr == 0)
10363 {
10364 if (idxRegIndex == UINT8_MAX)
10365 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
10366 else
10367 {
10368 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
10369 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
10370 }
10371 }
10372 else
10373 {
10374 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
10375 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
10376 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
10377 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
10378 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
10379 else
10380 {
10381 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
10382 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
10383 }
10384 if (idxRegIndex != UINT8_MAX)
10385 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
10386 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
10387 }
10388
10389#else
10390# error "port me"
10391#endif
10392
10393 if (idxRegIndex != UINT8_MAX)
10394 iemNativeRegFreeTmp(pReNative, idxRegIndex);
10395 iemNativeRegFreeTmp(pReNative, idxRegBase);
10396 iemNativeVarRegisterRelease(pReNative, idxVarRet);
10397 return off;
10398}
10399
10400
10401#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
10402 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
10403
10404/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
10405 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
10406DECL_INLINE_THROW(uint32_t)
10407iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
10408 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
10409{
10410 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
10411
10412 /*
10413 * Handle the disp32 form with no registers first.
10414 *
10415 * Convert to an immediate value, as that'll delay the register allocation
10416 * and assignment till the memory access / call / whatever and we can use
10417 * a more appropriate register (or none at all).
10418 */
10419 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
10420 {
10421 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
10422 return off;
10423 }
10424
10425 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
10426 uint32_t u32EffAddr = 0;
10427 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
10428 {
10429 case 0: break;
10430 case 1: u32EffAddr = (int8_t)u32Disp; break;
10431 case 2: u32EffAddr = u32Disp; break;
10432 default: AssertFailed();
10433 }
10434
10435 /* Get the register (or SIB) value. */
10436 uint8_t idxGstRegBase = UINT8_MAX;
10437 uint8_t idxGstRegIndex = UINT8_MAX;
10438 uint8_t cShiftIndex = 0;
10439 switch (bRm & X86_MODRM_RM_MASK)
10440 {
10441 case 0: idxGstRegBase = X86_GREG_xAX; break;
10442 case 1: idxGstRegBase = X86_GREG_xCX; break;
10443 case 2: idxGstRegBase = X86_GREG_xDX; break;
10444 case 3: idxGstRegBase = X86_GREG_xBX; break;
10445 case 4: /* SIB */
10446 {
10447 /* index /w scaling . */
10448 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
10449 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
10450 {
10451 case 0: idxGstRegIndex = X86_GREG_xAX; break;
10452 case 1: idxGstRegIndex = X86_GREG_xCX; break;
10453 case 2: idxGstRegIndex = X86_GREG_xDX; break;
10454 case 3: idxGstRegIndex = X86_GREG_xBX; break;
10455 case 4: cShiftIndex = 0; /*no index*/ break;
10456 case 5: idxGstRegIndex = X86_GREG_xBP; break;
10457 case 6: idxGstRegIndex = X86_GREG_xSI; break;
10458 case 7: idxGstRegIndex = X86_GREG_xDI; break;
10459 }
10460
10461 /* base */
10462 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
10463 {
10464 case 0: idxGstRegBase = X86_GREG_xAX; break;
10465 case 1: idxGstRegBase = X86_GREG_xCX; break;
10466 case 2: idxGstRegBase = X86_GREG_xDX; break;
10467 case 3: idxGstRegBase = X86_GREG_xBX; break;
10468 case 4:
10469 idxGstRegBase = X86_GREG_xSP;
10470 u32EffAddr += uSibAndRspOffset >> 8;
10471 break;
10472 case 5:
10473 if ((bRm & X86_MODRM_MOD_MASK) != 0)
10474 idxGstRegBase = X86_GREG_xBP;
10475 else
10476 {
10477 Assert(u32EffAddr == 0);
10478 u32EffAddr = u32Disp;
10479 }
10480 break;
10481 case 6: idxGstRegBase = X86_GREG_xSI; break;
10482 case 7: idxGstRegBase = X86_GREG_xDI; break;
10483 }
10484 break;
10485 }
10486 case 5: idxGstRegBase = X86_GREG_xBP; break;
10487 case 6: idxGstRegBase = X86_GREG_xSI; break;
10488 case 7: idxGstRegBase = X86_GREG_xDI; break;
10489 }
10490
10491 /*
10492 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
10493 * the start of the function.
10494 */
10495 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
10496 {
10497 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
10498 return off;
10499 }
10500
10501 /*
10502 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
10503 */
10504 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
10505 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
10506 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
10507 kIemNativeGstRegUse_ReadOnly);
10508 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
10509 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
10510 kIemNativeGstRegUse_ReadOnly);
10511
10512 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
10513 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
10514 {
10515 idxRegBase = idxRegIndex;
10516 idxRegIndex = UINT8_MAX;
10517 }
10518
10519#ifdef RT_ARCH_AMD64
10520 if (idxRegIndex == UINT8_MAX)
10521 {
10522 if (u32EffAddr == 0)
10523 {
10524 /* mov ret, base */
10525 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
10526 }
10527 else
10528 {
10529 /* lea ret32, [base64 + disp32] */
10530 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
10531 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10532 if (idxRegRet >= 8 || idxRegBase >= 8)
10533 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
10534 pbCodeBuf[off++] = 0x8d;
10535 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
10536 if (idxRegBase != X86_GREG_x12 /*SIB*/)
10537 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
10538 else
10539 {
10540 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
10541 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
10542 }
10543 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10544 if (bMod == X86_MOD_MEM4)
10545 {
10546 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10547 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10548 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10549 }
10550 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10551 }
10552 }
10553 else
10554 {
10555 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
10556 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10557 if (idxRegBase == UINT8_MAX)
10558 {
10559 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
10560 if (idxRegRet >= 8 || idxRegIndex >= 8)
10561 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10562 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
10563 pbCodeBuf[off++] = 0x8d;
10564 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
10565 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
10566 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10567 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10568 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10569 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10570 }
10571 else
10572 {
10573 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
10574 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
10575 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10576 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
10577 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
10578 pbCodeBuf[off++] = 0x8d;
10579 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
10580 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
10581 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
10582 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
10583 if (bMod != X86_MOD_MEM0)
10584 {
10585 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10586 if (bMod == X86_MOD_MEM4)
10587 {
10588 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10589 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10590 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10591 }
10592 }
10593 }
10594 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10595 }
10596
10597#elif defined(RT_ARCH_ARM64)
10598 if (u32EffAddr == 0)
10599 {
10600 if (idxRegIndex == UINT8_MAX)
10601 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
10602 else if (idxRegBase == UINT8_MAX)
10603 {
10604 if (cShiftIndex == 0)
10605 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
10606 else
10607 {
10608 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10609 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
10610 }
10611 }
10612 else
10613 {
10614 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10615 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
10616 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
10617 }
10618 }
10619 else
10620 {
10621 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
10622 {
10623 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10624 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
10625 }
10626 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
10627 {
10628 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10629 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
10630 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
10631 }
10632 else
10633 {
10634 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
10635 if (idxRegBase != UINT8_MAX)
10636 {
10637 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10638 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
10639 }
10640 }
10641 if (idxRegIndex != UINT8_MAX)
10642 {
10643 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10644 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
10645 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
10646 }
10647 }
10648
10649#else
10650# error "port me"
10651#endif
10652
10653 if (idxRegIndex != UINT8_MAX)
10654 iemNativeRegFreeTmp(pReNative, idxRegIndex);
10655 if (idxRegBase != UINT8_MAX)
10656 iemNativeRegFreeTmp(pReNative, idxRegBase);
10657 iemNativeVarRegisterRelease(pReNative, idxVarRet);
10658 return off;
10659}
10660
10661
10662#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
10663 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
10664 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
10665
10666#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
10667 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
10668 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
10669
10670#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
10671 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
10672 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
10673
10674/**
10675 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
10676 *
10677 * @returns New off.
10678 * @param pReNative .
10679 * @param off .
10680 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
10681 * bit 4 to REX.X. The two bits are part of the
10682 * REG sub-field, which isn't needed in this
10683 * function.
10684 * @param uSibAndRspOffset Two parts:
10685 * - The first 8 bits make up the SIB byte.
10686 * - The next 8 bits are the fixed RSP/ESP offset
10687 * in case of a pop [xSP].
10688 * @param u32Disp The displacement byte/word/dword, if any.
10689 * @param cbInstr The size of the fully decoded instruction. Used
10690 * for RIP relative addressing.
10691 * @param idxVarRet The result variable number.
10692 * @param f64Bit Whether to use a 64-bit or 32-bit address size
10693 * when calculating the address.
10694 *
10695 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
10696 */
10697DECL_INLINE_THROW(uint32_t)
10698iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
10699 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
10700{
10701 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
10702
10703 /*
10704 * Special case the rip + disp32 form first.
10705 */
10706 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
10707 {
10708 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
10709 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
10710 kIemNativeGstRegUse_ReadOnly);
10711#ifdef RT_ARCH_AMD64
10712 if (f64Bit)
10713 {
10714 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
10715 if ((int32_t)offFinalDisp == offFinalDisp)
10716 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
10717 else
10718 {
10719 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
10720 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
10721 }
10722 }
10723 else
10724 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
10725
10726#elif defined(RT_ARCH_ARM64)
10727 if (f64Bit)
10728 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
10729 (int64_t)(int32_t)u32Disp + cbInstr);
10730 else
10731 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
10732 (int32_t)u32Disp + cbInstr);
10733
10734#else
10735# error "Port me!"
10736#endif
10737 iemNativeRegFreeTmp(pReNative, idxRegPc);
10738 iemNativeVarRegisterRelease(pReNative, idxVarRet);
10739 return off;
10740 }
10741
10742 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
10743 int64_t i64EffAddr = 0;
10744 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
10745 {
10746 case 0: break;
10747 case 1: i64EffAddr = (int8_t)u32Disp; break;
10748 case 2: i64EffAddr = (int32_t)u32Disp; break;
10749 default: AssertFailed();
10750 }
10751
10752 /* Get the register (or SIB) value. */
10753 uint8_t idxGstRegBase = UINT8_MAX;
10754 uint8_t idxGstRegIndex = UINT8_MAX;
10755 uint8_t cShiftIndex = 0;
10756 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
10757 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
10758 else /* SIB: */
10759 {
10760 /* index /w scaling . */
10761 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
10762 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
10763 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
10764 if (idxGstRegIndex == 4)
10765 {
10766 /* no index */
10767 cShiftIndex = 0;
10768 idxGstRegIndex = UINT8_MAX;
10769 }
10770
10771 /* base */
10772 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
10773 if (idxGstRegBase == 4)
10774 {
10775 /* pop [rsp] hack */
10776 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
10777 }
10778 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
10779 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
10780 {
10781 /* mod=0 and base=5 -> disp32, no base reg. */
10782 Assert(i64EffAddr == 0);
10783 i64EffAddr = (int32_t)u32Disp;
10784 idxGstRegBase = UINT8_MAX;
10785 }
10786 }
10787
10788 /*
10789 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
10790 * the start of the function.
10791 */
10792 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
10793 {
10794 if (f64Bit)
10795 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
10796 else
10797 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
10798 return off;
10799 }
10800
10801 /*
10802 * Now emit code that calculates:
10803 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
10804 * or if !f64Bit:
10805 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
10806 */
10807 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
10808 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
10809 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
10810 kIemNativeGstRegUse_ReadOnly);
10811 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
10812 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
10813 kIemNativeGstRegUse_ReadOnly);
10814
10815 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
10816 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
10817 {
10818 idxRegBase = idxRegIndex;
10819 idxRegIndex = UINT8_MAX;
10820 }
10821
10822#ifdef RT_ARCH_AMD64
10823 uint8_t bFinalAdj;
10824 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
10825 bFinalAdj = 0; /* likely */
10826 else
10827 {
10828 /* pop [rsp] with a problematic disp32 value. Split out the
10829 RSP offset and add it separately afterwards (bFinalAdj). */
10830 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
10831 Assert(idxGstRegBase == X86_GREG_xSP);
10832 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
10833 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
10834 Assert(bFinalAdj != 0);
10835 i64EffAddr -= bFinalAdj;
10836 Assert((int32_t)i64EffAddr == i64EffAddr);
10837 }
10838 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
10839//pReNative->pInstrBuf[off++] = 0xcc;
10840
10841 if (idxRegIndex == UINT8_MAX)
10842 {
10843 if (u32EffAddr == 0)
10844 {
10845 /* mov ret, base */
10846 if (f64Bit)
10847 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
10848 else
10849 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
10850 }
10851 else
10852 {
10853 /* lea ret, [base + disp32] */
10854 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
10855 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10856 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
10857 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10858 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
10859 | (f64Bit ? X86_OP_REX_W : 0);
10860 pbCodeBuf[off++] = 0x8d;
10861 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
10862 if (idxRegBase != X86_GREG_x12 /*SIB*/)
10863 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
10864 else
10865 {
10866 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
10867 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
10868 }
10869 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10870 if (bMod == X86_MOD_MEM4)
10871 {
10872 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10873 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10874 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10875 }
10876 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10877 }
10878 }
10879 else
10880 {
10881 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
10882 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10883 if (idxRegBase == UINT8_MAX)
10884 {
10885 /* lea ret, [(index64 << cShiftIndex) + disp32] */
10886 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
10887 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10888 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
10889 | (f64Bit ? X86_OP_REX_W : 0);
10890 pbCodeBuf[off++] = 0x8d;
10891 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
10892 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
10893 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10894 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10895 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10896 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10897 }
10898 else
10899 {
10900 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
10901 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
10902 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10903 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
10904 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
10905 | (f64Bit ? X86_OP_REX_W : 0);
10906 pbCodeBuf[off++] = 0x8d;
10907 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
10908 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
10909 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
10910 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
10911 if (bMod != X86_MOD_MEM0)
10912 {
10913 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10914 if (bMod == X86_MOD_MEM4)
10915 {
10916 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10917 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10918 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10919 }
10920 }
10921 }
10922 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10923 }
10924
10925 if (!bFinalAdj)
10926 { /* likely */ }
10927 else
10928 {
10929 Assert(f64Bit);
10930 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
10931 }
10932
10933#elif defined(RT_ARCH_ARM64)
10934 if (i64EffAddr == 0)
10935 {
10936 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10937 if (idxRegIndex == UINT8_MAX)
10938 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
10939 else if (idxRegBase != UINT8_MAX)
10940 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
10941 f64Bit, false /*fSetFlags*/, cShiftIndex);
10942 else
10943 {
10944 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
10945 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
10946 }
10947 }
10948 else
10949 {
10950 if (f64Bit)
10951 { /* likely */ }
10952 else
10953 i64EffAddr = (int32_t)i64EffAddr;
10954
10955 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
10956 {
10957 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10958 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
10959 }
10960 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
10961 {
10962 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10963 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
10964 }
10965 else
10966 {
10967 if (f64Bit)
10968 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
10969 else
10970 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
10971 if (idxRegBase != UINT8_MAX)
10972 {
10973 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10974 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
10975 }
10976 }
10977 if (idxRegIndex != UINT8_MAX)
10978 {
10979 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10980 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
10981 f64Bit, false /*fSetFlags*/, cShiftIndex);
10982 }
10983 }
10984
10985#else
10986# error "port me"
10987#endif
10988
10989 if (idxRegIndex != UINT8_MAX)
10990 iemNativeRegFreeTmp(pReNative, idxRegIndex);
10991 if (idxRegBase != UINT8_MAX)
10992 iemNativeRegFreeTmp(pReNative, idxRegBase);
10993 iemNativeVarRegisterRelease(pReNative, idxVarRet);
10994 return off;
10995}
10996
10997
10998/*********************************************************************************************************************************
10999* TLB Lookup. *
11000*********************************************************************************************************************************/
11001
11002/**
11003 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
11004 */
11005DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
11006{
11007 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
11008 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
11009 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
11010 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
11011
11012 /* Do the lookup manually. */
11013 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
11014 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
11015 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
11016 if (RT_LIKELY(pTlbe->uTag == uTag))
11017 {
11018 /*
11019 * Check TLB page table level access flags.
11020 */
11021 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
11022 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
11023 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
11024 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
11025 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
11026 | IEMTLBE_F_PG_UNASSIGNED
11027 | IEMTLBE_F_PT_NO_ACCESSED
11028 | fNoWriteNoDirty | fNoUser);
11029 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
11030 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
11031 {
11032 /*
11033 * Return the address.
11034 */
11035 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
11036 if ((uintptr_t)pbAddr == uResult)
11037 return;
11038 RT_NOREF(cbMem);
11039 AssertFailed();
11040 }
11041 else
11042 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
11043 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
11044 }
11045 else
11046 AssertFailed();
11047 RT_BREAKPOINT();
11048}
11049
11050/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
11051
11052
11053/*********************************************************************************************************************************
11054* Memory fetches and stores common *
11055*********************************************************************************************************************************/
11056
11057typedef enum IEMNATIVEMITMEMOP
11058{
11059 kIemNativeEmitMemOp_Store = 0,
11060 kIemNativeEmitMemOp_Fetch,
11061 kIemNativeEmitMemOp_Fetch_Zx_U16,
11062 kIemNativeEmitMemOp_Fetch_Zx_U32,
11063 kIemNativeEmitMemOp_Fetch_Zx_U64,
11064 kIemNativeEmitMemOp_Fetch_Sx_U16,
11065 kIemNativeEmitMemOp_Fetch_Sx_U32,
11066 kIemNativeEmitMemOp_Fetch_Sx_U64
11067} IEMNATIVEMITMEMOP;
11068
11069/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
11070 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
11071 * (with iSegReg = UINT8_MAX). */
11072DECL_INLINE_THROW(uint32_t)
11073iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
11074 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
11075 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
11076{
11077 /*
11078 * Assert sanity.
11079 */
11080 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
11081 Assert( enmOp != kIemNativeEmitMemOp_Store
11082 || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Immediate
11083 || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Stack);
11084 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
11085 AssertStmt( pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
11086 || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
11087 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11088 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
11089 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
11090 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
11091#ifdef VBOX_STRICT
11092 if (iSegReg == UINT8_MAX)
11093 {
11094 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
11095 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
11096 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
11097 switch (cbMem)
11098 {
11099 case 1:
11100 Assert( pfnFunction
11101 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
11102 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11103 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11104 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11105 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11106 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
11107 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
11108 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
11109 : UINT64_C(0xc000b000a0009000) ));
11110 break;
11111 case 2:
11112 Assert( pfnFunction
11113 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
11114 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
11115 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
11116 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
11117 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
11118 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
11119 : UINT64_C(0xc000b000a0009000) ));
11120 break;
11121 case 4:
11122 Assert( pfnFunction
11123 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
11124 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
11125 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
11126 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
11127 : UINT64_C(0xc000b000a0009000) ));
11128 break;
11129 case 8:
11130 Assert( pfnFunction
11131 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
11132 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
11133 : UINT64_C(0xc000b000a0009000) ));
11134 break;
11135 }
11136 }
11137 else
11138 {
11139 Assert(iSegReg < 6);
11140 switch (cbMem)
11141 {
11142 case 1:
11143 Assert( pfnFunction
11144 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
11145 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
11146 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
11147 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
11148 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
11149 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
11150 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
11151 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
11152 : UINT64_C(0xc000b000a0009000) ));
11153 break;
11154 case 2:
11155 Assert( pfnFunction
11156 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
11157 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
11158 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
11159 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
11160 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
11161 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
11162 : UINT64_C(0xc000b000a0009000) ));
11163 break;
11164 case 4:
11165 Assert( pfnFunction
11166 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
11167 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
11168 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
11169 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
11170 : UINT64_C(0xc000b000a0009000) ));
11171 break;
11172 case 8:
11173 Assert( pfnFunction
11174 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
11175 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
11176 : UINT64_C(0xc000b000a0009000) ));
11177 break;
11178 }
11179 }
11180#endif
11181
11182#ifdef VBOX_STRICT
11183 /*
11184 * Check that the fExec flags we've got make sense.
11185 */
11186 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
11187#endif
11188
11189 /*
11190 * To keep things simple we have to commit any pending writes first as we
11191 * may end up making calls.
11192 */
11193 /** @todo we could postpone this till we make the call and reload the
11194 * registers after returning from the call. Not sure if that's sensible or
11195 * not, though. */
11196 off = iemNativeRegFlushPendingWrites(pReNative, off);
11197
11198#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11199 /*
11200 * Move/spill/flush stuff out of call-volatile registers.
11201 * This is the easy way out. We could contain this to the tlb-miss branch
11202 * by saving and restoring active stuff here.
11203 */
11204 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
11205#endif
11206
11207 /*
11208 * Define labels and allocate the result register (trying for the return
11209 * register if we can).
11210 */
11211 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
11212 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
11213 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
11214 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
11215 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
11216 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
11217 uint8_t const idxRegValueStore = !TlbState.fSkip
11218 && enmOp == kIemNativeEmitMemOp_Store
11219 && pReNative->Core.aVars[idxVarValue].enmKind != kIemNativeVarKind_Immediate
11220 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off)
11221 : UINT8_MAX;
11222 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
11223 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
11224 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
11225 : UINT32_MAX;
11226
11227 /*
11228 * Jump to the TLB lookup code.
11229 */
11230 if (!TlbState.fSkip)
11231 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
11232
11233 /*
11234 * TlbMiss:
11235 *
11236 * Call helper to do the fetching.
11237 * We flush all guest register shadow copies here.
11238 */
11239 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
11240
11241#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
11242 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
11243#else
11244 RT_NOREF(idxInstr);
11245#endif
11246
11247#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11248 /* Save variables in volatile registers. */
11249 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
11250 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
11251 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
11252 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
11253#endif
11254
11255 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
11256 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
11257 if (enmOp == kIemNativeEmitMemOp_Store)
11258 {
11259 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
11260 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
11261#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11262 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
11263#else
11264 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
11265 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
11266#endif
11267 }
11268
11269 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
11270 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
11271#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11272 fVolGregMask);
11273#else
11274 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
11275#endif
11276
11277 if (iSegReg != UINT8_MAX)
11278 {
11279 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
11280 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
11281 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
11282 }
11283
11284 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
11285 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
11286
11287 /* Done setting up parameters, make the call. */
11288 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
11289
11290 /*
11291 * Put the result in the right register if this is a fetch.
11292 */
11293 if (enmOp != kIemNativeEmitMemOp_Store)
11294 {
11295 Assert(idxRegValueFetch == pReNative->Core.aVars[idxVarValue].idxReg);
11296 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
11297 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
11298 }
11299
11300#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11301 /* Restore variables and guest shadow registers to volatile registers. */
11302 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
11303 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
11304#endif
11305
11306#ifdef IEMNATIVE_WITH_TLB_LOOKUP
11307 if (!TlbState.fSkip)
11308 {
11309 /* end of TlbMiss - Jump to the done label. */
11310 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
11311 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
11312
11313 /*
11314 * TlbLookup:
11315 */
11316 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
11317 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
11318 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
11319
11320 /*
11321 * Emit code to do the actual storing / fetching.
11322 */
11323 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
11324# ifdef VBOX_WITH_STATISTICS
11325 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
11326 enmOp == kIemNativeEmitMemOp_Store
11327 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
11328 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
11329# endif
11330 switch (enmOp)
11331 {
11332 case kIemNativeEmitMemOp_Store:
11333 if (pReNative->Core.aVars[idxVarValue].enmKind != kIemNativeVarKind_Immediate)
11334 {
11335 switch (cbMem)
11336 {
11337 case 1:
11338 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
11339 break;
11340 case 2:
11341 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
11342 break;
11343 case 4:
11344 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
11345 break;
11346 case 8:
11347 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
11348 break;
11349 default:
11350 AssertFailed();
11351 }
11352 }
11353 else
11354 {
11355 switch (cbMem)
11356 {
11357 case 1:
11358 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off,
11359 (uint8_t)pReNative->Core.aVars[idxVarValue].u.uValue,
11360 idxRegMemResult, TlbState.idxReg1);
11361 break;
11362 case 2:
11363 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off,
11364 (uint16_t)pReNative->Core.aVars[idxVarValue].u.uValue,
11365 idxRegMemResult, TlbState.idxReg1);
11366 break;
11367 case 4:
11368 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off,
11369 (uint32_t)pReNative->Core.aVars[idxVarValue].u.uValue,
11370 idxRegMemResult, TlbState.idxReg1);
11371 break;
11372 case 8:
11373 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pReNative->Core.aVars[idxVarValue].u.uValue,
11374 idxRegMemResult, TlbState.idxReg1);
11375 break;
11376 default:
11377 AssertFailed();
11378 }
11379 }
11380 break;
11381
11382 case kIemNativeEmitMemOp_Fetch:
11383 case kIemNativeEmitMemOp_Fetch_Zx_U16:
11384 case kIemNativeEmitMemOp_Fetch_Zx_U32:
11385 case kIemNativeEmitMemOp_Fetch_Zx_U64:
11386 switch (cbMem)
11387 {
11388 case 1:
11389 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11390 break;
11391 case 2:
11392 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11393 break;
11394 case 4:
11395 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11396 break;
11397 case 8:
11398 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11399 break;
11400 default:
11401 AssertFailed();
11402 }
11403 break;
11404
11405 case kIemNativeEmitMemOp_Fetch_Sx_U16:
11406 Assert(cbMem == 1);
11407 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11408 break;
11409
11410 case kIemNativeEmitMemOp_Fetch_Sx_U32:
11411 Assert(cbMem == 1 || cbMem == 2);
11412 if (cbMem == 1)
11413 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11414 else
11415 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11416 break;
11417
11418 case kIemNativeEmitMemOp_Fetch_Sx_U64:
11419 switch (cbMem)
11420 {
11421 case 1:
11422 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11423 break;
11424 case 2:
11425 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11426 break;
11427 case 4:
11428 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11429 break;
11430 default:
11431 AssertFailed();
11432 }
11433 break;
11434
11435 default:
11436 AssertFailed();
11437 }
11438
11439 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
11440
11441 /*
11442 * TlbDone:
11443 */
11444 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
11445
11446 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
11447
11448# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11449 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
11450 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
11451# endif
11452 }
11453#else
11454 RT_NOREF(fAlignMask, idxLabelTlbMiss);
11455#endif
11456
11457 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
11458 iemNativeVarRegisterRelease(pReNative, idxVarValue);
11459 return off;
11460}
11461
11462
11463
11464/*********************************************************************************************************************************
11465* Memory fetches (IEM_MEM_FETCH_XXX). *
11466*********************************************************************************************************************************/
11467
11468/* 8-bit segmented: */
11469#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
11470 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
11471 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
11472 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
11473
11474#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
11475 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
11476 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
11477 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
11478
11479#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11480 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11481 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
11482 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
11483
11484#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11485 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11486 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11487 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
11488
11489#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
11490 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
11491 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
11492 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
11493
11494#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11495 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11496 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
11497 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
11498
11499#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11500 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11501 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11502 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
11503
11504/* 16-bit segmented: */
11505#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
11506 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
11507 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
11508 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
11509
11510#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
11511 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
11512 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
11513 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
11514
11515#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11516 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11517 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
11518 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
11519
11520#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11521 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11522 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11523 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
11524
11525#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11526 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11527 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
11528 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
11529
11530#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11531 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11532 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11533 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
11534
11535
11536/* 32-bit segmented: */
11537#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11538 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11539 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
11540 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
11541
11542#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
11543 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11544 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
11545 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
11546
11547#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11548 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11549 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11550 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
11551
11552#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11553 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11554 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11555 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
11556
11557
11558/* 64-bit segmented: */
11559#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11560 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11561 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
11562 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
11563
11564
11565
11566/* 8-bit flat: */
11567#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
11568 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
11569 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
11570 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
11571
11572#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
11573 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
11574 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
11575 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
11576
11577#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
11578 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11579 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
11580 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
11581
11582#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
11583 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11584 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11585 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
11586
11587#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
11588 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
11589 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
11590 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
11591
11592#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
11593 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11594 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
11595 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
11596
11597#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
11598 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11599 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11600 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
11601
11602
11603/* 16-bit flat: */
11604#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
11605 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
11606 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
11607 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
11608
11609#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
11610 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
11611 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
11612 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
11613
11614#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
11615 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11616 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
11617 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
11618
11619#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
11620 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11621 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11622 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
11623
11624#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
11625 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11626 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
11627 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
11628
11629#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
11630 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11631 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11632 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
11633
11634/* 32-bit flat: */
11635#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
11636 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11637 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
11638 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
11639
11640#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
11641 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11642 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
11643 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
11644
11645#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
11646 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11647 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11648 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
11649
11650#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
11651 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11652 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11653 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
11654
11655/* 64-bit flat: */
11656#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
11657 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11658 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
11659 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
11660
11661
11662
11663/*********************************************************************************************************************************
11664* Memory stores (IEM_MEM_STORE_XXX). *
11665*********************************************************************************************************************************/
11666
11667#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
11668 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
11669 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
11670 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
11671
11672#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
11673 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
11674 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
11675 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
11676
11677#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
11678 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
11679 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
11680 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
11681
11682#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
11683 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
11684 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
11685 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
11686
11687
11688#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
11689 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
11690 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
11691 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
11692
11693#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
11694 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
11695 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
11696 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
11697
11698#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
11699 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
11700 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
11701 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
11702
11703#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
11704 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
11705 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
11706 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
11707
11708
11709#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
11710 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
11711 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
11712
11713#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
11714 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
11715 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
11716
11717#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
11718 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
11719 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
11720
11721#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
11722 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
11723 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
11724
11725
11726#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
11727 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
11728 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
11729
11730#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
11731 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
11732 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
11733
11734#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
11735 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
11736 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
11737
11738#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
11739 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
11740 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
11741
11742/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
11743 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
11744DECL_INLINE_THROW(uint32_t)
11745iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
11746 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
11747{
11748 /*
11749 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
11750 * to do the grunt work.
11751 */
11752 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
11753 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
11754 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
11755 pfnFunction, idxInstr);
11756 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
11757 return off;
11758}
11759
11760
11761
11762/*********************************************************************************************************************************
11763* Stack Accesses. *
11764*********************************************************************************************************************************/
11765/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
11766#define IEM_MC_PUSH_U16(a_u16Value) \
11767 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
11768 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
11769#define IEM_MC_PUSH_U32(a_u32Value) \
11770 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
11771 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
11772#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
11773 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
11774 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
11775#define IEM_MC_PUSH_U64(a_u64Value) \
11776 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
11777 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
11778
11779#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
11780 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
11781 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
11782#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
11783 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
11784 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
11785#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
11786 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
11787 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
11788
11789#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
11790 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
11791 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
11792#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
11793 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
11794 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
11795
11796
11797DECL_FORCE_INLINE_THROW(uint32_t)
11798iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
11799{
11800 /* Use16BitSp: */
11801#ifdef RT_ARCH_AMD64
11802 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
11803 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
11804#else
11805 /* sub regeff, regrsp, #cbMem */
11806 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
11807 /* and regeff, regeff, #0xffff */
11808 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
11809 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
11810 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
11811 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
11812#endif
11813 return off;
11814}
11815
11816
11817DECL_FORCE_INLINE(uint32_t)
11818iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
11819{
11820 /* Use32BitSp: */
11821 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
11822 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
11823 return off;
11824}
11825
11826
11827/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
11828DECL_INLINE_THROW(uint32_t)
11829iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
11830 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
11831{
11832 /*
11833 * Assert sanity.
11834 */
11835 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
11836#ifdef VBOX_STRICT
11837 if (RT_BYTE2(cBitsVarAndFlat) != 0)
11838 {
11839 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
11840 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
11841 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
11842 Assert( pfnFunction
11843 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
11844 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
11845 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
11846 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
11847 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
11848 : UINT64_C(0xc000b000a0009000) ));
11849 }
11850 else
11851 Assert( pfnFunction
11852 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
11853 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
11854 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
11855 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
11856 : UINT64_C(0xc000b000a0009000) ));
11857#endif
11858
11859#ifdef VBOX_STRICT
11860 /*
11861 * Check that the fExec flags we've got make sense.
11862 */
11863 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
11864#endif
11865
11866 /*
11867 * To keep things simple we have to commit any pending writes first as we
11868 * may end up making calls.
11869 */
11870 /** @todo we could postpone this till we make the call and reload the
11871 * registers after returning from the call. Not sure if that's sensible or
11872 * not, though. */
11873 off = iemNativeRegFlushPendingWrites(pReNative, off);
11874
11875 /*
11876 * First we calculate the new RSP and the effective stack pointer value.
11877 * For 64-bit mode and flat 32-bit these two are the same.
11878 * (Code structure is very similar to that of PUSH)
11879 */
11880 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
11881 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
11882 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
11883 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
11884 ? cbMem : sizeof(uint16_t);
11885 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
11886 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
11887 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
11888 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
11889 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
11890 if (cBitsFlat != 0)
11891 {
11892 Assert(idxRegEffSp == idxRegRsp);
11893 Assert(cBitsFlat == 32 || cBitsFlat == 64);
11894 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
11895 if (cBitsFlat == 64)
11896 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
11897 else
11898 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
11899 }
11900 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
11901 {
11902 Assert(idxRegEffSp != idxRegRsp);
11903 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
11904 kIemNativeGstRegUse_ReadOnly);
11905#ifdef RT_ARCH_AMD64
11906 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
11907#else
11908 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
11909#endif
11910 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
11911 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
11912 offFixupJumpToUseOtherBitSp = off;
11913 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
11914 {
11915 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
11916 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
11917 }
11918 else
11919 {
11920 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
11921 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
11922 }
11923 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11924 }
11925 /* SpUpdateEnd: */
11926 uint32_t const offLabelSpUpdateEnd = off;
11927
11928 /*
11929 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
11930 * we're skipping lookup).
11931 */
11932 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
11933 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
11934 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
11935 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
11936 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
11937 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
11938 : UINT32_MAX;
11939 uint8_t const idxRegValue = !TlbState.fSkip
11940 && pReNative->Core.aVars[idxVarValue].enmKind != kIemNativeVarKind_Immediate
11941 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
11942 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
11943 : UINT8_MAX;
11944 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
11945
11946
11947 if (!TlbState.fSkip)
11948 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
11949 else
11950 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
11951
11952 /*
11953 * Use16BitSp:
11954 */
11955 if (cBitsFlat == 0)
11956 {
11957#ifdef RT_ARCH_AMD64
11958 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
11959#else
11960 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
11961#endif
11962 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
11963 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
11964 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
11965 else
11966 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
11967 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
11968 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11969 }
11970
11971 /*
11972 * TlbMiss:
11973 *
11974 * Call helper to do the pushing.
11975 */
11976 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
11977
11978#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
11979 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
11980#else
11981 RT_NOREF(idxInstr);
11982#endif
11983
11984 /* Save variables in volatile registers. */
11985 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
11986 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
11987 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
11988 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
11989 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
11990
11991 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
11992 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
11993 {
11994 /* Swap them using ARG0 as temp register: */
11995 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
11996 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
11997 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
11998 }
11999 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
12000 {
12001 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
12002 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
12003 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
12004
12005 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
12006 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
12007 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
12008 }
12009 else
12010 {
12011 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
12012 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
12013
12014 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
12015 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
12016 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
12017 }
12018
12019 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
12020 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
12021
12022 /* Done setting up parameters, make the call. */
12023 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
12024
12025 /* Restore variables and guest shadow registers to volatile registers. */
12026 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
12027 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
12028
12029#ifdef IEMNATIVE_WITH_TLB_LOOKUP
12030 if (!TlbState.fSkip)
12031 {
12032 /* end of TlbMiss - Jump to the done label. */
12033 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
12034 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
12035
12036 /*
12037 * TlbLookup:
12038 */
12039 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
12040 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
12041
12042 /*
12043 * Emit code to do the actual storing / fetching.
12044 */
12045 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
12046# ifdef VBOX_WITH_STATISTICS
12047 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
12048 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
12049# endif
12050 if (idxRegValue != UINT8_MAX)
12051 {
12052 switch (cbMemAccess)
12053 {
12054 case 2:
12055 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
12056 break;
12057 case 4:
12058 if (!fIsIntelSeg)
12059 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
12060 else
12061 {
12062 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
12063 PUSH FS in real mode, so we have to try emulate that here.
12064 We borrow the now unused idxReg1 from the TLB lookup code here. */
12065 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
12066 kIemNativeGstReg_EFlags);
12067 if (idxRegEfl != UINT8_MAX)
12068 {
12069#ifdef ARCH_AMD64
12070 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
12071 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
12072 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
12073#else
12074 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
12075 off, TlbState.idxReg1, idxRegEfl,
12076 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
12077#endif
12078 iemNativeRegFreeTmp(pReNative, idxRegEfl);
12079 }
12080 else
12081 {
12082 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
12083 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
12084 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
12085 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
12086 }
12087 /* ASSUMES the upper half of idxRegValue is ZERO. */
12088 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
12089 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
12090 }
12091 break;
12092 case 8:
12093 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
12094 break;
12095 default:
12096 AssertFailed();
12097 }
12098 }
12099 else
12100 {
12101 switch (cbMemAccess)
12102 {
12103 case 2:
12104 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off,
12105 (uint16_t)pReNative->Core.aVars[idxVarValue].u.uValue,
12106 idxRegMemResult, TlbState.idxReg1);
12107 break;
12108 case 4:
12109 Assert(!fIsSegReg);
12110 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off,
12111 (uint32_t)pReNative->Core.aVars[idxVarValue].u.uValue,
12112 idxRegMemResult, TlbState.idxReg1);
12113 break;
12114 case 8:
12115 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pReNative->Core.aVars[idxVarValue].u.uValue,
12116 idxRegMemResult, TlbState.idxReg1);
12117 break;
12118 default:
12119 AssertFailed();
12120 }
12121 }
12122
12123 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
12124 TlbState.freeRegsAndReleaseVars(pReNative);
12125
12126 /*
12127 * TlbDone:
12128 *
12129 * Commit the new RSP value.
12130 */
12131 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
12132 }
12133#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
12134
12135 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
12136 iemNativeRegFreeTmp(pReNative, idxRegRsp);
12137 if (idxRegEffSp != idxRegRsp)
12138 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
12139
12140 /* The value variable is implictly flushed. */
12141 if (idxRegValue != UINT8_MAX)
12142 iemNativeVarRegisterRelease(pReNative, idxVarValue);
12143 iemNativeVarFreeLocal(pReNative, idxVarValue);
12144
12145 return off;
12146}
12147
12148
12149
12150/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
12151#define IEM_MC_POP_GREG_U16(a_iGReg) \
12152 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
12153 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
12154#define IEM_MC_POP_GREG_U32(a_iGReg) \
12155 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
12156 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
12157#define IEM_MC_POP_GREG_U64(a_iGReg) \
12158 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
12159 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
12160
12161#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
12162 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
12163 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
12164#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
12165 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
12166 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
12167
12168#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
12169 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
12170 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
12171#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
12172 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
12173 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
12174
12175
12176DECL_FORCE_INLINE_THROW(uint32_t)
12177iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
12178 uint8_t idxRegTmp)
12179{
12180 /* Use16BitSp: */
12181#ifdef RT_ARCH_AMD64
12182 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
12183 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
12184 RT_NOREF(idxRegTmp);
12185#else
12186 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
12187 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
12188 /* add tmp, regrsp, #cbMem */
12189 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
12190 /* and tmp, tmp, #0xffff */
12191 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
12192 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
12193 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
12194 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
12195#endif
12196 return off;
12197}
12198
12199
12200DECL_FORCE_INLINE(uint32_t)
12201iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
12202{
12203 /* Use32BitSp: */
12204 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
12205 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
12206 return off;
12207}
12208
12209
12210/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
12211DECL_INLINE_THROW(uint32_t)
12212iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
12213 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
12214{
12215 /*
12216 * Assert sanity.
12217 */
12218 Assert(idxGReg < 16);
12219#ifdef VBOX_STRICT
12220 if (RT_BYTE2(cBitsVarAndFlat) != 0)
12221 {
12222 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
12223 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
12224 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
12225 Assert( pfnFunction
12226 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
12227 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
12228 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
12229 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
12230 : UINT64_C(0xc000b000a0009000) ));
12231 }
12232 else
12233 Assert( pfnFunction
12234 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
12235 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
12236 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
12237 : UINT64_C(0xc000b000a0009000) ));
12238#endif
12239
12240#ifdef VBOX_STRICT
12241 /*
12242 * Check that the fExec flags we've got make sense.
12243 */
12244 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
12245#endif
12246
12247 /*
12248 * To keep things simple we have to commit any pending writes first as we
12249 * may end up making calls.
12250 */
12251 off = iemNativeRegFlushPendingWrites(pReNative, off);
12252
12253 /*
12254 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
12255 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
12256 * directly as the effective stack pointer.
12257 * (Code structure is very similar to that of PUSH)
12258 */
12259 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
12260 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
12261 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
12262 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
12263 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
12264 /** @todo can do a better job picking the register here. For cbMem >= 4 this
12265 * will be the resulting register value. */
12266 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
12267
12268 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
12269 if (cBitsFlat != 0)
12270 {
12271 Assert(idxRegEffSp == idxRegRsp);
12272 Assert(cBitsFlat == 32 || cBitsFlat == 64);
12273 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
12274 }
12275 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
12276 {
12277 Assert(idxRegEffSp != idxRegRsp);
12278 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
12279 kIemNativeGstRegUse_ReadOnly);
12280#ifdef RT_ARCH_AMD64
12281 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12282#else
12283 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
12284#endif
12285 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
12286 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
12287 offFixupJumpToUseOtherBitSp = off;
12288 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
12289 {
12290/** @todo can skip idxRegRsp updating when popping ESP. */
12291 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
12292 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12293 }
12294 else
12295 {
12296 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
12297 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
12298 }
12299 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12300 }
12301 /* SpUpdateEnd: */
12302 uint32_t const offLabelSpUpdateEnd = off;
12303
12304 /*
12305 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
12306 * we're skipping lookup).
12307 */
12308 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
12309 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
12310 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
12311 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
12312 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
12313 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
12314 : UINT32_MAX;
12315
12316 if (!TlbState.fSkip)
12317 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
12318 else
12319 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
12320
12321 /*
12322 * Use16BitSp:
12323 */
12324 if (cBitsFlat == 0)
12325 {
12326#ifdef RT_ARCH_AMD64
12327 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12328#else
12329 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
12330#endif
12331 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
12332 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
12333 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
12334 else
12335 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12336 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
12337 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12338 }
12339
12340 /*
12341 * TlbMiss:
12342 *
12343 * Call helper to do the pushing.
12344 */
12345 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
12346
12347#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
12348 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
12349#else
12350 RT_NOREF(idxInstr);
12351#endif
12352
12353 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
12354 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
12355 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
12356 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
12357
12358
12359 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
12360 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
12361 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
12362
12363 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
12364 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
12365
12366 /* Done setting up parameters, make the call. */
12367 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
12368
12369 /* Move the return register content to idxRegMemResult. */
12370 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
12371 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
12372
12373 /* Restore variables and guest shadow registers to volatile registers. */
12374 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
12375 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
12376
12377#ifdef IEMNATIVE_WITH_TLB_LOOKUP
12378 if (!TlbState.fSkip)
12379 {
12380 /* end of TlbMiss - Jump to the done label. */
12381 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
12382 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
12383
12384 /*
12385 * TlbLookup:
12386 */
12387 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
12388 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
12389
12390 /*
12391 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
12392 */
12393 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12394# ifdef VBOX_WITH_STATISTICS
12395 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
12396 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
12397# endif
12398 switch (cbMem)
12399 {
12400 case 2:
12401 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
12402 break;
12403 case 4:
12404 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
12405 break;
12406 case 8:
12407 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
12408 break;
12409 default:
12410 AssertFailed();
12411 }
12412
12413 TlbState.freeRegsAndReleaseVars(pReNative);
12414
12415 /*
12416 * TlbDone:
12417 *
12418 * Set the new RSP value (FLAT accesses needs to calculate it first) and
12419 * commit the popped register value.
12420 */
12421 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
12422 }
12423#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
12424
12425 if (idxGReg != X86_GREG_xSP)
12426 {
12427 /* Set the register. */
12428 if (cbMem >= sizeof(uint32_t))
12429 {
12430#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
12431 AssertMsg( pReNative->idxCurCall == 0
12432 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
12433 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
12434#endif
12435 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
12436 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
12437 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
12438 }
12439 else
12440 {
12441 Assert(cbMem == sizeof(uint16_t));
12442 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
12443 kIemNativeGstRegUse_ForUpdate);
12444 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
12445 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
12446 iemNativeRegFreeTmp(pReNative, idxRegDst);
12447 }
12448
12449 /* Complete RSP calculation for FLAT mode. */
12450 if (idxRegEffSp == idxRegRsp)
12451 {
12452 if (cBitsFlat == 64)
12453 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
12454 else
12455 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
12456 }
12457 }
12458 else
12459 {
12460 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
12461 if (cbMem == sizeof(uint64_t))
12462 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
12463 else if (cbMem == sizeof(uint32_t))
12464 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
12465 else
12466 {
12467 if (idxRegEffSp == idxRegRsp)
12468 {
12469 if (cBitsFlat == 64)
12470 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
12471 else
12472 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
12473 }
12474 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
12475 }
12476 }
12477 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
12478
12479 iemNativeRegFreeTmp(pReNative, idxRegRsp);
12480 if (idxRegEffSp != idxRegRsp)
12481 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
12482 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
12483
12484 return off;
12485}
12486
12487
12488
12489/*********************************************************************************************************************************
12490* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
12491*********************************************************************************************************************************/
12492
12493#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12494 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12495 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
12496 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
12497
12498#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12499 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12500 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
12501 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
12502
12503#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12504 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12505 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
12506 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
12507
12508#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12509 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12510 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
12511 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
12512
12513
12514#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12515 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12516 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12517 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
12518
12519#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12520 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12521 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12522 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
12523
12524#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12525 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12526 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12527 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
12528
12529#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12530 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12531 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12532 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
12533
12534#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12535 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
12536 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12537 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
12538
12539
12540#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12541 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12542 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12543 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
12544
12545#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12546 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12547 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12548 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
12549
12550#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12551 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12552 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12553 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
12554
12555#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12556 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12557 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12558 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
12559
12560#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12561 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
12562 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12563 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
12564
12565
12566#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12567 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12568 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12569 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
12570
12571#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12572 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12573 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12574 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
12575#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12576 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12577 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12578 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
12579
12580#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12581 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12582 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12583 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
12584
12585#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12586 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
12587 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12588 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
12589
12590
12591#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12592 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
12593 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12594 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
12595
12596#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12597 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
12598 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
12599 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
12600
12601
12602#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12603 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
12604 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12605 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
12606
12607#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12608 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
12609 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12610 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
12611
12612#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12613 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
12614 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12615 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
12616
12617#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12618 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
12619 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12620 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
12621
12622
12623
12624#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
12625 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
12626 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
12627 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
12628
12629#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
12630 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
12631 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
12632 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
12633
12634#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
12635 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
12636 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
12637 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
12638
12639#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
12640 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
12641 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
12642 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
12643
12644
12645#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
12646 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
12647 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12648 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
12649
12650#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
12651 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
12652 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12653 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
12654
12655#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
12656 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
12657 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12658 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
12659
12660#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
12661 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
12662 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12663 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
12664
12665#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
12666 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
12667 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12668 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
12669
12670
12671#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
12672 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
12673 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12674 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
12675
12676#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
12677 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
12678 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12679 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
12680
12681#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
12682 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
12683 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12684 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
12685
12686#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
12687 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
12688 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12689 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
12690
12691#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
12692 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
12693 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12694 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
12695
12696
12697#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
12698 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
12699 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12700 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
12701
12702#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
12703 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
12704 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12705 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
12706
12707#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
12708 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
12709 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12710 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
12711
12712#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
12713 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
12714 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12715 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
12716
12717#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
12718 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
12719 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12720 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
12721
12722
12723#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
12724 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
12725 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12726 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
12727
12728#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
12729 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
12730 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
12731 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
12732
12733
12734#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
12735 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
12736 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12737 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
12738
12739#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
12740 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
12741 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12742 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
12743
12744#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
12745 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
12746 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12747 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
12748
12749#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
12750 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
12751 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12752 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
12753
12754
12755DECL_INLINE_THROW(uint32_t)
12756iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
12757 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
12758 uintptr_t pfnFunction, uint8_t idxInstr)
12759{
12760 /*
12761 * Assert sanity.
12762 */
12763 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
12764 AssertStmt( pReNative->Core.aVars[idxVarMem].enmKind == kIemNativeVarKind_Invalid
12765 && pReNative->Core.aVars[idxVarMem].cbVar == sizeof(void *),
12766 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
12767
12768 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
12769 AssertStmt( pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Invalid
12770 && pReNative->Core.aVars[idxVarUnmapInfo].cbVar == sizeof(uint8_t),
12771 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
12772
12773 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
12774 AssertStmt( pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
12775 || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
12776 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
12777
12778 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
12779
12780 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
12781
12782#ifdef VBOX_STRICT
12783# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
12784 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
12785 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
12786 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
12787 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
12788# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
12789 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
12790 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
12791 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
12792
12793 if (iSegReg == UINT8_MAX)
12794 {
12795 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
12796 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
12797 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
12798 switch (cbMem)
12799 {
12800 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
12801 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
12802 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
12803 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
12804 case 10:
12805 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
12806 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
12807 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
12808 break;
12809 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
12810# if 0
12811 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
12812 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
12813# endif
12814 default: AssertFailed(); break;
12815 }
12816 }
12817 else
12818 {
12819 Assert(iSegReg < 6);
12820 switch (cbMem)
12821 {
12822 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
12823 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
12824 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
12825 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
12826 case 10:
12827 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
12828 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
12829 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
12830 break;
12831 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
12832# if 0
12833 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
12834 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
12835# endif
12836 default: AssertFailed(); break;
12837 }
12838 }
12839# undef IEM_MAP_HLP_FN
12840# undef IEM_MAP_HLP_FN_NO_AT
12841#endif
12842
12843#ifdef VBOX_STRICT
12844 /*
12845 * Check that the fExec flags we've got make sense.
12846 */
12847 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
12848#endif
12849
12850 /*
12851 * To keep things simple we have to commit any pending writes first as we
12852 * may end up making calls.
12853 */
12854 off = iemNativeRegFlushPendingWrites(pReNative, off);
12855
12856#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12857 /*
12858 * Move/spill/flush stuff out of call-volatile registers.
12859 * This is the easy way out. We could contain this to the tlb-miss branch
12860 * by saving and restoring active stuff here.
12861 */
12862 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
12863 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
12864#endif
12865
12866 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
12867 while the tlb-miss codepath will temporarily put it on the stack.
12868 Set the the type to stack here so we don't need to do it twice below. */
12869 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
12870 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
12871 /** @todo use a tmp register from TlbState, since they'll be free after tlb
12872 * lookup is done. */
12873
12874 /*
12875 * Define labels and allocate the result register (trying for the return
12876 * register if we can).
12877 */
12878 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
12879 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
12880 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
12881 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
12882 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
12883 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
12884 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
12885 : UINT32_MAX;
12886//off=iemNativeEmitBrk(pReNative, off, 0);
12887 /*
12888 * Jump to the TLB lookup code.
12889 */
12890 if (!TlbState.fSkip)
12891 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
12892
12893 /*
12894 * TlbMiss:
12895 *
12896 * Call helper to do the fetching.
12897 * We flush all guest register shadow copies here.
12898 */
12899 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
12900
12901#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
12902 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
12903#else
12904 RT_NOREF(idxInstr);
12905#endif
12906
12907#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12908 /* Save variables in volatile registers. */
12909 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
12910 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
12911#endif
12912
12913 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
12914 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
12915#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12916 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
12917#else
12918 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
12919#endif
12920
12921 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
12922 if (iSegReg != UINT8_MAX)
12923 {
12924 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
12925 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
12926 }
12927
12928 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
12929 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
12930 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
12931
12932 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
12933 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
12934
12935 /* Done setting up parameters, make the call. */
12936 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
12937
12938 /*
12939 * Put the output in the right registers.
12940 */
12941 Assert(idxRegMemResult == pReNative->Core.aVars[idxVarMem].idxReg);
12942 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
12943 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
12944
12945#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12946 /* Restore variables and guest shadow registers to volatile registers. */
12947 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
12948 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
12949#endif
12950
12951 Assert(pReNative->Core.aVars[idxVarUnmapInfo].idxReg == idxRegUnmapInfo);
12952 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
12953
12954#ifdef IEMNATIVE_WITH_TLB_LOOKUP
12955 if (!TlbState.fSkip)
12956 {
12957 /* end of tlbsmiss - Jump to the done label. */
12958 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
12959 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
12960
12961 /*
12962 * TlbLookup:
12963 */
12964 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
12965 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
12966# ifdef VBOX_WITH_STATISTICS
12967 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
12968 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
12969# endif
12970
12971 /* [idxVarUnmapInfo] = 0; */
12972 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
12973
12974 /*
12975 * TlbDone:
12976 */
12977 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
12978
12979 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
12980
12981# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12982 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
12983 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
12984# endif
12985 }
12986#else
12987 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
12988#endif
12989
12990 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
12991 iemNativeVarRegisterRelease(pReNative, idxVarMem);
12992
12993 return off;
12994}
12995
12996
12997#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
12998 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
12999 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
13000
13001#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
13002 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
13003 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
13004
13005#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
13006 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
13007 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
13008
13009#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
13010 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
13011 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
13012
13013DECL_INLINE_THROW(uint32_t)
13014iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
13015 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
13016{
13017 /*
13018 * Assert sanity.
13019 */
13020 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
13021 Assert(pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Stack);
13022 Assert( pReNative->Core.aVars[idxVarUnmapInfo].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
13023 || pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
13024#ifdef VBOX_STRICT
13025 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
13026 {
13027 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
13028 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
13029 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
13030 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
13031 case IEM_ACCESS_TYPE_WRITE:
13032 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
13033 case IEM_ACCESS_TYPE_READ:
13034 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
13035 default: AssertFailed();
13036 }
13037#else
13038 RT_NOREF(fAccess);
13039#endif
13040
13041 /*
13042 * To keep things simple we have to commit any pending writes first as we
13043 * may end up making calls (there shouldn't be any at this point, so this
13044 * is just for consistency).
13045 */
13046 /** @todo we could postpone this till we make the call and reload the
13047 * registers after returning from the call. Not sure if that's sensible or
13048 * not, though. */
13049 off = iemNativeRegFlushPendingWrites(pReNative, off);
13050
13051 /*
13052 * Move/spill/flush stuff out of call-volatile registers.
13053 *
13054 * We exclude any register holding the bUnmapInfo variable, as we'll be
13055 * checking it after returning from the call and will free it afterwards.
13056 */
13057 /** @todo save+restore active registers and maybe guest shadows in miss
13058 * scenario. */
13059 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarUnmapInfo));
13060
13061 /*
13062 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
13063 * to call the unmap helper function.
13064 *
13065 * The likelyhood of it being zero is higher than for the TLB hit when doing
13066 * the mapping, as a TLB miss for an well aligned and unproblematic memory
13067 * access should also end up with a mapping that won't need special unmapping.
13068 */
13069 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
13070 * should speed up things for the pure interpreter as well when TLBs
13071 * are enabled. */
13072#ifdef RT_ARCH_AMD64
13073 if (pReNative->Core.aVars[idxVarUnmapInfo].idxReg == UINT8_MAX)
13074 {
13075 /* test byte [rbp - xxx], 0ffh */
13076 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
13077 pbCodeBuf[off++] = 0xf6;
13078 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot;
13079 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
13080 pbCodeBuf[off++] = 0xff;
13081 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
13082 }
13083 else
13084#endif
13085 {
13086 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
13087 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
13088 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
13089 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
13090 }
13091 uint32_t const offJmpFixup = off;
13092 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
13093
13094 /*
13095 * Call the unmap helper function.
13096 */
13097#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
13098 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
13099#else
13100 RT_NOREF(idxInstr);
13101#endif
13102
13103 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
13104 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
13105 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13106
13107 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
13108 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
13109
13110 /* Done setting up parameters, make the call. */
13111 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
13112
13113 /* The bUnmapInfo variable is implictly free by these MCs. */
13114 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
13115
13116 /*
13117 * Done, just fixup the jump for the non-call case.
13118 */
13119 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
13120
13121 return off;
13122}
13123
13124
13125
13126/*********************************************************************************************************************************
13127* State and Exceptions *
13128*********************************************************************************************************************************/
13129
13130#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13131#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
13132
13133#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13134#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13135#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
13136
13137#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13138#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13139#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
13140
13141
13142DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
13143{
13144 /** @todo this needs a lot more work later. */
13145 RT_NOREF(pReNative, fForChange);
13146 return off;
13147}
13148
13149
13150/*********************************************************************************************************************************
13151* The native code generator functions for each MC block. *
13152*********************************************************************************************************************************/
13153
13154
13155/*
13156 * Include g_apfnIemNativeRecompileFunctions and associated functions.
13157 *
13158 * This should probably live in it's own file later, but lets see what the
13159 * compile times turn out to be first.
13160 */
13161#include "IEMNativeFunctions.cpp.h"
13162
13163
13164
13165/*********************************************************************************************************************************
13166* Recompiler Core. *
13167*********************************************************************************************************************************/
13168
13169
13170/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
13171static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
13172{
13173 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
13174 pDis->cbCachedInstr += cbMaxRead;
13175 RT_NOREF(cbMinRead);
13176 return VERR_NO_DATA;
13177}
13178
13179
13180DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
13181{
13182 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
13183 {
13184#define ENTRY(a_Member) { RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member }
13185 ENTRY(fLocalForcedActions),
13186 ENTRY(iem.s.rcPassUp),
13187 ENTRY(iem.s.fExec),
13188 ENTRY(iem.s.pbInstrBuf),
13189 ENTRY(iem.s.uInstrBufPc),
13190 ENTRY(iem.s.GCPhysInstrBuf),
13191 ENTRY(iem.s.cbInstrBufTotal),
13192 ENTRY(iem.s.idxTbCurInstr),
13193#ifdef VBOX_WITH_STATISTICS
13194 ENTRY(iem.s.StatNativeTlbHitsForFetch),
13195 ENTRY(iem.s.StatNativeTlbHitsForStore),
13196 ENTRY(iem.s.StatNativeTlbHitsForStack),
13197 ENTRY(iem.s.StatNativeTlbHitsForMapped),
13198 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
13199 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
13200 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
13201 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
13202#endif
13203 ENTRY(iem.s.DataTlb.aEntries),
13204 ENTRY(iem.s.DataTlb.uTlbRevision),
13205 ENTRY(iem.s.DataTlb.uTlbPhysRev),
13206 ENTRY(iem.s.DataTlb.cTlbHits),
13207 ENTRY(iem.s.CodeTlb.aEntries),
13208 ENTRY(iem.s.CodeTlb.uTlbRevision),
13209 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
13210 ENTRY(iem.s.CodeTlb.cTlbHits),
13211 ENTRY(pVMR3),
13212 ENTRY(cpum.GstCtx.rax),
13213 ENTRY(cpum.GstCtx.ah),
13214 ENTRY(cpum.GstCtx.rcx),
13215 ENTRY(cpum.GstCtx.ch),
13216 ENTRY(cpum.GstCtx.rdx),
13217 ENTRY(cpum.GstCtx.dh),
13218 ENTRY(cpum.GstCtx.rbx),
13219 ENTRY(cpum.GstCtx.bh),
13220 ENTRY(cpum.GstCtx.rsp),
13221 ENTRY(cpum.GstCtx.rbp),
13222 ENTRY(cpum.GstCtx.rsi),
13223 ENTRY(cpum.GstCtx.rdi),
13224 ENTRY(cpum.GstCtx.r8),
13225 ENTRY(cpum.GstCtx.r9),
13226 ENTRY(cpum.GstCtx.r10),
13227 ENTRY(cpum.GstCtx.r11),
13228 ENTRY(cpum.GstCtx.r12),
13229 ENTRY(cpum.GstCtx.r13),
13230 ENTRY(cpum.GstCtx.r14),
13231 ENTRY(cpum.GstCtx.r15),
13232 ENTRY(cpum.GstCtx.es.Sel),
13233 ENTRY(cpum.GstCtx.es.u64Base),
13234 ENTRY(cpum.GstCtx.es.u32Limit),
13235 ENTRY(cpum.GstCtx.es.Attr),
13236 ENTRY(cpum.GstCtx.cs.Sel),
13237 ENTRY(cpum.GstCtx.cs.u64Base),
13238 ENTRY(cpum.GstCtx.cs.u32Limit),
13239 ENTRY(cpum.GstCtx.cs.Attr),
13240 ENTRY(cpum.GstCtx.ss.Sel),
13241 ENTRY(cpum.GstCtx.ss.u64Base),
13242 ENTRY(cpum.GstCtx.ss.u32Limit),
13243 ENTRY(cpum.GstCtx.ss.Attr),
13244 ENTRY(cpum.GstCtx.ds.Sel),
13245 ENTRY(cpum.GstCtx.ds.u64Base),
13246 ENTRY(cpum.GstCtx.ds.u32Limit),
13247 ENTRY(cpum.GstCtx.ds.Attr),
13248 ENTRY(cpum.GstCtx.fs.Sel),
13249 ENTRY(cpum.GstCtx.fs.u64Base),
13250 ENTRY(cpum.GstCtx.fs.u32Limit),
13251 ENTRY(cpum.GstCtx.fs.Attr),
13252 ENTRY(cpum.GstCtx.gs.Sel),
13253 ENTRY(cpum.GstCtx.gs.u64Base),
13254 ENTRY(cpum.GstCtx.gs.u32Limit),
13255 ENTRY(cpum.GstCtx.gs.Attr),
13256 ENTRY(cpum.GstCtx.rip),
13257 ENTRY(cpum.GstCtx.eflags),
13258 ENTRY(cpum.GstCtx.uRipInhibitInt),
13259#undef ENTRY
13260 };
13261#ifdef VBOX_STRICT
13262 static bool s_fOrderChecked = false;
13263 if (!s_fOrderChecked)
13264 {
13265 s_fOrderChecked = true;
13266 uint32_t offPrev = s_aMembers[0].off;
13267 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
13268 {
13269 Assert(s_aMembers[i].off > offPrev);
13270 offPrev = s_aMembers[i].off;
13271 }
13272 }
13273#endif
13274
13275 /*
13276 * Binary lookup.
13277 */
13278 unsigned iStart = 0;
13279 unsigned iEnd = RT_ELEMENTS(s_aMembers);
13280 for (;;)
13281 {
13282 unsigned const iCur = iStart + (iEnd - iStart) / 2;
13283 uint32_t const offCur = s_aMembers[iCur].off;
13284 if (off < offCur)
13285 {
13286 if (iCur != iStart)
13287 iEnd = iCur;
13288 else
13289 break;
13290 }
13291 else if (off > offCur)
13292 {
13293 if (iCur + 1 < iEnd)
13294 iStart = iCur + 1;
13295 else
13296 break;
13297 }
13298 else
13299 return s_aMembers[iCur].pszName;
13300 }
13301#ifdef VBOX_WITH_STATISTICS
13302 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
13303 return "iem.s.acThreadedFuncStats[iFn]";
13304#endif
13305 return NULL;
13306}
13307
13308
13309/**
13310 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
13311 * @returns pszBuf.
13312 * @param fFlags The flags.
13313 * @param pszBuf The output buffer.
13314 * @param cbBuf The output buffer size. At least 32 bytes.
13315 */
13316DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
13317{
13318 Assert(cbBuf >= 32);
13319 static RTSTRTUPLE const s_aModes[] =
13320 {
13321 /* [00] = */ { RT_STR_TUPLE("16BIT") },
13322 /* [01] = */ { RT_STR_TUPLE("32BIT") },
13323 /* [02] = */ { RT_STR_TUPLE("!2!") },
13324 /* [03] = */ { RT_STR_TUPLE("!3!") },
13325 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
13326 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
13327 /* [06] = */ { RT_STR_TUPLE("!6!") },
13328 /* [07] = */ { RT_STR_TUPLE("!7!") },
13329 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
13330 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
13331 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
13332 /* [0b] = */ { RT_STR_TUPLE("!b!") },
13333 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
13334 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
13335 /* [0e] = */ { RT_STR_TUPLE("!e!") },
13336 /* [0f] = */ { RT_STR_TUPLE("!f!") },
13337 /* [10] = */ { RT_STR_TUPLE("!10!") },
13338 /* [11] = */ { RT_STR_TUPLE("!11!") },
13339 /* [12] = */ { RT_STR_TUPLE("!12!") },
13340 /* [13] = */ { RT_STR_TUPLE("!13!") },
13341 /* [14] = */ { RT_STR_TUPLE("!14!") },
13342 /* [15] = */ { RT_STR_TUPLE("!15!") },
13343 /* [16] = */ { RT_STR_TUPLE("!16!") },
13344 /* [17] = */ { RT_STR_TUPLE("!17!") },
13345 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
13346 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
13347 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
13348 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
13349 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
13350 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
13351 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
13352 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
13353 };
13354 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
13355 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
13356 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
13357
13358 pszBuf[off++] = ' ';
13359 pszBuf[off++] = 'C';
13360 pszBuf[off++] = 'P';
13361 pszBuf[off++] = 'L';
13362 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
13363 Assert(off < 32);
13364
13365 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
13366
13367 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
13368 {
13369 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
13370 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
13371 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
13372 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
13373 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
13374 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
13375 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
13376 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
13377 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
13378 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
13379 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
13380 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
13381 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
13382 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
13383 };
13384 if (fFlags)
13385 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
13386 if (s_aFlags[i].fFlag & fFlags)
13387 {
13388 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
13389 pszBuf[off++] = ' ';
13390 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
13391 off += s_aFlags[i].cchName;
13392 fFlags &= ~s_aFlags[i].fFlag;
13393 if (!fFlags)
13394 break;
13395 }
13396 pszBuf[off] = '\0';
13397
13398 return pszBuf;
13399}
13400
13401
13402DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
13403{
13404 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
13405#if defined(RT_ARCH_AMD64)
13406 static const char * const a_apszMarkers[] =
13407 {
13408 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
13409 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
13410 };
13411#endif
13412
13413 char szDisBuf[512];
13414 DISSTATE Dis;
13415 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
13416 uint32_t const cNative = pTb->Native.cInstructions;
13417 uint32_t offNative = 0;
13418#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
13419 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
13420#endif
13421 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
13422 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
13423 : DISCPUMODE_64BIT;
13424#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
13425 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
13426#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
13427 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
13428#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
13429# error "Port me"
13430#else
13431 csh hDisasm = ~(size_t)0;
13432# if defined(RT_ARCH_AMD64)
13433 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
13434# elif defined(RT_ARCH_ARM64)
13435 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
13436# else
13437# error "Port me"
13438# endif
13439 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
13440#endif
13441
13442 /*
13443 * Print TB info.
13444 */
13445 pHlp->pfnPrintf(pHlp,
13446 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
13447 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
13448 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
13449 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
13450#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
13451 if (pDbgInfo && pDbgInfo->cEntries > 1)
13452 {
13453 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
13454
13455 /*
13456 * This disassembly is driven by the debug info which follows the native
13457 * code and indicates when it starts with the next guest instructions,
13458 * where labels are and such things.
13459 */
13460 uint32_t idxThreadedCall = 0;
13461 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
13462 uint8_t idxRange = UINT8_MAX;
13463 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
13464 uint32_t offRange = 0;
13465 uint32_t offOpcodes = 0;
13466 uint32_t const cbOpcodes = pTb->cbOpcodes;
13467 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
13468 uint32_t const cDbgEntries = pDbgInfo->cEntries;
13469 uint32_t iDbgEntry = 1;
13470 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
13471
13472 while (offNative < cNative)
13473 {
13474 /* If we're at or have passed the point where the next chunk of debug
13475 info starts, process it. */
13476 if (offDbgNativeNext <= offNative)
13477 {
13478 offDbgNativeNext = UINT32_MAX;
13479 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
13480 {
13481 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
13482 {
13483 case kIemTbDbgEntryType_GuestInstruction:
13484 {
13485 /* Did the exec flag change? */
13486 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
13487 {
13488 pHlp->pfnPrintf(pHlp,
13489 " fExec change %#08x -> %#08x %s\n",
13490 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
13491 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
13492 szDisBuf, sizeof(szDisBuf)));
13493 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
13494 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
13495 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
13496 : DISCPUMODE_64BIT;
13497 }
13498
13499 /* New opcode range? We need to fend up a spurious debug info entry here for cases
13500 where the compilation was aborted before the opcode was recorded and the actual
13501 instruction was translated to a threaded call. This may happen when we run out
13502 of ranges, or when some complicated interrupts/FFs are found to be pending or
13503 similar. So, we just deal with it here rather than in the compiler code as it
13504 is a lot simpler to do here. */
13505 if ( idxRange == UINT8_MAX
13506 || idxRange >= cRanges
13507 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
13508 {
13509 idxRange += 1;
13510 if (idxRange < cRanges)
13511 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
13512 else
13513 continue;
13514 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
13515 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
13516 + (pTb->aRanges[idxRange].idxPhysPage == 0
13517 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
13518 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
13519 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
13520 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
13521 pTb->aRanges[idxRange].idxPhysPage);
13522 GCPhysPc += offRange;
13523 }
13524
13525 /* Disassemble the instruction. */
13526 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
13527 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
13528 uint32_t cbInstr = 1;
13529 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
13530 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
13531 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
13532 if (RT_SUCCESS(rc))
13533 {
13534 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
13535 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
13536 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13537 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13538
13539 static unsigned const s_offMarker = 55;
13540 static char const s_szMarker[] = " ; <--- guest";
13541 if (cch < s_offMarker)
13542 {
13543 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
13544 cch = s_offMarker;
13545 }
13546 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
13547 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
13548
13549 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
13550 }
13551 else
13552 {
13553 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
13554 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
13555 cbInstr = 1;
13556 }
13557 GCPhysPc += cbInstr;
13558 offOpcodes += cbInstr;
13559 offRange += cbInstr;
13560 continue;
13561 }
13562
13563 case kIemTbDbgEntryType_ThreadedCall:
13564 pHlp->pfnPrintf(pHlp,
13565 " Call #%u to %s (%u args) - %s\n",
13566 idxThreadedCall,
13567 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
13568 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
13569 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
13570 idxThreadedCall++;
13571 continue;
13572
13573 case kIemTbDbgEntryType_GuestRegShadowing:
13574 {
13575 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
13576 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
13577 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
13578 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
13579 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
13580 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
13581 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
13582 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
13583 else
13584 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
13585 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
13586 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
13587 continue;
13588 }
13589
13590 case kIemTbDbgEntryType_Label:
13591 {
13592 const char *pszName = "what_the_fudge";
13593 const char *pszComment = "";
13594 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
13595 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
13596 {
13597 case kIemNativeLabelType_Return:
13598 pszName = "Return";
13599 break;
13600 case kIemNativeLabelType_ReturnBreak:
13601 pszName = "ReturnBreak";
13602 break;
13603 case kIemNativeLabelType_ReturnWithFlags:
13604 pszName = "ReturnWithFlags";
13605 break;
13606 case kIemNativeLabelType_NonZeroRetOrPassUp:
13607 pszName = "NonZeroRetOrPassUp";
13608 break;
13609 case kIemNativeLabelType_RaiseGp0:
13610 pszName = "RaiseGp0";
13611 break;
13612 case kIemNativeLabelType_ObsoleteTb:
13613 pszName = "ObsoleteTb";
13614 break;
13615 case kIemNativeLabelType_NeedCsLimChecking:
13616 pszName = "NeedCsLimChecking";
13617 break;
13618 case kIemNativeLabelType_CheckBranchMiss:
13619 pszName = "CheckBranchMiss";
13620 break;
13621 case kIemNativeLabelType_If:
13622 pszName = "If";
13623 fNumbered = true;
13624 break;
13625 case kIemNativeLabelType_Else:
13626 pszName = "Else";
13627 fNumbered = true;
13628 pszComment = " ; regs state restored pre-if-block";
13629 break;
13630 case kIemNativeLabelType_Endif:
13631 pszName = "Endif";
13632 fNumbered = true;
13633 break;
13634 case kIemNativeLabelType_CheckIrq:
13635 pszName = "CheckIrq_CheckVM";
13636 fNumbered = true;
13637 break;
13638 case kIemNativeLabelType_TlbLookup:
13639 pszName = "TlbLookup";
13640 fNumbered = true;
13641 break;
13642 case kIemNativeLabelType_TlbMiss:
13643 pszName = "TlbMiss";
13644 fNumbered = true;
13645 break;
13646 case kIemNativeLabelType_TlbDone:
13647 pszName = "TlbDone";
13648 fNumbered = true;
13649 break;
13650 case kIemNativeLabelType_Invalid:
13651 case kIemNativeLabelType_End:
13652 break;
13653 }
13654 if (fNumbered)
13655 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
13656 else
13657 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
13658 continue;
13659 }
13660
13661 case kIemTbDbgEntryType_NativeOffset:
13662 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
13663 Assert(offDbgNativeNext > offNative);
13664 break;
13665
13666 default:
13667 AssertFailed();
13668 }
13669 iDbgEntry++;
13670 break;
13671 }
13672 }
13673
13674 /*
13675 * Disassemble the next native instruction.
13676 */
13677 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
13678# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
13679 uint32_t cbInstr = sizeof(paNative[0]);
13680 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
13681 if (RT_SUCCESS(rc))
13682 {
13683# if defined(RT_ARCH_AMD64)
13684 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
13685 {
13686 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
13687 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
13688 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
13689 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
13690 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
13691 uInfo & 0x8000 ? "recompiled" : "todo");
13692 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
13693 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
13694 else
13695 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
13696 }
13697 else
13698# endif
13699 {
13700 const char *pszAnnotation = NULL;
13701# ifdef RT_ARCH_AMD64
13702 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
13703 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
13704 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13705 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13706 PCDISOPPARAM pMemOp;
13707 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
13708 pMemOp = &Dis.Param1;
13709 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
13710 pMemOp = &Dis.Param2;
13711 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
13712 pMemOp = &Dis.Param3;
13713 else
13714 pMemOp = NULL;
13715 if ( pMemOp
13716 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
13717 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
13718 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
13719 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
13720
13721#elif defined(RT_ARCH_ARM64)
13722 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
13723 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13724 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13725# else
13726# error "Port me"
13727# endif
13728 if (pszAnnotation)
13729 {
13730 static unsigned const s_offAnnotation = 55;
13731 size_t const cchAnnotation = strlen(pszAnnotation);
13732 size_t cchDis = strlen(szDisBuf);
13733 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
13734 {
13735 if (cchDis < s_offAnnotation)
13736 {
13737 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
13738 cchDis = s_offAnnotation;
13739 }
13740 szDisBuf[cchDis++] = ' ';
13741 szDisBuf[cchDis++] = ';';
13742 szDisBuf[cchDis++] = ' ';
13743 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
13744 }
13745 }
13746 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
13747 }
13748 }
13749 else
13750 {
13751# if defined(RT_ARCH_AMD64)
13752 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
13753 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
13754# elif defined(RT_ARCH_ARM64)
13755 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
13756# else
13757# error "Port me"
13758# endif
13759 cbInstr = sizeof(paNative[0]);
13760 }
13761 offNative += cbInstr / sizeof(paNative[0]);
13762
13763# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
13764 cs_insn *pInstr;
13765 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
13766 (uintptr_t)pNativeCur, 1, &pInstr);
13767 if (cInstrs > 0)
13768 {
13769 Assert(cInstrs == 1);
13770# if defined(RT_ARCH_AMD64)
13771 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
13772 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
13773# else
13774 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
13775 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
13776# endif
13777 offNative += pInstr->size / sizeof(*pNativeCur);
13778 cs_free(pInstr, cInstrs);
13779 }
13780 else
13781 {
13782# if defined(RT_ARCH_AMD64)
13783 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
13784 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
13785# else
13786 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
13787# endif
13788 offNative++;
13789 }
13790# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
13791 }
13792 }
13793 else
13794#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
13795 {
13796 /*
13797 * No debug info, just disassemble the x86 code and then the native code.
13798 *
13799 * First the guest code:
13800 */
13801 for (unsigned i = 0; i < pTb->cRanges; i++)
13802 {
13803 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
13804 + (pTb->aRanges[i].idxPhysPage == 0
13805 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
13806 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
13807 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
13808 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
13809 unsigned off = pTb->aRanges[i].offOpcodes;
13810 /** @todo this ain't working when crossing pages! */
13811 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
13812 while (off < cbOpcodes)
13813 {
13814 uint32_t cbInstr = 1;
13815 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
13816 &pTb->pabOpcodes[off], cbOpcodes - off,
13817 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
13818 if (RT_SUCCESS(rc))
13819 {
13820 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
13821 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
13822 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13823 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13824 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
13825 GCPhysPc += cbInstr;
13826 off += cbInstr;
13827 }
13828 else
13829 {
13830 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
13831 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
13832 break;
13833 }
13834 }
13835 }
13836
13837 /*
13838 * Then the native code:
13839 */
13840 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
13841 while (offNative < cNative)
13842 {
13843 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
13844# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
13845 uint32_t cbInstr = sizeof(paNative[0]);
13846 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
13847 if (RT_SUCCESS(rc))
13848 {
13849# if defined(RT_ARCH_AMD64)
13850 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
13851 {
13852 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
13853 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
13854 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
13855 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
13856 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
13857 uInfo & 0x8000 ? "recompiled" : "todo");
13858 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
13859 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
13860 else
13861 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
13862 }
13863 else
13864# endif
13865 {
13866# ifdef RT_ARCH_AMD64
13867 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
13868 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
13869 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13870 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13871# elif defined(RT_ARCH_ARM64)
13872 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
13873 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13874 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13875# else
13876# error "Port me"
13877# endif
13878 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
13879 }
13880 }
13881 else
13882 {
13883# if defined(RT_ARCH_AMD64)
13884 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
13885 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
13886# else
13887 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
13888# endif
13889 cbInstr = sizeof(paNative[0]);
13890 }
13891 offNative += cbInstr / sizeof(paNative[0]);
13892
13893# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
13894 cs_insn *pInstr;
13895 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
13896 (uintptr_t)pNativeCur, 1, &pInstr);
13897 if (cInstrs > 0)
13898 {
13899 Assert(cInstrs == 1);
13900# if defined(RT_ARCH_AMD64)
13901 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
13902 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
13903# else
13904 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
13905 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
13906# endif
13907 offNative += pInstr->size / sizeof(*pNativeCur);
13908 cs_free(pInstr, cInstrs);
13909 }
13910 else
13911 {
13912# if defined(RT_ARCH_AMD64)
13913 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
13914 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
13915# else
13916 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
13917# endif
13918 offNative++;
13919 }
13920# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
13921 }
13922 }
13923
13924#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
13925 /* Cleanup. */
13926 cs_close(&hDisasm);
13927#endif
13928}
13929
13930
13931/**
13932 * Recompiles the given threaded TB into a native one.
13933 *
13934 * In case of failure the translation block will be returned as-is.
13935 *
13936 * @returns pTb.
13937 * @param pVCpu The cross context virtual CPU structure of the calling
13938 * thread.
13939 * @param pTb The threaded translation to recompile to native.
13940 */
13941DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
13942{
13943 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
13944
13945 /*
13946 * The first time thru, we allocate the recompiler state, the other times
13947 * we just need to reset it before using it again.
13948 */
13949 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
13950 if (RT_LIKELY(pReNative))
13951 iemNativeReInit(pReNative, pTb);
13952 else
13953 {
13954 pReNative = iemNativeInit(pVCpu, pTb);
13955 AssertReturn(pReNative, pTb);
13956 }
13957
13958#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
13959 /*
13960 * First do liveness analysis. This is done backwards.
13961 */
13962 {
13963 uint32_t idxCall = pTb->Thrd.cCalls;
13964 if (idxCall <= pReNative->cLivenessEntriesAlloc)
13965 { /* likely */ }
13966 else
13967 {
13968 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
13969 while (idxCall > cAlloc)
13970 cAlloc *= 2;
13971 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
13972 AssertReturn(pvNew, pTb);
13973 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
13974 pReNative->cLivenessEntriesAlloc = cAlloc;
13975 }
13976 AssertReturn(idxCall > 0, pTb);
13977 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
13978
13979 /* The initial (final) entry. */
13980 idxCall--;
13981 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
13982
13983 /* Loop backwards thru the calls and fill in the other entries. */
13984 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
13985 while (idxCall > 0)
13986 {
13987 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
13988 if (pfnLiveness)
13989 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
13990 else
13991 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
13992 pCallEntry--;
13993 idxCall--;
13994 }
13995
13996# ifdef VBOX_WITH_STATISTICS
13997 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
13998 to 'clobbered' rather that 'input'. */
13999 /** @todo */
14000# endif
14001 }
14002#endif
14003
14004 /*
14005 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
14006 * for aborting if an error happens.
14007 */
14008 uint32_t cCallsLeft = pTb->Thrd.cCalls;
14009#ifdef LOG_ENABLED
14010 uint32_t const cCallsOrg = cCallsLeft;
14011#endif
14012 uint32_t off = 0;
14013 int rc = VINF_SUCCESS;
14014 IEMNATIVE_TRY_SETJMP(pReNative, rc)
14015 {
14016 /*
14017 * Emit prolog code (fixed).
14018 */
14019 off = iemNativeEmitProlog(pReNative, off);
14020
14021 /*
14022 * Convert the calls to native code.
14023 */
14024#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
14025 int32_t iGstInstr = -1;
14026#endif
14027#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
14028 uint32_t cThreadedCalls = 0;
14029 uint32_t cRecompiledCalls = 0;
14030#endif
14031#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
14032 uint32_t idxCurCall = 0;
14033#endif
14034 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
14035 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
14036 while (cCallsLeft-- > 0)
14037 {
14038 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
14039#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
14040 pReNative->idxCurCall = idxCurCall;
14041#endif
14042
14043 /*
14044 * Debug info, assembly markup and statistics.
14045 */
14046#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
14047 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
14048 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
14049#endif
14050#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
14051 iemNativeDbgInfoAddNativeOffset(pReNative, off);
14052 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
14053 {
14054 if (iGstInstr < (int32_t)pTb->cInstructions)
14055 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
14056 else
14057 Assert(iGstInstr == pTb->cInstructions);
14058 iGstInstr = pCallEntry->idxInstr;
14059 }
14060 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
14061#endif
14062#if defined(VBOX_STRICT)
14063 off = iemNativeEmitMarker(pReNative, off,
14064 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
14065#endif
14066#if defined(VBOX_STRICT)
14067 iemNativeRegAssertSanity(pReNative);
14068#endif
14069#ifdef VBOX_WITH_STATISTICS
14070 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
14071#endif
14072
14073 /*
14074 * Actual work.
14075 */
14076 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
14077 pfnRecom ? "(recompiled)" : "(todo)"));
14078 if (pfnRecom) /** @todo stats on this. */
14079 {
14080 off = pfnRecom(pReNative, off, pCallEntry);
14081 STAM_REL_STATS({cRecompiledCalls++;});
14082 }
14083 else
14084 {
14085 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
14086 STAM_REL_STATS({cThreadedCalls++;});
14087 }
14088 Assert(off <= pReNative->cInstrBufAlloc);
14089 Assert(pReNative->cCondDepth == 0);
14090
14091#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
14092 if (LogIs2Enabled())
14093 {
14094 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
14095# ifndef IEMLIVENESS_EXTENDED_LAYOUT
14096 static const char s_achState[] = "CUXI";
14097# else
14098 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
14099# endif
14100
14101 char szGpr[17];
14102 for (unsigned i = 0; i < 16; i++)
14103 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
14104 szGpr[16] = '\0';
14105
14106 char szSegBase[X86_SREG_COUNT + 1];
14107 char szSegLimit[X86_SREG_COUNT + 1];
14108 char szSegAttrib[X86_SREG_COUNT + 1];
14109 char szSegSel[X86_SREG_COUNT + 1];
14110 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
14111 {
14112 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
14113 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
14114 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
14115 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
14116 }
14117 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
14118 = szSegSel[X86_SREG_COUNT] = '\0';
14119
14120 char szEFlags[8];
14121 for (unsigned i = 0; i < 7; i++)
14122 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
14123 szEFlags[7] = '\0';
14124
14125 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
14126 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
14127 }
14128#endif
14129
14130 /*
14131 * Advance.
14132 */
14133 pCallEntry++;
14134#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
14135 idxCurCall++;
14136#endif
14137 }
14138
14139 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
14140 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
14141 if (!cThreadedCalls)
14142 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
14143
14144 /*
14145 * Emit the epilog code.
14146 */
14147 uint32_t idxReturnLabel;
14148 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
14149
14150 /*
14151 * Generate special jump labels.
14152 */
14153 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
14154 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
14155 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
14156 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
14157 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
14158 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
14159 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
14160 off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
14161 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
14162 off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
14163 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_CheckBranchMiss))
14164 off = iemNativeEmitCheckBranchMiss(pReNative, off, idxReturnLabel);
14165 }
14166 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
14167 {
14168 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
14169 return pTb;
14170 }
14171 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
14172 Assert(off <= pReNative->cInstrBufAlloc);
14173
14174 /*
14175 * Make sure all labels has been defined.
14176 */
14177 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
14178#ifdef VBOX_STRICT
14179 uint32_t const cLabels = pReNative->cLabels;
14180 for (uint32_t i = 0; i < cLabels; i++)
14181 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
14182#endif
14183
14184 /*
14185 * Allocate executable memory, copy over the code we've generated.
14186 */
14187 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
14188 if (pTbAllocator->pDelayedFreeHead)
14189 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
14190
14191 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
14192 AssertReturn(paFinalInstrBuf, pTb);
14193 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
14194
14195 /*
14196 * Apply fixups.
14197 */
14198 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
14199 uint32_t const cFixups = pReNative->cFixups;
14200 for (uint32_t i = 0; i < cFixups; i++)
14201 {
14202 Assert(paFixups[i].off < off);
14203 Assert(paFixups[i].idxLabel < cLabels);
14204 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
14205 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
14206 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
14207 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
14208 switch (paFixups[i].enmType)
14209 {
14210#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
14211 case kIemNativeFixupType_Rel32:
14212 Assert(paFixups[i].off + 4 <= off);
14213 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
14214 continue;
14215
14216#elif defined(RT_ARCH_ARM64)
14217 case kIemNativeFixupType_RelImm26At0:
14218 {
14219 Assert(paFixups[i].off < off);
14220 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
14221 Assert(offDisp >= -262144 && offDisp < 262144);
14222 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
14223 continue;
14224 }
14225
14226 case kIemNativeFixupType_RelImm19At5:
14227 {
14228 Assert(paFixups[i].off < off);
14229 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
14230 Assert(offDisp >= -262144 && offDisp < 262144);
14231 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
14232 continue;
14233 }
14234
14235 case kIemNativeFixupType_RelImm14At5:
14236 {
14237 Assert(paFixups[i].off < off);
14238 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
14239 Assert(offDisp >= -8192 && offDisp < 8192);
14240 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
14241 continue;
14242 }
14243
14244#endif
14245 case kIemNativeFixupType_Invalid:
14246 case kIemNativeFixupType_End:
14247 break;
14248 }
14249 AssertFailed();
14250 }
14251
14252 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
14253 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
14254
14255 /*
14256 * Convert the translation block.
14257 */
14258 RTMemFree(pTb->Thrd.paCalls);
14259 pTb->Native.paInstructions = paFinalInstrBuf;
14260 pTb->Native.cInstructions = off;
14261 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
14262#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
14263 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
14264 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
14265#endif
14266
14267 Assert(pTbAllocator->cThreadedTbs > 0);
14268 pTbAllocator->cThreadedTbs -= 1;
14269 pTbAllocator->cNativeTbs += 1;
14270 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
14271
14272#ifdef LOG_ENABLED
14273 /*
14274 * Disassemble to the log if enabled.
14275 */
14276 if (LogIs3Enabled())
14277 {
14278 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
14279 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
14280# ifdef DEBUG_bird
14281 RTLogFlush(NULL);
14282# endif
14283 }
14284#endif
14285 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
14286
14287 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
14288 return pTb;
14289}
14290
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette