VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103374

Last change on this file since 103374 was 103334, checked in by vboxsync, 13 months ago

VMM/IEM: Drop the IEMLIVENESS_OLD_LAYOUT code, as clang output smaller code for the new layout on arm. bugref:10372

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 623.5 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103334 2024-02-13 13:45:51Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef VBOX_STRICT
133static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
134 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
135static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
136#endif
137#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
138static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
139static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
140#endif
141DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
142DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
143 IEMNATIVEGSTREG enmGstReg, uint32_t off);
144DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
145
146
147/*********************************************************************************************************************************
148* Executable Memory Allocator *
149*********************************************************************************************************************************/
150/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
151 * Use an alternative chunk sub-allocator that does store internal data
152 * in the chunk.
153 *
154 * Using the RTHeapSimple is not practial on newer darwin systems where
155 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
156 * memory. We would have to change the protection of the whole chunk for
157 * every call to RTHeapSimple, which would be rather expensive.
158 *
159 * This alternative implemenation let restrict page protection modifications
160 * to the pages backing the executable memory we just allocated.
161 */
162#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
163/** The chunk sub-allocation unit size in bytes. */
164#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
165/** The chunk sub-allocation unit size as a shift factor. */
166#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
167
168#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
169# ifdef IEMNATIVE_USE_GDB_JIT
170# define IEMNATIVE_USE_GDB_JIT_ET_DYN
171
172/** GDB JIT: Code entry. */
173typedef struct GDBJITCODEENTRY
174{
175 struct GDBJITCODEENTRY *pNext;
176 struct GDBJITCODEENTRY *pPrev;
177 uint8_t *pbSymFile;
178 uint64_t cbSymFile;
179} GDBJITCODEENTRY;
180
181/** GDB JIT: Actions. */
182typedef enum GDBJITACTIONS : uint32_t
183{
184 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
185} GDBJITACTIONS;
186
187/** GDB JIT: Descriptor. */
188typedef struct GDBJITDESCRIPTOR
189{
190 uint32_t uVersion;
191 GDBJITACTIONS enmAction;
192 GDBJITCODEENTRY *pRelevant;
193 GDBJITCODEENTRY *pHead;
194 /** Our addition: */
195 GDBJITCODEENTRY *pTail;
196} GDBJITDESCRIPTOR;
197
198/** GDB JIT: Our simple symbol file data. */
199typedef struct GDBJITSYMFILE
200{
201 Elf64_Ehdr EHdr;
202# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
203 Elf64_Shdr aShdrs[5];
204# else
205 Elf64_Shdr aShdrs[7];
206 Elf64_Phdr aPhdrs[2];
207# endif
208 /** The dwarf ehframe data for the chunk. */
209 uint8_t abEhFrame[512];
210 char szzStrTab[128];
211 Elf64_Sym aSymbols[3];
212# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
213 Elf64_Sym aDynSyms[2];
214 Elf64_Dyn aDyn[6];
215# endif
216} GDBJITSYMFILE;
217
218extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
219extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
220
221/** Init once for g_IemNativeGdbJitLock. */
222static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
223/** Init once for the critical section. */
224static RTCRITSECT g_IemNativeGdbJitLock;
225
226/** GDB reads the info here. */
227GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
228
229/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
230DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
231{
232 ASMNopPause();
233}
234
235/** @callback_method_impl{FNRTONCE} */
236static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
237{
238 RT_NOREF(pvUser);
239 return RTCritSectInit(&g_IemNativeGdbJitLock);
240}
241
242
243# endif /* IEMNATIVE_USE_GDB_JIT */
244
245/**
246 * Per-chunk unwind info for non-windows hosts.
247 */
248typedef struct IEMEXECMEMCHUNKEHFRAME
249{
250# ifdef IEMNATIVE_USE_LIBUNWIND
251 /** The offset of the FDA into abEhFrame. */
252 uintptr_t offFda;
253# else
254 /** 'struct object' storage area. */
255 uint8_t abObject[1024];
256# endif
257# ifdef IEMNATIVE_USE_GDB_JIT
258# if 0
259 /** The GDB JIT 'symbol file' data. */
260 GDBJITSYMFILE GdbJitSymFile;
261# endif
262 /** The GDB JIT list entry. */
263 GDBJITCODEENTRY GdbJitEntry;
264# endif
265 /** The dwarf ehframe data for the chunk. */
266 uint8_t abEhFrame[512];
267} IEMEXECMEMCHUNKEHFRAME;
268/** Pointer to per-chunk info info for non-windows hosts. */
269typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
270#endif
271
272
273/**
274 * An chunk of executable memory.
275 */
276typedef struct IEMEXECMEMCHUNK
277{
278#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
279 /** Number of free items in this chunk. */
280 uint32_t cFreeUnits;
281 /** Hint were to start searching for free space in the allocation bitmap. */
282 uint32_t idxFreeHint;
283#else
284 /** The heap handle. */
285 RTHEAPSIMPLE hHeap;
286#endif
287 /** Pointer to the chunk. */
288 void *pvChunk;
289#ifdef IN_RING3
290 /**
291 * Pointer to the unwind information.
292 *
293 * This is used during C++ throw and longjmp (windows and probably most other
294 * platforms). Some debuggers (windbg) makes use of it as well.
295 *
296 * Windows: This is allocated from hHeap on windows because (at least for
297 * AMD64) the UNWIND_INFO structure address in the
298 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
299 *
300 * Others: Allocated from the regular heap to avoid unnecessary executable data
301 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
302 void *pvUnwindInfo;
303#elif defined(IN_RING0)
304 /** Allocation handle. */
305 RTR0MEMOBJ hMemObj;
306#endif
307} IEMEXECMEMCHUNK;
308/** Pointer to a memory chunk. */
309typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
310
311
312/**
313 * Executable memory allocator for the native recompiler.
314 */
315typedef struct IEMEXECMEMALLOCATOR
316{
317 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
318 uint32_t uMagic;
319
320 /** The chunk size. */
321 uint32_t cbChunk;
322 /** The maximum number of chunks. */
323 uint32_t cMaxChunks;
324 /** The current number of chunks. */
325 uint32_t cChunks;
326 /** Hint where to start looking for available memory. */
327 uint32_t idxChunkHint;
328 /** Statistics: Current number of allocations. */
329 uint32_t cAllocations;
330
331 /** The total amount of memory available. */
332 uint64_t cbTotal;
333 /** Total amount of free memory. */
334 uint64_t cbFree;
335 /** Total amount of memory allocated. */
336 uint64_t cbAllocated;
337
338#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
339 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
340 *
341 * Since the chunk size is a power of two and the minimum chunk size is a lot
342 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
343 * require a whole number of uint64_t elements in the allocation bitmap. So,
344 * for sake of simplicity, they are allocated as one continous chunk for
345 * simplicity/laziness. */
346 uint64_t *pbmAlloc;
347 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
348 uint32_t cUnitsPerChunk;
349 /** Number of bitmap elements per chunk (for quickly locating the bitmap
350 * portion corresponding to an chunk). */
351 uint32_t cBitmapElementsPerChunk;
352#else
353 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
354 * @{ */
355 /** The size of the heap internal block header. This is used to adjust the
356 * request memory size to make sure there is exacly enough room for a header at
357 * the end of the blocks we allocate before the next 64 byte alignment line. */
358 uint32_t cbHeapBlockHdr;
359 /** The size of initial heap allocation required make sure the first
360 * allocation is correctly aligned. */
361 uint32_t cbHeapAlignTweak;
362 /** The alignment tweak allocation address. */
363 void *pvAlignTweak;
364 /** @} */
365#endif
366
367#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
368 /** Pointer to the array of unwind info running parallel to aChunks (same
369 * allocation as this structure, located after the bitmaps).
370 * (For Windows, the structures must reside in 32-bit RVA distance to the
371 * actual chunk, so they are allocated off the chunk.) */
372 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
373#endif
374
375 /** The allocation chunks. */
376 RT_FLEXIBLE_ARRAY_EXTENSION
377 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
378} IEMEXECMEMALLOCATOR;
379/** Pointer to an executable memory allocator. */
380typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
381
382/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
383#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
384
385
386static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
387
388
389/**
390 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
391 * the heap statistics.
392 */
393static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
394 uint32_t cbReq, uint32_t idxChunk)
395{
396 pExecMemAllocator->cAllocations += 1;
397 pExecMemAllocator->cbAllocated += cbReq;
398#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
399 pExecMemAllocator->cbFree -= cbReq;
400#else
401 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
402#endif
403 pExecMemAllocator->idxChunkHint = idxChunk;
404
405#ifdef RT_OS_DARWIN
406 /*
407 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
408 * on darwin. So, we mark the pages returned as read+write after alloc and
409 * expect the caller to call iemExecMemAllocatorReadyForUse when done
410 * writing to the allocation.
411 *
412 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
413 * for details.
414 */
415 /** @todo detect if this is necessary... it wasn't required on 10.15 or
416 * whatever older version it was. */
417 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
418 AssertRC(rc);
419#endif
420
421 return pvRet;
422}
423
424
425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
426static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
427 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
428{
429 /*
430 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
431 */
432 Assert(!(cToScan & 63));
433 Assert(!(idxFirst & 63));
434 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
435 pbmAlloc += idxFirst / 64;
436
437 /*
438 * Scan the bitmap for cReqUnits of consequtive clear bits
439 */
440 /** @todo This can probably be done more efficiently for non-x86 systems. */
441 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
442 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
443 {
444 uint32_t idxAddBit = 1;
445 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
446 idxAddBit++;
447 if (idxAddBit >= cReqUnits)
448 {
449 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
450
451 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
452 pChunk->cFreeUnits -= cReqUnits;
453 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
454
455 void * const pvRet = (uint8_t *)pChunk->pvChunk
456 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
457
458 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
459 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
460 }
461
462 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
463 }
464 return NULL;
465}
466#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
467
468
469static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
470{
471#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
472 /*
473 * Figure out how much to allocate.
474 */
475 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
476 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
477 {
478 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
479 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
480 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
481 {
482 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
483 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
484 if (pvRet)
485 return pvRet;
486 }
487 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
488 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
489 cReqUnits, idxChunk);
490 }
491#else
492 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
493 if (pvRet)
494 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
495#endif
496 return NULL;
497
498}
499
500
501/**
502 * Allocates @a cbReq bytes of executable memory.
503 *
504 * @returns Pointer to the memory, NULL if out of memory or other problem
505 * encountered.
506 * @param pVCpu The cross context virtual CPU structure of the calling
507 * thread.
508 * @param cbReq How many bytes are required.
509 */
510static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
511{
512 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
513 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
514 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
515
516
517 for (unsigned iIteration = 0;; iIteration++)
518 {
519 /*
520 * Adjust the request size so it'll fit the allocator alignment/whatnot.
521 *
522 * For the RTHeapSimple allocator this means to follow the logic described
523 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
524 * existing chunks if we think we've got sufficient free memory around.
525 *
526 * While for the alternative one we just align it up to a whole unit size.
527 */
528#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
529 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
530#else
531 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
532#endif
533 if (cbReq <= pExecMemAllocator->cbFree)
534 {
535 uint32_t const cChunks = pExecMemAllocator->cChunks;
536 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
537 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
538 {
539 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
540 if (pvRet)
541 return pvRet;
542 }
543 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
544 {
545 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
546 if (pvRet)
547 return pvRet;
548 }
549 }
550
551 /*
552 * Can we grow it with another chunk?
553 */
554 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
555 {
556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
557 AssertLogRelRCReturn(rc, NULL);
558
559 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
560 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
561 if (pvRet)
562 return pvRet;
563 AssertFailed();
564 }
565
566 /*
567 * Try prune native TBs once.
568 */
569 if (iIteration == 0)
570 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
571 else
572 {
573 /** @todo stats... */
574 return NULL;
575 }
576 }
577
578}
579
580
581/** This is a hook that we may need later for changing memory protection back
582 * to readonly+exec */
583static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
584{
585#ifdef RT_OS_DARWIN
586 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
587 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
588 AssertRC(rc); RT_NOREF(pVCpu);
589
590 /*
591 * Flush the instruction cache:
592 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
593 */
594 /* sys_dcache_flush(pv, cb); - not necessary */
595 sys_icache_invalidate(pv, cb);
596#else
597 RT_NOREF(pVCpu, pv, cb);
598#endif
599}
600
601
602/**
603 * Frees executable memory.
604 */
605void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
606{
607 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
608 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
609 Assert(pv);
610#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
611 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
612#else
613 Assert(!((uintptr_t)pv & 63));
614#endif
615
616 /* Align the size as we did when allocating the block. */
617#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
618 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
619#else
620 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
621#endif
622
623 /* Free it / assert sanity. */
624#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
625 uint32_t const cChunks = pExecMemAllocator->cChunks;
626 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
627 bool fFound = false;
628 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
629 {
630 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
631 fFound = offChunk < cbChunk;
632 if (fFound)
633 {
634#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
635 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
636 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
637
638 /* Check that it's valid and free it. */
639 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
640 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
641 for (uint32_t i = 1; i < cReqUnits; i++)
642 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
643 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
644
645 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
646 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
647
648 /* Update the stats. */
649 pExecMemAllocator->cbAllocated -= cb;
650 pExecMemAllocator->cbFree += cb;
651 pExecMemAllocator->cAllocations -= 1;
652 return;
653#else
654 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
655 break;
656#endif
657 }
658 }
659# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
660 AssertFailed();
661# else
662 Assert(fFound);
663# endif
664#endif
665
666#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
667 /* Update stats while cb is freshly calculated.*/
668 pExecMemAllocator->cbAllocated -= cb;
669 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
670 pExecMemAllocator->cAllocations -= 1;
671
672 /* Free it. */
673 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
674#endif
675}
676
677
678
679#ifdef IN_RING3
680# ifdef RT_OS_WINDOWS
681
682/**
683 * Initializes the unwind info structures for windows hosts.
684 */
685static int
686iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
687 void *pvChunk, uint32_t idxChunk)
688{
689 RT_NOREF(pVCpu);
690
691 /*
692 * The AMD64 unwind opcodes.
693 *
694 * This is a program that starts with RSP after a RET instruction that
695 * ends up in recompiled code, and the operations we describe here will
696 * restore all non-volatile registers and bring RSP back to where our
697 * RET address is. This means it's reverse order from what happens in
698 * the prologue.
699 *
700 * Note! Using a frame register approach here both because we have one
701 * and but mainly because the UWOP_ALLOC_LARGE argument values
702 * would be a pain to write initializers for. On the positive
703 * side, we're impervious to changes in the the stack variable
704 * area can can deal with dynamic stack allocations if necessary.
705 */
706 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
707 {
708 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
709 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
710 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
711 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
712 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
713 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
714 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
715 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
716 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
717 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
718 };
719 union
720 {
721 IMAGE_UNWIND_INFO Info;
722 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
723 } s_UnwindInfo =
724 {
725 {
726 /* .Version = */ 1,
727 /* .Flags = */ 0,
728 /* .SizeOfProlog = */ 16, /* whatever */
729 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
730 /* .FrameRegister = */ X86_GREG_xBP,
731 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
732 }
733 };
734 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
735 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
736
737 /*
738 * Calc how much space we need and allocate it off the exec heap.
739 */
740 unsigned const cFunctionEntries = 1;
741 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
742 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
743# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
744 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
745 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
746 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
747# else
748 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
749 - pExecMemAllocator->cbHeapBlockHdr;
750 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
751 32 /*cbAlignment*/);
752# endif
753 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
754 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
755
756 /*
757 * Initialize the structures.
758 */
759 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
760
761 paFunctions[0].BeginAddress = 0;
762 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
763 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
764
765 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
766 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
767
768 /*
769 * Register it.
770 */
771 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
772 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
773
774 return VINF_SUCCESS;
775}
776
777
778# else /* !RT_OS_WINDOWS */
779
780/**
781 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
782 */
783DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
784{
785 if (iValue >= 64)
786 {
787 Assert(iValue < 0x2000);
788 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
789 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
790 }
791 else if (iValue >= 0)
792 *Ptr.pb++ = (uint8_t)iValue;
793 else if (iValue > -64)
794 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
795 else
796 {
797 Assert(iValue > -0x2000);
798 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
799 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
800 }
801 return Ptr;
802}
803
804
805/**
806 * Emits an ULEB128 encoded value (up to 64-bit wide).
807 */
808DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
809{
810 while (uValue >= 0x80)
811 {
812 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
813 uValue >>= 7;
814 }
815 *Ptr.pb++ = (uint8_t)uValue;
816 return Ptr;
817}
818
819
820/**
821 * Emits a CFA rule as register @a uReg + offset @a off.
822 */
823DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
824{
825 *Ptr.pb++ = DW_CFA_def_cfa;
826 Ptr = iemDwarfPutUleb128(Ptr, uReg);
827 Ptr = iemDwarfPutUleb128(Ptr, off);
828 return Ptr;
829}
830
831
832/**
833 * Emits a register (@a uReg) save location:
834 * CFA + @a off * data_alignment_factor
835 */
836DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
837{
838 if (uReg < 0x40)
839 *Ptr.pb++ = DW_CFA_offset | uReg;
840 else
841 {
842 *Ptr.pb++ = DW_CFA_offset_extended;
843 Ptr = iemDwarfPutUleb128(Ptr, uReg);
844 }
845 Ptr = iemDwarfPutUleb128(Ptr, off);
846 return Ptr;
847}
848
849
850# if 0 /* unused */
851/**
852 * Emits a register (@a uReg) save location, using signed offset:
853 * CFA + @a offSigned * data_alignment_factor
854 */
855DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
856{
857 *Ptr.pb++ = DW_CFA_offset_extended_sf;
858 Ptr = iemDwarfPutUleb128(Ptr, uReg);
859 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
860 return Ptr;
861}
862# endif
863
864
865/**
866 * Initializes the unwind info section for non-windows hosts.
867 */
868static int
869iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
870 void *pvChunk, uint32_t idxChunk)
871{
872 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
873 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
874
875 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
876
877 /*
878 * Generate the CIE first.
879 */
880# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
881 uint8_t const iDwarfVer = 3;
882# else
883 uint8_t const iDwarfVer = 4;
884# endif
885 RTPTRUNION const PtrCie = Ptr;
886 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
887 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
888 *Ptr.pb++ = iDwarfVer; /* DwARF version */
889 *Ptr.pb++ = 0; /* Augmentation. */
890 if (iDwarfVer >= 4)
891 {
892 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
893 *Ptr.pb++ = 0; /* Segment selector size. */
894 }
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
897# else
898 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
899# endif
900 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
901# ifdef RT_ARCH_AMD64
902 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
903# elif defined(RT_ARCH_ARM64)
904 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
905# else
906# error "port me"
907# endif
908 /* Initial instructions: */
909# ifdef RT_ARCH_AMD64
910 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
912 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
913 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
914 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
916 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
917 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
918# elif defined(RT_ARCH_ARM64)
919# if 1
920 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
921# else
922 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
923# endif
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
930 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
931 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
932 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
933 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
934 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
935 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
936 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
937 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
938# else
939# error "port me"
940# endif
941 while ((Ptr.u - PtrCie.u) & 3)
942 *Ptr.pb++ = DW_CFA_nop;
943 /* Finalize the CIE size. */
944 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
945
946 /*
947 * Generate an FDE for the whole chunk area.
948 */
949# ifdef IEMNATIVE_USE_LIBUNWIND
950 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
951# endif
952 RTPTRUNION const PtrFde = Ptr;
953 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
954 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
955 Ptr.pu32++;
956 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
957 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
958# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
959 *Ptr.pb++ = DW_CFA_nop;
960# endif
961 while ((Ptr.u - PtrFde.u) & 3)
962 *Ptr.pb++ = DW_CFA_nop;
963 /* Finalize the FDE size. */
964 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
965
966 /* Terminator entry. */
967 *Ptr.pu32++ = 0;
968 *Ptr.pu32++ = 0; /* just to be sure... */
969 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
970
971 /*
972 * Register it.
973 */
974# ifdef IEMNATIVE_USE_LIBUNWIND
975 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
976# else
977 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
978 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
979# endif
980
981# ifdef IEMNATIVE_USE_GDB_JIT
982 /*
983 * Now for telling GDB about this (experimental).
984 *
985 * This seems to work best with ET_DYN.
986 */
987 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
988# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
989 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
990 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
991# else
992 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
993 - pExecMemAllocator->cbHeapBlockHdr;
994 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
995# endif
996 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
997 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
998
999 RT_ZERO(*pSymFile);
1000
1001 /*
1002 * The ELF header:
1003 */
1004 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1005 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1006 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1007 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1008 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1009 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1010 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1011 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1012# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1013 pSymFile->EHdr.e_type = ET_DYN;
1014# else
1015 pSymFile->EHdr.e_type = ET_REL;
1016# endif
1017# ifdef RT_ARCH_AMD64
1018 pSymFile->EHdr.e_machine = EM_AMD64;
1019# elif defined(RT_ARCH_ARM64)
1020 pSymFile->EHdr.e_machine = EM_AARCH64;
1021# else
1022# error "port me"
1023# endif
1024 pSymFile->EHdr.e_version = 1; /*?*/
1025 pSymFile->EHdr.e_entry = 0;
1026# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1027 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1028# else
1029 pSymFile->EHdr.e_phoff = 0;
1030# endif
1031 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1032 pSymFile->EHdr.e_flags = 0;
1033 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1034# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1035 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1036 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1037# else
1038 pSymFile->EHdr.e_phentsize = 0;
1039 pSymFile->EHdr.e_phnum = 0;
1040# endif
1041 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1042 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1043 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1044
1045 uint32_t offStrTab = 0;
1046#define APPEND_STR(a_szStr) do { \
1047 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1048 offStrTab += sizeof(a_szStr); \
1049 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1050 } while (0)
1051#define APPEND_STR_FMT(a_szStr, ...) do { \
1052 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1053 offStrTab++; \
1054 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1055 } while (0)
1056
1057 /*
1058 * Section headers.
1059 */
1060 /* Section header #0: NULL */
1061 unsigned i = 0;
1062 APPEND_STR("");
1063 RT_ZERO(pSymFile->aShdrs[i]);
1064 i++;
1065
1066 /* Section header: .eh_frame */
1067 pSymFile->aShdrs[i].sh_name = offStrTab;
1068 APPEND_STR(".eh_frame");
1069 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1070 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1071# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1072 pSymFile->aShdrs[i].sh_offset
1073 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1074# else
1075 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1076 pSymFile->aShdrs[i].sh_offset = 0;
1077# endif
1078
1079 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1080 pSymFile->aShdrs[i].sh_link = 0;
1081 pSymFile->aShdrs[i].sh_info = 0;
1082 pSymFile->aShdrs[i].sh_addralign = 1;
1083 pSymFile->aShdrs[i].sh_entsize = 0;
1084 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1085 i++;
1086
1087 /* Section header: .shstrtab */
1088 unsigned const iShStrTab = i;
1089 pSymFile->EHdr.e_shstrndx = iShStrTab;
1090 pSymFile->aShdrs[i].sh_name = offStrTab;
1091 APPEND_STR(".shstrtab");
1092 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1093 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1094# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1095 pSymFile->aShdrs[i].sh_offset
1096 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1097# else
1098 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1099 pSymFile->aShdrs[i].sh_offset = 0;
1100# endif
1101 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1102 pSymFile->aShdrs[i].sh_link = 0;
1103 pSymFile->aShdrs[i].sh_info = 0;
1104 pSymFile->aShdrs[i].sh_addralign = 1;
1105 pSymFile->aShdrs[i].sh_entsize = 0;
1106 i++;
1107
1108 /* Section header: .symbols */
1109 pSymFile->aShdrs[i].sh_name = offStrTab;
1110 APPEND_STR(".symtab");
1111 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1112 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1113 pSymFile->aShdrs[i].sh_offset
1114 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1115 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1116 pSymFile->aShdrs[i].sh_link = iShStrTab;
1117 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1118 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1119 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1120 i++;
1121
1122# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1123 /* Section header: .symbols */
1124 pSymFile->aShdrs[i].sh_name = offStrTab;
1125 APPEND_STR(".dynsym");
1126 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1127 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1128 pSymFile->aShdrs[i].sh_offset
1129 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1130 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1131 pSymFile->aShdrs[i].sh_link = iShStrTab;
1132 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1133 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1134 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1135 i++;
1136# endif
1137
1138# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1139 /* Section header: .dynamic */
1140 pSymFile->aShdrs[i].sh_name = offStrTab;
1141 APPEND_STR(".dynamic");
1142 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1143 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1144 pSymFile->aShdrs[i].sh_offset
1145 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1146 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1147 pSymFile->aShdrs[i].sh_link = iShStrTab;
1148 pSymFile->aShdrs[i].sh_info = 0;
1149 pSymFile->aShdrs[i].sh_addralign = 1;
1150 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1151 i++;
1152# endif
1153
1154 /* Section header: .text */
1155 unsigned const iShText = i;
1156 pSymFile->aShdrs[i].sh_name = offStrTab;
1157 APPEND_STR(".text");
1158 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1159 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1160# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1161 pSymFile->aShdrs[i].sh_offset
1162 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1163# else
1164 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1165 pSymFile->aShdrs[i].sh_offset = 0;
1166# endif
1167 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1168 pSymFile->aShdrs[i].sh_link = 0;
1169 pSymFile->aShdrs[i].sh_info = 0;
1170 pSymFile->aShdrs[i].sh_addralign = 1;
1171 pSymFile->aShdrs[i].sh_entsize = 0;
1172 i++;
1173
1174 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1175
1176# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1177 /*
1178 * The program headers:
1179 */
1180 /* Everything in a single LOAD segment: */
1181 i = 0;
1182 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1183 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1184 pSymFile->aPhdrs[i].p_offset
1185 = pSymFile->aPhdrs[i].p_vaddr
1186 = pSymFile->aPhdrs[i].p_paddr = 0;
1187 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1188 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1189 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1190 i++;
1191 /* The .dynamic segment. */
1192 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1193 pSymFile->aPhdrs[i].p_flags = PF_R;
1194 pSymFile->aPhdrs[i].p_offset
1195 = pSymFile->aPhdrs[i].p_vaddr
1196 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1197 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1198 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1199 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1200 i++;
1201
1202 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1203
1204 /*
1205 * The dynamic section:
1206 */
1207 i = 0;
1208 pSymFile->aDyn[i].d_tag = DT_SONAME;
1209 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1210 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1219 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1220 i++;
1221 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1222 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1223 i++;
1224 pSymFile->aDyn[i].d_tag = DT_NULL;
1225 i++;
1226 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1227# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1228
1229 /*
1230 * Symbol tables:
1231 */
1232 /** @todo gdb doesn't seem to really like this ... */
1233 i = 0;
1234 pSymFile->aSymbols[i].st_name = 0;
1235 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1236 pSymFile->aSymbols[i].st_value = 0;
1237 pSymFile->aSymbols[i].st_size = 0;
1238 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1239 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1240# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1241 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1242# endif
1243 i++;
1244
1245 pSymFile->aSymbols[i].st_name = 0;
1246 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1247 pSymFile->aSymbols[i].st_value = 0;
1248 pSymFile->aSymbols[i].st_size = 0;
1249 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1250 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1251 i++;
1252
1253 pSymFile->aSymbols[i].st_name = offStrTab;
1254 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1255# if 0
1256 pSymFile->aSymbols[i].st_shndx = iShText;
1257 pSymFile->aSymbols[i].st_value = 0;
1258# else
1259 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1260 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1261# endif
1262 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1263 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1264 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1265# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1266 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1267 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1268# endif
1269 i++;
1270
1271 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1272 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1273
1274 /*
1275 * The GDB JIT entry and informing GDB.
1276 */
1277 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1278# if 1
1279 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1280# else
1281 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1282# endif
1283
1284 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1285 RTCritSectEnter(&g_IemNativeGdbJitLock);
1286 pEhFrame->GdbJitEntry.pNext = NULL;
1287 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1288 if (__jit_debug_descriptor.pTail)
1289 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1290 else
1291 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1292 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1293 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1294
1295 /* Notify GDB: */
1296 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1297 __jit_debug_register_code();
1298 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1299 RTCritSectLeave(&g_IemNativeGdbJitLock);
1300
1301# else /* !IEMNATIVE_USE_GDB_JIT */
1302 RT_NOREF(pVCpu);
1303# endif /* !IEMNATIVE_USE_GDB_JIT */
1304
1305 return VINF_SUCCESS;
1306}
1307
1308# endif /* !RT_OS_WINDOWS */
1309#endif /* IN_RING3 */
1310
1311
1312/**
1313 * Adds another chunk to the executable memory allocator.
1314 *
1315 * This is used by the init code for the initial allocation and later by the
1316 * regular allocator function when it's out of memory.
1317 */
1318static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1319{
1320 /* Check that we've room for growth. */
1321 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1322 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1323
1324 /* Allocate a chunk. */
1325#ifdef RT_OS_DARWIN
1326 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1327#else
1328 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1329#endif
1330 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1331
1332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1333 int rc = VINF_SUCCESS;
1334#else
1335 /* Initialize the heap for the chunk. */
1336 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1337 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1338 AssertRC(rc);
1339 if (RT_SUCCESS(rc))
1340 {
1341 /*
1342 * We want the memory to be aligned on 64 byte, so the first time thru
1343 * here we do some exploratory allocations to see how we can achieve this.
1344 * On subsequent runs we only make an initial adjustment allocation, if
1345 * necessary.
1346 *
1347 * Since we own the heap implementation, we know that the internal block
1348 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1349 * so all we need to wrt allocation size adjustments is to add 32 bytes
1350 * to the size, align up by 64 bytes, and subtract 32 bytes.
1351 *
1352 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1353 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1354 * allocation to force subsequent allocations to return 64 byte aligned
1355 * user areas.
1356 */
1357 if (!pExecMemAllocator->cbHeapBlockHdr)
1358 {
1359 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1360 pExecMemAllocator->cbHeapAlignTweak = 64;
1361 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1362 32 /*cbAlignment*/);
1363 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1364
1365 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1372 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1373 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1374 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1375 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1376
1377 RTHeapSimpleFree(hHeap, pvTest2);
1378 RTHeapSimpleFree(hHeap, pvTest1);
1379 }
1380 else
1381 {
1382 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1383 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1384 }
1385 if (RT_SUCCESS(rc))
1386#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1387 {
1388 /*
1389 * Add the chunk.
1390 *
1391 * This must be done before the unwind init so windows can allocate
1392 * memory from the chunk when using the alternative sub-allocator.
1393 */
1394 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1395#ifdef IN_RING3
1396 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1397#endif
1398#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1399 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1400#else
1401 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1402 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1403 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1404 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1405#endif
1406
1407 pExecMemAllocator->cChunks = idxChunk + 1;
1408 pExecMemAllocator->idxChunkHint = idxChunk;
1409
1410#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1411 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1412 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1413#else
1414 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1415 pExecMemAllocator->cbTotal += cbFree;
1416 pExecMemAllocator->cbFree += cbFree;
1417#endif
1418
1419#ifdef IN_RING3
1420 /*
1421 * Initialize the unwind information (this cannot really fail atm).
1422 * (This sets pvUnwindInfo.)
1423 */
1424 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1425 if (RT_SUCCESS(rc))
1426#endif
1427 {
1428 return VINF_SUCCESS;
1429 }
1430
1431#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1432 /* Just in case the impossible happens, undo the above up: */
1433 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1434 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1435 pExecMemAllocator->cChunks = idxChunk;
1436 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1437 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1438 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1439 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1440#endif
1441 }
1442#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1443 }
1444#endif
1445 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1446 RT_NOREF(pVCpu);
1447 return rc;
1448}
1449
1450
1451/**
1452 * Initializes the executable memory allocator for native recompilation on the
1453 * calling EMT.
1454 *
1455 * @returns VBox status code.
1456 * @param pVCpu The cross context virtual CPU structure of the calling
1457 * thread.
1458 * @param cbMax The max size of the allocator.
1459 * @param cbInitial The initial allocator size.
1460 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1461 * dependent).
1462 */
1463int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1464{
1465 /*
1466 * Validate input.
1467 */
1468 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1469 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1470 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1471 || cbChunk == 0
1472 || ( RT_IS_POWER_OF_TWO(cbChunk)
1473 && cbChunk >= _1M
1474 && cbChunk <= _256M
1475 && cbChunk <= cbMax),
1476 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1477 VERR_OUT_OF_RANGE);
1478
1479 /*
1480 * Adjust/figure out the chunk size.
1481 */
1482 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1483 {
1484 if (cbMax >= _256M)
1485 cbChunk = _64M;
1486 else
1487 {
1488 if (cbMax < _16M)
1489 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1490 else
1491 cbChunk = (uint32_t)cbMax / 4;
1492 if (!RT_IS_POWER_OF_TWO(cbChunk))
1493 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1494 }
1495 }
1496
1497 if (cbChunk > cbMax)
1498 cbMax = cbChunk;
1499 else
1500 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1501 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1502 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1503
1504 /*
1505 * Allocate and initialize the allocatore instance.
1506 */
1507 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1508#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1509 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1510 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1511 cbNeeded += cbBitmap * cMaxChunks;
1512 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1513 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1514#endif
1515#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1516 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1517 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1518#endif
1519 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1520 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1521 VERR_NO_MEMORY);
1522 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1523 pExecMemAllocator->cbChunk = cbChunk;
1524 pExecMemAllocator->cMaxChunks = cMaxChunks;
1525 pExecMemAllocator->cChunks = 0;
1526 pExecMemAllocator->idxChunkHint = 0;
1527 pExecMemAllocator->cAllocations = 0;
1528 pExecMemAllocator->cbTotal = 0;
1529 pExecMemAllocator->cbFree = 0;
1530 pExecMemAllocator->cbAllocated = 0;
1531#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1532 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1533 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1534 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1535 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1536#endif
1537#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1538 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1539#endif
1540 for (uint32_t i = 0; i < cMaxChunks; i++)
1541 {
1542#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1543 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1544 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1545#else
1546 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1547#endif
1548 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1549#ifdef IN_RING0
1550 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1551#else
1552 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1553#endif
1554 }
1555 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1556
1557 /*
1558 * Do the initial allocations.
1559 */
1560 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1561 {
1562 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1563 AssertLogRelRCReturn(rc, rc);
1564 }
1565
1566 pExecMemAllocator->idxChunkHint = 0;
1567
1568 return VINF_SUCCESS;
1569}
1570
1571
1572/*********************************************************************************************************************************
1573* Native Recompilation *
1574*********************************************************************************************************************************/
1575
1576
1577/**
1578 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1579 */
1580IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1581{
1582 pVCpu->iem.s.cInstructions += idxInstr;
1583 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1584}
1585
1586
1587/**
1588 * Used by TB code when it wants to raise a \#GP(0).
1589 */
1590IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1591{
1592 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1593#ifndef _MSC_VER
1594 return VINF_IEM_RAISED_XCPT; /* not reached */
1595#endif
1596}
1597
1598
1599/**
1600 * Used by TB code when detecting opcode changes.
1601 * @see iemThreadeFuncWorkerObsoleteTb
1602 */
1603IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1604{
1605 /* We set fSafeToFree to false where as we're being called in the context
1606 of a TB callback function, which for native TBs means we cannot release
1607 the executable memory till we've returned our way back to iemTbExec as
1608 that return path codes via the native code generated for the TB. */
1609 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1610 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1611 return VINF_IEM_REEXEC_BREAK;
1612}
1613
1614
1615/**
1616 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1617 */
1618IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1619{
1620 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1621 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1622 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1623 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1624 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1625 return VINF_IEM_REEXEC_BREAK;
1626}
1627
1628
1629/**
1630 * Used by TB code when we missed a PC check after a branch.
1631 */
1632IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1633{
1634 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1635 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1636 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1637 pVCpu->iem.s.pbInstrBuf));
1638 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1639 return VINF_IEM_REEXEC_BREAK;
1640}
1641
1642
1643
1644/*********************************************************************************************************************************
1645* Helpers: Segmented memory fetches and stores. *
1646*********************************************************************************************************************************/
1647
1648/**
1649 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1650 */
1651IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1652{
1653#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1654 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1655#else
1656 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1657#endif
1658}
1659
1660
1661/**
1662 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1663 * to 16 bits.
1664 */
1665IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1666{
1667#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1668 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1669#else
1670 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1671#endif
1672}
1673
1674
1675/**
1676 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1677 * to 32 bits.
1678 */
1679IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1680{
1681#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1682 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1683#else
1684 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1685#endif
1686}
1687
1688/**
1689 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1690 * to 64 bits.
1691 */
1692IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1693{
1694#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1695 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1696#else
1697 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1698#endif
1699}
1700
1701
1702/**
1703 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1704 */
1705IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1706{
1707#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1708 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1709#else
1710 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1711#endif
1712}
1713
1714
1715/**
1716 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1717 * to 32 bits.
1718 */
1719IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1720{
1721#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1722 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1723#else
1724 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1725#endif
1726}
1727
1728
1729/**
1730 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1731 * to 64 bits.
1732 */
1733IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1734{
1735#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1736 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1737#else
1738 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1739#endif
1740}
1741
1742
1743/**
1744 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1745 */
1746IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1747{
1748#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1749 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1750#else
1751 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1752#endif
1753}
1754
1755
1756/**
1757 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1758 * to 64 bits.
1759 */
1760IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1761{
1762#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1763 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1764#else
1765 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1766#endif
1767}
1768
1769
1770/**
1771 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1772 */
1773IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1774{
1775#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1776 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1777#else
1778 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1779#endif
1780}
1781
1782
1783/**
1784 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1785 */
1786IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1787{
1788#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1789 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1790#else
1791 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1792#endif
1793}
1794
1795
1796/**
1797 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1798 */
1799IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1800{
1801#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1802 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1803#else
1804 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1805#endif
1806}
1807
1808
1809/**
1810 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1811 */
1812IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1813{
1814#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1815 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1816#else
1817 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1818#endif
1819}
1820
1821
1822/**
1823 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1824 */
1825IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1826{
1827#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1828 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1829#else
1830 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1831#endif
1832}
1833
1834
1835
1836/**
1837 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
1838 */
1839IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1840{
1841#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1842 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1843#else
1844 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1845#endif
1846}
1847
1848
1849/**
1850 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
1851 */
1852IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1853{
1854#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1855 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1856#else
1857 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1858#endif
1859}
1860
1861
1862/**
1863 * Used by TB code to store an 32-bit selector value onto a generic stack.
1864 *
1865 * Intel CPUs doesn't do write a whole dword, thus the special function.
1866 */
1867IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1868{
1869#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1870 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1871#else
1872 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1873#endif
1874}
1875
1876
1877/**
1878 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1879 */
1880IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1881{
1882#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1883 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1884#else
1885 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1886#endif
1887}
1888
1889
1890/**
1891 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1892 */
1893IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1894{
1895#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1896 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1897#else
1898 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
1899#endif
1900}
1901
1902
1903/**
1904 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1905 */
1906IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1907{
1908#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1909 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1910#else
1911 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
1912#endif
1913}
1914
1915
1916/**
1917 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1918 */
1919IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1920{
1921#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1922 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1923#else
1924 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
1925#endif
1926}
1927
1928
1929
1930/*********************************************************************************************************************************
1931* Helpers: Flat memory fetches and stores. *
1932*********************************************************************************************************************************/
1933
1934/**
1935 * Used by TB code to load unsigned 8-bit data w/ flat address.
1936 * @note Zero extending the value to 64-bit to simplify assembly.
1937 */
1938IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1939{
1940#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1941 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1942#else
1943 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1944#endif
1945}
1946
1947
1948/**
1949 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1950 * to 16 bits.
1951 * @note Zero extending the value to 64-bit to simplify assembly.
1952 */
1953IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1954{
1955#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1956 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1957#else
1958 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1959#endif
1960}
1961
1962
1963/**
1964 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1965 * to 32 bits.
1966 * @note Zero extending the value to 64-bit to simplify assembly.
1967 */
1968IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1969{
1970#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1971 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1972#else
1973 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1974#endif
1975}
1976
1977
1978/**
1979 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1980 * to 64 bits.
1981 */
1982IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1983{
1984#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1985 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1986#else
1987 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1988#endif
1989}
1990
1991
1992/**
1993 * Used by TB code to load unsigned 16-bit data w/ flat address.
1994 * @note Zero extending the value to 64-bit to simplify assembly.
1995 */
1996IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1997{
1998#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1999 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2000#else
2001 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2002#endif
2003}
2004
2005
2006/**
2007 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2008 * to 32 bits.
2009 * @note Zero extending the value to 64-bit to simplify assembly.
2010 */
2011IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2012{
2013#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2014 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2015#else
2016 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2017#endif
2018}
2019
2020
2021/**
2022 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2023 * to 64 bits.
2024 * @note Zero extending the value to 64-bit to simplify assembly.
2025 */
2026IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2027{
2028#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2029 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2030#else
2031 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2032#endif
2033}
2034
2035
2036/**
2037 * Used by TB code to load unsigned 32-bit data w/ flat address.
2038 * @note Zero extending the value to 64-bit to simplify assembly.
2039 */
2040IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2041{
2042#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2043 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2044#else
2045 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2046#endif
2047}
2048
2049
2050/**
2051 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2052 * to 64 bits.
2053 * @note Zero extending the value to 64-bit to simplify assembly.
2054 */
2055IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2056{
2057#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2058 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2059#else
2060 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2061#endif
2062}
2063
2064
2065/**
2066 * Used by TB code to load unsigned 64-bit data w/ flat address.
2067 */
2068IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2069{
2070#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2071 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2072#else
2073 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2074#endif
2075}
2076
2077
2078/**
2079 * Used by TB code to store unsigned 8-bit data w/ flat address.
2080 */
2081IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2082{
2083#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2084 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2085#else
2086 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2087#endif
2088}
2089
2090
2091/**
2092 * Used by TB code to store unsigned 16-bit data w/ flat address.
2093 */
2094IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2095{
2096#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2097 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2098#else
2099 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2100#endif
2101}
2102
2103
2104/**
2105 * Used by TB code to store unsigned 32-bit data w/ flat address.
2106 */
2107IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2108{
2109#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2110 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2111#else
2112 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2113#endif
2114}
2115
2116
2117/**
2118 * Used by TB code to store unsigned 64-bit data w/ flat address.
2119 */
2120IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2121{
2122#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2123 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2124#else
2125 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2126#endif
2127}
2128
2129
2130
2131/**
2132 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2133 */
2134IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2135{
2136#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2137 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2138#else
2139 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2140#endif
2141}
2142
2143
2144/**
2145 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2146 */
2147IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2148{
2149#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2150 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2151#else
2152 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2153#endif
2154}
2155
2156
2157/**
2158 * Used by TB code to store a segment selector value onto a flat stack.
2159 *
2160 * Intel CPUs doesn't do write a whole dword, thus the special function.
2161 */
2162IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2163{
2164#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2165 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2166#else
2167 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2168#endif
2169}
2170
2171
2172/**
2173 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2174 */
2175IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2176{
2177#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2178 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2179#else
2180 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2181#endif
2182}
2183
2184
2185/**
2186 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2187 */
2188IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2189{
2190#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2191 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2192#else
2193 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2194#endif
2195}
2196
2197
2198/**
2199 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2200 */
2201IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2202{
2203#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2204 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2205#else
2206 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2207#endif
2208}
2209
2210
2211/**
2212 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2213 */
2214IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2215{
2216#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2217 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2218#else
2219 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2220#endif
2221}
2222
2223
2224
2225/*********************************************************************************************************************************
2226* Helpers: Segmented memory mapping. *
2227*********************************************************************************************************************************/
2228
2229/**
2230 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2231 * segmentation.
2232 */
2233IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2234 RTGCPTR GCPtrMem, uint8_t iSegReg))
2235{
2236#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2237 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2238#else
2239 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2240#endif
2241}
2242
2243
2244/**
2245 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2246 */
2247IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2248 RTGCPTR GCPtrMem, uint8_t iSegReg))
2249{
2250#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2251 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2252#else
2253 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2254#endif
2255}
2256
2257
2258/**
2259 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2260 */
2261IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2262 RTGCPTR GCPtrMem, uint8_t iSegReg))
2263{
2264#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2265 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2266#else
2267 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2268#endif
2269}
2270
2271
2272/**
2273 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2274 */
2275IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2276 RTGCPTR GCPtrMem, uint8_t iSegReg))
2277{
2278#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2279 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2280#else
2281 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2282#endif
2283}
2284
2285
2286/**
2287 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2288 * segmentation.
2289 */
2290IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2291 RTGCPTR GCPtrMem, uint8_t iSegReg))
2292{
2293#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2294 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2295#else
2296 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2297#endif
2298}
2299
2300
2301/**
2302 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2303 */
2304IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2305 RTGCPTR GCPtrMem, uint8_t iSegReg))
2306{
2307#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2308 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2309#else
2310 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2311#endif
2312}
2313
2314
2315/**
2316 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2317 */
2318IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2319 RTGCPTR GCPtrMem, uint8_t iSegReg))
2320{
2321#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2322 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2323#else
2324 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2325#endif
2326}
2327
2328
2329/**
2330 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2331 */
2332IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2333 RTGCPTR GCPtrMem, uint8_t iSegReg))
2334{
2335#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2336 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2337#else
2338 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2339#endif
2340}
2341
2342
2343/**
2344 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2345 * segmentation.
2346 */
2347IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2348 RTGCPTR GCPtrMem, uint8_t iSegReg))
2349{
2350#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2351 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2352#else
2353 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2354#endif
2355}
2356
2357
2358/**
2359 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2360 */
2361IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2362 RTGCPTR GCPtrMem, uint8_t iSegReg))
2363{
2364#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2365 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2366#else
2367 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2368#endif
2369}
2370
2371
2372/**
2373 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2374 */
2375IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2376 RTGCPTR GCPtrMem, uint8_t iSegReg))
2377{
2378#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2379 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2380#else
2381 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2382#endif
2383}
2384
2385
2386/**
2387 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2388 */
2389IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2390 RTGCPTR GCPtrMem, uint8_t iSegReg))
2391{
2392#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2393 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2394#else
2395 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2396#endif
2397}
2398
2399
2400/**
2401 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2402 * segmentation.
2403 */
2404IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2405 RTGCPTR GCPtrMem, uint8_t iSegReg))
2406{
2407#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2408 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2409#else
2410 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2411#endif
2412}
2413
2414
2415/**
2416 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2417 */
2418IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2419 RTGCPTR GCPtrMem, uint8_t iSegReg))
2420{
2421#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2422 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2423#else
2424 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2425#endif
2426}
2427
2428
2429/**
2430 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2431 */
2432IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2433 RTGCPTR GCPtrMem, uint8_t iSegReg))
2434{
2435#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2436 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2437#else
2438 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2439#endif
2440}
2441
2442
2443/**
2444 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2445 */
2446IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2447 RTGCPTR GCPtrMem, uint8_t iSegReg))
2448{
2449#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2450 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2451#else
2452 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2453#endif
2454}
2455
2456
2457/**
2458 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2459 */
2460IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2461 RTGCPTR GCPtrMem, uint8_t iSegReg))
2462{
2463#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2464 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2465#else
2466 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2467#endif
2468}
2469
2470
2471/**
2472 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2473 */
2474IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2475 RTGCPTR GCPtrMem, uint8_t iSegReg))
2476{
2477#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2478 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2479#else
2480 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2481#endif
2482}
2483
2484
2485/**
2486 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2487 * segmentation.
2488 */
2489IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2490 RTGCPTR GCPtrMem, uint8_t iSegReg))
2491{
2492#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2493 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2494#else
2495 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2496#endif
2497}
2498
2499
2500/**
2501 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2502 */
2503IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2504 RTGCPTR GCPtrMem, uint8_t iSegReg))
2505{
2506#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2507 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2508#else
2509 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2510#endif
2511}
2512
2513
2514/**
2515 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2516 */
2517IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2518 RTGCPTR GCPtrMem, uint8_t iSegReg))
2519{
2520#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2521 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2522#else
2523 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2524#endif
2525}
2526
2527
2528/**
2529 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2530 */
2531IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2532 RTGCPTR GCPtrMem, uint8_t iSegReg))
2533{
2534#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2535 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2536#else
2537 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2538#endif
2539}
2540
2541
2542/*********************************************************************************************************************************
2543* Helpers: Flat memory mapping. *
2544*********************************************************************************************************************************/
2545
2546/**
2547 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2548 * address.
2549 */
2550IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2551{
2552#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2553 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2554#else
2555 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2556#endif
2557}
2558
2559
2560/**
2561 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2562 */
2563IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2564{
2565#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2566 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2567#else
2568 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2569#endif
2570}
2571
2572
2573/**
2574 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2575 */
2576IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2577{
2578#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2579 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2580#else
2581 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2582#endif
2583}
2584
2585
2586/**
2587 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2588 */
2589IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2590{
2591#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2592 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2593#else
2594 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2595#endif
2596}
2597
2598
2599/**
2600 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2601 * address.
2602 */
2603IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2604{
2605#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2606 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2607#else
2608 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2609#endif
2610}
2611
2612
2613/**
2614 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2615 */
2616IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2617{
2618#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2619 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2620#else
2621 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2622#endif
2623}
2624
2625
2626/**
2627 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2628 */
2629IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2630{
2631#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2632 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2633#else
2634 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2635#endif
2636}
2637
2638
2639/**
2640 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2641 */
2642IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2643{
2644#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2645 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2646#else
2647 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2648#endif
2649}
2650
2651
2652/**
2653 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2654 * address.
2655 */
2656IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2657{
2658#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2659 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2660#else
2661 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2662#endif
2663}
2664
2665
2666/**
2667 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2668 */
2669IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2670{
2671#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2672 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2673#else
2674 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2675#endif
2676}
2677
2678
2679/**
2680 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2681 */
2682IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2683{
2684#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2685 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2686#else
2687 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2688#endif
2689}
2690
2691
2692/**
2693 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2694 */
2695IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2696{
2697#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2698 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2699#else
2700 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2701#endif
2702}
2703
2704
2705/**
2706 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
2707 * address.
2708 */
2709IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2710{
2711#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2712 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2713#else
2714 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2715#endif
2716}
2717
2718
2719/**
2720 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2721 */
2722IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2723{
2724#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2725 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2726#else
2727 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2728#endif
2729}
2730
2731
2732/**
2733 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2734 */
2735IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2736{
2737#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2738 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2739#else
2740 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2741#endif
2742}
2743
2744
2745/**
2746 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2747 */
2748IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2749{
2750#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2751 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2752#else
2753 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2754#endif
2755}
2756
2757
2758/**
2759 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2760 */
2761IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2762{
2763#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2764 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2765#else
2766 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2767#endif
2768}
2769
2770
2771/**
2772 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2773 */
2774IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2775{
2776#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2777 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2778#else
2779 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2780#endif
2781}
2782
2783
2784/**
2785 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
2786 * address.
2787 */
2788IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2789{
2790#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2791 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2792#else
2793 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2794#endif
2795}
2796
2797
2798/**
2799 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2800 */
2801IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2802{
2803#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2804 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2805#else
2806 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2807#endif
2808}
2809
2810
2811/**
2812 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2813 */
2814IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2815{
2816#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2817 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2818#else
2819 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2820#endif
2821}
2822
2823
2824/**
2825 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2826 */
2827IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2828{
2829#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2830 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2831#else
2832 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2833#endif
2834}
2835
2836
2837/*********************************************************************************************************************************
2838* Helpers: Commit, rollback & unmap *
2839*********************************************************************************************************************************/
2840
2841/**
2842 * Used by TB code to commit and unmap a read-write memory mapping.
2843 */
2844IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2845{
2846 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2847}
2848
2849
2850/**
2851 * Used by TB code to commit and unmap a read-write memory mapping.
2852 */
2853IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2854{
2855 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2856}
2857
2858
2859/**
2860 * Used by TB code to commit and unmap a write-only memory mapping.
2861 */
2862IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2863{
2864 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2865}
2866
2867
2868/**
2869 * Used by TB code to commit and unmap a read-only memory mapping.
2870 */
2871IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2872{
2873 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2874}
2875
2876
2877/**
2878 * Reinitializes the native recompiler state.
2879 *
2880 * Called before starting a new recompile job.
2881 */
2882static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2883{
2884 pReNative->cLabels = 0;
2885 pReNative->bmLabelTypes = 0;
2886 pReNative->cFixups = 0;
2887#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2888 pReNative->pDbgInfo->cEntries = 0;
2889#endif
2890 pReNative->pTbOrg = pTb;
2891 pReNative->cCondDepth = 0;
2892 pReNative->uCondSeqNo = 0;
2893 pReNative->uCheckIrqSeqNo = 0;
2894 pReNative->uTlbSeqNo = 0;
2895
2896 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2897#if IEMNATIVE_HST_GREG_COUNT < 32
2898 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2899#endif
2900 ;
2901 pReNative->Core.bmHstRegsWithGstShadow = 0;
2902 pReNative->Core.bmGstRegShadows = 0;
2903 pReNative->Core.bmVars = 0;
2904 pReNative->Core.bmStack = 0;
2905 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2906 pReNative->Core.u64ArgVars = UINT64_MAX;
2907
2908 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 9);
2909 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2910 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2911 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2912 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2913 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2914 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2915 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2916 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2917 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2918
2919 /* Full host register reinit: */
2920 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2921 {
2922 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2923 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2924 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2925 }
2926
2927 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2928 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2929#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2930 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2931#endif
2932#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2933 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2934#endif
2935 );
2936 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2937 {
2938 fRegs &= ~RT_BIT_32(idxReg);
2939 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2940 }
2941
2942 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2943#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2944 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2945#endif
2946#ifdef IEMNATIVE_REG_FIXED_TMP0
2947 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2948#endif
2949 return pReNative;
2950}
2951
2952
2953/**
2954 * Allocates and initializes the native recompiler state.
2955 *
2956 * This is called the first time an EMT wants to recompile something.
2957 *
2958 * @returns Pointer to the new recompiler state.
2959 * @param pVCpu The cross context virtual CPU structure of the calling
2960 * thread.
2961 * @param pTb The TB that's about to be recompiled.
2962 * @thread EMT(pVCpu)
2963 */
2964static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
2965{
2966 VMCPU_ASSERT_EMT(pVCpu);
2967
2968 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
2969 AssertReturn(pReNative, NULL);
2970
2971 /*
2972 * Try allocate all the buffers and stuff we need.
2973 */
2974 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
2975 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
2976 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
2977#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2978 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
2979#endif
2980 if (RT_LIKELY( pReNative->pInstrBuf
2981 && pReNative->paLabels
2982 && pReNative->paFixups)
2983#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2984 && pReNative->pDbgInfo
2985#endif
2986 )
2987 {
2988 /*
2989 * Set the buffer & array sizes on success.
2990 */
2991 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
2992 pReNative->cLabelsAlloc = _8K;
2993 pReNative->cFixupsAlloc = _16K;
2994#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2995 pReNative->cDbgInfoAlloc = _16K;
2996#endif
2997
2998 /* Other constant stuff: */
2999 pReNative->pVCpu = pVCpu;
3000
3001 /*
3002 * Done, just need to save it and reinit it.
3003 */
3004 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3005 return iemNativeReInit(pReNative, pTb);
3006 }
3007
3008 /*
3009 * Failed. Cleanup and return.
3010 */
3011 AssertFailed();
3012 RTMemFree(pReNative->pInstrBuf);
3013 RTMemFree(pReNative->paLabels);
3014 RTMemFree(pReNative->paFixups);
3015#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3016 RTMemFree(pReNative->pDbgInfo);
3017#endif
3018 RTMemFree(pReNative);
3019 return NULL;
3020}
3021
3022
3023/**
3024 * Creates a label
3025 *
3026 * If the label does not yet have a defined position,
3027 * call iemNativeLabelDefine() later to set it.
3028 *
3029 * @returns Label ID. Throws VBox status code on failure, so no need to check
3030 * the return value.
3031 * @param pReNative The native recompile state.
3032 * @param enmType The label type.
3033 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3034 * label is not yet defined (default).
3035 * @param uData Data associated with the lable. Only applicable to
3036 * certain type of labels. Default is zero.
3037 */
3038DECL_HIDDEN_THROW(uint32_t)
3039iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3040 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3041{
3042 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3043
3044 /*
3045 * Locate existing label definition.
3046 *
3047 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3048 * and uData is zero.
3049 */
3050 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3051 uint32_t const cLabels = pReNative->cLabels;
3052 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3053#ifndef VBOX_STRICT
3054 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3055 && offWhere == UINT32_MAX
3056 && uData == 0
3057#endif
3058 )
3059 {
3060#ifndef VBOX_STRICT
3061 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3062 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3063 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3064 if (idxLabel < pReNative->cLabels)
3065 return idxLabel;
3066#else
3067 for (uint32_t i = 0; i < cLabels; i++)
3068 if ( paLabels[i].enmType == enmType
3069 && paLabels[i].uData == uData)
3070 {
3071 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3072 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3073 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3074 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3075 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3076 return i;
3077 }
3078 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3079 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3080#endif
3081 }
3082
3083 /*
3084 * Make sure we've got room for another label.
3085 */
3086 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3087 { /* likely */ }
3088 else
3089 {
3090 uint32_t cNew = pReNative->cLabelsAlloc;
3091 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3092 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3093 cNew *= 2;
3094 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3095 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3096 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3097 pReNative->paLabels = paLabels;
3098 pReNative->cLabelsAlloc = cNew;
3099 }
3100
3101 /*
3102 * Define a new label.
3103 */
3104 paLabels[cLabels].off = offWhere;
3105 paLabels[cLabels].enmType = enmType;
3106 paLabels[cLabels].uData = uData;
3107 pReNative->cLabels = cLabels + 1;
3108
3109 Assert((unsigned)enmType < 64);
3110 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3111
3112 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3113 {
3114 Assert(uData == 0);
3115 pReNative->aidxUniqueLabels[enmType] = cLabels;
3116 }
3117
3118 if (offWhere != UINT32_MAX)
3119 {
3120#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3121 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3122 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3123#endif
3124 }
3125 return cLabels;
3126}
3127
3128
3129/**
3130 * Defines the location of an existing label.
3131 *
3132 * @param pReNative The native recompile state.
3133 * @param idxLabel The label to define.
3134 * @param offWhere The position.
3135 */
3136DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3137{
3138 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3139 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3140 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3141 pLabel->off = offWhere;
3142#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3143 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3144 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3145#endif
3146}
3147
3148
3149/**
3150 * Looks up a lable.
3151 *
3152 * @returns Label ID if found, UINT32_MAX if not.
3153 */
3154static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3155 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3156{
3157 Assert((unsigned)enmType < 64);
3158 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3159 {
3160 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3161 return pReNative->aidxUniqueLabels[enmType];
3162
3163 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3164 uint32_t const cLabels = pReNative->cLabels;
3165 for (uint32_t i = 0; i < cLabels; i++)
3166 if ( paLabels[i].enmType == enmType
3167 && paLabels[i].uData == uData
3168 && ( paLabels[i].off == offWhere
3169 || offWhere == UINT32_MAX
3170 || paLabels[i].off == UINT32_MAX))
3171 return i;
3172 }
3173 return UINT32_MAX;
3174}
3175
3176
3177/**
3178 * Adds a fixup.
3179 *
3180 * @throws VBox status code (int) on failure.
3181 * @param pReNative The native recompile state.
3182 * @param offWhere The instruction offset of the fixup location.
3183 * @param idxLabel The target label ID for the fixup.
3184 * @param enmType The fixup type.
3185 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3186 */
3187DECL_HIDDEN_THROW(void)
3188iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3189 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3190{
3191 Assert(idxLabel <= UINT16_MAX);
3192 Assert((unsigned)enmType <= UINT8_MAX);
3193
3194 /*
3195 * Make sure we've room.
3196 */
3197 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3198 uint32_t const cFixups = pReNative->cFixups;
3199 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3200 { /* likely */ }
3201 else
3202 {
3203 uint32_t cNew = pReNative->cFixupsAlloc;
3204 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3205 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3206 cNew *= 2;
3207 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3208 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3209 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3210 pReNative->paFixups = paFixups;
3211 pReNative->cFixupsAlloc = cNew;
3212 }
3213
3214 /*
3215 * Add the fixup.
3216 */
3217 paFixups[cFixups].off = offWhere;
3218 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3219 paFixups[cFixups].enmType = enmType;
3220 paFixups[cFixups].offAddend = offAddend;
3221 pReNative->cFixups = cFixups + 1;
3222}
3223
3224
3225/**
3226 * Slow code path for iemNativeInstrBufEnsure.
3227 */
3228DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3229{
3230 /* Double the buffer size till we meet the request. */
3231 uint32_t cNew = pReNative->cInstrBufAlloc;
3232 AssertReturn(cNew > 0, NULL);
3233 do
3234 cNew *= 2;
3235 while (cNew < off + cInstrReq);
3236
3237 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3238#ifdef RT_ARCH_ARM64
3239 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3240#else
3241 uint32_t const cbMaxInstrBuf = _2M;
3242#endif
3243 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3244
3245 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3246 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3247
3248#ifdef VBOX_STRICT
3249 pReNative->offInstrBufChecked = off + cInstrReq;
3250#endif
3251 pReNative->cInstrBufAlloc = cNew;
3252 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3253}
3254
3255#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3256
3257/**
3258 * Grows the static debug info array used during recompilation.
3259 *
3260 * @returns Pointer to the new debug info block; throws VBox status code on
3261 * failure, so no need to check the return value.
3262 */
3263DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3264{
3265 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3266 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3267 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3268 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3269 pReNative->pDbgInfo = pDbgInfo;
3270 pReNative->cDbgInfoAlloc = cNew;
3271 return pDbgInfo;
3272}
3273
3274
3275/**
3276 * Adds a new debug info uninitialized entry, returning the pointer to it.
3277 */
3278DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3279{
3280 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3281 { /* likely */ }
3282 else
3283 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3284 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3285}
3286
3287
3288/**
3289 * Debug Info: Adds a native offset record, if necessary.
3290 */
3291static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3292{
3293 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3294
3295 /*
3296 * Search backwards to see if we've got a similar record already.
3297 */
3298 uint32_t idx = pDbgInfo->cEntries;
3299 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3300 while (idx-- > idxStop)
3301 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3302 {
3303 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3304 return;
3305 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3306 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3307 break;
3308 }
3309
3310 /*
3311 * Add it.
3312 */
3313 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3314 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3315 pEntry->NativeOffset.offNative = off;
3316}
3317
3318
3319/**
3320 * Debug Info: Record info about a label.
3321 */
3322static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3323{
3324 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3325 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3326 pEntry->Label.uUnused = 0;
3327 pEntry->Label.enmLabel = (uint8_t)enmType;
3328 pEntry->Label.uData = uData;
3329}
3330
3331
3332/**
3333 * Debug Info: Record info about a threaded call.
3334 */
3335static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3336{
3337 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3338 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3339 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3340 pEntry->ThreadedCall.uUnused = 0;
3341 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3342}
3343
3344
3345/**
3346 * Debug Info: Record info about a new guest instruction.
3347 */
3348static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3349{
3350 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3351 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3352 pEntry->GuestInstruction.uUnused = 0;
3353 pEntry->GuestInstruction.fExec = fExec;
3354}
3355
3356
3357/**
3358 * Debug Info: Record info about guest register shadowing.
3359 */
3360static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3361 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
3362{
3363 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3364 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3365 pEntry->GuestRegShadowing.uUnused = 0;
3366 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3367 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3368 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3369}
3370
3371#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3372
3373
3374/*********************************************************************************************************************************
3375* Register Allocator *
3376*********************************************************************************************************************************/
3377
3378/**
3379 * Register parameter indexes (indexed by argument number).
3380 */
3381DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3382{
3383 IEMNATIVE_CALL_ARG0_GREG,
3384 IEMNATIVE_CALL_ARG1_GREG,
3385 IEMNATIVE_CALL_ARG2_GREG,
3386 IEMNATIVE_CALL_ARG3_GREG,
3387#if defined(IEMNATIVE_CALL_ARG4_GREG)
3388 IEMNATIVE_CALL_ARG4_GREG,
3389# if defined(IEMNATIVE_CALL_ARG5_GREG)
3390 IEMNATIVE_CALL_ARG5_GREG,
3391# if defined(IEMNATIVE_CALL_ARG6_GREG)
3392 IEMNATIVE_CALL_ARG6_GREG,
3393# if defined(IEMNATIVE_CALL_ARG7_GREG)
3394 IEMNATIVE_CALL_ARG7_GREG,
3395# endif
3396# endif
3397# endif
3398#endif
3399};
3400
3401/**
3402 * Call register masks indexed by argument count.
3403 */
3404DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3405{
3406 0,
3407 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3408 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3409 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3410 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3411 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3412#if defined(IEMNATIVE_CALL_ARG4_GREG)
3413 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3414 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3415# if defined(IEMNATIVE_CALL_ARG5_GREG)
3416 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3417 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3418# if defined(IEMNATIVE_CALL_ARG6_GREG)
3419 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3420 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3421 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3422# if defined(IEMNATIVE_CALL_ARG7_GREG)
3423 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3424 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3425 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3426# endif
3427# endif
3428# endif
3429#endif
3430};
3431
3432#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3433/**
3434 * BP offset of the stack argument slots.
3435 *
3436 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3437 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3438 */
3439DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3440{
3441 IEMNATIVE_FP_OFF_STACK_ARG0,
3442# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3443 IEMNATIVE_FP_OFF_STACK_ARG1,
3444# endif
3445# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3446 IEMNATIVE_FP_OFF_STACK_ARG2,
3447# endif
3448# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3449 IEMNATIVE_FP_OFF_STACK_ARG3,
3450# endif
3451};
3452AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3453#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3454
3455/**
3456 * Info about shadowed guest register values.
3457 * @see IEMNATIVEGSTREG
3458 */
3459static struct
3460{
3461 /** Offset in VMCPU. */
3462 uint32_t off;
3463 /** The field size. */
3464 uint8_t cb;
3465 /** Name (for logging). */
3466 const char *pszName;
3467} const g_aGstShadowInfo[] =
3468{
3469#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3470 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3471 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3472 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3473 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3474 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3475 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3476 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3477 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3478 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3479 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3480 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3481 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3482 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3483 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3484 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3485 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3486 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3487 /* [kIemNativeGstReg_LivenessPadding17] = */ { UINT32_MAX / 4, 0, "pad17", },
3488 /* [kIemNativeGstReg_LivenessPadding18] = */ { UINT32_MAX / 4, 0, "pad18", },
3489 /* [kIemNativeGstReg_LivenessPadding19] = */ { UINT32_MAX / 4, 0, "pad19", },
3490 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3491 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3492 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3493 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3494 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3495 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3496 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3497 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3498 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3499 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3500 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3501 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3502 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3503 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3504 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3505 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3506 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3507 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3508 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3509 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3510 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3511 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3512 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3513 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3514 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3515#undef CPUMCTX_OFF_AND_SIZE
3516};
3517AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3518
3519
3520/** Host CPU general purpose register names. */
3521DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3522{
3523#ifdef RT_ARCH_AMD64
3524 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3525#elif RT_ARCH_ARM64
3526 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3527 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3528#else
3529# error "port me"
3530#endif
3531};
3532
3533
3534DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
3535 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
3536{
3537 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3538
3539 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
3540 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3541 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
3542 return (uint8_t)idxReg;
3543}
3544
3545
3546#if 0 /* unused */
3547/**
3548 * Tries to locate a suitable register in the given register mask.
3549 *
3550 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3551 * failed.
3552 *
3553 * @returns Host register number on success, returns UINT8_MAX on failure.
3554 */
3555static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3556{
3557 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3558 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3559 if (fRegs)
3560 {
3561 /** @todo pick better here: */
3562 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3563
3564 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3565 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3566 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3567 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3568
3569 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3570 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3571 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3572 return idxReg;
3573 }
3574 return UINT8_MAX;
3575}
3576#endif /* unused */
3577
3578
3579/**
3580 * Locate a register, possibly freeing one up.
3581 *
3582 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3583 * failed.
3584 *
3585 * @returns Host register number on success. Returns UINT8_MAX if no registers
3586 * found, the caller is supposed to deal with this and raise a
3587 * allocation type specific status code (if desired).
3588 *
3589 * @throws VBox status code if we're run into trouble spilling a variable of
3590 * recording debug info. Does NOT throw anything if we're out of
3591 * registers, though.
3592 */
3593static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3594 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3595{
3596 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3597 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3598 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3599
3600 /*
3601 * Try a freed register that's shadowing a guest register.
3602 */
3603 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3604 if (fRegs)
3605 {
3606 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3607
3608#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3609 /*
3610 * When we have livness information, we use it to kick out all shadowed
3611 * guest register that will not be needed any more in this TB. If we're
3612 * lucky, this may prevent us from ending up here again.
3613 *
3614 * Note! We must consider the previous entry here so we don't free
3615 * anything that the current threaded function requires (current
3616 * entry is produced by the next threaded function).
3617 */
3618 uint32_t const idxCurCall = pReNative->idxCurCall;
3619 if (idxCurCall > 0)
3620 {
3621 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3622
3623 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3624 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3625# if 0
3626 IEMLIVENESSBIT Tmp = { pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64 }; /* mask of regs in either UNUSED */
3627 Tmp.fEflOther &= Tmp.fEflCf; /** @todo optimize this (pair of 3 (status), pair of 4 (in other), pair of 2, pair of 1). */
3628 Tmp.fEflOther &= Tmp.fEflPf;
3629 Tmp.fEflOther &= Tmp.fEflAf;
3630 Tmp.fEflOther &= Tmp.fEflZf;
3631 Tmp.fEflOther &= Tmp.fEflSf;
3632 Tmp.fEflOther &= Tmp.fEflOf;
3633 Tmp.fEflCf = 0; /* not necessary, but better safe. */
3634 Tmp.fEflPf = 0;
3635 Tmp.fEflAf = 0;
3636 Tmp.fEflZf = 0;
3637 Tmp.fEflSf = 0;
3638 Tmp.fEflOf = 0;
3639 uint64_t fToFreeMask = Tmp.bm64;
3640# else
3641 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3642 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3643 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3644 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3645 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3646 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3647# endif
3648
3649 /* If it matches any shadowed registers. */
3650 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3651 {
3652 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3653 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3654 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3655
3656 /* See if we've got any unshadowed registers we can return now. */
3657 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3658 if (fUnshadowedRegs)
3659 {
3660 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3661 return (fPreferVolatile
3662 ? ASMBitFirstSetU32(fUnshadowedRegs)
3663 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3664 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3665 - 1;
3666 }
3667 }
3668 }
3669#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3670
3671 unsigned const idxReg = (fPreferVolatile
3672 ? ASMBitFirstSetU32(fRegs)
3673 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3674 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3675 - 1;
3676
3677 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3678 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3679 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3680 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3681
3682 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3683 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3684 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3685 return idxReg;
3686 }
3687
3688 /*
3689 * Try free up a variable that's in a register.
3690 *
3691 * We do two rounds here, first evacuating variables we don't need to be
3692 * saved on the stack, then in the second round move things to the stack.
3693 */
3694 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3695 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3696 {
3697 uint32_t fVars = pReNative->Core.bmVars;
3698 while (fVars)
3699 {
3700 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3701 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3702 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3703 && (RT_BIT_32(idxReg) & fRegMask)
3704 && ( iLoop == 0
3705 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3706 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3707 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3708 {
3709 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3710 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3711 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3712 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3713 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3714 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3715
3716 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3717 {
3718 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3719 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3720 }
3721
3722 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3723 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3724
3725 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3726 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3727 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3728 return idxReg;
3729 }
3730 fVars &= ~RT_BIT_32(idxVar);
3731 }
3732 }
3733
3734 return UINT8_MAX;
3735}
3736
3737
3738/**
3739 * Reassigns a variable to a different register specified by the caller.
3740 *
3741 * @returns The new code buffer position.
3742 * @param pReNative The native recompile state.
3743 * @param off The current code buffer position.
3744 * @param idxVar The variable index.
3745 * @param idxRegOld The old host register number.
3746 * @param idxRegNew The new host register number.
3747 * @param pszCaller The caller for logging.
3748 */
3749static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3750 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3751{
3752 Assert(pReNative->Core.aVars[idxVar].idxReg == idxRegOld);
3753 RT_NOREF(pszCaller);
3754
3755 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3756
3757 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3758 Log12(("%s: moving idxVar=%d from %s to %s (fGstRegShadows=%RX64)\n",
3759 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3760 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3761
3762 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3763 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3764 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3765 if (fGstRegShadows)
3766 {
3767 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3768 | RT_BIT_32(idxRegNew);
3769 while (fGstRegShadows)
3770 {
3771 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3772 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3773
3774 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3775 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3776 }
3777 }
3778
3779 pReNative->Core.aVars[idxVar].idxReg = (uint8_t)idxRegNew;
3780 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3781 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3782 return off;
3783}
3784
3785
3786/**
3787 * Moves a variable to a different register or spills it onto the stack.
3788 *
3789 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3790 * kinds can easily be recreated if needed later.
3791 *
3792 * @returns The new code buffer position.
3793 * @param pReNative The native recompile state.
3794 * @param off The current code buffer position.
3795 * @param idxVar The variable index.
3796 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3797 * call-volatile registers.
3798 */
3799static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3800 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3801{
3802 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3803 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
3804 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
3805
3806 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
3807 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3808 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3809 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3810 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3811 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3812 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3813 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3814 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3815
3816
3817 /** @todo Add statistics on this.*/
3818 /** @todo Implement basic variable liveness analysis (python) so variables
3819 * can be freed immediately once no longer used. This has the potential to
3820 * be trashing registers and stack for dead variables.
3821 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3822
3823 /*
3824 * First try move it to a different register, as that's cheaper.
3825 */
3826 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3827 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3828 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3829 if (fRegs)
3830 {
3831 /* Avoid using shadow registers, if possible. */
3832 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3833 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3834 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3835 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3836 }
3837
3838 /*
3839 * Otherwise we must spill the register onto the stack.
3840 */
3841 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3842 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%d/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3843 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3844 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3845
3846 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3847 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3848 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3849 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3850 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3851 return off;
3852}
3853
3854
3855/**
3856 * Allocates a temporary host general purpose register.
3857 *
3858 * This may emit code to save register content onto the stack in order to free
3859 * up a register.
3860 *
3861 * @returns The host register number; throws VBox status code on failure,
3862 * so no need to check the return value.
3863 * @param pReNative The native recompile state.
3864 * @param poff Pointer to the variable with the code buffer position.
3865 * This will be update if we need to move a variable from
3866 * register to stack in order to satisfy the request.
3867 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3868 * registers (@c true, default) or the other way around
3869 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3870 */
3871DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3872{
3873 /*
3874 * Try find a completely unused register, preferably a call-volatile one.
3875 */
3876 uint8_t idxReg;
3877 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3878 & ~pReNative->Core.bmHstRegsWithGstShadow
3879 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3880 if (fRegs)
3881 {
3882 if (fPreferVolatile)
3883 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3884 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3885 else
3886 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3887 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3888 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3889 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3890 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3891 }
3892 else
3893 {
3894 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3895 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3896 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3897 }
3898 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3899}
3900
3901
3902/**
3903 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3904 * registers.
3905 *
3906 * @returns The host register number; throws VBox status code on failure,
3907 * so no need to check the return value.
3908 * @param pReNative The native recompile state.
3909 * @param poff Pointer to the variable with the code buffer position.
3910 * This will be update if we need to move a variable from
3911 * register to stack in order to satisfy the request.
3912 * @param fRegMask Mask of acceptable registers.
3913 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3914 * registers (@c true, default) or the other way around
3915 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3916 */
3917DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3918 bool fPreferVolatile /*= true*/)
3919{
3920 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3921 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3922
3923 /*
3924 * Try find a completely unused register, preferably a call-volatile one.
3925 */
3926 uint8_t idxReg;
3927 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3928 & ~pReNative->Core.bmHstRegsWithGstShadow
3929 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3930 & fRegMask;
3931 if (fRegs)
3932 {
3933 if (fPreferVolatile)
3934 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3935 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3936 else
3937 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3938 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3939 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3940 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3941 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3942 }
3943 else
3944 {
3945 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
3946 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3947 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3948 }
3949 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3950}
3951
3952
3953/**
3954 * Allocates a temporary register for loading an immediate value into.
3955 *
3956 * This will emit code to load the immediate, unless there happens to be an
3957 * unused register with the value already loaded.
3958 *
3959 * The caller will not modify the returned register, it must be considered
3960 * read-only. Free using iemNativeRegFreeTmpImm.
3961 *
3962 * @returns The host register number; throws VBox status code on failure, so no
3963 * need to check the return value.
3964 * @param pReNative The native recompile state.
3965 * @param poff Pointer to the variable with the code buffer position.
3966 * @param uImm The immediate value that the register must hold upon
3967 * return.
3968 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3969 * registers (@c true, default) or the other way around
3970 * (@c false).
3971 *
3972 * @note Reusing immediate values has not been implemented yet.
3973 */
3974DECL_HIDDEN_THROW(uint8_t)
3975iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3976{
3977 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
3978 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3979 return idxReg;
3980}
3981
3982
3983/**
3984 * Helper for iemNativeLivenessGetStateByGstReg.
3985 *
3986 * @returns IEMLIVENESS_STATE_XXX
3987 * @param fMergedStateExp2 This is the RT_BIT_32() of each sub-state
3988 * ORed together.
3989 */
3990DECL_FORCE_INLINE(uint32_t)
3991iemNativeLivenessMergeExpandedEFlagsState(uint32_t fMergedStateExp2)
3992{
3993 /* INPUT trumps anything else. */
3994 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_INPUT))
3995 return IEMLIVENESS_STATE_INPUT;
3996
3997 /* CLOBBERED trumps XCPT_OR_CALL and UNUSED. */
3998 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_CLOBBERED))
3999 {
4000 /* If not all sub-fields are clobbered they must be considered INPUT. */
4001 if (fMergedStateExp2 & (RT_BIT_32(IEMLIVENESS_STATE_UNUSED) | RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL)))
4002 return IEMLIVENESS_STATE_INPUT;
4003 return IEMLIVENESS_STATE_CLOBBERED;
4004 }
4005
4006 /* XCPT_OR_CALL trumps UNUSED. */
4007 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL))
4008 return IEMLIVENESS_STATE_XCPT_OR_CALL;
4009
4010 return IEMLIVENESS_STATE_UNUSED;
4011}
4012
4013#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4014
4015DECL_FORCE_INLINE(uint32_t)
4016iemNativeLivenessGetStateByGstRegEx(PCIEMLIVENESSENTRY pLivenessEntry, unsigned enmGstRegEx)
4017{
4018 return ((pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
4019 | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2);
4020}
4021
4022
4023DECL_FORCE_INLINE(uint32_t)
4024iemNativeLivenessGetStateByGstReg(PCIEMLIVENESSENTRY pLivenessEntry, IEMNATIVEGSTREG enmGstReg)
4025{
4026 uint32_t uRet = ((pLivenessEntry->Bit0.bm64 >> (unsigned)enmGstReg) & 1)
4027 | (((pLivenessEntry->Bit1.bm64 >> (unsigned)enmGstReg) << 1) & 2);
4028 if (enmGstReg == kIemNativeGstReg_EFlags)
4029 {
4030 /* Merge the eflags states to one. */
4031 uRet = RT_BIT_32(uRet);
4032 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflCf | (pLivenessEntry->Bit1.fEflCf << 1));
4033 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflPf | (pLivenessEntry->Bit1.fEflPf << 1));
4034 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflAf | (pLivenessEntry->Bit1.fEflAf << 1));
4035 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflZf | (pLivenessEntry->Bit1.fEflZf << 1));
4036 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflSf | (pLivenessEntry->Bit1.fEflSf << 1));
4037 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflOf | (pLivenessEntry->Bit1.fEflOf << 1));
4038 uRet = iemNativeLivenessMergeExpandedEFlagsState(uRet);
4039 }
4040 return uRet;
4041}
4042
4043
4044# ifdef VBOX_STRICT
4045/** For assertions only, user checks that idxCurCall isn't zerow. */
4046DECL_FORCE_INLINE(uint32_t)
4047iemNativeLivenessGetPrevStateByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg)
4048{
4049 return iemNativeLivenessGetStateByGstReg(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], enmGstReg);
4050}
4051# endif /* VBOX_STRICT */
4052
4053#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4054
4055/**
4056 * Marks host register @a idxHstReg as containing a shadow copy of guest
4057 * register @a enmGstReg.
4058 *
4059 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
4060 * host register before calling.
4061 */
4062DECL_FORCE_INLINE(void)
4063iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4064{
4065 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
4066 Assert(!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4067 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
4068
4069 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
4070 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg); /** @todo why? not OR? */
4071 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
4072 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
4073#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4074 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4075 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
4076#else
4077 RT_NOREF(off);
4078#endif
4079}
4080
4081
4082/**
4083 * Clear any guest register shadow claims from @a idxHstReg.
4084 *
4085 * The register does not need to be shadowing any guest registers.
4086 */
4087DECL_FORCE_INLINE(void)
4088iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
4089{
4090 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4091 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4092 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4093 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4094 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4095
4096#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4097 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4098 if (fGstRegs)
4099 {
4100 Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
4101 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4102 while (fGstRegs)
4103 {
4104 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4105 fGstRegs &= ~RT_BIT_64(iGstReg);
4106 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
4107 }
4108 }
4109#else
4110 RT_NOREF(off);
4111#endif
4112
4113 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4114 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4115 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4116}
4117
4118
4119/**
4120 * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
4121 * and global overview flags.
4122 */
4123DECL_FORCE_INLINE(void)
4124iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4125{
4126 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4127 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4128 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4129 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4130 Assert(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg));
4131 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4132 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4133
4134#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4135 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4136 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
4137#else
4138 RT_NOREF(off);
4139#endif
4140
4141 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4142 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4143 if (!fGstRegShadowsNew)
4144 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4145 pReNative->Core.bmGstRegShadows &= ~RT_BIT_64(enmGstReg);
4146}
4147
4148
4149#if 0 /* unused */
4150/**
4151 * Clear any guest register shadow claim for @a enmGstReg.
4152 */
4153DECL_FORCE_INLINE(void)
4154iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4155{
4156 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4157 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4158 {
4159 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
4160 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4161 }
4162}
4163#endif
4164
4165
4166/**
4167 * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
4168 * as the new shadow of it.
4169 *
4170 * Unlike the other guest reg shadow helpers, this does the logging for you.
4171 * However, it is the liveness state is not asserted here, the caller must do
4172 * that.
4173 */
4174DECL_FORCE_INLINE(void)
4175iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
4176 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4177{
4178 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4179 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4180 {
4181 uint8_t const idxHstRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
4182 Assert(idxHstRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
4183 if (idxHstRegOld == idxHstRegNew)
4184 return;
4185 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s (from %s)\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4186 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstRegOld]));
4187 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4188 }
4189 else
4190 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4191 g_aGstShadowInfo[enmGstReg].pszName));
4192 iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
4193}
4194
4195
4196/**
4197 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
4198 * to @a idxRegTo.
4199 */
4200DECL_FORCE_INLINE(void)
4201iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
4202 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4203{
4204 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
4205 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] == idxRegFrom);
4206 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
4207 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
4208 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4209 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
4210 == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
4211 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
4212 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
4213
4214 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4215 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows = fGstRegShadowsFrom;
4216 if (!fGstRegShadowsFrom)
4217 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegFrom);
4218 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegTo);
4219 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows |= RT_BIT_64(enmGstReg);
4220 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
4221#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4222 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4223 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
4224#else
4225 RT_NOREF(off);
4226#endif
4227}
4228
4229
4230/**
4231 * Allocates a temporary host general purpose register for keeping a guest
4232 * register value.
4233 *
4234 * Since we may already have a register holding the guest register value,
4235 * code will be emitted to do the loading if that's not the case. Code may also
4236 * be emitted if we have to free up a register to satify the request.
4237 *
4238 * @returns The host register number; throws VBox status code on failure, so no
4239 * need to check the return value.
4240 * @param pReNative The native recompile state.
4241 * @param poff Pointer to the variable with the code buffer
4242 * position. This will be update if we need to move a
4243 * variable from register to stack in order to satisfy
4244 * the request.
4245 * @param enmGstReg The guest register that will is to be updated.
4246 * @param enmIntendedUse How the caller will be using the host register.
4247 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4248 * register is okay (default). The ASSUMPTION here is
4249 * that the caller has already flushed all volatile
4250 * registers, so this is only applied if we allocate a
4251 * new register.
4252 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4253 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4254 */
4255DECL_HIDDEN_THROW(uint8_t)
4256iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4257 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4258 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4259{
4260 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4261#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4262 AssertMsg( fSkipLivenessAssert
4263 || pReNative->idxCurCall == 0
4264 || enmGstReg == kIemNativeGstReg_Pc
4265 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4266 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4267 : IEMLIVENESS_STATE_IS_ACCESS_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4268 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4269#endif
4270 RT_NOREF(fSkipLivenessAssert);
4271#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4272 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4273#endif
4274 uint32_t const fRegMask = !fNoVolatileRegs
4275 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4276 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4277
4278 /*
4279 * First check if the guest register value is already in a host register.
4280 */
4281 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4282 {
4283 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4284 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4285 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4286 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4287
4288 /* It's not supposed to be allocated... */
4289 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4290 {
4291 /*
4292 * If the register will trash the guest shadow copy, try find a
4293 * completely unused register we can use instead. If that fails,
4294 * we need to disassociate the host reg from the guest reg.
4295 */
4296 /** @todo would be nice to know if preserving the register is in any way helpful. */
4297 /* If the purpose is calculations, try duplicate the register value as
4298 we'll be clobbering the shadow. */
4299 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4300 && ( ~pReNative->Core.bmHstRegs
4301 & ~pReNative->Core.bmHstRegsWithGstShadow
4302 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4303 {
4304 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4305
4306 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4307
4308 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4309 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4310 g_apszIemNativeHstRegNames[idxRegNew]));
4311 idxReg = idxRegNew;
4312 }
4313 /* If the current register matches the restrictions, go ahead and allocate
4314 it for the caller. */
4315 else if (fRegMask & RT_BIT_32(idxReg))
4316 {
4317 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4318 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4319 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4320 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4321 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4322 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4323 else
4324 {
4325 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4326 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4327 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4328 }
4329 }
4330 /* Otherwise, allocate a register that satisfies the caller and transfer
4331 the shadowing if compatible with the intended use. (This basically
4332 means the call wants a non-volatile register (RSP push/pop scenario).) */
4333 else
4334 {
4335 Assert(fNoVolatileRegs);
4336 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4337 !fNoVolatileRegs
4338 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4339 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4340 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4341 {
4342 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4343 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4344 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4345 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4346 }
4347 else
4348 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4349 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4350 g_apszIemNativeHstRegNames[idxRegNew]));
4351 idxReg = idxRegNew;
4352 }
4353 }
4354 else
4355 {
4356 /*
4357 * Oops. Shadowed guest register already allocated!
4358 *
4359 * Allocate a new register, copy the value and, if updating, the
4360 * guest shadow copy assignment to the new register.
4361 */
4362 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4363 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4364 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4365 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4366
4367 /** @todo share register for readonly access. */
4368 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4369 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4370
4371 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4372 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4373
4374 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4375 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4376 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4377 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4378 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4379 else
4380 {
4381 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4382 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4383 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4384 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4385 }
4386 idxReg = idxRegNew;
4387 }
4388 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4389
4390#ifdef VBOX_STRICT
4391 /* Strict builds: Check that the value is correct. */
4392 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4393#endif
4394
4395 return idxReg;
4396 }
4397
4398 /*
4399 * Allocate a new register, load it with the guest value and designate it as a copy of the
4400 */
4401 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4402
4403 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4404 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4405
4406 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4407 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4408 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4409 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4410
4411 return idxRegNew;
4412}
4413
4414
4415/**
4416 * Allocates a temporary host general purpose register that already holds the
4417 * given guest register value.
4418 *
4419 * The use case for this function is places where the shadowing state cannot be
4420 * modified due to branching and such. This will fail if the we don't have a
4421 * current shadow copy handy or if it's incompatible. The only code that will
4422 * be emitted here is value checking code in strict builds.
4423 *
4424 * The intended use can only be readonly!
4425 *
4426 * @returns The host register number, UINT8_MAX if not present.
4427 * @param pReNative The native recompile state.
4428 * @param poff Pointer to the instruction buffer offset.
4429 * Will be updated in strict builds if a register is
4430 * found.
4431 * @param enmGstReg The guest register that will is to be updated.
4432 * @note In strict builds, this may throw instruction buffer growth failures.
4433 * Non-strict builds will not throw anything.
4434 * @sa iemNativeRegAllocTmpForGuestReg
4435 */
4436DECL_HIDDEN_THROW(uint8_t)
4437iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4438{
4439 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4440#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4441 AssertMsg( pReNative->idxCurCall == 0
4442 || IEMLIVENESS_STATE_IS_ACCESS_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4443 || enmGstReg == kIemNativeGstReg_Pc,
4444 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4445#endif
4446
4447 /*
4448 * First check if the guest register value is already in a host register.
4449 */
4450 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4451 {
4452 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4453 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4454 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4455 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4456
4457 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4458 {
4459 /*
4460 * We only do readonly use here, so easy compared to the other
4461 * variant of this code.
4462 */
4463 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4464 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4465 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4466 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4467 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4468
4469#ifdef VBOX_STRICT
4470 /* Strict builds: Check that the value is correct. */
4471 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4472#else
4473 RT_NOREF(poff);
4474#endif
4475 return idxReg;
4476 }
4477 }
4478
4479 return UINT8_MAX;
4480}
4481
4482
4483DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar);
4484
4485
4486/**
4487 * Allocates argument registers for a function call.
4488 *
4489 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4490 * need to check the return value.
4491 * @param pReNative The native recompile state.
4492 * @param off The current code buffer offset.
4493 * @param cArgs The number of arguments the function call takes.
4494 */
4495DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4496{
4497 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4498 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4499 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4500 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4501
4502 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4503 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4504 else if (cArgs == 0)
4505 return true;
4506
4507 /*
4508 * Do we get luck and all register are free and not shadowing anything?
4509 */
4510 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4511 for (uint32_t i = 0; i < cArgs; i++)
4512 {
4513 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4514 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4515 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4516 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4517 }
4518 /*
4519 * Okay, not lucky so we have to free up the registers.
4520 */
4521 else
4522 for (uint32_t i = 0; i < cArgs; i++)
4523 {
4524 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4525 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4526 {
4527 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4528 {
4529 case kIemNativeWhat_Var:
4530 {
4531 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4532 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars),
4533 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4534 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
4535 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
4536
4537 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4538 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4539 else
4540 {
4541 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4542 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4543 }
4544 break;
4545 }
4546
4547 case kIemNativeWhat_Tmp:
4548 case kIemNativeWhat_Arg:
4549 case kIemNativeWhat_rc:
4550 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4551 default:
4552 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4553 }
4554
4555 }
4556 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4557 {
4558 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4559 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4560 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4561 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4562 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4563 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4564 }
4565 else
4566 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4567 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4568 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4569 }
4570 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4571 return true;
4572}
4573
4574
4575DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4576
4577
4578#if 0
4579/**
4580 * Frees a register assignment of any type.
4581 *
4582 * @param pReNative The native recompile state.
4583 * @param idxHstReg The register to free.
4584 *
4585 * @note Does not update variables.
4586 */
4587DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4588{
4589 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4590 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4591 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4592 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4593 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4594 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4595 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4596 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4597 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4598 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4599 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4600 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4601 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4602 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4603
4604 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4605 /* no flushing, right:
4606 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4607 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4608 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4609 */
4610}
4611#endif
4612
4613
4614/**
4615 * Frees a temporary register.
4616 *
4617 * Any shadow copies of guest registers assigned to the host register will not
4618 * be flushed by this operation.
4619 */
4620DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4621{
4622 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4623 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4624 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4625 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4626 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4627}
4628
4629
4630/**
4631 * Frees a temporary immediate register.
4632 *
4633 * It is assumed that the call has not modified the register, so it still hold
4634 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4635 */
4636DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4637{
4638 iemNativeRegFreeTmp(pReNative, idxHstReg);
4639}
4640
4641
4642/**
4643 * Frees a register assigned to a variable.
4644 *
4645 * The register will be disassociated from the variable.
4646 */
4647DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4648{
4649 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4650 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4651 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4652 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4653 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
4654
4655 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4656 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4657 if (!fFlushShadows)
4658 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%d\n",
4659 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4660 else
4661 {
4662 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4663 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4664 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4665 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4666 uint64_t fGstRegShadows = fGstRegShadowsOld;
4667 while (fGstRegShadows)
4668 {
4669 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4670 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4671
4672 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4673 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4674 }
4675 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%d\n",
4676 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4677 }
4678}
4679
4680
4681/**
4682 * Called right before emitting a call instruction to move anything important
4683 * out of call-volatile registers, free and flush the call-volatile registers,
4684 * optionally freeing argument variables.
4685 *
4686 * @returns New code buffer offset, UINT32_MAX on failure.
4687 * @param pReNative The native recompile state.
4688 * @param off The code buffer offset.
4689 * @param cArgs The number of arguments the function call takes.
4690 * It is presumed that the host register part of these have
4691 * been allocated as such already and won't need moving,
4692 * just freeing.
4693 * @param fKeepVars Mask of variables that should keep their register
4694 * assignments. Caller must take care to handle these.
4695 */
4696DECL_HIDDEN_THROW(uint32_t)
4697iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4698{
4699 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4700
4701 /* fKeepVars will reduce this mask. */
4702 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4703
4704 /*
4705 * Move anything important out of volatile registers.
4706 */
4707 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4708 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4709 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4710#ifdef IEMNATIVE_REG_FIXED_TMP0
4711 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4712#endif
4713 & ~g_afIemNativeCallRegs[cArgs];
4714
4715 fRegsToMove &= pReNative->Core.bmHstRegs;
4716 if (!fRegsToMove)
4717 { /* likely */ }
4718 else
4719 {
4720 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4721 while (fRegsToMove != 0)
4722 {
4723 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4724 fRegsToMove &= ~RT_BIT_32(idxReg);
4725
4726 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4727 {
4728 case kIemNativeWhat_Var:
4729 {
4730 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4731 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
4732 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
4733 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
4734 if (!(RT_BIT_32(idxVar) & fKeepVars))
4735 {
4736 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%d enmKind=%d idxReg=%d\n",
4737 idxVar, pReNative->Core.aVars[idxVar].enmKind, pReNative->Core.aVars[idxVar].idxReg));
4738 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4739 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4740 else
4741 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4742 }
4743 else
4744 fRegsToFree &= ~RT_BIT_32(idxReg);
4745 continue;
4746 }
4747
4748 case kIemNativeWhat_Arg:
4749 AssertMsgFailed(("What?!?: %u\n", idxReg));
4750 continue;
4751
4752 case kIemNativeWhat_rc:
4753 case kIemNativeWhat_Tmp:
4754 AssertMsgFailed(("Missing free: %u\n", idxReg));
4755 continue;
4756
4757 case kIemNativeWhat_FixedTmp:
4758 case kIemNativeWhat_pVCpuFixed:
4759 case kIemNativeWhat_pCtxFixed:
4760 case kIemNativeWhat_FixedReserved:
4761 case kIemNativeWhat_Invalid:
4762 case kIemNativeWhat_End:
4763 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4764 }
4765 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4766 }
4767 }
4768
4769 /*
4770 * Do the actual freeing.
4771 */
4772 if (pReNative->Core.bmHstRegs & fRegsToFree)
4773 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4774 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4775 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4776
4777 /* If there are guest register shadows in any call-volatile register, we
4778 have to clear the corrsponding guest register masks for each register. */
4779 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4780 if (fHstRegsWithGstShadow)
4781 {
4782 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4783 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4784 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4785 do
4786 {
4787 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4788 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4789
4790 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4791 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4792 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4793 } while (fHstRegsWithGstShadow != 0);
4794 }
4795
4796 return off;
4797}
4798
4799
4800/**
4801 * Flushes a set of guest register shadow copies.
4802 *
4803 * This is usually done after calling a threaded function or a C-implementation
4804 * of an instruction.
4805 *
4806 * @param pReNative The native recompile state.
4807 * @param fGstRegs Set of guest registers to flush.
4808 */
4809DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4810{
4811 /*
4812 * Reduce the mask by what's currently shadowed
4813 */
4814 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4815 fGstRegs &= bmGstRegShadowsOld;
4816 if (fGstRegs)
4817 {
4818 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4819 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4820 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4821 if (bmGstRegShadowsNew)
4822 {
4823 /*
4824 * Partial.
4825 */
4826 do
4827 {
4828 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4829 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4830 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4831 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4832 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4833
4834 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4835 fGstRegs &= ~fInThisHstReg;
4836 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4837 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4838 if (!fGstRegShadowsNew)
4839 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4840 } while (fGstRegs != 0);
4841 }
4842 else
4843 {
4844 /*
4845 * Clear all.
4846 */
4847 do
4848 {
4849 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4850 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4851 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4852 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4853 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4854
4855 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4856 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4857 } while (fGstRegs != 0);
4858 pReNative->Core.bmHstRegsWithGstShadow = 0;
4859 }
4860 }
4861}
4862
4863
4864/**
4865 * Flushes guest register shadow copies held by a set of host registers.
4866 *
4867 * This is used with the TLB lookup code for ensuring that we don't carry on
4868 * with any guest shadows in volatile registers, as these will get corrupted by
4869 * a TLB miss.
4870 *
4871 * @param pReNative The native recompile state.
4872 * @param fHstRegs Set of host registers to flush guest shadows for.
4873 */
4874DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4875{
4876 /*
4877 * Reduce the mask by what's currently shadowed.
4878 */
4879 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4880 fHstRegs &= bmHstRegsWithGstShadowOld;
4881 if (fHstRegs)
4882 {
4883 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4884 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4885 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4886 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4887 if (bmHstRegsWithGstShadowNew)
4888 {
4889 /*
4890 * Partial (likely).
4891 */
4892 uint64_t fGstShadows = 0;
4893 do
4894 {
4895 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4896 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4897 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4898 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4899
4900 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4901 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4902 fHstRegs &= ~RT_BIT_32(idxHstReg);
4903 } while (fHstRegs != 0);
4904 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4905 }
4906 else
4907 {
4908 /*
4909 * Clear all.
4910 */
4911 do
4912 {
4913 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4914 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4915 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4916 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4917
4918 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4919 fHstRegs &= ~RT_BIT_32(idxHstReg);
4920 } while (fHstRegs != 0);
4921 pReNative->Core.bmGstRegShadows = 0;
4922 }
4923 }
4924}
4925
4926
4927/**
4928 * Restores guest shadow copies in volatile registers.
4929 *
4930 * This is used after calling a helper function (think TLB miss) to restore the
4931 * register state of volatile registers.
4932 *
4933 * @param pReNative The native recompile state.
4934 * @param off The code buffer offset.
4935 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4936 * be active (allocated) w/o asserting. Hack.
4937 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4938 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4939 */
4940DECL_HIDDEN_THROW(uint32_t)
4941iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4942{
4943 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4944 if (fHstRegs)
4945 {
4946 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4947 do
4948 {
4949 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4950
4951 /* It's not fatal if a register is active holding a variable that
4952 shadowing a guest register, ASSUMING all pending guest register
4953 writes were flushed prior to the helper call. However, we'll be
4954 emitting duplicate restores, so it wasts code space. */
4955 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4956 RT_NOREF(fHstRegsActiveShadows);
4957
4958 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4959 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4960 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4961 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4962
4963 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4964 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4965
4966 fHstRegs &= ~RT_BIT_32(idxHstReg);
4967 } while (fHstRegs != 0);
4968 }
4969 return off;
4970}
4971
4972
4973/**
4974 * Flushes delayed write of a specific guest register.
4975 *
4976 * This must be called prior to calling CImpl functions and any helpers that use
4977 * the guest state (like raising exceptions) and such.
4978 *
4979 * This optimization has not yet been implemented. The first target would be
4980 * RIP updates, since these are the most common ones.
4981 */
4982DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4983 IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
4984{
4985 RT_NOREF(pReNative, enmClass, idxReg);
4986 return off;
4987}
4988
4989
4990/**
4991 * Flushes any delayed guest register writes.
4992 *
4993 * This must be called prior to calling CImpl functions and any helpers that use
4994 * the guest state (like raising exceptions) and such.
4995 *
4996 * This optimization has not yet been implemented. The first target would be
4997 * RIP updates, since these are the most common ones.
4998 */
4999DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5000{
5001 RT_NOREF(pReNative, off);
5002 return off;
5003}
5004
5005
5006#ifdef VBOX_STRICT
5007/**
5008 * Does internal register allocator sanity checks.
5009 */
5010static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5011{
5012 /*
5013 * Iterate host registers building a guest shadowing set.
5014 */
5015 uint64_t bmGstRegShadows = 0;
5016 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5017 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5018 while (bmHstRegsWithGstShadow)
5019 {
5020 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5021 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5022 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5023
5024 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5025 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5026 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5027 bmGstRegShadows |= fThisGstRegShadows;
5028 while (fThisGstRegShadows)
5029 {
5030 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5031 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5032 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5033 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5034 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5035 }
5036 }
5037 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5038 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5039 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5040
5041 /*
5042 * Now the other way around, checking the guest to host index array.
5043 */
5044 bmHstRegsWithGstShadow = 0;
5045 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5046 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5047 while (bmGstRegShadows)
5048 {
5049 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5050 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5051 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5052
5053 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5054 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5055 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5056 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5057 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5058 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5059 }
5060 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5061 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5062 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5063}
5064#endif
5065
5066
5067/*********************************************************************************************************************************
5068* Code Emitters (larger snippets) *
5069*********************************************************************************************************************************/
5070
5071/**
5072 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5073 * extending to 64-bit width.
5074 *
5075 * @returns New code buffer offset on success, UINT32_MAX on failure.
5076 * @param pReNative .
5077 * @param off The current code buffer position.
5078 * @param idxHstReg The host register to load the guest register value into.
5079 * @param enmGstReg The guest register to load.
5080 *
5081 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5082 * that is something the caller needs to do if applicable.
5083 */
5084DECL_HIDDEN_THROW(uint32_t)
5085iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5086{
5087 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
5088 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5089
5090 switch (g_aGstShadowInfo[enmGstReg].cb)
5091 {
5092 case sizeof(uint64_t):
5093 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5094 case sizeof(uint32_t):
5095 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5096 case sizeof(uint16_t):
5097 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5098#if 0 /* not present in the table. */
5099 case sizeof(uint8_t):
5100 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5101#endif
5102 default:
5103 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5104 }
5105}
5106
5107
5108#ifdef VBOX_STRICT
5109/**
5110 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5111 *
5112 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5113 * Trashes EFLAGS on AMD64.
5114 */
5115static uint32_t
5116iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5117{
5118# ifdef RT_ARCH_AMD64
5119 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5120
5121 /* rol reg64, 32 */
5122 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5123 pbCodeBuf[off++] = 0xc1;
5124 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5125 pbCodeBuf[off++] = 32;
5126
5127 /* test reg32, ffffffffh */
5128 if (idxReg >= 8)
5129 pbCodeBuf[off++] = X86_OP_REX_B;
5130 pbCodeBuf[off++] = 0xf7;
5131 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5132 pbCodeBuf[off++] = 0xff;
5133 pbCodeBuf[off++] = 0xff;
5134 pbCodeBuf[off++] = 0xff;
5135 pbCodeBuf[off++] = 0xff;
5136
5137 /* je/jz +1 */
5138 pbCodeBuf[off++] = 0x74;
5139 pbCodeBuf[off++] = 0x01;
5140
5141 /* int3 */
5142 pbCodeBuf[off++] = 0xcc;
5143
5144 /* rol reg64, 32 */
5145 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5146 pbCodeBuf[off++] = 0xc1;
5147 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5148 pbCodeBuf[off++] = 32;
5149
5150# elif defined(RT_ARCH_ARM64)
5151 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5152 /* lsr tmp0, reg64, #32 */
5153 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
5154 /* cbz tmp0, +1 */
5155 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5156 /* brk #0x1100 */
5157 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
5158
5159# else
5160# error "Port me!"
5161# endif
5162 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5163 return off;
5164}
5165#endif /* VBOX_STRICT */
5166
5167
5168#ifdef VBOX_STRICT
5169/**
5170 * Emitting code that checks that the content of register @a idxReg is the same
5171 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
5172 * instruction if that's not the case.
5173 *
5174 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5175 * Trashes EFLAGS on AMD64.
5176 */
5177static uint32_t
5178iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5179{
5180# ifdef RT_ARCH_AMD64
5181 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5182
5183 /* cmp reg, [mem] */
5184 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5185 {
5186 if (idxReg >= 8)
5187 pbCodeBuf[off++] = X86_OP_REX_R;
5188 pbCodeBuf[off++] = 0x38;
5189 }
5190 else
5191 {
5192 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5193 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5194 else
5195 {
5196 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5197 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5198 else
5199 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5200 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5201 if (idxReg >= 8)
5202 pbCodeBuf[off++] = X86_OP_REX_R;
5203 }
5204 pbCodeBuf[off++] = 0x39;
5205 }
5206 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
5207
5208 /* je/jz +1 */
5209 pbCodeBuf[off++] = 0x74;
5210 pbCodeBuf[off++] = 0x01;
5211
5212 /* int3 */
5213 pbCodeBuf[off++] = 0xcc;
5214
5215 /* For values smaller than the register size, we must check that the rest
5216 of the register is all zeros. */
5217 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
5218 {
5219 /* test reg64, imm32 */
5220 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5221 pbCodeBuf[off++] = 0xf7;
5222 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5223 pbCodeBuf[off++] = 0;
5224 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
5225 pbCodeBuf[off++] = 0xff;
5226 pbCodeBuf[off++] = 0xff;
5227
5228 /* je/jz +1 */
5229 pbCodeBuf[off++] = 0x74;
5230 pbCodeBuf[off++] = 0x01;
5231
5232 /* int3 */
5233 pbCodeBuf[off++] = 0xcc;
5234 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5235 }
5236 else
5237 {
5238 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5239 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
5240 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
5241 }
5242
5243# elif defined(RT_ARCH_ARM64)
5244 /* mov TMP0, [gstreg] */
5245 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
5246
5247 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5248 /* sub tmp0, tmp0, idxReg */
5249 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
5250 /* cbz tmp0, +1 */
5251 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5252 /* brk #0x1000+enmGstReg */
5253 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
5254 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5255
5256# else
5257# error "Port me!"
5258# endif
5259 return off;
5260}
5261#endif /* VBOX_STRICT */
5262
5263
5264#ifdef VBOX_STRICT
5265/**
5266 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
5267 * important bits.
5268 *
5269 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5270 * Trashes EFLAGS on AMD64.
5271 */
5272static uint32_t
5273iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
5274{
5275 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5276 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
5277 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
5278 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
5279
5280#ifdef RT_ARCH_AMD64
5281 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5282
5283 /* je/jz +1 */
5284 pbCodeBuf[off++] = 0x74;
5285 pbCodeBuf[off++] = 0x01;
5286
5287 /* int3 */
5288 pbCodeBuf[off++] = 0xcc;
5289
5290# elif defined(RT_ARCH_ARM64)
5291 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5292
5293 /* b.eq +1 */
5294 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
5295 /* brk #0x2000 */
5296 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
5297
5298# else
5299# error "Port me!"
5300# endif
5301 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5302
5303 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5304 return off;
5305}
5306#endif /* VBOX_STRICT */
5307
5308
5309/**
5310 * Emits a code for checking the return code of a call and rcPassUp, returning
5311 * from the code if either are non-zero.
5312 */
5313DECL_HIDDEN_THROW(uint32_t)
5314iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
5315{
5316#ifdef RT_ARCH_AMD64
5317 /*
5318 * AMD64: eax = call status code.
5319 */
5320
5321 /* edx = rcPassUp */
5322 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5323# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5324 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
5325# endif
5326
5327 /* edx = eax | rcPassUp */
5328 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5329 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
5330 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
5331 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5332
5333 /* Jump to non-zero status return path. */
5334 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
5335
5336 /* done. */
5337
5338#elif RT_ARCH_ARM64
5339 /*
5340 * ARM64: w0 = call status code.
5341 */
5342# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5343 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
5344# endif
5345 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5346
5347 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5348
5349 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
5350
5351 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5352 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
5353 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
5354
5355#else
5356# error "port me"
5357#endif
5358 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5359 RT_NOREF_PV(idxInstr);
5360 return off;
5361}
5362
5363
5364/**
5365 * Emits code to check if the content of @a idxAddrReg is a canonical address,
5366 * raising a \#GP(0) if it isn't.
5367 *
5368 * @returns New code buffer offset, UINT32_MAX on failure.
5369 * @param pReNative The native recompile state.
5370 * @param off The code buffer offset.
5371 * @param idxAddrReg The host register with the address to check.
5372 * @param idxInstr The current instruction.
5373 */
5374DECL_HIDDEN_THROW(uint32_t)
5375iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
5376{
5377 /*
5378 * Make sure we don't have any outstanding guest register writes as we may
5379 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5380 */
5381 off = iemNativeRegFlushPendingWrites(pReNative, off);
5382
5383#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5384 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5385#else
5386 RT_NOREF(idxInstr);
5387#endif
5388
5389#ifdef RT_ARCH_AMD64
5390 /*
5391 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
5392 * return raisexcpt();
5393 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
5394 */
5395 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5396
5397 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
5398 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
5399 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
5400 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
5401 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5402
5403 iemNativeRegFreeTmp(pReNative, iTmpReg);
5404
5405#elif defined(RT_ARCH_ARM64)
5406 /*
5407 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
5408 * return raisexcpt();
5409 * ----
5410 * mov x1, 0x800000000000
5411 * add x1, x0, x1
5412 * cmp xzr, x1, lsr 48
5413 * b.ne .Lraisexcpt
5414 */
5415 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5416
5417 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
5418 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
5419 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
5420 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5421
5422 iemNativeRegFreeTmp(pReNative, iTmpReg);
5423
5424#else
5425# error "Port me"
5426#endif
5427 return off;
5428}
5429
5430
5431/**
5432 * Emits code to check if the content of @a idxAddrReg is within the limit of
5433 * idxSegReg, raising a \#GP(0) if it isn't.
5434 *
5435 * @returns New code buffer offset; throws VBox status code on error.
5436 * @param pReNative The native recompile state.
5437 * @param off The code buffer offset.
5438 * @param idxAddrReg The host register (32-bit) with the address to
5439 * check.
5440 * @param idxSegReg The segment register (X86_SREG_XXX) to check
5441 * against.
5442 * @param idxInstr The current instruction.
5443 */
5444DECL_HIDDEN_THROW(uint32_t)
5445iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5446 uint8_t idxAddrReg, uint8_t idxSegReg, uint8_t idxInstr)
5447{
5448 /*
5449 * Make sure we don't have any outstanding guest register writes as we may
5450 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5451 */
5452 off = iemNativeRegFlushPendingWrites(pReNative, off);
5453
5454#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5455 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5456#else
5457 RT_NOREF(idxInstr);
5458#endif
5459
5460 AssertStmt(idxSegReg == X86_SREG_CS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
5461
5462 uint8_t const iTmpLimReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5463 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + idxSegReg),
5464 kIemNativeGstRegUse_ForUpdate);
5465
5466 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, iTmpLimReg);
5467 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5468
5469 iemNativeRegFreeTmp(pReNative, iTmpLimReg);
5470 return off;
5471}
5472
5473
5474/**
5475 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
5476 *
5477 * @returns The flush mask.
5478 * @param fCImpl The IEM_CIMPL_F_XXX flags.
5479 * @param fGstShwFlush The starting flush mask.
5480 */
5481DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
5482{
5483 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
5484 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
5485 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
5486 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
5487 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
5488 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
5489 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
5490 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
5491 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
5492 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
5493 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
5494 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
5495 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
5496 return fGstShwFlush;
5497}
5498
5499
5500/**
5501 * Emits a call to a CImpl function or something similar.
5502 */
5503DECL_HIDDEN_THROW(uint32_t)
5504iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
5505 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
5506{
5507 /*
5508 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
5509 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
5510 */
5511 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
5512 fGstShwFlush
5513 | RT_BIT_64(kIemNativeGstReg_Pc)
5514 | RT_BIT_64(kIemNativeGstReg_EFlags));
5515 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
5516
5517 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
5518
5519 /*
5520 * Load the parameters.
5521 */
5522#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
5523 /* Special code the hidden VBOXSTRICTRC pointer. */
5524 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5525 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
5526 if (cAddParams > 0)
5527 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
5528 if (cAddParams > 1)
5529 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
5530 if (cAddParams > 2)
5531 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
5532 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5533
5534#else
5535 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
5536 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5537 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
5538 if (cAddParams > 0)
5539 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
5540 if (cAddParams > 1)
5541 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
5542 if (cAddParams > 2)
5543# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
5544 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
5545# else
5546 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
5547# endif
5548#endif
5549
5550 /*
5551 * Make the call.
5552 */
5553 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
5554
5555#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
5556 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5557#endif
5558
5559 /*
5560 * Check the status code.
5561 */
5562 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
5563}
5564
5565
5566/**
5567 * Emits a call to a threaded worker function.
5568 */
5569DECL_HIDDEN_THROW(uint32_t)
5570iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
5571{
5572 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
5573 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
5574
5575#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5576 /* The threaded function may throw / long jmp, so set current instruction
5577 number if we're counting. */
5578 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5579#endif
5580
5581 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
5582
5583#ifdef RT_ARCH_AMD64
5584 /* Load the parameters and emit the call. */
5585# ifdef RT_OS_WINDOWS
5586# ifndef VBOXSTRICTRC_STRICT_ENABLED
5587 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
5588 if (cParams > 0)
5589 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
5590 if (cParams > 1)
5591 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
5592 if (cParams > 2)
5593 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
5594# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
5595 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
5596 if (cParams > 0)
5597 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
5598 if (cParams > 1)
5599 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
5600 if (cParams > 2)
5601 {
5602 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
5603 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
5604 }
5605 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5606# endif /* VBOXSTRICTRC_STRICT_ENABLED */
5607# else
5608 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
5609 if (cParams > 0)
5610 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
5611 if (cParams > 1)
5612 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
5613 if (cParams > 2)
5614 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
5615# endif
5616
5617 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
5618
5619# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
5620 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5621# endif
5622
5623#elif RT_ARCH_ARM64
5624 /*
5625 * ARM64:
5626 */
5627 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5628 if (cParams > 0)
5629 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
5630 if (cParams > 1)
5631 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
5632 if (cParams > 2)
5633 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
5634
5635 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
5636
5637#else
5638# error "port me"
5639#endif
5640
5641 /*
5642 * Check the status code.
5643 */
5644 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
5645
5646 return off;
5647}
5648
5649
5650/**
5651 * Emits the code at the CheckBranchMiss label.
5652 */
5653static uint32_t iemNativeEmitCheckBranchMiss(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5654{
5655 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_CheckBranchMiss);
5656 if (idxLabel != UINT32_MAX)
5657 {
5658 iemNativeLabelDefine(pReNative, idxLabel, off);
5659
5660 /* int iemNativeHlpCheckBranchMiss(PVMCPUCC pVCpu) */
5661 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5662 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpCheckBranchMiss);
5663
5664 /* jump back to the return sequence. */
5665 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5666 }
5667 return off;
5668}
5669
5670
5671/**
5672 * Emits the code at the NeedCsLimChecking label.
5673 */
5674static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5675{
5676 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
5677 if (idxLabel != UINT32_MAX)
5678 {
5679 iemNativeLabelDefine(pReNative, idxLabel, off);
5680
5681 /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
5682 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5683 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
5684
5685 /* jump back to the return sequence. */
5686 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5687 }
5688 return off;
5689}
5690
5691
5692/**
5693 * Emits the code at the ObsoleteTb label.
5694 */
5695static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5696{
5697 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
5698 if (idxLabel != UINT32_MAX)
5699 {
5700 iemNativeLabelDefine(pReNative, idxLabel, off);
5701
5702 /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
5703 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5704 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
5705
5706 /* jump back to the return sequence. */
5707 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5708 }
5709 return off;
5710}
5711
5712
5713/**
5714 * Emits the code at the RaiseGP0 label.
5715 */
5716static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5717{
5718 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
5719 if (idxLabel != UINT32_MAX)
5720 {
5721 iemNativeLabelDefine(pReNative, idxLabel, off);
5722
5723 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
5724 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5725 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
5726
5727 /* jump back to the return sequence. */
5728 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5729 }
5730 return off;
5731}
5732
5733
5734/**
5735 * Emits the code at the ReturnWithFlags label (returns
5736 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
5737 */
5738static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5739{
5740 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
5741 if (idxLabel != UINT32_MAX)
5742 {
5743 iemNativeLabelDefine(pReNative, idxLabel, off);
5744
5745 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
5746
5747 /* jump back to the return sequence. */
5748 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5749 }
5750 return off;
5751}
5752
5753
5754/**
5755 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
5756 */
5757static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5758{
5759 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
5760 if (idxLabel != UINT32_MAX)
5761 {
5762 iemNativeLabelDefine(pReNative, idxLabel, off);
5763
5764 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
5765
5766 /* jump back to the return sequence. */
5767 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5768 }
5769 return off;
5770}
5771
5772
5773/**
5774 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
5775 */
5776static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5777{
5778 /*
5779 * Generate the rc + rcPassUp fiddling code if needed.
5780 */
5781 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5782 if (idxLabel != UINT32_MAX)
5783 {
5784 iemNativeLabelDefine(pReNative, idxLabel, off);
5785
5786 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
5787#ifdef RT_ARCH_AMD64
5788# ifdef RT_OS_WINDOWS
5789# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5790 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
5791# endif
5792 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
5793 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
5794# else
5795 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
5796 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
5797# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5798 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
5799# endif
5800# endif
5801# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5802 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
5803# endif
5804
5805#else
5806 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
5807 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5808 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
5809#endif
5810
5811 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
5812 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5813 }
5814 return off;
5815}
5816
5817
5818/**
5819 * Emits a standard epilog.
5820 */
5821static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
5822{
5823 *pidxReturnLabel = UINT32_MAX;
5824
5825 /*
5826 * Successful return, so clear the return register (eax, w0).
5827 */
5828 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
5829
5830 /*
5831 * Define label for common return point.
5832 */
5833 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
5834 *pidxReturnLabel = idxReturn;
5835
5836 /*
5837 * Restore registers and return.
5838 */
5839#ifdef RT_ARCH_AMD64
5840 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5841
5842 /* Reposition esp at the r15 restore point. */
5843 pbCodeBuf[off++] = X86_OP_REX_W;
5844 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
5845 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
5846 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
5847
5848 /* Pop non-volatile registers and return */
5849 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
5850 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
5851 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
5852 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
5853 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
5854 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
5855 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
5856 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
5857# ifdef RT_OS_WINDOWS
5858 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
5859 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
5860# endif
5861 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
5862 pbCodeBuf[off++] = 0xc9; /* leave */
5863 pbCodeBuf[off++] = 0xc3; /* ret */
5864 pbCodeBuf[off++] = 0xcc; /* int3 poison */
5865
5866#elif RT_ARCH_ARM64
5867 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5868
5869 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
5870 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
5871 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
5872 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
5873 IEMNATIVE_FRAME_VAR_SIZE / 8);
5874 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
5875 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5876 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
5877 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5878 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
5879 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5880 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
5881 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5882 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
5883 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5884 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
5885 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
5886
5887 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
5888 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
5889 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
5890 IEMNATIVE_FRAME_SAVE_REG_SIZE);
5891
5892 /* retab / ret */
5893# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
5894 if (1)
5895 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
5896 else
5897# endif
5898 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
5899
5900#else
5901# error "port me"
5902#endif
5903 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5904
5905 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
5906}
5907
5908
5909/**
5910 * Emits a standard prolog.
5911 */
5912static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5913{
5914#ifdef RT_ARCH_AMD64
5915 /*
5916 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
5917 * reserving 64 bytes for stack variables plus 4 non-register argument
5918 * slots. Fixed register assignment: xBX = pReNative;
5919 *
5920 * Since we always do the same register spilling, we can use the same
5921 * unwind description for all the code.
5922 */
5923 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5924 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
5925 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
5926 pbCodeBuf[off++] = 0x8b;
5927 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
5928 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
5929 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
5930# ifdef RT_OS_WINDOWS
5931 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
5932 pbCodeBuf[off++] = 0x8b;
5933 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
5934 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
5935 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
5936# else
5937 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
5938 pbCodeBuf[off++] = 0x8b;
5939 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
5940# endif
5941 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
5942 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
5943 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
5944 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
5945 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
5946 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
5947 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
5948 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
5949
5950 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
5951 X86_GREG_xSP,
5952 IEMNATIVE_FRAME_ALIGN_SIZE
5953 + IEMNATIVE_FRAME_VAR_SIZE
5954 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
5955 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
5956 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
5957 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
5958 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
5959
5960#elif RT_ARCH_ARM64
5961 /*
5962 * We set up a stack frame exactly like on x86, only we have to push the
5963 * return address our selves here. We save all non-volatile registers.
5964 */
5965 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5966
5967# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
5968 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
5969 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
5970 * in any way conditional, so just emitting this instructions now and hoping for the best... */
5971 /* pacibsp */
5972 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
5973# endif
5974
5975 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
5976 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
5977 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
5978 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
5979 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
5980 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
5981 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5982 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
5983 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5984 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
5985 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5986 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
5987 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5988 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
5989 /* Save the BP and LR (ret address) registers at the top of the frame. */
5990 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5991 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
5992 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
5993 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
5994 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
5995 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
5996
5997 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
5998 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
5999
6000 /* mov r28, r0 */
6001 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
6002 /* mov r27, r1 */
6003 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
6004
6005#else
6006# error "port me"
6007#endif
6008 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6009 return off;
6010}
6011
6012
6013
6014
6015/*********************************************************************************************************************************
6016* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
6017*********************************************************************************************************************************/
6018
6019#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
6020 { \
6021 Assert(pReNative->Core.bmVars == 0); \
6022 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
6023 Assert(pReNative->Core.bmStack == 0); \
6024 pReNative->fMc = (a_fMcFlags); \
6025 pReNative->fCImpl = (a_fCImplFlags); \
6026 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
6027
6028/** We have to get to the end in recompilation mode, as otherwise we won't
6029 * generate code for all the IEM_MC_IF_XXX branches. */
6030#define IEM_MC_END() \
6031 iemNativeVarFreeAll(pReNative); \
6032 } return off
6033
6034
6035
6036/*********************************************************************************************************************************
6037* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
6038*********************************************************************************************************************************/
6039
6040#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
6041 pReNative->fMc = 0; \
6042 pReNative->fCImpl = (a_fFlags); \
6043 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
6044
6045
6046#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
6047 pReNative->fMc = 0; \
6048 pReNative->fCImpl = (a_fFlags); \
6049 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
6050
6051DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6052 uint8_t idxInstr, uint64_t a_fGstShwFlush,
6053 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
6054{
6055 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
6056}
6057
6058
6059#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
6060 pReNative->fMc = 0; \
6061 pReNative->fCImpl = (a_fFlags); \
6062 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
6063 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
6064
6065DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6066 uint8_t idxInstr, uint64_t a_fGstShwFlush,
6067 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
6068{
6069 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
6070}
6071
6072
6073#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
6074 pReNative->fMc = 0; \
6075 pReNative->fCImpl = (a_fFlags); \
6076 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
6077 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
6078
6079DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6080 uint8_t idxInstr, uint64_t a_fGstShwFlush,
6081 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
6082 uint64_t uArg2)
6083{
6084 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
6085}
6086
6087
6088
6089/*********************************************************************************************************************************
6090* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
6091*********************************************************************************************************************************/
6092
6093/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
6094 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
6095DECL_INLINE_THROW(uint32_t)
6096iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6097{
6098 /*
6099 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
6100 * return with special status code and make the execution loop deal with
6101 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
6102 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
6103 * could continue w/o interruption, it probably will drop into the
6104 * debugger, so not worth the effort of trying to services it here and we
6105 * just lump it in with the handling of the others.
6106 *
6107 * To simplify the code and the register state management even more (wrt
6108 * immediate in AND operation), we always update the flags and skip the
6109 * extra check associated conditional jump.
6110 */
6111 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
6112 <= UINT32_MAX);
6113#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
6114 AssertMsg( pReNative->idxCurCall == 0
6115 || IEMLIVENESS_STATE_IS_ACCESS_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], kIemNativeGstReg_EFlags/*_Other*/)),
6116 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], kIemNativeGstReg_EFlags/*_Other*/)));
6117#endif
6118
6119 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6120 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
6121 true /*fSkipLivenessAssert*/);
6122 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
6123 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
6124 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
6125 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
6126 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
6127
6128 /* Free but don't flush the EFLAGS register. */
6129 iemNativeRegFreeTmp(pReNative, idxEflReg);
6130
6131 return off;
6132}
6133
6134
6135/** The VINF_SUCCESS dummy. */
6136template<int const a_rcNormal>
6137DECL_FORCE_INLINE(uint32_t)
6138iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6139{
6140 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
6141 if (a_rcNormal != VINF_SUCCESS)
6142 {
6143#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6144 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6145#else
6146 RT_NOREF_PV(idxInstr);
6147#endif
6148 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
6149 }
6150 return off;
6151}
6152
6153
6154#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
6155 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6156 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6157
6158#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
6159 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6160 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6161 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6162
6163/** Same as iemRegAddToRip64AndFinishingNoFlags. */
6164DECL_INLINE_THROW(uint32_t)
6165iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
6166{
6167 /* Allocate a temporary PC register. */
6168 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6169
6170 /* Perform the addition and store the result. */
6171 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
6172 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6173
6174 /* Free but don't flush the PC register. */
6175 iemNativeRegFreeTmp(pReNative, idxPcReg);
6176
6177 return off;
6178}
6179
6180
6181#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
6182 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6183 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6184
6185#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
6186 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6187 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6188 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6189
6190/** Same as iemRegAddToEip32AndFinishingNoFlags. */
6191DECL_INLINE_THROW(uint32_t)
6192iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
6193{
6194 /* Allocate a temporary PC register. */
6195 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6196
6197 /* Perform the addition and store the result. */
6198 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
6199 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6200
6201 /* Free but don't flush the PC register. */
6202 iemNativeRegFreeTmp(pReNative, idxPcReg);
6203
6204 return off;
6205}
6206
6207
6208#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
6209 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6210 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6211
6212#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
6213 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6214 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6215 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6216
6217/** Same as iemRegAddToIp16AndFinishingNoFlags. */
6218DECL_INLINE_THROW(uint32_t)
6219iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
6220{
6221 /* Allocate a temporary PC register. */
6222 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6223
6224 /* Perform the addition and store the result. */
6225 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
6226 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6227 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6228
6229 /* Free but don't flush the PC register. */
6230 iemNativeRegFreeTmp(pReNative, idxPcReg);
6231
6232 return off;
6233}
6234
6235
6236
6237/*********************************************************************************************************************************
6238* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
6239*********************************************************************************************************************************/
6240
6241#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6242 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6243 (a_enmEffOpSize), pCallEntry->idxInstr); \
6244 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6245
6246#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6247 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6248 (a_enmEffOpSize), pCallEntry->idxInstr); \
6249 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6250 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6251
6252#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
6253 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6254 IEMMODE_16BIT, pCallEntry->idxInstr); \
6255 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6256
6257#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
6258 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6259 IEMMODE_16BIT, pCallEntry->idxInstr); \
6260 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6261 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6262
6263#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
6264 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6265 IEMMODE_64BIT, pCallEntry->idxInstr); \
6266 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6267
6268#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
6269 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6270 IEMMODE_64BIT, pCallEntry->idxInstr); \
6271 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6272 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6273
6274/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
6275 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
6276 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
6277DECL_INLINE_THROW(uint32_t)
6278iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
6279 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
6280{
6281 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
6282
6283 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6284 off = iemNativeRegFlushPendingWrites(pReNative, off);
6285
6286 /* Allocate a temporary PC register. */
6287 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6288
6289 /* Perform the addition. */
6290 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
6291
6292 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
6293 {
6294 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
6295 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6296 }
6297 else
6298 {
6299 /* Just truncate the result to 16-bit IP. */
6300 Assert(enmEffOpSize == IEMMODE_16BIT);
6301 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6302 }
6303 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6304
6305 /* Free but don't flush the PC register. */
6306 iemNativeRegFreeTmp(pReNative, idxPcReg);
6307
6308 return off;
6309}
6310
6311
6312#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6313 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6314 (a_enmEffOpSize), pCallEntry->idxInstr); \
6315 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6316
6317#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6318 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6319 (a_enmEffOpSize), pCallEntry->idxInstr); \
6320 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6321 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6322
6323#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
6324 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6325 IEMMODE_16BIT, pCallEntry->idxInstr); \
6326 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6327
6328#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
6329 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6330 IEMMODE_16BIT, pCallEntry->idxInstr); \
6331 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6332 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6333
6334#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
6335 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6336 IEMMODE_32BIT, pCallEntry->idxInstr); \
6337 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6338
6339#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
6340 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6341 IEMMODE_32BIT, pCallEntry->idxInstr); \
6342 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6343 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6344
6345/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
6346 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
6347 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
6348DECL_INLINE_THROW(uint32_t)
6349iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
6350 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
6351{
6352 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
6353
6354 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6355 off = iemNativeRegFlushPendingWrites(pReNative, off);
6356
6357 /* Allocate a temporary PC register. */
6358 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6359
6360 /* Perform the addition. */
6361 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
6362
6363 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
6364 if (enmEffOpSize == IEMMODE_16BIT)
6365 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6366
6367 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
6368/** @todo we can skip this in 32-bit FLAT mode. */
6369 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
6370
6371 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6372
6373 /* Free but don't flush the PC register. */
6374 iemNativeRegFreeTmp(pReNative, idxPcReg);
6375
6376 return off;
6377}
6378
6379
6380#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
6381 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
6382 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6383
6384#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
6385 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
6386 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6387 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6388
6389#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
6390 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
6391 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6392
6393#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
6394 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
6395 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6396 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6397
6398#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
6399 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
6400 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6401
6402#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
6403 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
6404 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6405 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6406
6407/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
6408DECL_INLINE_THROW(uint32_t)
6409iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6410 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
6411{
6412 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6413 off = iemNativeRegFlushPendingWrites(pReNative, off);
6414
6415 /* Allocate a temporary PC register. */
6416 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6417
6418 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
6419 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
6420 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6421 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
6422 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6423
6424 /* Free but don't flush the PC register. */
6425 iemNativeRegFreeTmp(pReNative, idxPcReg);
6426
6427 return off;
6428}
6429
6430
6431
6432/*********************************************************************************************************************************
6433* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
6434*********************************************************************************************************************************/
6435
6436/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
6437#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
6438 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6439
6440/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
6441#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
6442 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6443
6444/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
6445#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
6446 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6447
6448/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
6449 * clears flags. */
6450#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
6451 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
6452 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6453
6454/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
6455 * clears flags. */
6456#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
6457 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
6458 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6459
6460/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
6461 * clears flags. */
6462#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
6463 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
6464 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6465
6466#undef IEM_MC_SET_RIP_U16_AND_FINISH
6467
6468
6469/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
6470#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
6471 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
6472
6473/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
6474#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
6475 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
6476
6477/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
6478 * clears flags. */
6479#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
6480 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
6481 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6482
6483/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
6484 * and clears flags. */
6485#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
6486 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
6487 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6488
6489#undef IEM_MC_SET_RIP_U32_AND_FINISH
6490
6491
6492/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
6493#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
6494 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
6495
6496/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
6497 * and clears flags. */
6498#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
6499 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
6500 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6501
6502#undef IEM_MC_SET_RIP_U64_AND_FINISH
6503
6504
6505/** Same as iemRegRipJumpU16AndFinishNoFlags,
6506 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
6507DECL_INLINE_THROW(uint32_t)
6508iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
6509 uint8_t idxInstr, uint8_t cbVar)
6510{
6511 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
6512 Assert(pReNative->Core.aVars[idxVarPc].cbVar == cbVar);
6513
6514 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6515 off = iemNativeRegFlushPendingWrites(pReNative, off);
6516
6517 /* Get a register with the new PC loaded from idxVarPc.
6518 Note! This ASSUMES that the high bits of the GPR is zeroed. */
6519 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
6520
6521 /* Check limit (may #GP(0) + exit TB). */
6522 if (!f64Bit)
6523/** @todo we can skip this test in FLAT 32-bit mode. */
6524 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
6525 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
6526 else if (cbVar > sizeof(uint32_t))
6527 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6528
6529 /* Store the result. */
6530 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6531
6532 iemNativeVarRegisterRelease(pReNative, idxVarPc);
6533 /** @todo implictly free the variable? */
6534
6535 return off;
6536}
6537
6538
6539
6540/*********************************************************************************************************************************
6541* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
6542*********************************************************************************************************************************/
6543
6544/**
6545 * Pushes an IEM_MC_IF_XXX onto the condition stack.
6546 *
6547 * @returns Pointer to the condition stack entry on success, NULL on failure
6548 * (too many nestings)
6549 */
6550DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
6551{
6552 uint32_t const idxStack = pReNative->cCondDepth;
6553 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
6554
6555 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
6556 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
6557
6558 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
6559 pEntry->fInElse = false;
6560 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
6561 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
6562
6563 return pEntry;
6564}
6565
6566
6567/**
6568 * Start of the if-block, snapshotting the register and variable state.
6569 */
6570DECL_INLINE_THROW(void)
6571iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
6572{
6573 Assert(offIfBlock != UINT32_MAX);
6574 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
6575 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
6576 Assert(!pEntry->fInElse);
6577
6578 /* Define the start of the IF block if request or for disassembly purposes. */
6579 if (idxLabelIf != UINT32_MAX)
6580 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
6581#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6582 else
6583 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
6584#else
6585 RT_NOREF(offIfBlock);
6586#endif
6587
6588 /* Copy the initial state so we can restore it in the 'else' block. */
6589 pEntry->InitialState = pReNative->Core;
6590}
6591
6592
6593#define IEM_MC_ELSE() } while (0); \
6594 off = iemNativeEmitElse(pReNative, off); \
6595 do {
6596
6597/** Emits code related to IEM_MC_ELSE. */
6598DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6599{
6600 /* Check sanity and get the conditional stack entry. */
6601 Assert(off != UINT32_MAX);
6602 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
6603 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
6604 Assert(!pEntry->fInElse);
6605
6606 /* Jump to the endif */
6607 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
6608
6609 /* Define the else label and enter the else part of the condition. */
6610 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
6611 pEntry->fInElse = true;
6612
6613 /* Snapshot the core state so we can do a merge at the endif and restore
6614 the snapshot we took at the start of the if-block. */
6615 pEntry->IfFinalState = pReNative->Core;
6616 pReNative->Core = pEntry->InitialState;
6617
6618 return off;
6619}
6620
6621
6622#define IEM_MC_ENDIF() } while (0); \
6623 off = iemNativeEmitEndIf(pReNative, off)
6624
6625/** Emits code related to IEM_MC_ENDIF. */
6626DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6627{
6628 /* Check sanity and get the conditional stack entry. */
6629 Assert(off != UINT32_MAX);
6630 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
6631 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
6632
6633 /*
6634 * Now we have find common group with the core state at the end of the
6635 * if-final. Use the smallest common denominator and just drop anything
6636 * that isn't the same in both states.
6637 */
6638 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
6639 * which is why we're doing this at the end of the else-block.
6640 * But we'd need more info about future for that to be worth the effort. */
6641 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
6642 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
6643 {
6644 /* shadow guest stuff first. */
6645 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
6646 if (fGstRegs)
6647 {
6648 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
6649 do
6650 {
6651 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
6652 fGstRegs &= ~RT_BIT_64(idxGstReg);
6653
6654 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
6655 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
6656 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
6657 {
6658 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
6659 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
6660 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
6661 }
6662 } while (fGstRegs);
6663 }
6664 else
6665 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
6666
6667 /* Check variables next. For now we must require them to be identical
6668 or stuff we can recreate. */
6669 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
6670 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
6671 if (fVars)
6672 {
6673 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
6674 do
6675 {
6676 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
6677 fVars &= ~RT_BIT_32(idxVar);
6678
6679 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
6680 {
6681 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
6682 continue;
6683 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
6684 {
6685 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
6686 if (idxHstReg != UINT8_MAX)
6687 {
6688 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
6689 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6690 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u\n",
6691 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
6692 }
6693 continue;
6694 }
6695 }
6696 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
6697 continue;
6698
6699 /* Irreconcilable, so drop it. */
6700 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
6701 if (idxHstReg != UINT8_MAX)
6702 {
6703 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
6704 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6705 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u (also dropped)\n",
6706 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
6707 }
6708 Log11(("iemNativeEmitEndIf: Freeing variable #%u\n", idxVar));
6709 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
6710 } while (fVars);
6711 }
6712
6713 /* Finally, check that the host register allocations matches. */
6714 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
6715 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
6716 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
6717 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
6718 }
6719
6720 /*
6721 * Define the endif label and maybe the else one if we're still in the 'if' part.
6722 */
6723 if (!pEntry->fInElse)
6724 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
6725 else
6726 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
6727 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
6728
6729 /* Pop the conditional stack.*/
6730 pReNative->cCondDepth -= 1;
6731
6732 return off;
6733}
6734
6735
6736#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
6737 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
6738 do {
6739
6740/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
6741DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
6742{
6743 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6744
6745 /* Get the eflags. */
6746 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6747 kIemNativeGstRegUse_ReadOnly);
6748
6749 /* Test and jump. */
6750 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
6751
6752 /* Free but don't flush the EFlags register. */
6753 iemNativeRegFreeTmp(pReNative, idxEflReg);
6754
6755 /* Make a copy of the core state now as we start the if-block. */
6756 iemNativeCondStartIfBlock(pReNative, off);
6757
6758 return off;
6759}
6760
6761
6762#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
6763 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
6764 do {
6765
6766/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
6767DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
6768{
6769 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6770
6771 /* Get the eflags. */
6772 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6773 kIemNativeGstRegUse_ReadOnly);
6774
6775 /* Test and jump. */
6776 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
6777
6778 /* Free but don't flush the EFlags register. */
6779 iemNativeRegFreeTmp(pReNative, idxEflReg);
6780
6781 /* Make a copy of the core state now as we start the if-block. */
6782 iemNativeCondStartIfBlock(pReNative, off);
6783
6784 return off;
6785}
6786
6787
6788#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
6789 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
6790 do {
6791
6792/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
6793DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
6794{
6795 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6796
6797 /* Get the eflags. */
6798 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6799 kIemNativeGstRegUse_ReadOnly);
6800
6801 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6802 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6803
6804 /* Test and jump. */
6805 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
6806
6807 /* Free but don't flush the EFlags register. */
6808 iemNativeRegFreeTmp(pReNative, idxEflReg);
6809
6810 /* Make a copy of the core state now as we start the if-block. */
6811 iemNativeCondStartIfBlock(pReNative, off);
6812
6813 return off;
6814}
6815
6816
6817#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
6818 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
6819 do {
6820
6821/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
6822DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
6823{
6824 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6825
6826 /* Get the eflags. */
6827 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6828 kIemNativeGstRegUse_ReadOnly);
6829
6830 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6831 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6832
6833 /* Test and jump. */
6834 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
6835
6836 /* Free but don't flush the EFlags register. */
6837 iemNativeRegFreeTmp(pReNative, idxEflReg);
6838
6839 /* Make a copy of the core state now as we start the if-block. */
6840 iemNativeCondStartIfBlock(pReNative, off);
6841
6842 return off;
6843}
6844
6845
6846#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
6847 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
6848 do {
6849
6850#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
6851 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
6852 do {
6853
6854/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
6855DECL_INLINE_THROW(uint32_t)
6856iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6857 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
6858{
6859 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6860
6861 /* Get the eflags. */
6862 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6863 kIemNativeGstRegUse_ReadOnly);
6864
6865 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
6866 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
6867
6868 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
6869 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
6870 Assert(iBitNo1 != iBitNo2);
6871
6872#ifdef RT_ARCH_AMD64
6873 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
6874
6875 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6876 if (iBitNo1 > iBitNo2)
6877 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
6878 else
6879 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
6880 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6881
6882#elif defined(RT_ARCH_ARM64)
6883 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6884 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6885
6886 /* and tmpreg, eflreg, #1<<iBitNo1 */
6887 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
6888
6889 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
6890 if (iBitNo1 > iBitNo2)
6891 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6892 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
6893 else
6894 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6895 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
6896
6897 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6898
6899#else
6900# error "Port me"
6901#endif
6902
6903 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
6904 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
6905 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
6906
6907 /* Free but don't flush the EFlags and tmp registers. */
6908 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6909 iemNativeRegFreeTmp(pReNative, idxEflReg);
6910
6911 /* Make a copy of the core state now as we start the if-block. */
6912 iemNativeCondStartIfBlock(pReNative, off);
6913
6914 return off;
6915}
6916
6917
6918#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
6919 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
6920 do {
6921
6922#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
6923 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
6924 do {
6925
6926/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
6927 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
6928DECL_INLINE_THROW(uint32_t)
6929iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
6930 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
6931{
6932 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6933
6934 /* We need an if-block label for the non-inverted variant. */
6935 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
6936 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
6937
6938 /* Get the eflags. */
6939 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6940 kIemNativeGstRegUse_ReadOnly);
6941
6942 /* Translate the flag masks to bit numbers. */
6943 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6944 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6945
6946 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
6947 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
6948 Assert(iBitNo1 != iBitNo);
6949
6950 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
6951 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
6952 Assert(iBitNo2 != iBitNo);
6953 Assert(iBitNo2 != iBitNo1);
6954
6955#ifdef RT_ARCH_AMD64
6956 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
6957#elif defined(RT_ARCH_ARM64)
6958 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6959#endif
6960
6961 /* Check for the lone bit first. */
6962 if (!fInverted)
6963 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
6964 else
6965 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
6966
6967 /* Then extract and compare the other two bits. */
6968#ifdef RT_ARCH_AMD64
6969 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6970 if (iBitNo1 > iBitNo2)
6971 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
6972 else
6973 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
6974 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6975
6976#elif defined(RT_ARCH_ARM64)
6977 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6978
6979 /* and tmpreg, eflreg, #1<<iBitNo1 */
6980 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
6981
6982 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
6983 if (iBitNo1 > iBitNo2)
6984 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6985 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
6986 else
6987 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6988 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
6989
6990 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6991
6992#else
6993# error "Port me"
6994#endif
6995
6996 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
6997 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
6998 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
6999
7000 /* Free but don't flush the EFlags and tmp registers. */
7001 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7002 iemNativeRegFreeTmp(pReNative, idxEflReg);
7003
7004 /* Make a copy of the core state now as we start the if-block. */
7005 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
7006
7007 return off;
7008}
7009
7010
7011#define IEM_MC_IF_CX_IS_NZ() \
7012 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
7013 do {
7014
7015/** Emits code for IEM_MC_IF_CX_IS_NZ. */
7016DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7017{
7018 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7019
7020 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7021 kIemNativeGstRegUse_ReadOnly);
7022 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
7023 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7024
7025 iemNativeCondStartIfBlock(pReNative, off);
7026 return off;
7027}
7028
7029
7030#define IEM_MC_IF_ECX_IS_NZ() \
7031 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
7032 do {
7033
7034#define IEM_MC_IF_RCX_IS_NZ() \
7035 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
7036 do {
7037
7038/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
7039DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
7040{
7041 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7042
7043 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7044 kIemNativeGstRegUse_ReadOnly);
7045 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
7046 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7047
7048 iemNativeCondStartIfBlock(pReNative, off);
7049 return off;
7050}
7051
7052
7053#define IEM_MC_IF_CX_IS_NOT_ONE() \
7054 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
7055 do {
7056
7057/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
7058DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7059{
7060 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7061
7062 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7063 kIemNativeGstRegUse_ReadOnly);
7064#ifdef RT_ARCH_AMD64
7065 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7066#else
7067 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7068 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
7069 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7070#endif
7071 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7072
7073 iemNativeCondStartIfBlock(pReNative, off);
7074 return off;
7075}
7076
7077
7078#define IEM_MC_IF_ECX_IS_NOT_ONE() \
7079 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
7080 do {
7081
7082#define IEM_MC_IF_RCX_IS_NOT_ONE() \
7083 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
7084 do {
7085
7086/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
7087DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
7088{
7089 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7090
7091 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7092 kIemNativeGstRegUse_ReadOnly);
7093 if (f64Bit)
7094 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7095 else
7096 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7097 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7098
7099 iemNativeCondStartIfBlock(pReNative, off);
7100 return off;
7101}
7102
7103
7104#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
7105 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
7106 do {
7107
7108#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
7109 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
7110 do {
7111
7112/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
7113 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
7114DECL_INLINE_THROW(uint32_t)
7115iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
7116{
7117 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7118
7119 /* We have to load both RCX and EFLAGS before we can start branching,
7120 otherwise we'll end up in the else-block with an inconsistent
7121 register allocator state.
7122 Doing EFLAGS first as it's more likely to be loaded, right? */
7123 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7124 kIemNativeGstRegUse_ReadOnly);
7125 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7126 kIemNativeGstRegUse_ReadOnly);
7127
7128 /** @todo we could reduce this to a single branch instruction by spending a
7129 * temporary register and some setnz stuff. Not sure if loops are
7130 * worth it. */
7131 /* Check CX. */
7132#ifdef RT_ARCH_AMD64
7133 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7134#else
7135 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7136 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
7137 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7138#endif
7139
7140 /* Check the EFlags bit. */
7141 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7142 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7143 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
7144 !fCheckIfSet /*fJmpIfSet*/);
7145
7146 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7147 iemNativeRegFreeTmp(pReNative, idxEflReg);
7148
7149 iemNativeCondStartIfBlock(pReNative, off);
7150 return off;
7151}
7152
7153
7154#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
7155 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
7156 do {
7157
7158#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
7159 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
7160 do {
7161
7162#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
7163 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
7164 do {
7165
7166#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
7167 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
7168 do {
7169
7170/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
7171 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
7172 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
7173 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
7174DECL_INLINE_THROW(uint32_t)
7175iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7176 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
7177{
7178 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7179
7180 /* We have to load both RCX and EFLAGS before we can start branching,
7181 otherwise we'll end up in the else-block with an inconsistent
7182 register allocator state.
7183 Doing EFLAGS first as it's more likely to be loaded, right? */
7184 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7185 kIemNativeGstRegUse_ReadOnly);
7186 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7187 kIemNativeGstRegUse_ReadOnly);
7188
7189 /** @todo we could reduce this to a single branch instruction by spending a
7190 * temporary register and some setnz stuff. Not sure if loops are
7191 * worth it. */
7192 /* Check RCX/ECX. */
7193 if (f64Bit)
7194 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7195 else
7196 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7197
7198 /* Check the EFlags bit. */
7199 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7200 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7201 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
7202 !fCheckIfSet /*fJmpIfSet*/);
7203
7204 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7205 iemNativeRegFreeTmp(pReNative, idxEflReg);
7206
7207 iemNativeCondStartIfBlock(pReNative, off);
7208 return off;
7209}
7210
7211
7212
7213/*********************************************************************************************************************************
7214* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7215*********************************************************************************************************************************/
7216/** Number of hidden arguments for CIMPL calls.
7217 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
7218#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
7219# define IEM_CIMPL_HIDDEN_ARGS 3
7220#else
7221# define IEM_CIMPL_HIDDEN_ARGS 2
7222#endif
7223
7224#define IEM_MC_NOREF(a_Name) \
7225 RT_NOREF_PV(a_Name)
7226
7227#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
7228 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
7229
7230#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
7231 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
7232
7233#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
7234 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
7235
7236#define IEM_MC_LOCAL(a_Type, a_Name) \
7237 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
7238
7239#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
7240 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
7241
7242
7243/**
7244 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
7245 */
7246DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
7247{
7248 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
7249 return IEM_CIMPL_HIDDEN_ARGS;
7250 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
7251 return 1;
7252 return 0;
7253}
7254
7255
7256/**
7257 * Internal work that allocates a variable with kind set to
7258 * kIemNativeVarKind_Invalid and no current stack allocation.
7259 *
7260 * The kind will either be set by the caller or later when the variable is first
7261 * assigned a value.
7262 */
7263static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7264{
7265 Assert(cbType > 0 && cbType <= 64);
7266 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7267 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7268 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7269 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7270 pReNative->Core.aVars[idxVar].cbVar = cbType;
7271 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7272 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7273 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7274 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7275 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7276 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7277 pReNative->Core.aVars[idxVar].u.uValue = 0;
7278 return idxVar;
7279}
7280
7281
7282/**
7283 * Internal work that allocates an argument variable w/o setting enmKind.
7284 */
7285static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7286{
7287 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7288 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7289 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7290
7291 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7292 pReNative->Core.aidxArgVars[iArgNo] = idxVar;
7293 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7294 return idxVar;
7295}
7296
7297
7298/**
7299 * Gets the stack slot for a stack variable, allocating one if necessary.
7300 *
7301 * Calling this function implies that the stack slot will contain a valid
7302 * variable value. The caller deals with any register currently assigned to the
7303 * variable, typically by spilling it into the stack slot.
7304 *
7305 * @returns The stack slot number.
7306 * @param pReNative The recompiler state.
7307 * @param idxVar The variable.
7308 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7309 */
7310DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7311{
7312 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7313 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
7314
7315 /* Already got a slot? */
7316 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7317 if (idxStackSlot != UINT8_MAX)
7318 {
7319 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7320 return idxStackSlot;
7321 }
7322
7323 /*
7324 * A single slot is easy to allocate.
7325 * Allocate them from the top end, closest to BP, to reduce the displacement.
7326 */
7327 if (pReNative->Core.aVars[idxVar].cbVar <= sizeof(uint64_t))
7328 {
7329 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7330 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7331 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7332 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
7333 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x\n", idxVar, iSlot));
7334 return (uint8_t)iSlot;
7335 }
7336
7337 /*
7338 * We need more than one stack slot.
7339 *
7340 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7341 */
7342 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7343 Assert(pReNative->Core.aVars[idxVar].cbVar <= 64);
7344 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pReNative->Core.aVars[idxVar].cbVar) - 4) - 1;
7345 uint32_t fBitAllocMask = RT_BIT_32((pReNative->Core.aVars[idxVar].cbVar + 7) >> 3) - 1;
7346 uint32_t bmStack = ~pReNative->Core.bmStack;
7347 while (bmStack != UINT32_MAX)
7348 {
7349/** @todo allocate from the top to reduce BP displacement. */
7350 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
7351 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7352 if (!(iSlot & fBitAlignMask))
7353 {
7354 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
7355 {
7356 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7357 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
7358 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n",
7359 idxVar, iSlot, fBitAllocMask, pReNative->Core.aVars[idxVar].cbVar));
7360 return (uint8_t)iSlot;
7361 }
7362 }
7363 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
7364 }
7365 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7366}
7367
7368
7369/**
7370 * Changes the variable to a stack variable.
7371 *
7372 * Currently this is s only possible to do the first time the variable is used,
7373 * switching later is can be implemented but not done.
7374 *
7375 * @param pReNative The recompiler state.
7376 * @param idxVar The variable.
7377 * @throws VERR_IEM_VAR_IPE_2
7378 */
7379static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7380{
7381 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7382 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
7383 {
7384 /* We could in theory transition from immediate to stack as well, but it
7385 would involve the caller doing work storing the value on the stack. So,
7386 till that's required we only allow transition from invalid. */
7387 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7388 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7389 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7390 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Stack;
7391
7392 /* Note! We don't allocate a stack slot here, that's only done when a
7393 slot is actually needed to hold a variable value. */
7394 }
7395}
7396
7397
7398/**
7399 * Sets it to a variable with a constant value.
7400 *
7401 * This does not require stack storage as we know the value and can always
7402 * reload it, unless of course it's referenced.
7403 *
7404 * @param pReNative The recompiler state.
7405 * @param idxVar The variable.
7406 * @param uValue The immediate value.
7407 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7408 */
7409static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7410{
7411 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7412 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Immediate)
7413 {
7414 /* Only simple transitions for now. */
7415 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7416 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7417 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
7418 }
7419 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7420
7421 pReNative->Core.aVars[idxVar].u.uValue = uValue;
7422 AssertMsg( pReNative->Core.aVars[idxVar].cbVar >= sizeof(uint64_t)
7423 || pReNative->Core.aVars[idxVar].u.uValue < RT_BIT_64(pReNative->Core.aVars[idxVar].cbVar * 8),
7424 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pReNative->Core.aVars[idxVar].cbVar, uValue));
7425}
7426
7427
7428/**
7429 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7430 *
7431 * This does not require stack storage as we know the value and can always
7432 * reload it. Loading is postponed till needed.
7433 *
7434 * @param pReNative The recompiler state.
7435 * @param idxVar The variable.
7436 * @param idxOtherVar The variable to take the (stack) address of.
7437 *
7438 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7439 */
7440static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7441{
7442 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7443 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7444
7445 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7446 {
7447 /* Only simple transitions for now. */
7448 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7449 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7450 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7451 }
7452 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7453
7454 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar;
7455
7456 /* Update the other variable, ensure it's a stack variable. */
7457 /** @todo handle variables with const values... that'll go boom now. */
7458 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7459 iemNativeVarSetKindToStack(pReNative, idxOtherVar);
7460}
7461
7462
7463/**
7464 * Sets the variable to a reference (pointer) to a guest register reference.
7465 *
7466 * This does not require stack storage as we know the value and can always
7467 * reload it. Loading is postponed till needed.
7468 *
7469 * @param pReNative The recompiler state.
7470 * @param idxVar The variable.
7471 * @param enmRegClass The class guest registers to reference.
7472 * @param idxReg The register within @a enmRegClass to reference.
7473 *
7474 * @throws VERR_IEM_VAR_IPE_2
7475 */
7476static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7477 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7478{
7479 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7480
7481 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_GstRegRef)
7482 {
7483 /* Only simple transitions for now. */
7484 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7485 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7486 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_GstRegRef;
7487 }
7488 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7489
7490 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass = enmRegClass;
7491 pReNative->Core.aVars[idxVar].u.GstRegRef.idx = idxReg;
7492}
7493
7494
7495DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7496{
7497 return iemNativeArgAllocInt(pReNative, iArgNo, cbType);
7498}
7499
7500
7501DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7502{
7503 uint8_t const idxVar = iemNativeArgAllocInt(pReNative, iArgNo, cbType);
7504
7505 /* Since we're using a generic uint64_t value type, we must truncate it if
7506 the variable is smaller otherwise we may end up with too large value when
7507 scaling up a imm8 w/ sign-extension.
7508
7509 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7510 in the bios, bx=1) when running on arm, because clang expect 16-bit
7511 register parameters to have bits 16 and up set to zero. Instead of
7512 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7513 CF value in the result. */
7514 switch (cbType)
7515 {
7516 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7517 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7518 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7519 }
7520 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7521 return idxVar;
7522}
7523
7524
7525DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7526{
7527 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7528 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7529 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7530 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7531
7532 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7533 iemNativeVarSetKindToLocalRef(pReNative, idxArgVar, idxOtherVar);
7534 return idxArgVar;
7535}
7536
7537
7538DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7539{
7540 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7541 /* Don't set to stack now, leave that to the first use as for instance
7542 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7543 return idxVar;
7544}
7545
7546
7547DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7548{
7549 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7550
7551 /* Since we're using a generic uint64_t value type, we must truncate it if
7552 the variable is smaller otherwise we may end up with too large value when
7553 scaling up a imm8 w/ sign-extension. */
7554 switch (cbType)
7555 {
7556 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7557 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7558 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7559 }
7560 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7561 return idxVar;
7562}
7563
7564
7565/**
7566 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7567 * fixed till we call iemNativeVarRegisterRelease.
7568 *
7569 * @returns The host register number.
7570 * @param pReNative The recompiler state.
7571 * @param idxVar The variable.
7572 * @param poff Pointer to the instruction buffer offset.
7573 * In case a register needs to be freed up or the value
7574 * loaded off the stack.
7575 * @param fInitialized Set if the variable must already have been initialized.
7576 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7577 * the case.
7578 * @param idxRegPref Preferred register number or UINT8_MAX.
7579 */
7580DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7581 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7582{
7583 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7584 Assert(pReNative->Core.aVars[idxVar].cbVar <= 8);
7585 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7586
7587 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
7588 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7589 {
7590 Assert( pReNative->Core.aVars[idxVar].enmKind > kIemNativeVarKind_Invalid
7591 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
7592 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7593 return idxReg;
7594 }
7595
7596 /*
7597 * If the kind of variable has not yet been set, default to 'stack'.
7598 */
7599 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid
7600 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
7601 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid)
7602 iemNativeVarSetKindToStack(pReNative, idxVar);
7603
7604 /*
7605 * We have to allocate a register for the variable, even if its a stack one
7606 * as we don't know if there are modification being made to it before its
7607 * finalized (todo: analyze and insert hints about that?).
7608 *
7609 * If we can, we try get the correct register for argument variables. This
7610 * is assuming that most argument variables are fetched as close as possible
7611 * to the actual call, so that there aren't any interfering hidden calls
7612 * (memory accesses, etc) inbetween.
7613 *
7614 * If we cannot or it's a variable, we make sure no argument registers
7615 * that will be used by this MC block will be allocated here, and we always
7616 * prefer non-volatile registers to avoid needing to spill stuff for internal
7617 * call.
7618 */
7619 /** @todo Detect too early argument value fetches and warn about hidden
7620 * calls causing less optimal code to be generated in the python script. */
7621
7622 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7623 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7624 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7625 {
7626 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7627 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7628 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7629 }
7630 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7631 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7632 {
7633 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7634 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7635 & ~pReNative->Core.bmHstRegsWithGstShadow
7636 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7637 & fNotArgsMask;
7638 if (fRegs)
7639 {
7640 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7641 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7642 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7643 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7644 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7645 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7646 }
7647 else
7648 {
7649 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7650 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7651 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7652 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7653 }
7654 }
7655 else
7656 {
7657 idxReg = idxRegPref;
7658 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7659 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (preferred)\n", idxVar, idxReg));
7660 }
7661 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7662 pReNative->Core.aVars[idxVar].idxReg = idxReg;
7663
7664 /*
7665 * Load it off the stack if we've got a stack slot.
7666 */
7667 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7668 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7669 {
7670 Assert(fInitialized);
7671 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7672 switch (pReNative->Core.aVars[idxVar].cbVar)
7673 {
7674 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7675 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7676 case 3: AssertFailed(); RT_FALL_THRU();
7677 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7678 default: AssertFailed(); RT_FALL_THRU();
7679 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7680 }
7681 }
7682 else
7683 {
7684 Assert(idxStackSlot == UINT8_MAX);
7685 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7686 }
7687 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7688 return idxReg;
7689}
7690
7691
7692/**
7693 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7694 * guest register.
7695 *
7696 * This function makes sure there is a register for it and sets it to be the
7697 * current shadow copy of @a enmGstReg.
7698 *
7699 * @returns The host register number.
7700 * @param pReNative The recompiler state.
7701 * @param idxVar The variable.
7702 * @param enmGstReg The guest register this variable will be written to
7703 * after this call.
7704 * @param poff Pointer to the instruction buffer offset.
7705 * In case a register needs to be freed up or if the
7706 * variable content needs to be loaded off the stack.
7707 *
7708 * @note We DO NOT expect @a idxVar to be an argument variable,
7709 * because we can only in the commit stage of an instruction when this
7710 * function is used.
7711 */
7712DECL_HIDDEN_THROW(uint8_t)
7713iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7714{
7715 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7716 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7717 AssertMsgStmt( pReNative->Core.aVars[idxVar].cbVar <= 8
7718 && ( pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate
7719 || pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack),
7720 ("idxVar=%d cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pReNative->Core.aVars[idxVar].cbVar,
7721 pReNative->Core.aVars[idxVar].enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7722 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7723
7724 /*
7725 * This shouldn't ever be used for arguments, unless it's in a weird else
7726 * branch that doesn't do any calling and even then it's questionable.
7727 *
7728 * However, in case someone writes crazy wrong MC code and does register
7729 * updates before making calls, just use the regular register allocator to
7730 * ensure we get a register suitable for the intended argument number.
7731 */
7732 AssertStmt(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7733
7734 /*
7735 * If there is already a register for the variable, we transfer/set the
7736 * guest shadow copy assignment to it.
7737 */
7738 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
7739 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7740 {
7741 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7742 {
7743 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7744 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7745 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7746 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7747 }
7748 else
7749 {
7750 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7751 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7752 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7753 }
7754 /** @todo figure this one out. We need some way of making sure the register isn't
7755 * modified after this point, just in case we start writing crappy MC code. */
7756 pReNative->Core.aVars[idxVar].enmGstReg = enmGstReg;
7757 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7758 return idxReg;
7759 }
7760 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
7761
7762 /*
7763 * Because this is supposed to be the commit stage, we're just tag along with the
7764 * temporary register allocator and upgrade it to a variable register.
7765 */
7766 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7767 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7768 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7769 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7770 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7771 pReNative->Core.aVars[idxVar].idxReg = idxReg;
7772
7773 /*
7774 * Now we need to load the register value.
7775 */
7776 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate)
7777 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pReNative->Core.aVars[idxVar].u.uValue);
7778 else
7779 {
7780 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7781 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7782 switch (pReNative->Core.aVars[idxVar].cbVar)
7783 {
7784 case sizeof(uint64_t):
7785 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7786 break;
7787 case sizeof(uint32_t):
7788 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7789 break;
7790 case sizeof(uint16_t):
7791 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7792 break;
7793 case sizeof(uint8_t):
7794 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7795 break;
7796 default:
7797 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7798 }
7799 }
7800
7801 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7802 return idxReg;
7803}
7804
7805
7806/**
7807 * Sets the host register for @a idxVarRc to @a idxReg.
7808 *
7809 * The register must not be allocated. Any guest register shadowing will be
7810 * implictly dropped by this call.
7811 *
7812 * The variable must not have any register associated with it (causes
7813 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
7814 * implied.
7815 *
7816 * @returns idxReg
7817 * @param pReNative The recompiler state.
7818 * @param idxVar The variable.
7819 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
7820 * @param off For recording in debug info.
7821 *
7822 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
7823 */
7824DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
7825{
7826 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7827 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7828 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
7829 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
7830 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
7831
7832 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
7833 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7834
7835 iemNativeVarSetKindToStack(pReNative, idxVar);
7836 pReNative->Core.aVars[idxVar].idxReg = idxReg;
7837
7838 return idxReg;
7839}
7840
7841
7842/**
7843 * A convenient helper function.
7844 */
7845DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7846 uint8_t idxReg, uint32_t *poff)
7847{
7848 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
7849 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7850 return idxReg;
7851}
7852
7853
7854/**
7855 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7856 *
7857 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7858 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7859 * requirement of flushing anything in volatile host registers when making a
7860 * call.
7861 *
7862 * @returns New @a off value.
7863 * @param pReNative The recompiler state.
7864 * @param off The code buffer position.
7865 * @param fHstRegsNotToSave Set of registers not to save & restore.
7866 */
7867DECL_HIDDEN_THROW(uint32_t)
7868iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7869{
7870 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7871 if (fHstRegs)
7872 {
7873 do
7874 {
7875 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7876 fHstRegs &= ~RT_BIT_32(idxHstReg);
7877
7878 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7879 {
7880 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7881 AssertStmt( idxVar < RT_ELEMENTS(pReNative->Core.aVars)
7882 && (pReNative->Core.bmVars & RT_BIT_32(idxVar))
7883 && pReNative->Core.aVars[idxVar].idxReg == idxHstReg,
7884 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7885 switch (pReNative->Core.aVars[idxVar].enmKind)
7886 {
7887 case kIemNativeVarKind_Stack:
7888 {
7889 /* Temporarily spill the variable register. */
7890 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7891 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%d/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7892 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7893 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7894 continue;
7895 }
7896
7897 case kIemNativeVarKind_Immediate:
7898 case kIemNativeVarKind_VarRef:
7899 case kIemNativeVarKind_GstRegRef:
7900 /* It is weird to have any of these loaded at this point. */
7901 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7902 continue;
7903
7904 case kIemNativeVarKind_End:
7905 case kIemNativeVarKind_Invalid:
7906 break;
7907 }
7908 AssertFailed();
7909 }
7910 else
7911 {
7912 /*
7913 * Allocate a temporary stack slot and spill the register to it.
7914 */
7915 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7916 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7917 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7918 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7919 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7920 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7921 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7922 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7923 }
7924 } while (fHstRegs);
7925 }
7926 return off;
7927}
7928
7929
7930/**
7931 * Emit code to restore volatile registers after to a call to a helper.
7932 *
7933 * @returns New @a off value.
7934 * @param pReNative The recompiler state.
7935 * @param off The code buffer position.
7936 * @param fHstRegsNotToSave Set of registers not to save & restore.
7937 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7938 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7939 */
7940DECL_HIDDEN_THROW(uint32_t)
7941iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7942{
7943 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7944 if (fHstRegs)
7945 {
7946 do
7947 {
7948 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7949 fHstRegs &= ~RT_BIT_32(idxHstReg);
7950
7951 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7952 {
7953 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7954 AssertStmt( idxVar < RT_ELEMENTS(pReNative->Core.aVars)
7955 && (pReNative->Core.bmVars & RT_BIT_32(idxVar))
7956 && pReNative->Core.aVars[idxVar].idxReg == idxHstReg,
7957 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7958 switch (pReNative->Core.aVars[idxVar].enmKind)
7959 {
7960 case kIemNativeVarKind_Stack:
7961 {
7962 /* Unspill the variable register. */
7963 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7964 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%d/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7965 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7966 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7967 continue;
7968 }
7969
7970 case kIemNativeVarKind_Immediate:
7971 case kIemNativeVarKind_VarRef:
7972 case kIemNativeVarKind_GstRegRef:
7973 /* It is weird to have any of these loaded at this point. */
7974 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7975 continue;
7976
7977 case kIemNativeVarKind_End:
7978 case kIemNativeVarKind_Invalid:
7979 break;
7980 }
7981 AssertFailed();
7982 }
7983 else
7984 {
7985 /*
7986 * Restore from temporary stack slot.
7987 */
7988 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
7989 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
7990 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
7991 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
7992
7993 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7994 }
7995 } while (fHstRegs);
7996 }
7997 return off;
7998}
7999
8000
8001/**
8002 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8003 *
8004 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8005 */
8006DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8007{
8008 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8009 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8010 {
8011 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8012 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8013 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8014 Assert(cSlots > 0);
8015 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8016 Log11(("iemNativeVarFreeStackSlots: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n", idxVar, idxStackSlot, fAllocMask, cbVar));
8017 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8018 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8019 }
8020 else
8021 Assert(idxStackSlot == UINT8_MAX);
8022}
8023
8024
8025/**
8026 * Worker that frees a single variable.
8027 *
8028 * ASSUMES that @a idxVar is valid.
8029 */
8030DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8031{
8032 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8033 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8034 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8035
8036 /* Free the host register first if any assigned. */
8037 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8038 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8039 {
8040 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
8041 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8042 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8043 }
8044
8045 /* Free argument mapping. */
8046 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8047 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8048 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8049
8050 /* Free the stack slots. */
8051 iemNativeVarFreeStackSlots(pReNative, idxVar);
8052
8053 /* Free the actual variable. */
8054 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8055 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8056}
8057
8058
8059/**
8060 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8061 */
8062DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8063{
8064 while (bmVars != 0)
8065 {
8066 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8067 bmVars &= ~RT_BIT_32(idxVar);
8068
8069#if 1 /** @todo optimize by simplifying this later... */
8070 iemNativeVarFreeOneWorker(pReNative, idxVar);
8071#else
8072 /* Only need to free the host register, the rest is done as bulk updates below. */
8073 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8074 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8075 {
8076 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
8077 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8078 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8079 }
8080#endif
8081 }
8082#if 0 /** @todo optimize by simplifying this later... */
8083 pReNative->Core.bmVars = 0;
8084 pReNative->Core.bmStack = 0;
8085 pReNative->Core.u64ArgVars = UINT64_MAX;
8086#endif
8087}
8088
8089
8090/**
8091 * This is called by IEM_MC_END() to clean up all variables.
8092 */
8093DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
8094{
8095 uint32_t const bmVars = pReNative->Core.bmVars;
8096 if (bmVars != 0)
8097 iemNativeVarFreeAllSlow(pReNative, bmVars);
8098 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8099 Assert(pReNative->Core.bmStack == 0);
8100}
8101
8102
8103#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
8104
8105/**
8106 * This is called by IEM_MC_FREE_LOCAL.
8107 */
8108DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8109{
8110 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8111 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
8112 iemNativeVarFreeOneWorker(pReNative, idxVar);
8113}
8114
8115
8116#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
8117
8118/**
8119 * This is called by IEM_MC_FREE_ARG.
8120 */
8121DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8122{
8123 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8124 Assert(pReNative->Core.aVars[idxVar].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
8125 iemNativeVarFreeOneWorker(pReNative, idxVar);
8126}
8127
8128
8129#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
8130
8131/**
8132 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
8133 */
8134DECL_INLINE_THROW(uint32_t)
8135iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
8136{
8137 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
8138 AssertStmt(pReNative->Core.aVars[idxVarDst].enmKind == kIemNativeVarKind_Invalid,
8139 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8140 Assert( pReNative->Core.aVars[idxVarDst].cbVar == sizeof(uint16_t)
8141 || pReNative->Core.aVars[idxVarDst].cbVar == sizeof(uint32_t));
8142
8143 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
8144 AssertStmt( pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Stack
8145 || pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Immediate,
8146 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8147
8148 Assert(pReNative->Core.aVars[idxVarDst].cbVar < pReNative->Core.aVars[idxVarSrc].cbVar);
8149
8150 /*
8151 * Special case for immediates.
8152 */
8153 if (pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Immediate)
8154 {
8155 switch (pReNative->Core.aVars[idxVarDst].cbVar)
8156 {
8157 case sizeof(uint16_t):
8158 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pReNative->Core.aVars[idxVarSrc].u.uValue);
8159 break;
8160 case sizeof(uint32_t):
8161 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pReNative->Core.aVars[idxVarSrc].u.uValue);
8162 break;
8163 default: AssertFailed(); break;
8164 }
8165 }
8166 else
8167 {
8168 /*
8169 * The generic solution for now.
8170 */
8171 /** @todo optimize this by having the python script make sure the source
8172 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
8173 * statement. Then we could just transfer the register assignments. */
8174 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
8175 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
8176 switch (pReNative->Core.aVars[idxVarDst].cbVar)
8177 {
8178 case sizeof(uint16_t):
8179 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
8180 break;
8181 case sizeof(uint32_t):
8182 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
8183 break;
8184 default: AssertFailed(); break;
8185 }
8186 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
8187 iemNativeVarRegisterRelease(pReNative, idxVarDst);
8188 }
8189 return off;
8190}
8191
8192
8193
8194/*********************************************************************************************************************************
8195* Emitters for IEM_MC_CALL_CIMPL_XXX *
8196*********************************************************************************************************************************/
8197
8198/**
8199 * Emits code to load a reference to the given guest register into @a idxGprDst.
8200 */
8201DECL_INLINE_THROW(uint32_t)
8202iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8203 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8204{
8205 /*
8206 * Get the offset relative to the CPUMCTX structure.
8207 */
8208 uint32_t offCpumCtx;
8209 switch (enmClass)
8210 {
8211 case kIemNativeGstRegRef_Gpr:
8212 Assert(idxRegInClass < 16);
8213 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8214 break;
8215
8216 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8217 Assert(idxRegInClass < 4);
8218 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8219 break;
8220
8221 case kIemNativeGstRegRef_EFlags:
8222 Assert(idxRegInClass == 0);
8223 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8224 break;
8225
8226 case kIemNativeGstRegRef_MxCsr:
8227 Assert(idxRegInClass == 0);
8228 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8229 break;
8230
8231 case kIemNativeGstRegRef_FpuReg:
8232 Assert(idxRegInClass < 8);
8233 AssertFailed(); /** @todo what kind of indexing? */
8234 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8235 break;
8236
8237 case kIemNativeGstRegRef_MReg:
8238 Assert(idxRegInClass < 8);
8239 AssertFailed(); /** @todo what kind of indexing? */
8240 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8241 break;
8242
8243 case kIemNativeGstRegRef_XReg:
8244 Assert(idxRegInClass < 16);
8245 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8246 break;
8247
8248 default:
8249 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8250 }
8251
8252 /*
8253 * Load the value into the destination register.
8254 */
8255#ifdef RT_ARCH_AMD64
8256 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8257
8258#elif defined(RT_ARCH_ARM64)
8259 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8260 Assert(offCpumCtx < 4096);
8261 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8262
8263#else
8264# error "Port me!"
8265#endif
8266
8267 return off;
8268}
8269
8270
8271/**
8272 * Common code for CIMPL and AIMPL calls.
8273 *
8274 * These are calls that uses argument variables and such. They should not be
8275 * confused with internal calls required to implement an MC operation,
8276 * like a TLB load and similar.
8277 *
8278 * Upon return all that is left to do is to load any hidden arguments and
8279 * perform the call. All argument variables are freed.
8280 *
8281 * @returns New code buffer offset; throws VBox status code on error.
8282 * @param pReNative The native recompile state.
8283 * @param off The code buffer offset.
8284 * @param cArgs The total nubmer of arguments (includes hidden
8285 * count).
8286 * @param cHiddenArgs The number of hidden arguments. The hidden
8287 * arguments must not have any variable declared for
8288 * them, whereas all the regular arguments must
8289 * (tstIEMCheckMc ensures this).
8290 */
8291DECL_HIDDEN_THROW(uint32_t)
8292iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
8293{
8294#ifdef VBOX_STRICT
8295 /*
8296 * Assert sanity.
8297 */
8298 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8299 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8300 for (unsigned i = 0; i < cHiddenArgs; i++)
8301 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8302 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8303 {
8304 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8305 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8306 }
8307 iemNativeRegAssertSanity(pReNative);
8308#endif
8309
8310 /*
8311 * Before we do anything else, go over variables that are referenced and
8312 * make sure they are not in a register.
8313 */
8314 uint32_t bmVars = pReNative->Core.bmVars;
8315 if (bmVars)
8316 {
8317 do
8318 {
8319 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8320 bmVars &= ~RT_BIT_32(idxVar);
8321
8322 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8323 {
8324 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8325 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
8326 {
8327 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8328 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8329 idxVar, idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8330 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8331 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8332
8333 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8334 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8335 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8336 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8337 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8338 }
8339 }
8340 } while (bmVars != 0);
8341#if 0 //def VBOX_STRICT
8342 iemNativeRegAssertSanity(pReNative);
8343#endif
8344 }
8345
8346 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8347
8348 /*
8349 * First, go over the host registers that will be used for arguments and make
8350 * sure they either hold the desired argument or are free.
8351 */
8352 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8353 {
8354 for (uint32_t i = 0; i < cRegArgs; i++)
8355 {
8356 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8357 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8358 {
8359 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8360 {
8361 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8362 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8363 Assert(pReNative->Core.aVars[idxVar].idxReg == idxArgReg);
8364 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8365 if (uArgNo == i)
8366 { /* prefect */ }
8367 /* The variable allocator logic should make sure this is impossible,
8368 except for when the return register is used as a parameter (ARM,
8369 but not x86). */
8370#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8371 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8372 {
8373# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8374# error "Implement this"
8375# endif
8376 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8377 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8378 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8379 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8380 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8381 }
8382#endif
8383 else
8384 {
8385 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8386
8387 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
8388 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8389 else
8390 {
8391 /* just free it, can be reloaded if used again */
8392 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8393 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8394 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8395 }
8396 }
8397 }
8398 else
8399 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8400 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8401 }
8402 }
8403#if 0 //def VBOX_STRICT
8404 iemNativeRegAssertSanity(pReNative);
8405#endif
8406 }
8407
8408 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8409
8410#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8411 /*
8412 * If there are any stack arguments, make sure they are in their place as well.
8413 *
8414 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8415 * the caller) be loading it later and it must be free (see first loop).
8416 */
8417 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8418 {
8419 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8420 {
8421 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
8422 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8423 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8424 {
8425 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8426 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].idxReg);
8427 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg);
8428 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8429 }
8430 else
8431 {
8432 /* Use ARG0 as temp for stuff we need registers for. */
8433 switch (pReNative->Core.aVars[idxVar].enmKind)
8434 {
8435 case kIemNativeVarKind_Stack:
8436 {
8437 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8438 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8439 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8440 iemNativeStackCalcBpDisp(idxStackSlot));
8441 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8442 continue;
8443 }
8444
8445 case kIemNativeVarKind_Immediate:
8446 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].u.uValue);
8447 continue;
8448
8449 case kIemNativeVarKind_VarRef:
8450 {
8451 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
8452 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8453 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
8454 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8455 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8456 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8457 {
8458 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8459 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8460 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8461 }
8462 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8463 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8464 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8465 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8466 continue;
8467 }
8468
8469 case kIemNativeVarKind_GstRegRef:
8470 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8471 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
8472 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
8473 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8474 continue;
8475
8476 case kIemNativeVarKind_Invalid:
8477 case kIemNativeVarKind_End:
8478 break;
8479 }
8480 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8481 }
8482 }
8483# if 0 //def VBOX_STRICT
8484 iemNativeRegAssertSanity(pReNative);
8485# endif
8486 }
8487#else
8488 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8489#endif
8490
8491 /*
8492 * Make sure the argument variables are loaded into their respective registers.
8493 *
8494 * We can optimize this by ASSUMING that any register allocations are for
8495 * registeres that have already been loaded and are ready. The previous step
8496 * saw to that.
8497 */
8498 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8499 {
8500 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8501 {
8502 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8503 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8504 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == pReNative->Core.aidxArgVars[i]
8505 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8506 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8507 else
8508 {
8509 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
8510 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8511 {
8512 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
8513 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].idxReg);
8514 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg))
8515 | RT_BIT_32(idxArgReg);
8516 pReNative->Core.aVars[idxVar].idxReg = idxArgReg;
8517 }
8518 else
8519 {
8520 /* Use ARG0 as temp for stuff we need registers for. */
8521 switch (pReNative->Core.aVars[idxVar].enmKind)
8522 {
8523 case kIemNativeVarKind_Stack:
8524 {
8525 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8526 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8527 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8528 continue;
8529 }
8530
8531 case kIemNativeVarKind_Immediate:
8532 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].u.uValue);
8533 continue;
8534
8535 case kIemNativeVarKind_VarRef:
8536 {
8537 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
8538 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8539 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
8540 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8541 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8542 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8543 {
8544 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8545 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8546 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8547 }
8548 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8549 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8550 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8551 continue;
8552 }
8553
8554 case kIemNativeVarKind_GstRegRef:
8555 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8556 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
8557 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
8558 continue;
8559
8560 case kIemNativeVarKind_Invalid:
8561 case kIemNativeVarKind_End:
8562 break;
8563 }
8564 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8565 }
8566 }
8567 }
8568#if 0 //def VBOX_STRICT
8569 iemNativeRegAssertSanity(pReNative);
8570#endif
8571 }
8572#ifdef VBOX_STRICT
8573 else
8574 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8575 {
8576 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8577 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8578 }
8579#endif
8580
8581 /*
8582 * Free all argument variables (simplified).
8583 * Their lifetime always expires with the call they are for.
8584 */
8585 /** @todo Make the python script check that arguments aren't used after
8586 * IEM_MC_CALL_XXXX. */
8587 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8588 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8589 * an argument value. There is also some FPU stuff. */
8590 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8591 {
8592 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
8593 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8594
8595 /* no need to free registers: */
8596 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8597 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8598 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8599 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8600 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8601 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8602
8603 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8604 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8605 iemNativeVarFreeStackSlots(pReNative, idxVar);
8606 }
8607 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8608
8609 /*
8610 * Flush volatile registers as we make the call.
8611 */
8612 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8613
8614 return off;
8615}
8616
8617
8618/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
8619DECL_HIDDEN_THROW(uint32_t)
8620iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
8621 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
8622
8623{
8624 /*
8625 * Do all the call setup and cleanup.
8626 */
8627 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
8628
8629 /*
8630 * Load the two or three hidden arguments.
8631 */
8632#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
8633 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
8634 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8635 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
8636#else
8637 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8638 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
8639#endif
8640
8641 /*
8642 * Make the call and check the return code.
8643 *
8644 * Shadow PC copies are always flushed here, other stuff depends on flags.
8645 * Segment and general purpose registers are explictily flushed via the
8646 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
8647 * macros.
8648 */
8649 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
8650#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
8651 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
8652#endif
8653 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
8654 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
8655 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
8656 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
8657
8658 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
8659}
8660
8661
8662#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
8663 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
8664
8665/** Emits code for IEM_MC_CALL_CIMPL_1. */
8666DECL_INLINE_THROW(uint32_t)
8667iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8668 uintptr_t pfnCImpl, uint8_t idxArg0)
8669{
8670 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8671 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
8672}
8673
8674
8675#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
8676 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
8677
8678/** Emits code for IEM_MC_CALL_CIMPL_2. */
8679DECL_INLINE_THROW(uint32_t)
8680iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8681 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
8682{
8683 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8684 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
8685 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
8686}
8687
8688
8689#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
8690 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
8691 (uintptr_t)a_pfnCImpl, a0, a1, a2)
8692
8693/** Emits code for IEM_MC_CALL_CIMPL_3. */
8694DECL_INLINE_THROW(uint32_t)
8695iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8696 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8697{
8698 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8699 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
8700 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
8701 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
8702}
8703
8704
8705#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
8706 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
8707 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
8708
8709/** Emits code for IEM_MC_CALL_CIMPL_4. */
8710DECL_INLINE_THROW(uint32_t)
8711iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8712 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
8713{
8714 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8715 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
8716 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
8717 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
8718 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
8719}
8720
8721
8722#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
8723 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
8724 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
8725
8726/** Emits code for IEM_MC_CALL_CIMPL_4. */
8727DECL_INLINE_THROW(uint32_t)
8728iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8729 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
8730{
8731 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8732 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
8733 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
8734 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
8735 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
8736 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
8737}
8738
8739
8740/** Recompiler debugging: Flush guest register shadow copies. */
8741#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
8742
8743
8744
8745/*********************************************************************************************************************************
8746* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
8747*********************************************************************************************************************************/
8748
8749/**
8750 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
8751 */
8752DECL_INLINE_THROW(uint32_t)
8753iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
8754 uintptr_t pfnAImpl, uint8_t cArgs)
8755{
8756 if (idxVarRc != UINT8_MAX)
8757 {
8758 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
8759 AssertStmt(pReNative->Core.aVars[idxVarRc].uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
8760 AssertStmt(pReNative->Core.aVars[idxVarRc].cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
8761 }
8762
8763 /*
8764 * Do all the call setup and cleanup.
8765 */
8766 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
8767
8768 /*
8769 * Make the call and update the return code variable if we've got one.
8770 */
8771 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
8772 if (idxVarRc < RT_ELEMENTS(pReNative->Core.aVars))
8773 {
8774pReNative->pInstrBuf[off++] = 0xcc; /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
8775 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
8776 }
8777
8778 return off;
8779}
8780
8781
8782
8783#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
8784 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
8785
8786#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
8787 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
8788
8789/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
8790DECL_INLINE_THROW(uint32_t)
8791iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
8792{
8793 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
8794}
8795
8796
8797#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
8798 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
8799
8800#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
8801 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
8802
8803/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
8804DECL_INLINE_THROW(uint32_t)
8805iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
8806{
8807 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
8808 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
8809}
8810
8811
8812#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
8813 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
8814
8815#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
8816 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
8817
8818/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
8819DECL_INLINE_THROW(uint32_t)
8820iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
8821 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
8822{
8823 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
8824 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
8825 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
8826}
8827
8828
8829#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
8830 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
8831
8832#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
8833 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
8834
8835/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
8836DECL_INLINE_THROW(uint32_t)
8837iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
8838 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8839{
8840 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
8841 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
8842 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
8843 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
8844}
8845
8846
8847#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
8848 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
8849
8850#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
8851 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
8852
8853/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
8854DECL_INLINE_THROW(uint32_t)
8855iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
8856 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
8857{
8858 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
8859 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
8860 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
8861 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
8862 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
8863}
8864
8865
8866
8867/*********************************************************************************************************************************
8868* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
8869*********************************************************************************************************************************/
8870
8871#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
8872 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
8873
8874#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
8875 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
8876
8877#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
8878 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
8879
8880#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
8881 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
8882
8883
8884/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
8885 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
8886DECL_INLINE_THROW(uint32_t)
8887iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
8888{
8889 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8890 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
8891 Assert(iGRegEx < 20);
8892
8893 /* Same discussion as in iemNativeEmitFetchGregU16 */
8894 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
8895 kIemNativeGstRegUse_ReadOnly);
8896
8897 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8898 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8899
8900 /* The value is zero-extended to the full 64-bit host register width. */
8901 if (iGRegEx < 16)
8902 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
8903 else
8904 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
8905
8906 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8907 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8908 return off;
8909}
8910
8911
8912#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
8913 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
8914
8915#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
8916 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
8917
8918#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
8919 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
8920
8921/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
8922DECL_INLINE_THROW(uint32_t)
8923iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
8924{
8925 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8926 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
8927 Assert(iGRegEx < 20);
8928
8929 /* Same discussion as in iemNativeEmitFetchGregU16 */
8930 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
8931 kIemNativeGstRegUse_ReadOnly);
8932
8933 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8934 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8935
8936 if (iGRegEx < 16)
8937 {
8938 switch (cbSignExtended)
8939 {
8940 case sizeof(uint16_t):
8941 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
8942 break;
8943 case sizeof(uint32_t):
8944 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
8945 break;
8946 case sizeof(uint64_t):
8947 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
8948 break;
8949 default: AssertFailed(); break;
8950 }
8951 }
8952 else
8953 {
8954 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
8955 switch (cbSignExtended)
8956 {
8957 case sizeof(uint16_t):
8958 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
8959 break;
8960 case sizeof(uint32_t):
8961 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
8962 break;
8963 case sizeof(uint64_t):
8964 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
8965 break;
8966 default: AssertFailed(); break;
8967 }
8968 }
8969
8970 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8971 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8972 return off;
8973}
8974
8975
8976
8977#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
8978 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
8979
8980#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
8981 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
8982
8983#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
8984 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
8985
8986/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
8987DECL_INLINE_THROW(uint32_t)
8988iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
8989{
8990 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8991 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
8992 Assert(iGReg < 16);
8993
8994 /*
8995 * We can either just load the low 16-bit of the GPR into a host register
8996 * for the variable, or we can do so via a shadow copy host register. The
8997 * latter will avoid having to reload it if it's being stored later, but
8998 * will waste a host register if it isn't touched again. Since we don't
8999 * know what going to happen, we choose the latter for now.
9000 */
9001 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9002 kIemNativeGstRegUse_ReadOnly);
9003
9004 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9005 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9006 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
9007 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9008
9009 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9010 return off;
9011}
9012
9013
9014#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
9015 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
9016
9017#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
9018 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
9019
9020/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
9021DECL_INLINE_THROW(uint32_t)
9022iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
9023{
9024 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9025 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
9026 Assert(iGReg < 16);
9027
9028 /*
9029 * We can either just load the low 16-bit of the GPR into a host register
9030 * for the variable, or we can do so via a shadow copy host register. The
9031 * latter will avoid having to reload it if it's being stored later, but
9032 * will waste a host register if it isn't touched again. Since we don't
9033 * know what going to happen, we choose the latter for now.
9034 */
9035 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9036 kIemNativeGstRegUse_ReadOnly);
9037
9038 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9039 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9040 if (cbSignExtended == sizeof(uint32_t))
9041 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
9042 else
9043 {
9044 Assert(cbSignExtended == sizeof(uint64_t));
9045 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
9046 }
9047 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9048
9049 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9050 return off;
9051}
9052
9053
9054#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
9055 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
9056
9057#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
9058 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
9059
9060/** Emits code for IEM_MC_FETCH_GREG_U32. */
9061DECL_INLINE_THROW(uint32_t)
9062iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
9063{
9064 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9065 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF_PV(cbZeroExtended);
9066 Assert(iGReg < 16);
9067
9068 /*
9069 * We can either just load the low 16-bit of the GPR into a host register
9070 * for the variable, or we can do so via a shadow copy host register. The
9071 * latter will avoid having to reload it if it's being stored later, but
9072 * will waste a host register if it isn't touched again. Since we don't
9073 * know what going to happen, we choose the latter for now.
9074 */
9075 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9076 kIemNativeGstRegUse_ReadOnly);
9077
9078 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9079 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9080 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
9081 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9082
9083 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9084 return off;
9085}
9086
9087
9088#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
9089 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
9090
9091/** Emits code for IEM_MC_FETCH_GREG_U32. */
9092DECL_INLINE_THROW(uint32_t)
9093iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
9094{
9095 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9096 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint64_t));
9097 Assert(iGReg < 16);
9098
9099 /*
9100 * We can either just load the low 32-bit of the GPR into a host register
9101 * for the variable, or we can do so via a shadow copy host register. The
9102 * latter will avoid having to reload it if it's being stored later, but
9103 * will waste a host register if it isn't touched again. Since we don't
9104 * know what going to happen, we choose the latter for now.
9105 */
9106 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9107 kIemNativeGstRegUse_ReadOnly);
9108
9109 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9110 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9111 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
9112 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9113
9114 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9115 return off;
9116}
9117
9118
9119#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
9120 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
9121
9122#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
9123 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
9124
9125/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
9126 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
9127DECL_INLINE_THROW(uint32_t)
9128iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
9129{
9130 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9131 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint64_t));
9132 Assert(iGReg < 16);
9133
9134 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9135 kIemNativeGstRegUse_ReadOnly);
9136
9137 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9138 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9139 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
9140 /** @todo name the register a shadow one already? */
9141 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9142
9143 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9144 return off;
9145}
9146
9147
9148
9149/*********************************************************************************************************************************
9150* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
9151*********************************************************************************************************************************/
9152
9153#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
9154 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
9155
9156/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
9157DECL_INLINE_THROW(uint32_t)
9158iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
9159{
9160 Assert(iGRegEx < 20);
9161 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
9162 kIemNativeGstRegUse_ForUpdate);
9163#ifdef RT_ARCH_AMD64
9164 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
9165
9166 /* To the lowest byte of the register: mov r8, imm8 */
9167 if (iGRegEx < 16)
9168 {
9169 if (idxGstTmpReg >= 8)
9170 pbCodeBuf[off++] = X86_OP_REX_B;
9171 else if (idxGstTmpReg >= 4)
9172 pbCodeBuf[off++] = X86_OP_REX;
9173 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
9174 pbCodeBuf[off++] = u8Value;
9175 }
9176 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
9177 else if (idxGstTmpReg < 4)
9178 {
9179 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
9180 pbCodeBuf[off++] = u8Value;
9181 }
9182 else
9183 {
9184 /* ror reg64, 8 */
9185 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9186 pbCodeBuf[off++] = 0xc1;
9187 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9188 pbCodeBuf[off++] = 8;
9189
9190 /* mov reg8, imm8 */
9191 if (idxGstTmpReg >= 8)
9192 pbCodeBuf[off++] = X86_OP_REX_B;
9193 else if (idxGstTmpReg >= 4)
9194 pbCodeBuf[off++] = X86_OP_REX;
9195 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
9196 pbCodeBuf[off++] = u8Value;
9197
9198 /* rol reg64, 8 */
9199 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9200 pbCodeBuf[off++] = 0xc1;
9201 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9202 pbCodeBuf[off++] = 8;
9203 }
9204
9205#elif defined(RT_ARCH_ARM64)
9206 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
9207 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9208 if (iGRegEx < 16)
9209 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
9210 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
9211 else
9212 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
9213 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
9214 iemNativeRegFreeTmp(pReNative, idxImmReg);
9215
9216#else
9217# error "Port me!"
9218#endif
9219
9220 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9221
9222 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
9223
9224 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9225 return off;
9226}
9227
9228
9229#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
9230 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
9231
9232/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
9233DECL_INLINE_THROW(uint32_t)
9234iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
9235{
9236 Assert(iGRegEx < 20);
9237 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9238
9239 /*
9240 * If it's a constant value (unlikely) we treat this as a
9241 * IEM_MC_STORE_GREG_U8_CONST statement.
9242 */
9243 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
9244 { /* likely */ }
9245 else
9246 {
9247 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
9248 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9249 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pReNative->Core.aVars[idxValueVar].u.uValue);
9250 }
9251
9252 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
9253 kIemNativeGstRegUse_ForUpdate);
9254 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
9255
9256#ifdef RT_ARCH_AMD64
9257 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
9258 if (iGRegEx < 16)
9259 {
9260 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
9261 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
9262 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
9263 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
9264 pbCodeBuf[off++] = X86_OP_REX;
9265 pbCodeBuf[off++] = 0x8a;
9266 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
9267 }
9268 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
9269 else if (idxGstTmpReg < 4 && idxVarReg < 4)
9270 {
9271 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
9272 pbCodeBuf[off++] = 0x8a;
9273 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
9274 }
9275 else
9276 {
9277 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
9278
9279 /* ror reg64, 8 */
9280 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9281 pbCodeBuf[off++] = 0xc1;
9282 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9283 pbCodeBuf[off++] = 8;
9284
9285 /* mov reg8, reg8(r/m) */
9286 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
9287 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
9288 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
9289 pbCodeBuf[off++] = X86_OP_REX;
9290 pbCodeBuf[off++] = 0x8a;
9291 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
9292
9293 /* rol reg64, 8 */
9294 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9295 pbCodeBuf[off++] = 0xc1;
9296 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9297 pbCodeBuf[off++] = 8;
9298 }
9299
9300#elif defined(RT_ARCH_ARM64)
9301 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
9302 or
9303 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
9304 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9305 if (iGRegEx < 16)
9306 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
9307 else
9308 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
9309
9310#else
9311# error "Port me!"
9312#endif
9313 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9314
9315 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9316
9317 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
9318 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9319 return off;
9320}
9321
9322
9323
9324#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
9325 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
9326
9327/** Emits code for IEM_MC_STORE_GREG_U16. */
9328DECL_INLINE_THROW(uint32_t)
9329iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
9330{
9331 Assert(iGReg < 16);
9332 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9333 kIemNativeGstRegUse_ForUpdate);
9334#ifdef RT_ARCH_AMD64
9335 /* mov reg16, imm16 */
9336 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
9337 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9338 if (idxGstTmpReg >= 8)
9339 pbCodeBuf[off++] = X86_OP_REX_B;
9340 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
9341 pbCodeBuf[off++] = RT_BYTE1(uValue);
9342 pbCodeBuf[off++] = RT_BYTE2(uValue);
9343
9344#elif defined(RT_ARCH_ARM64)
9345 /* movk xdst, #uValue, lsl #0 */
9346 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9347 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
9348
9349#else
9350# error "Port me!"
9351#endif
9352
9353 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9354
9355 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9356 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9357 return off;
9358}
9359
9360
9361#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
9362 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
9363
9364/** Emits code for IEM_MC_STORE_GREG_U16. */
9365DECL_INLINE_THROW(uint32_t)
9366iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
9367{
9368 Assert(iGReg < 16);
9369 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9370
9371 /*
9372 * If it's a constant value (unlikely) we treat this as a
9373 * IEM_MC_STORE_GREG_U16_CONST statement.
9374 */
9375 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
9376 { /* likely */ }
9377 else
9378 {
9379 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
9380 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9381 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pReNative->Core.aVars[idxValueVar].u.uValue);
9382 }
9383
9384 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9385 kIemNativeGstRegUse_ForUpdate);
9386
9387#ifdef RT_ARCH_AMD64
9388 /* mov reg16, reg16 or [mem16] */
9389 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
9390 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9391 if (pReNative->Core.aVars[idxValueVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9392 {
9393 if (idxGstTmpReg >= 8 || pReNative->Core.aVars[idxValueVar].idxReg >= 8)
9394 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
9395 | (pReNative->Core.aVars[idxValueVar].idxReg >= 8 ? X86_OP_REX_B : 0);
9396 pbCodeBuf[off++] = 0x8b;
9397 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pReNative->Core.aVars[idxValueVar].idxReg & 7);
9398 }
9399 else
9400 {
9401 uint8_t const idxStackSlot = pReNative->Core.aVars[idxValueVar].idxStackSlot;
9402 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9403 if (idxGstTmpReg >= 8)
9404 pbCodeBuf[off++] = X86_OP_REX_R;
9405 pbCodeBuf[off++] = 0x8b;
9406 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
9407 }
9408
9409#elif defined(RT_ARCH_ARM64)
9410 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
9411 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
9412 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9413 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
9414 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9415
9416#else
9417# error "Port me!"
9418#endif
9419
9420 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9421
9422 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9423 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9424 return off;
9425}
9426
9427
9428#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
9429 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
9430
9431/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
9432DECL_INLINE_THROW(uint32_t)
9433iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
9434{
9435 Assert(iGReg < 16);
9436 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9437 kIemNativeGstRegUse_ForFullWrite);
9438 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
9439 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9440 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9441 return off;
9442}
9443
9444
9445#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
9446 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
9447
9448/** Emits code for IEM_MC_STORE_GREG_U32. */
9449DECL_INLINE_THROW(uint32_t)
9450iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
9451{
9452 Assert(iGReg < 16);
9453 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9454
9455 /*
9456 * If it's a constant value (unlikely) we treat this as a
9457 * IEM_MC_STORE_GREG_U32_CONST statement.
9458 */
9459 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
9460 { /* likely */ }
9461 else
9462 {
9463 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
9464 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9465 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pReNative->Core.aVars[idxValueVar].u.uValue);
9466 }
9467
9468 /*
9469 * For the rest we allocate a guest register for the variable and writes
9470 * it to the CPUMCTX structure.
9471 */
9472 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
9473 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9474#ifdef VBOX_STRICT
9475 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
9476#endif
9477 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9478 return off;
9479}
9480
9481
9482#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
9483 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
9484
9485/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
9486DECL_INLINE_THROW(uint32_t)
9487iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
9488{
9489 Assert(iGReg < 16);
9490 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9491 kIemNativeGstRegUse_ForFullWrite);
9492 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
9493 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9494 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9495 return off;
9496}
9497
9498
9499#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
9500 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
9501
9502/** Emits code for IEM_MC_STORE_GREG_U64. */
9503DECL_INLINE_THROW(uint32_t)
9504iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
9505{
9506 Assert(iGReg < 16);
9507 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9508
9509 /*
9510 * If it's a constant value (unlikely) we treat this as a
9511 * IEM_MC_STORE_GREG_U64_CONST statement.
9512 */
9513 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
9514 { /* likely */ }
9515 else
9516 {
9517 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
9518 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9519 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pReNative->Core.aVars[idxValueVar].u.uValue);
9520 }
9521
9522 /*
9523 * For the rest we allocate a guest register for the variable and writes
9524 * it to the CPUMCTX structure.
9525 */
9526 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
9527 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9528 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9529 return off;
9530}
9531
9532
9533#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
9534 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
9535
9536/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
9537DECL_INLINE_THROW(uint32_t)
9538iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
9539{
9540 Assert(iGReg < 16);
9541 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9542 kIemNativeGstRegUse_ForUpdate);
9543 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
9544 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9545 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9546 return off;
9547}
9548
9549
9550/*********************************************************************************************************************************
9551* General purpose register manipulation (add, sub). *
9552*********************************************************************************************************************************/
9553
9554#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
9555 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
9556
9557/** Emits code for IEM_MC_ADD_GREG_U16. */
9558DECL_INLINE_THROW(uint32_t)
9559iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
9560{
9561 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9562 kIemNativeGstRegUse_ForUpdate);
9563
9564#ifdef RT_ARCH_AMD64
9565 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
9566 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9567 if (idxGstTmpReg >= 8)
9568 pbCodeBuf[off++] = X86_OP_REX_B;
9569 if (uAddend == 1)
9570 {
9571 pbCodeBuf[off++] = 0xff; /* inc */
9572 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9573 }
9574 else
9575 {
9576 pbCodeBuf[off++] = 0x81;
9577 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9578 pbCodeBuf[off++] = uAddend;
9579 pbCodeBuf[off++] = 0;
9580 }
9581
9582#else
9583 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
9584 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9585
9586 /* sub tmp, gstgrp, uAddend */
9587 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
9588
9589 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
9590 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
9591
9592 iemNativeRegFreeTmp(pReNative, idxTmpReg);
9593#endif
9594
9595 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9596
9597 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9598
9599 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9600 return off;
9601}
9602
9603
9604#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
9605 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
9606
9607#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
9608 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
9609
9610/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
9611DECL_INLINE_THROW(uint32_t)
9612iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
9613{
9614 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9615 kIemNativeGstRegUse_ForUpdate);
9616
9617#ifdef RT_ARCH_AMD64
9618 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9619 if (f64Bit)
9620 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
9621 else if (idxGstTmpReg >= 8)
9622 pbCodeBuf[off++] = X86_OP_REX_B;
9623 if (uAddend == 1)
9624 {
9625 pbCodeBuf[off++] = 0xff; /* inc */
9626 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9627 }
9628 else if (uAddend < 128)
9629 {
9630 pbCodeBuf[off++] = 0x83; /* add */
9631 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9632 pbCodeBuf[off++] = RT_BYTE1(uAddend);
9633 }
9634 else
9635 {
9636 pbCodeBuf[off++] = 0x81; /* add */
9637 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9638 pbCodeBuf[off++] = RT_BYTE1(uAddend);
9639 pbCodeBuf[off++] = 0;
9640 pbCodeBuf[off++] = 0;
9641 pbCodeBuf[off++] = 0;
9642 }
9643
9644#else
9645 /* sub tmp, gstgrp, uAddend */
9646 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9647 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
9648
9649#endif
9650
9651 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9652
9653 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9654
9655 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9656 return off;
9657}
9658
9659
9660
9661#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
9662 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
9663
9664/** Emits code for IEM_MC_SUB_GREG_U16. */
9665DECL_INLINE_THROW(uint32_t)
9666iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
9667{
9668 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9669 kIemNativeGstRegUse_ForUpdate);
9670
9671#ifdef RT_ARCH_AMD64
9672 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
9673 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9674 if (idxGstTmpReg >= 8)
9675 pbCodeBuf[off++] = X86_OP_REX_B;
9676 if (uSubtrahend == 1)
9677 {
9678 pbCodeBuf[off++] = 0xff; /* dec */
9679 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9680 }
9681 else
9682 {
9683 pbCodeBuf[off++] = 0x81;
9684 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
9685 pbCodeBuf[off++] = uSubtrahend;
9686 pbCodeBuf[off++] = 0;
9687 }
9688
9689#else
9690 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
9691 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9692
9693 /* sub tmp, gstgrp, uSubtrahend */
9694 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
9695
9696 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
9697 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
9698
9699 iemNativeRegFreeTmp(pReNative, idxTmpReg);
9700#endif
9701
9702 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9703
9704 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9705
9706 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9707 return off;
9708}
9709
9710
9711#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
9712 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
9713
9714#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
9715 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
9716
9717/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
9718DECL_INLINE_THROW(uint32_t)
9719iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
9720{
9721 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9722 kIemNativeGstRegUse_ForUpdate);
9723
9724#ifdef RT_ARCH_AMD64
9725 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9726 if (f64Bit)
9727 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
9728 else if (idxGstTmpReg >= 8)
9729 pbCodeBuf[off++] = X86_OP_REX_B;
9730 if (uSubtrahend == 1)
9731 {
9732 pbCodeBuf[off++] = 0xff; /* dec */
9733 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9734 }
9735 else if (uSubtrahend < 128)
9736 {
9737 pbCodeBuf[off++] = 0x83; /* sub */
9738 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
9739 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
9740 }
9741 else
9742 {
9743 pbCodeBuf[off++] = 0x81; /* sub */
9744 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
9745 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
9746 pbCodeBuf[off++] = 0;
9747 pbCodeBuf[off++] = 0;
9748 pbCodeBuf[off++] = 0;
9749 }
9750
9751#else
9752 /* sub tmp, gstgrp, uSubtrahend */
9753 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9754 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
9755
9756#endif
9757
9758 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9759
9760 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9761
9762 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9763 return off;
9764}
9765
9766
9767
9768/*********************************************************************************************************************************
9769* EFLAGS *
9770*********************************************************************************************************************************/
9771
9772#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
9773# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
9774#else
9775# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
9776 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
9777
9778DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
9779{
9780 if (fEflOutput)
9781 {
9782 PVMCPUCC const pVCpu = pReNative->pVCpu;
9783 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
9784 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
9785 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
9786# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
9787 if (fEflOutput & (a_fEfl)) \
9788 { \
9789 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
9790 STAM_COUNTER_INC(&pVCpu->iem.s. a_CoreStatName ## Required); \
9791 else \
9792 STAM_COUNTER_INC(&pVCpu->iem.s. a_CoreStatName ## Skippable); \
9793 } else do { } while (0)
9794 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
9795 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
9796 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
9797 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
9798 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
9799 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
9800 CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
9801# undef CHECK_FLAG_AND_UPDATE_STATS
9802 }
9803 RT_NOREF(fEflInput);
9804}
9805#endif /* VBOX_WITH_STATISTICS */
9806
9807#undef IEM_MC_FETCH_EFLAGS /* should not be used */
9808#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
9809 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
9810
9811/** Handles IEM_MC_FETCH_EFLAGS_EX. */
9812DECL_INLINE_THROW(uint32_t)
9813iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
9814 uint32_t fEflInput, uint32_t fEflOutput)
9815{
9816 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
9817 Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
9818 RT_NOREF(fEflInput, fEflOutput);
9819
9820#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9821# ifdef VBOX_STRICT
9822 if ( pReNative->idxCurCall != 0
9823 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
9824 {
9825 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
9826 uint32_t const fBoth = fEflInput | fEflOutput;
9827# define ASSERT_ONE_EFL(a_fElfConst, a_offField) \
9828 AssertMsg( !(fBoth & (a_fElfConst)) \
9829 || (!(fEflInput & (a_fElfConst)) \
9830 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, kIemNativeGstReg_EFlags + (a_offField))) \
9831 : IEMLIVENESS_STATE_IS_ACCESS_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, kIemNativeGstReg_EFlags + (a_offField))) ), \
9832 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, kIemNativeGstReg_EFlags + (a_offField))))
9833 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, 0);
9834 ASSERT_ONE_EFL(X86_EFL_CF, 1);
9835 ASSERT_ONE_EFL(X86_EFL_PF, 2);
9836 ASSERT_ONE_EFL(X86_EFL_AF, 3);
9837 ASSERT_ONE_EFL(X86_EFL_ZF, 4);
9838 ASSERT_ONE_EFL(X86_EFL_SF, 5);
9839 ASSERT_ONE_EFL(X86_EFL_OF, 6);
9840# undef ASSERT_ONE_EFL
9841 }
9842# endif
9843#endif
9844
9845 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
9846 * the existing shadow copy. */
9847 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
9848 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
9849 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
9850 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
9851 return off;
9852}
9853
9854
9855
9856/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
9857 * start using it with custom native code emission (inlining assembly
9858 * instruction helpers). */
9859#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
9860#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
9861 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
9862 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput)
9863
9864/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
9865DECL_INLINE_THROW(uint32_t)
9866iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput)
9867{
9868 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
9869 Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
9870 RT_NOREF(fEflOutput);
9871
9872 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
9873
9874#ifdef VBOX_STRICT
9875 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
9876 uint32_t offFixup = off;
9877 off = iemNativeEmitJnzToFixed(pReNative, off, off);
9878 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
9879 iemNativeFixupFixedJump(pReNative, offFixup, off);
9880
9881 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
9882 offFixup = off;
9883 off = iemNativeEmitJzToFixed(pReNative, off, off);
9884 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
9885 iemNativeFixupFixedJump(pReNative, offFixup, off);
9886
9887 /** @todo validate that only bits in the fElfOutput mask changed. */
9888#endif
9889
9890 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
9891 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
9892 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
9893 return off;
9894}
9895
9896
9897
9898/*********************************************************************************************************************************
9899* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
9900*********************************************************************************************************************************/
9901
9902#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
9903 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
9904
9905#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
9906 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
9907
9908#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
9909 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
9910
9911
9912/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
9913 * IEM_MC_FETCH_SREG_ZX_U64. */
9914DECL_INLINE_THROW(uint32_t)
9915iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
9916{
9917 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9918 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbVar); RT_NOREF(cbVar);
9919 Assert(iSReg < X86_SREG_COUNT);
9920
9921 /*
9922 * For now, we will not create a shadow copy of a selector. The rational
9923 * is that since we do not recompile the popping and loading of segment
9924 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
9925 * pushing and moving to registers, there is only a small chance that the
9926 * shadow copy will be accessed again before the register is reloaded. One
9927 * scenario would be nested called in 16-bit code, but I doubt it's worth
9928 * the extra register pressure atm.
9929 *
9930 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
9931 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
9932 * store scencario covered at present (r160730).
9933 */
9934 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9935 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9936 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
9937 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9938 return off;
9939}
9940
9941
9942
9943/*********************************************************************************************************************************
9944* Register references. *
9945*********************************************************************************************************************************/
9946
9947#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
9948 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
9949
9950#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGReg) \
9951 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
9952
9953/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
9954DECL_INLINE_THROW(uint32_t)
9955iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
9956{
9957 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
9958 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
9959 Assert(iGRegEx < 20);
9960
9961 if (iGRegEx < 16)
9962 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
9963 else
9964 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
9965
9966 /* If we've delayed writing back the register value, flush it now. */
9967 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
9968
9969 /* If it's not a const reference we need to flush the shadow copy of the register now. */
9970 if (!fConst)
9971 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
9972
9973 return off;
9974}
9975
9976#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
9977 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
9978
9979#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
9980 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
9981
9982#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
9983 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
9984
9985#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
9986 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
9987
9988#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
9989 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
9990
9991#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
9992 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
9993
9994#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
9995 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
9996
9997#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
9998 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
9999
10000#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
10001 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
10002
10003#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
10004 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
10005
10006/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
10007DECL_INLINE_THROW(uint32_t)
10008iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
10009{
10010 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
10011 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
10012 Assert(iGReg < 16);
10013
10014 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
10015
10016 /* If we've delayed writing back the register value, flush it now. */
10017 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
10018
10019 /* If it's not a const reference we need to flush the shadow copy of the register now. */
10020 if (!fConst)
10021 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
10022
10023 return off;
10024}
10025
10026
10027#undef IEM_MC_REF_EFLAGS /* should not be used. */
10028#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
10029 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
10030 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
10031
10032/** Handles IEM_MC_REF_EFLAGS. */
10033DECL_INLINE_THROW(uint32_t)
10034iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
10035{
10036 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
10037 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
10038
10039 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
10040
10041 /* If we've delayed writing back the register value, flush it now. */
10042 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
10043
10044 /* If there is a shadow copy of guest EFLAGS, flush it now. */
10045 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
10046
10047 return off;
10048}
10049
10050
10051/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
10052 * different code from threaded recompiler, maybe it would be helpful. For now
10053 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
10054#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
10055
10056
10057
10058/*********************************************************************************************************************************
10059* Effective Address Calculation *
10060*********************************************************************************************************************************/
10061#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
10062 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
10063
10064/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
10065 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
10066DECL_INLINE_THROW(uint32_t)
10067iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
10068 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
10069{
10070 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
10071
10072 /*
10073 * Handle the disp16 form with no registers first.
10074 *
10075 * Convert to an immediate value, as that'll delay the register allocation
10076 * and assignment till the memory access / call / whatever and we can use
10077 * a more appropriate register (or none at all).
10078 */
10079 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
10080 {
10081 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
10082 return off;
10083 }
10084
10085 /* Determin the displacment. */
10086 uint16_t u16EffAddr;
10087 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
10088 {
10089 case 0: u16EffAddr = 0; break;
10090 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
10091 case 2: u16EffAddr = u16Disp; break;
10092 default: AssertFailedStmt(u16EffAddr = 0);
10093 }
10094
10095 /* Determine the registers involved. */
10096 uint8_t idxGstRegBase;
10097 uint8_t idxGstRegIndex;
10098 switch (bRm & X86_MODRM_RM_MASK)
10099 {
10100 case 0:
10101 idxGstRegBase = X86_GREG_xBX;
10102 idxGstRegIndex = X86_GREG_xSI;
10103 break;
10104 case 1:
10105 idxGstRegBase = X86_GREG_xBX;
10106 idxGstRegIndex = X86_GREG_xDI;
10107 break;
10108 case 2:
10109 idxGstRegBase = X86_GREG_xBP;
10110 idxGstRegIndex = X86_GREG_xSI;
10111 break;
10112 case 3:
10113 idxGstRegBase = X86_GREG_xBP;
10114 idxGstRegIndex = X86_GREG_xDI;
10115 break;
10116 case 4:
10117 idxGstRegBase = X86_GREG_xSI;
10118 idxGstRegIndex = UINT8_MAX;
10119 break;
10120 case 5:
10121 idxGstRegBase = X86_GREG_xDI;
10122 idxGstRegIndex = UINT8_MAX;
10123 break;
10124 case 6:
10125 idxGstRegBase = X86_GREG_xBP;
10126 idxGstRegIndex = UINT8_MAX;
10127 break;
10128#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
10129 default:
10130#endif
10131 case 7:
10132 idxGstRegBase = X86_GREG_xBX;
10133 idxGstRegIndex = UINT8_MAX;
10134 break;
10135 }
10136
10137 /*
10138 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
10139 */
10140 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
10141 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
10142 kIemNativeGstRegUse_ReadOnly);
10143 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
10144 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
10145 kIemNativeGstRegUse_ReadOnly)
10146 : UINT8_MAX;
10147#ifdef RT_ARCH_AMD64
10148 if (idxRegIndex == UINT8_MAX)
10149 {
10150 if (u16EffAddr == 0)
10151 {
10152 /* movxz ret, base */
10153 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
10154 }
10155 else
10156 {
10157 /* lea ret32, [base64 + disp32] */
10158 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
10159 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10160 if (idxRegRet >= 8 || idxRegBase >= 8)
10161 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
10162 pbCodeBuf[off++] = 0x8d;
10163 if (idxRegBase != X86_GREG_x12 /*SIB*/)
10164 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
10165 else
10166 {
10167 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
10168 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
10169 }
10170 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
10171 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
10172 pbCodeBuf[off++] = 0;
10173 pbCodeBuf[off++] = 0;
10174 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10175
10176 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
10177 }
10178 }
10179 else
10180 {
10181 /* lea ret32, [index64 + base64 (+ disp32)] */
10182 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
10183 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10184 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
10185 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10186 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
10187 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
10188 pbCodeBuf[off++] = 0x8d;
10189 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
10190 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
10191 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
10192 if (bMod == X86_MOD_MEM4)
10193 {
10194 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
10195 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
10196 pbCodeBuf[off++] = 0;
10197 pbCodeBuf[off++] = 0;
10198 }
10199 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10200 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
10201 }
10202
10203#elif defined(RT_ARCH_ARM64)
10204 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
10205 if (u16EffAddr == 0)
10206 {
10207 if (idxRegIndex == UINT8_MAX)
10208 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
10209 else
10210 {
10211 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
10212 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
10213 }
10214 }
10215 else
10216 {
10217 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
10218 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
10219 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
10220 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
10221 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
10222 else
10223 {
10224 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
10225 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
10226 }
10227 if (idxRegIndex != UINT8_MAX)
10228 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
10229 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
10230 }
10231
10232#else
10233# error "port me"
10234#endif
10235
10236 if (idxRegIndex != UINT8_MAX)
10237 iemNativeRegFreeTmp(pReNative, idxRegIndex);
10238 iemNativeRegFreeTmp(pReNative, idxRegBase);
10239 iemNativeVarRegisterRelease(pReNative, idxVarRet);
10240 return off;
10241}
10242
10243
10244#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
10245 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
10246
10247/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
10248 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
10249DECL_INLINE_THROW(uint32_t)
10250iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
10251 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
10252{
10253 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
10254
10255 /*
10256 * Handle the disp32 form with no registers first.
10257 *
10258 * Convert to an immediate value, as that'll delay the register allocation
10259 * and assignment till the memory access / call / whatever and we can use
10260 * a more appropriate register (or none at all).
10261 */
10262 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
10263 {
10264 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
10265 return off;
10266 }
10267
10268 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
10269 uint32_t u32EffAddr = 0;
10270 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
10271 {
10272 case 0: break;
10273 case 1: u32EffAddr = (int8_t)u32Disp; break;
10274 case 2: u32EffAddr = u32Disp; break;
10275 default: AssertFailed();
10276 }
10277
10278 /* Get the register (or SIB) value. */
10279 uint8_t idxGstRegBase = UINT8_MAX;
10280 uint8_t idxGstRegIndex = UINT8_MAX;
10281 uint8_t cShiftIndex = 0;
10282 switch (bRm & X86_MODRM_RM_MASK)
10283 {
10284 case 0: idxGstRegBase = X86_GREG_xAX; break;
10285 case 1: idxGstRegBase = X86_GREG_xCX; break;
10286 case 2: idxGstRegBase = X86_GREG_xDX; break;
10287 case 3: idxGstRegBase = X86_GREG_xBX; break;
10288 case 4: /* SIB */
10289 {
10290 /* index /w scaling . */
10291 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
10292 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
10293 {
10294 case 0: idxGstRegIndex = X86_GREG_xAX; break;
10295 case 1: idxGstRegIndex = X86_GREG_xCX; break;
10296 case 2: idxGstRegIndex = X86_GREG_xDX; break;
10297 case 3: idxGstRegIndex = X86_GREG_xBX; break;
10298 case 4: cShiftIndex = 0; /*no index*/ break;
10299 case 5: idxGstRegIndex = X86_GREG_xBP; break;
10300 case 6: idxGstRegIndex = X86_GREG_xSI; break;
10301 case 7: idxGstRegIndex = X86_GREG_xDI; break;
10302 }
10303
10304 /* base */
10305 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
10306 {
10307 case 0: idxGstRegBase = X86_GREG_xAX; break;
10308 case 1: idxGstRegBase = X86_GREG_xCX; break;
10309 case 2: idxGstRegBase = X86_GREG_xDX; break;
10310 case 3: idxGstRegBase = X86_GREG_xBX; break;
10311 case 4:
10312 idxGstRegBase = X86_GREG_xSP;
10313 u32EffAddr += uSibAndRspOffset >> 8;
10314 break;
10315 case 5:
10316 if ((bRm & X86_MODRM_MOD_MASK) != 0)
10317 idxGstRegBase = X86_GREG_xBP;
10318 else
10319 {
10320 Assert(u32EffAddr == 0);
10321 u32EffAddr = u32Disp;
10322 }
10323 break;
10324 case 6: idxGstRegBase = X86_GREG_xSI; break;
10325 case 7: idxGstRegBase = X86_GREG_xDI; break;
10326 }
10327 break;
10328 }
10329 case 5: idxGstRegBase = X86_GREG_xBP; break;
10330 case 6: idxGstRegBase = X86_GREG_xSI; break;
10331 case 7: idxGstRegBase = X86_GREG_xDI; break;
10332 }
10333
10334 /*
10335 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
10336 * the start of the function.
10337 */
10338 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
10339 {
10340 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
10341 return off;
10342 }
10343
10344 /*
10345 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
10346 */
10347 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
10348 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
10349 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
10350 kIemNativeGstRegUse_ReadOnly);
10351 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
10352 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
10353 kIemNativeGstRegUse_ReadOnly);
10354
10355 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
10356 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
10357 {
10358 idxRegBase = idxRegIndex;
10359 idxRegIndex = UINT8_MAX;
10360 }
10361
10362#ifdef RT_ARCH_AMD64
10363 if (idxRegIndex == UINT8_MAX)
10364 {
10365 if (u32EffAddr == 0)
10366 {
10367 /* mov ret, base */
10368 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
10369 }
10370 else
10371 {
10372 /* lea ret32, [base64 + disp32] */
10373 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
10374 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10375 if (idxRegRet >= 8 || idxRegBase >= 8)
10376 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
10377 pbCodeBuf[off++] = 0x8d;
10378 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
10379 if (idxRegBase != X86_GREG_x12 /*SIB*/)
10380 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
10381 else
10382 {
10383 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
10384 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
10385 }
10386 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10387 if (bMod == X86_MOD_MEM4)
10388 {
10389 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10390 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10391 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10392 }
10393 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10394 }
10395 }
10396 else
10397 {
10398 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
10399 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10400 if (idxRegBase == UINT8_MAX)
10401 {
10402 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
10403 if (idxRegRet >= 8 || idxRegIndex >= 8)
10404 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10405 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
10406 pbCodeBuf[off++] = 0x8d;
10407 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
10408 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
10409 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10410 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10411 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10412 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10413 }
10414 else
10415 {
10416 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
10417 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
10418 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10419 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
10420 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
10421 pbCodeBuf[off++] = 0x8d;
10422 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
10423 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
10424 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
10425 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
10426 if (bMod != X86_MOD_MEM0)
10427 {
10428 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10429 if (bMod == X86_MOD_MEM4)
10430 {
10431 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10432 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10433 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10434 }
10435 }
10436 }
10437 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10438 }
10439
10440#elif defined(RT_ARCH_ARM64)
10441 if (u32EffAddr == 0)
10442 {
10443 if (idxRegIndex == UINT8_MAX)
10444 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
10445 else if (idxRegBase == UINT8_MAX)
10446 {
10447 if (cShiftIndex == 0)
10448 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
10449 else
10450 {
10451 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10452 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
10453 }
10454 }
10455 else
10456 {
10457 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10458 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
10459 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
10460 }
10461 }
10462 else
10463 {
10464 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
10465 {
10466 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10467 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
10468 }
10469 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
10470 {
10471 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10472 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
10473 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
10474 }
10475 else
10476 {
10477 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
10478 if (idxRegBase != UINT8_MAX)
10479 {
10480 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10481 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
10482 }
10483 }
10484 if (idxRegIndex != UINT8_MAX)
10485 {
10486 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10487 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
10488 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
10489 }
10490 }
10491
10492#else
10493# error "port me"
10494#endif
10495
10496 if (idxRegIndex != UINT8_MAX)
10497 iemNativeRegFreeTmp(pReNative, idxRegIndex);
10498 if (idxRegBase != UINT8_MAX)
10499 iemNativeRegFreeTmp(pReNative, idxRegBase);
10500 iemNativeVarRegisterRelease(pReNative, idxVarRet);
10501 return off;
10502}
10503
10504
10505#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
10506 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
10507 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
10508
10509#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
10510 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
10511 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
10512
10513#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
10514 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
10515 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
10516
10517/**
10518 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
10519 *
10520 * @returns New off.
10521 * @param pReNative .
10522 * @param off .
10523 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
10524 * bit 4 to REX.X. The two bits are part of the
10525 * REG sub-field, which isn't needed in this
10526 * function.
10527 * @param uSibAndRspOffset Two parts:
10528 * - The first 8 bits make up the SIB byte.
10529 * - The next 8 bits are the fixed RSP/ESP offset
10530 * in case of a pop [xSP].
10531 * @param u32Disp The displacement byte/word/dword, if any.
10532 * @param cbInstr The size of the fully decoded instruction. Used
10533 * for RIP relative addressing.
10534 * @param idxVarRet The result variable number.
10535 * @param f64Bit Whether to use a 64-bit or 32-bit address size
10536 * when calculating the address.
10537 *
10538 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
10539 */
10540DECL_INLINE_THROW(uint32_t)
10541iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
10542 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
10543{
10544 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
10545
10546 /*
10547 * Special case the rip + disp32 form first.
10548 */
10549 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
10550 {
10551 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
10552 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
10553 kIemNativeGstRegUse_ReadOnly);
10554#ifdef RT_ARCH_AMD64
10555 if (f64Bit)
10556 {
10557 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
10558 if ((int32_t)offFinalDisp == offFinalDisp)
10559 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
10560 else
10561 {
10562 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
10563 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
10564 }
10565 }
10566 else
10567 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
10568
10569#elif defined(RT_ARCH_ARM64)
10570 if (f64Bit)
10571 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
10572 (int64_t)(int32_t)u32Disp + cbInstr);
10573 else
10574 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
10575 (int32_t)u32Disp + cbInstr);
10576
10577#else
10578# error "Port me!"
10579#endif
10580 iemNativeRegFreeTmp(pReNative, idxRegPc);
10581 iemNativeVarRegisterRelease(pReNative, idxVarRet);
10582 return off;
10583 }
10584
10585 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
10586 int64_t i64EffAddr = 0;
10587 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
10588 {
10589 case 0: break;
10590 case 1: i64EffAddr = (int8_t)u32Disp; break;
10591 case 2: i64EffAddr = (int32_t)u32Disp; break;
10592 default: AssertFailed();
10593 }
10594
10595 /* Get the register (or SIB) value. */
10596 uint8_t idxGstRegBase = UINT8_MAX;
10597 uint8_t idxGstRegIndex = UINT8_MAX;
10598 uint8_t cShiftIndex = 0;
10599 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
10600 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
10601 else /* SIB: */
10602 {
10603 /* index /w scaling . */
10604 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
10605 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
10606 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
10607 if (idxGstRegIndex == 4)
10608 {
10609 /* no index */
10610 cShiftIndex = 0;
10611 idxGstRegIndex = UINT8_MAX;
10612 }
10613
10614 /* base */
10615 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
10616 if (idxGstRegBase == 4)
10617 {
10618 /* pop [rsp] hack */
10619 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
10620 }
10621 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
10622 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
10623 {
10624 /* mod=0 and base=5 -> disp32, no base reg. */
10625 Assert(i64EffAddr == 0);
10626 i64EffAddr = (int32_t)u32Disp;
10627 idxGstRegBase = UINT8_MAX;
10628 }
10629 }
10630
10631 /*
10632 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
10633 * the start of the function.
10634 */
10635 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
10636 {
10637 if (f64Bit)
10638 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
10639 else
10640 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
10641 return off;
10642 }
10643
10644 /*
10645 * Now emit code that calculates:
10646 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
10647 * or if !f64Bit:
10648 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
10649 */
10650 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
10651 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
10652 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
10653 kIemNativeGstRegUse_ReadOnly);
10654 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
10655 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
10656 kIemNativeGstRegUse_ReadOnly);
10657
10658 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
10659 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
10660 {
10661 idxRegBase = idxRegIndex;
10662 idxRegIndex = UINT8_MAX;
10663 }
10664
10665#ifdef RT_ARCH_AMD64
10666 uint8_t bFinalAdj;
10667 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
10668 bFinalAdj = 0; /* likely */
10669 else
10670 {
10671 /* pop [rsp] with a problematic disp32 value. Split out the
10672 RSP offset and add it separately afterwards (bFinalAdj). */
10673 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
10674 Assert(idxGstRegBase == X86_GREG_xSP);
10675 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
10676 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
10677 Assert(bFinalAdj != 0);
10678 i64EffAddr -= bFinalAdj;
10679 Assert((int32_t)i64EffAddr == i64EffAddr);
10680 }
10681 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
10682//pReNative->pInstrBuf[off++] = 0xcc;
10683
10684 if (idxRegIndex == UINT8_MAX)
10685 {
10686 if (u32EffAddr == 0)
10687 {
10688 /* mov ret, base */
10689 if (f64Bit)
10690 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
10691 else
10692 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
10693 }
10694 else
10695 {
10696 /* lea ret, [base + disp32] */
10697 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
10698 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10699 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
10700 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10701 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
10702 | (f64Bit ? X86_OP_REX_W : 0);
10703 pbCodeBuf[off++] = 0x8d;
10704 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
10705 if (idxRegBase != X86_GREG_x12 /*SIB*/)
10706 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
10707 else
10708 {
10709 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
10710 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
10711 }
10712 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10713 if (bMod == X86_MOD_MEM4)
10714 {
10715 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10716 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10717 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10718 }
10719 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10720 }
10721 }
10722 else
10723 {
10724 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
10725 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10726 if (idxRegBase == UINT8_MAX)
10727 {
10728 /* lea ret, [(index64 << cShiftIndex) + disp32] */
10729 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
10730 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10731 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
10732 | (f64Bit ? X86_OP_REX_W : 0);
10733 pbCodeBuf[off++] = 0x8d;
10734 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
10735 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
10736 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10737 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10738 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10739 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10740 }
10741 else
10742 {
10743 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
10744 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
10745 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10746 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
10747 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
10748 | (f64Bit ? X86_OP_REX_W : 0);
10749 pbCodeBuf[off++] = 0x8d;
10750 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
10751 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
10752 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
10753 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
10754 if (bMod != X86_MOD_MEM0)
10755 {
10756 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10757 if (bMod == X86_MOD_MEM4)
10758 {
10759 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10760 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10761 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10762 }
10763 }
10764 }
10765 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10766 }
10767
10768 if (!bFinalAdj)
10769 { /* likely */ }
10770 else
10771 {
10772 Assert(f64Bit);
10773 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
10774 }
10775
10776#elif defined(RT_ARCH_ARM64)
10777 if (i64EffAddr == 0)
10778 {
10779 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10780 if (idxRegIndex == UINT8_MAX)
10781 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
10782 else if (idxRegBase != UINT8_MAX)
10783 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
10784 f64Bit, false /*fSetFlags*/, cShiftIndex);
10785 else
10786 {
10787 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
10788 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
10789 }
10790 }
10791 else
10792 {
10793 if (f64Bit)
10794 { /* likely */ }
10795 else
10796 i64EffAddr = (int32_t)i64EffAddr;
10797
10798 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
10799 {
10800 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10801 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
10802 }
10803 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
10804 {
10805 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10806 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
10807 }
10808 else
10809 {
10810 if (f64Bit)
10811 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
10812 else
10813 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
10814 if (idxRegBase != UINT8_MAX)
10815 {
10816 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10817 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
10818 }
10819 }
10820 if (idxRegIndex != UINT8_MAX)
10821 {
10822 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10823 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
10824 f64Bit, false /*fSetFlags*/, cShiftIndex);
10825 }
10826 }
10827
10828#else
10829# error "port me"
10830#endif
10831
10832 if (idxRegIndex != UINT8_MAX)
10833 iemNativeRegFreeTmp(pReNative, idxRegIndex);
10834 if (idxRegBase != UINT8_MAX)
10835 iemNativeRegFreeTmp(pReNative, idxRegBase);
10836 iemNativeVarRegisterRelease(pReNative, idxVarRet);
10837 return off;
10838}
10839
10840
10841/*********************************************************************************************************************************
10842* TLB Lookup. *
10843*********************************************************************************************************************************/
10844
10845/**
10846 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
10847 */
10848DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
10849{
10850 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
10851 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
10852 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
10853 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
10854
10855 /* Do the lookup manually. */
10856 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
10857 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
10858 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
10859 if (RT_LIKELY(pTlbe->uTag == uTag))
10860 {
10861 /*
10862 * Check TLB page table level access flags.
10863 */
10864 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
10865 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
10866 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
10867 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
10868 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
10869 | IEMTLBE_F_PG_UNASSIGNED
10870 | IEMTLBE_F_PT_NO_ACCESSED
10871 | fNoWriteNoDirty | fNoUser);
10872 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
10873 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
10874 {
10875 /*
10876 * Return the address.
10877 */
10878 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
10879 if ((uintptr_t)pbAddr == uResult)
10880 return;
10881 RT_NOREF(cbMem);
10882 AssertFailed();
10883 }
10884 else
10885 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
10886 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
10887 }
10888 else
10889 AssertFailed();
10890 RT_BREAKPOINT();
10891}
10892
10893/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
10894
10895
10896/*********************************************************************************************************************************
10897* Memory fetches and stores common *
10898*********************************************************************************************************************************/
10899
10900typedef enum IEMNATIVEMITMEMOP
10901{
10902 kIemNativeEmitMemOp_Store = 0,
10903 kIemNativeEmitMemOp_Fetch,
10904 kIemNativeEmitMemOp_Fetch_Zx_U16,
10905 kIemNativeEmitMemOp_Fetch_Zx_U32,
10906 kIemNativeEmitMemOp_Fetch_Zx_U64,
10907 kIemNativeEmitMemOp_Fetch_Sx_U16,
10908 kIemNativeEmitMemOp_Fetch_Sx_U32,
10909 kIemNativeEmitMemOp_Fetch_Sx_U64
10910} IEMNATIVEMITMEMOP;
10911
10912/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
10913 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
10914 * (with iSegReg = UINT8_MAX). */
10915DECL_INLINE_THROW(uint32_t)
10916iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
10917 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
10918 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
10919{
10920 /*
10921 * Assert sanity.
10922 */
10923 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
10924 Assert( enmOp != kIemNativeEmitMemOp_Store
10925 || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Immediate
10926 || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Stack);
10927 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
10928 AssertStmt( pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
10929 || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
10930 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
10931 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
10932 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
10933 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
10934#ifdef VBOX_STRICT
10935 if (iSegReg == UINT8_MAX)
10936 {
10937 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
10938 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
10939 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
10940 switch (cbMem)
10941 {
10942 case 1:
10943 Assert( pfnFunction
10944 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
10945 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
10946 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
10947 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
10948 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
10949 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
10950 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
10951 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
10952 : UINT64_C(0xc000b000a0009000) ));
10953 break;
10954 case 2:
10955 Assert( pfnFunction
10956 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
10957 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
10958 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
10959 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
10960 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
10961 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
10962 : UINT64_C(0xc000b000a0009000) ));
10963 break;
10964 case 4:
10965 Assert( pfnFunction
10966 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
10967 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
10968 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
10969 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
10970 : UINT64_C(0xc000b000a0009000) ));
10971 break;
10972 case 8:
10973 Assert( pfnFunction
10974 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
10975 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
10976 : UINT64_C(0xc000b000a0009000) ));
10977 break;
10978 }
10979 }
10980 else
10981 {
10982 Assert(iSegReg < 6);
10983 switch (cbMem)
10984 {
10985 case 1:
10986 Assert( pfnFunction
10987 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
10988 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
10989 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
10990 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
10991 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
10992 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
10993 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
10994 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
10995 : UINT64_C(0xc000b000a0009000) ));
10996 break;
10997 case 2:
10998 Assert( pfnFunction
10999 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
11000 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
11001 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
11002 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
11003 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
11004 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
11005 : UINT64_C(0xc000b000a0009000) ));
11006 break;
11007 case 4:
11008 Assert( pfnFunction
11009 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
11010 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
11011 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
11012 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
11013 : UINT64_C(0xc000b000a0009000) ));
11014 break;
11015 case 8:
11016 Assert( pfnFunction
11017 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
11018 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
11019 : UINT64_C(0xc000b000a0009000) ));
11020 break;
11021 }
11022 }
11023#endif
11024
11025#ifdef VBOX_STRICT
11026 /*
11027 * Check that the fExec flags we've got make sense.
11028 */
11029 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
11030#endif
11031
11032 /*
11033 * To keep things simple we have to commit any pending writes first as we
11034 * may end up making calls.
11035 */
11036 /** @todo we could postpone this till we make the call and reload the
11037 * registers after returning from the call. Not sure if that's sensible or
11038 * not, though. */
11039 off = iemNativeRegFlushPendingWrites(pReNative, off);
11040
11041#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11042 /*
11043 * Move/spill/flush stuff out of call-volatile registers.
11044 * This is the easy way out. We could contain this to the tlb-miss branch
11045 * by saving and restoring active stuff here.
11046 */
11047 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
11048#endif
11049
11050 /*
11051 * Define labels and allocate the result register (trying for the return
11052 * register if we can).
11053 */
11054 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
11055 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
11056 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
11057 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
11058 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
11059 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
11060 uint8_t const idxRegValueStore = !TlbState.fSkip
11061 && enmOp == kIemNativeEmitMemOp_Store
11062 && pReNative->Core.aVars[idxVarValue].enmKind != kIemNativeVarKind_Immediate
11063 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off)
11064 : UINT8_MAX;
11065 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
11066 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
11067 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
11068 : UINT32_MAX;
11069
11070 /*
11071 * Jump to the TLB lookup code.
11072 */
11073 if (!TlbState.fSkip)
11074 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
11075
11076 /*
11077 * TlbMiss:
11078 *
11079 * Call helper to do the fetching.
11080 * We flush all guest register shadow copies here.
11081 */
11082 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
11083
11084#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
11085 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
11086#else
11087 RT_NOREF(idxInstr);
11088#endif
11089
11090#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11091 /* Save variables in volatile registers. */
11092 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
11093 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
11094 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
11095 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
11096#endif
11097
11098 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
11099 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
11100 if (enmOp == kIemNativeEmitMemOp_Store)
11101 {
11102 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
11103 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
11104#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11105 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
11106#else
11107 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
11108 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
11109#endif
11110 }
11111
11112 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
11113 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
11114#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11115 fVolGregMask);
11116#else
11117 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
11118#endif
11119
11120 if (iSegReg != UINT8_MAX)
11121 {
11122 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
11123 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
11124 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
11125 }
11126
11127 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
11128 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
11129
11130 /* Done setting up parameters, make the call. */
11131 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
11132
11133 /*
11134 * Put the result in the right register if this is a fetch.
11135 */
11136 if (enmOp != kIemNativeEmitMemOp_Store)
11137 {
11138 Assert(idxRegValueFetch == pReNative->Core.aVars[idxVarValue].idxReg);
11139 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
11140 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
11141 }
11142
11143#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11144 /* Restore variables and guest shadow registers to volatile registers. */
11145 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
11146 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
11147#endif
11148
11149#ifdef IEMNATIVE_WITH_TLB_LOOKUP
11150 if (!TlbState.fSkip)
11151 {
11152 /* end of TlbMiss - Jump to the done label. */
11153 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
11154 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
11155
11156 /*
11157 * TlbLookup:
11158 */
11159 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
11160 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
11161 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
11162
11163 /*
11164 * Emit code to do the actual storing / fetching.
11165 */
11166 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
11167# ifdef VBOX_WITH_STATISTICS
11168 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
11169 enmOp == kIemNativeEmitMemOp_Store
11170 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
11171 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
11172# endif
11173 switch (enmOp)
11174 {
11175 case kIemNativeEmitMemOp_Store:
11176 if (pReNative->Core.aVars[idxVarValue].enmKind != kIemNativeVarKind_Immediate)
11177 {
11178 switch (cbMem)
11179 {
11180 case 1:
11181 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
11182 break;
11183 case 2:
11184 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
11185 break;
11186 case 4:
11187 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
11188 break;
11189 case 8:
11190 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
11191 break;
11192 default:
11193 AssertFailed();
11194 }
11195 }
11196 else
11197 {
11198 switch (cbMem)
11199 {
11200 case 1:
11201 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off,
11202 (uint8_t)pReNative->Core.aVars[idxVarValue].u.uValue,
11203 idxRegMemResult, TlbState.idxReg1);
11204 break;
11205 case 2:
11206 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off,
11207 (uint16_t)pReNative->Core.aVars[idxVarValue].u.uValue,
11208 idxRegMemResult, TlbState.idxReg1);
11209 break;
11210 case 4:
11211 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off,
11212 (uint32_t)pReNative->Core.aVars[idxVarValue].u.uValue,
11213 idxRegMemResult, TlbState.idxReg1);
11214 break;
11215 case 8:
11216 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pReNative->Core.aVars[idxVarValue].u.uValue,
11217 idxRegMemResult, TlbState.idxReg1);
11218 break;
11219 default:
11220 AssertFailed();
11221 }
11222 }
11223 break;
11224
11225 case kIemNativeEmitMemOp_Fetch:
11226 case kIemNativeEmitMemOp_Fetch_Zx_U16:
11227 case kIemNativeEmitMemOp_Fetch_Zx_U32:
11228 case kIemNativeEmitMemOp_Fetch_Zx_U64:
11229 switch (cbMem)
11230 {
11231 case 1:
11232 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11233 break;
11234 case 2:
11235 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11236 break;
11237 case 4:
11238 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11239 break;
11240 case 8:
11241 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11242 break;
11243 default:
11244 AssertFailed();
11245 }
11246 break;
11247
11248 case kIemNativeEmitMemOp_Fetch_Sx_U16:
11249 Assert(cbMem == 1);
11250 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11251 break;
11252
11253 case kIemNativeEmitMemOp_Fetch_Sx_U32:
11254 Assert(cbMem == 1 || cbMem == 2);
11255 if (cbMem == 1)
11256 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11257 else
11258 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11259 break;
11260
11261 case kIemNativeEmitMemOp_Fetch_Sx_U64:
11262 switch (cbMem)
11263 {
11264 case 1:
11265 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11266 break;
11267 case 2:
11268 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11269 break;
11270 case 4:
11271 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11272 break;
11273 default:
11274 AssertFailed();
11275 }
11276 break;
11277
11278 default:
11279 AssertFailed();
11280 }
11281
11282 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
11283
11284 /*
11285 * TlbDone:
11286 */
11287 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
11288
11289 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
11290
11291# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11292 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
11293 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
11294# endif
11295 }
11296#else
11297 RT_NOREF(fAlignMask, idxLabelTlbMiss);
11298#endif
11299
11300 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
11301 iemNativeVarRegisterRelease(pReNative, idxVarValue);
11302 return off;
11303}
11304
11305
11306
11307/*********************************************************************************************************************************
11308* Memory fetches (IEM_MEM_FETCH_XXX). *
11309*********************************************************************************************************************************/
11310
11311/* 8-bit segmented: */
11312#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
11313 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
11314 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
11315 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
11316
11317#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
11318 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
11319 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
11320 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
11321
11322#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11323 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11324 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
11325 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
11326
11327#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11328 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11329 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11330 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
11331
11332#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
11333 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
11334 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
11335 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
11336
11337#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11338 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11339 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
11340 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
11341
11342#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11343 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11344 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11345 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
11346
11347/* 16-bit segmented: */
11348#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
11349 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
11350 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
11351 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
11352
11353#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
11354 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
11355 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
11356 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
11357
11358#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11359 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11360 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
11361 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
11362
11363#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11364 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11365 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11366 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
11367
11368#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11369 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11370 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
11371 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
11372
11373#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11374 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11375 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11376 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
11377
11378
11379/* 32-bit segmented: */
11380#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11381 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11382 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
11383 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
11384
11385#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
11386 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11387 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
11388 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
11389
11390#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11391 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11392 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11393 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
11394
11395#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11396 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11397 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11398 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
11399
11400
11401/* 64-bit segmented: */
11402#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11403 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11404 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
11405 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
11406
11407
11408
11409/* 8-bit flat: */
11410#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
11411 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
11412 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
11413 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
11414
11415#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
11416 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
11417 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
11418 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
11419
11420#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
11421 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11422 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
11423 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
11424
11425#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
11426 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11427 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11428 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
11429
11430#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
11431 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
11432 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
11433 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
11434
11435#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
11436 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11437 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
11438 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
11439
11440#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
11441 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11442 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11443 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
11444
11445
11446/* 16-bit flat: */
11447#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
11448 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
11449 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
11450 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
11451
11452#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
11453 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
11454 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
11455 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
11456
11457#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
11458 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11459 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
11460 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
11461
11462#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
11463 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11464 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11465 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
11466
11467#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
11468 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11469 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
11470 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
11471
11472#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
11473 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11474 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11475 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
11476
11477/* 32-bit flat: */
11478#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
11479 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11480 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
11481 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
11482
11483#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
11484 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11485 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
11486 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
11487
11488#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
11489 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11490 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11491 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
11492
11493#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
11494 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11495 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11496 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
11497
11498/* 64-bit flat: */
11499#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
11500 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11501 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
11502 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
11503
11504
11505
11506/*********************************************************************************************************************************
11507* Memory stores (IEM_MEM_STORE_XXX). *
11508*********************************************************************************************************************************/
11509
11510#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
11511 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
11512 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
11513 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
11514
11515#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
11516 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
11517 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
11518 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
11519
11520#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
11521 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
11522 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
11523 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
11524
11525#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
11526 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
11527 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
11528 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
11529
11530
11531#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
11532 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
11533 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
11534 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
11535
11536#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
11537 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
11538 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
11539 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
11540
11541#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
11542 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
11543 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
11544 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
11545
11546#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
11547 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
11548 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
11549 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
11550
11551
11552#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
11553 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
11554 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
11555
11556#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
11557 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
11558 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
11559
11560#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
11561 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
11562 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
11563
11564#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
11565 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
11566 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
11567
11568
11569#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
11570 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
11571 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
11572
11573#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
11574 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
11575 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
11576
11577#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
11578 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
11579 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
11580
11581#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
11582 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
11583 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
11584
11585/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
11586 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
11587DECL_INLINE_THROW(uint32_t)
11588iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
11589 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
11590{
11591 /*
11592 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
11593 * to do the grunt work.
11594 */
11595 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
11596 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
11597 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
11598 pfnFunction, idxInstr);
11599 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
11600 return off;
11601}
11602
11603
11604
11605/*********************************************************************************************************************************
11606* Stack Accesses. *
11607*********************************************************************************************************************************/
11608/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
11609#define IEM_MC_PUSH_U16(a_u16Value) \
11610 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
11611 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
11612#define IEM_MC_PUSH_U32(a_u32Value) \
11613 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
11614 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
11615#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
11616 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
11617 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
11618#define IEM_MC_PUSH_U64(a_u64Value) \
11619 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
11620 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
11621
11622#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
11623 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
11624 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
11625#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
11626 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
11627 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
11628#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
11629 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
11630 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
11631
11632#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
11633 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
11634 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
11635#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
11636 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
11637 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
11638
11639
11640DECL_FORCE_INLINE_THROW(uint32_t)
11641iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
11642{
11643 /* Use16BitSp: */
11644#ifdef RT_ARCH_AMD64
11645 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
11646 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
11647#else
11648 /* sub regeff, regrsp, #cbMem */
11649 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
11650 /* and regeff, regeff, #0xffff */
11651 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
11652 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
11653 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
11654 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
11655#endif
11656 return off;
11657}
11658
11659
11660DECL_FORCE_INLINE(uint32_t)
11661iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
11662{
11663 /* Use32BitSp: */
11664 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
11665 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
11666 return off;
11667}
11668
11669
11670/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
11671DECL_INLINE_THROW(uint32_t)
11672iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
11673 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
11674{
11675 /*
11676 * Assert sanity.
11677 */
11678 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
11679#ifdef VBOX_STRICT
11680 if (RT_BYTE2(cBitsVarAndFlat) != 0)
11681 {
11682 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
11683 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
11684 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
11685 Assert( pfnFunction
11686 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
11687 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
11688 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
11689 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
11690 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
11691 : UINT64_C(0xc000b000a0009000) ));
11692 }
11693 else
11694 Assert( pfnFunction
11695 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
11696 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
11697 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
11698 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
11699 : UINT64_C(0xc000b000a0009000) ));
11700#endif
11701
11702#ifdef VBOX_STRICT
11703 /*
11704 * Check that the fExec flags we've got make sense.
11705 */
11706 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
11707#endif
11708
11709 /*
11710 * To keep things simple we have to commit any pending writes first as we
11711 * may end up making calls.
11712 */
11713 /** @todo we could postpone this till we make the call and reload the
11714 * registers after returning from the call. Not sure if that's sensible or
11715 * not, though. */
11716 off = iemNativeRegFlushPendingWrites(pReNative, off);
11717
11718 /*
11719 * First we calculate the new RSP and the effective stack pointer value.
11720 * For 64-bit mode and flat 32-bit these two are the same.
11721 * (Code structure is very similar to that of PUSH)
11722 */
11723 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
11724 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
11725 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
11726 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
11727 ? cbMem : sizeof(uint16_t);
11728 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
11729 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
11730 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
11731 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
11732 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
11733 if (cBitsFlat != 0)
11734 {
11735 Assert(idxRegEffSp == idxRegRsp);
11736 Assert(cBitsFlat == 32 || cBitsFlat == 64);
11737 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
11738 if (cBitsFlat == 64)
11739 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
11740 else
11741 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
11742 }
11743 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
11744 {
11745 Assert(idxRegEffSp != idxRegRsp);
11746 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
11747 kIemNativeGstRegUse_ReadOnly);
11748#ifdef RT_ARCH_AMD64
11749 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
11750#else
11751 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
11752#endif
11753 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
11754 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
11755 offFixupJumpToUseOtherBitSp = off;
11756 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
11757 {
11758 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
11759 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
11760 }
11761 else
11762 {
11763 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
11764 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
11765 }
11766 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11767 }
11768 /* SpUpdateEnd: */
11769 uint32_t const offLabelSpUpdateEnd = off;
11770
11771 /*
11772 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
11773 * we're skipping lookup).
11774 */
11775 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
11776 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
11777 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
11778 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
11779 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
11780 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
11781 : UINT32_MAX;
11782 uint8_t const idxRegValue = !TlbState.fSkip
11783 && pReNative->Core.aVars[idxVarValue].enmKind != kIemNativeVarKind_Immediate
11784 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
11785 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
11786 : UINT8_MAX;
11787 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
11788
11789
11790 if (!TlbState.fSkip)
11791 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
11792 else
11793 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
11794
11795 /*
11796 * Use16BitSp:
11797 */
11798 if (cBitsFlat == 0)
11799 {
11800#ifdef RT_ARCH_AMD64
11801 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
11802#else
11803 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
11804#endif
11805 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
11806 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
11807 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
11808 else
11809 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
11810 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
11811 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11812 }
11813
11814 /*
11815 * TlbMiss:
11816 *
11817 * Call helper to do the pushing.
11818 */
11819 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
11820
11821#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
11822 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
11823#else
11824 RT_NOREF(idxInstr);
11825#endif
11826
11827 /* Save variables in volatile registers. */
11828 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
11829 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
11830 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
11831 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
11832 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
11833
11834 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
11835 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
11836 {
11837 /* Swap them using ARG0 as temp register: */
11838 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
11839 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
11840 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
11841 }
11842 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
11843 {
11844 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
11845 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
11846 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
11847
11848 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
11849 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
11850 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
11851 }
11852 else
11853 {
11854 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
11855 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
11856
11857 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
11858 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
11859 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
11860 }
11861
11862 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
11863 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
11864
11865 /* Done setting up parameters, make the call. */
11866 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
11867
11868 /* Restore variables and guest shadow registers to volatile registers. */
11869 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
11870 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
11871
11872#ifdef IEMNATIVE_WITH_TLB_LOOKUP
11873 if (!TlbState.fSkip)
11874 {
11875 /* end of TlbMiss - Jump to the done label. */
11876 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
11877 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
11878
11879 /*
11880 * TlbLookup:
11881 */
11882 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
11883 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
11884
11885 /*
11886 * Emit code to do the actual storing / fetching.
11887 */
11888 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
11889# ifdef VBOX_WITH_STATISTICS
11890 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
11891 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
11892# endif
11893 if (idxRegValue != UINT8_MAX)
11894 {
11895 switch (cbMemAccess)
11896 {
11897 case 2:
11898 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
11899 break;
11900 case 4:
11901 if (!fIsIntelSeg)
11902 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
11903 else
11904 {
11905 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
11906 PUSH FS in real mode, so we have to try emulate that here.
11907 We borrow the now unused idxReg1 from the TLB lookup code here. */
11908 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
11909 kIemNativeGstReg_EFlags);
11910 if (idxRegEfl != UINT8_MAX)
11911 {
11912#ifdef ARCH_AMD64
11913 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
11914 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
11915 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
11916#else
11917 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
11918 off, TlbState.idxReg1, idxRegEfl,
11919 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
11920#endif
11921 iemNativeRegFreeTmp(pReNative, idxRegEfl);
11922 }
11923 else
11924 {
11925 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
11926 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
11927 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
11928 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
11929 }
11930 /* ASSUMES the upper half of idxRegValue is ZERO. */
11931 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
11932 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
11933 }
11934 break;
11935 case 8:
11936 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
11937 break;
11938 default:
11939 AssertFailed();
11940 }
11941 }
11942 else
11943 {
11944 switch (cbMemAccess)
11945 {
11946 case 2:
11947 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off,
11948 (uint16_t)pReNative->Core.aVars[idxVarValue].u.uValue,
11949 idxRegMemResult, TlbState.idxReg1);
11950 break;
11951 case 4:
11952 Assert(!fIsSegReg);
11953 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off,
11954 (uint32_t)pReNative->Core.aVars[idxVarValue].u.uValue,
11955 idxRegMemResult, TlbState.idxReg1);
11956 break;
11957 case 8:
11958 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pReNative->Core.aVars[idxVarValue].u.uValue,
11959 idxRegMemResult, TlbState.idxReg1);
11960 break;
11961 default:
11962 AssertFailed();
11963 }
11964 }
11965
11966 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
11967 TlbState.freeRegsAndReleaseVars(pReNative);
11968
11969 /*
11970 * TlbDone:
11971 *
11972 * Commit the new RSP value.
11973 */
11974 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
11975 }
11976#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
11977
11978 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
11979 iemNativeRegFreeTmp(pReNative, idxRegRsp);
11980 if (idxRegEffSp != idxRegRsp)
11981 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
11982
11983 /* The value variable is implictly flushed. */
11984 if (idxRegValue != UINT8_MAX)
11985 iemNativeVarRegisterRelease(pReNative, idxVarValue);
11986 iemNativeVarFreeLocal(pReNative, idxVarValue);
11987
11988 return off;
11989}
11990
11991
11992
11993/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
11994#define IEM_MC_POP_GREG_U16(a_iGReg) \
11995 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
11996 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
11997#define IEM_MC_POP_GREG_U32(a_iGReg) \
11998 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
11999 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
12000#define IEM_MC_POP_GREG_U64(a_iGReg) \
12001 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
12002 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
12003
12004#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
12005 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
12006 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
12007#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
12008 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
12009 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
12010
12011#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
12012 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
12013 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
12014#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
12015 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
12016 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
12017
12018
12019DECL_FORCE_INLINE_THROW(uint32_t)
12020iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
12021 uint8_t idxRegTmp)
12022{
12023 /* Use16BitSp: */
12024#ifdef RT_ARCH_AMD64
12025 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
12026 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
12027 RT_NOREF(idxRegTmp);
12028#else
12029 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
12030 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
12031 /* add tmp, regrsp, #cbMem */
12032 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
12033 /* and tmp, tmp, #0xffff */
12034 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
12035 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
12036 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
12037 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
12038#endif
12039 return off;
12040}
12041
12042
12043DECL_FORCE_INLINE(uint32_t)
12044iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
12045{
12046 /* Use32BitSp: */
12047 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
12048 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
12049 return off;
12050}
12051
12052
12053/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
12054DECL_INLINE_THROW(uint32_t)
12055iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
12056 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
12057{
12058 /*
12059 * Assert sanity.
12060 */
12061 Assert(idxGReg < 16);
12062#ifdef VBOX_STRICT
12063 if (RT_BYTE2(cBitsVarAndFlat) != 0)
12064 {
12065 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
12066 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
12067 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
12068 Assert( pfnFunction
12069 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
12070 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
12071 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
12072 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
12073 : UINT64_C(0xc000b000a0009000) ));
12074 }
12075 else
12076 Assert( pfnFunction
12077 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
12078 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
12079 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
12080 : UINT64_C(0xc000b000a0009000) ));
12081#endif
12082
12083#ifdef VBOX_STRICT
12084 /*
12085 * Check that the fExec flags we've got make sense.
12086 */
12087 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
12088#endif
12089
12090 /*
12091 * To keep things simple we have to commit any pending writes first as we
12092 * may end up making calls.
12093 */
12094 off = iemNativeRegFlushPendingWrites(pReNative, off);
12095
12096 /*
12097 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
12098 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
12099 * directly as the effective stack pointer.
12100 * (Code structure is very similar to that of PUSH)
12101 */
12102 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
12103 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
12104 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
12105 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
12106 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
12107 /** @todo can do a better job picking the register here. For cbMem >= 4 this
12108 * will be the resulting register value. */
12109 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
12110
12111 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
12112 if (cBitsFlat != 0)
12113 {
12114 Assert(idxRegEffSp == idxRegRsp);
12115 Assert(cBitsFlat == 32 || cBitsFlat == 64);
12116 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
12117 }
12118 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
12119 {
12120 Assert(idxRegEffSp != idxRegRsp);
12121 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
12122 kIemNativeGstRegUse_ReadOnly);
12123#ifdef RT_ARCH_AMD64
12124 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12125#else
12126 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
12127#endif
12128 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
12129 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
12130 offFixupJumpToUseOtherBitSp = off;
12131 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
12132 {
12133/** @todo can skip idxRegRsp updating when popping ESP. */
12134 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
12135 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12136 }
12137 else
12138 {
12139 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
12140 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
12141 }
12142 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12143 }
12144 /* SpUpdateEnd: */
12145 uint32_t const offLabelSpUpdateEnd = off;
12146
12147 /*
12148 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
12149 * we're skipping lookup).
12150 */
12151 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
12152 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
12153 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
12154 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
12155 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
12156 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
12157 : UINT32_MAX;
12158
12159 if (!TlbState.fSkip)
12160 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
12161 else
12162 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
12163
12164 /*
12165 * Use16BitSp:
12166 */
12167 if (cBitsFlat == 0)
12168 {
12169#ifdef RT_ARCH_AMD64
12170 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12171#else
12172 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
12173#endif
12174 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
12175 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
12176 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
12177 else
12178 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12179 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
12180 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12181 }
12182
12183 /*
12184 * TlbMiss:
12185 *
12186 * Call helper to do the pushing.
12187 */
12188 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
12189
12190#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
12191 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
12192#else
12193 RT_NOREF(idxInstr);
12194#endif
12195
12196 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
12197 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
12198 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
12199 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
12200
12201
12202 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
12203 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
12204 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
12205
12206 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
12207 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
12208
12209 /* Done setting up parameters, make the call. */
12210 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
12211
12212 /* Move the return register content to idxRegMemResult. */
12213 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
12214 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
12215
12216 /* Restore variables and guest shadow registers to volatile registers. */
12217 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
12218 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
12219
12220#ifdef IEMNATIVE_WITH_TLB_LOOKUP
12221 if (!TlbState.fSkip)
12222 {
12223 /* end of TlbMiss - Jump to the done label. */
12224 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
12225 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
12226
12227 /*
12228 * TlbLookup:
12229 */
12230 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
12231 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
12232
12233 /*
12234 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
12235 */
12236 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12237# ifdef VBOX_WITH_STATISTICS
12238 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
12239 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
12240# endif
12241 switch (cbMem)
12242 {
12243 case 2:
12244 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
12245 break;
12246 case 4:
12247 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
12248 break;
12249 case 8:
12250 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
12251 break;
12252 default:
12253 AssertFailed();
12254 }
12255
12256 TlbState.freeRegsAndReleaseVars(pReNative);
12257
12258 /*
12259 * TlbDone:
12260 *
12261 * Set the new RSP value (FLAT accesses needs to calculate it first) and
12262 * commit the popped register value.
12263 */
12264 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
12265 }
12266#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
12267
12268 if (idxGReg != X86_GREG_xSP)
12269 {
12270 /* Set the register. */
12271 if (cbMem >= sizeof(uint32_t))
12272 {
12273#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
12274 AssertMsg( pReNative->idxCurCall == 0
12275 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
12276 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
12277#endif
12278 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
12279 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
12280 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
12281 }
12282 else
12283 {
12284 Assert(cbMem == sizeof(uint16_t));
12285 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
12286 kIemNativeGstRegUse_ForUpdate);
12287 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
12288 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
12289 iemNativeRegFreeTmp(pReNative, idxRegDst);
12290 }
12291
12292 /* Complete RSP calculation for FLAT mode. */
12293 if (idxRegEffSp == idxRegRsp)
12294 {
12295 if (cBitsFlat == 64)
12296 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
12297 else
12298 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
12299 }
12300 }
12301 else
12302 {
12303 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
12304 if (cbMem == sizeof(uint64_t))
12305 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
12306 else if (cbMem == sizeof(uint32_t))
12307 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
12308 else
12309 {
12310 if (idxRegEffSp == idxRegRsp)
12311 {
12312 if (cBitsFlat == 64)
12313 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
12314 else
12315 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
12316 }
12317 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
12318 }
12319 }
12320 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
12321
12322 iemNativeRegFreeTmp(pReNative, idxRegRsp);
12323 if (idxRegEffSp != idxRegRsp)
12324 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
12325 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
12326
12327 return off;
12328}
12329
12330
12331
12332/*********************************************************************************************************************************
12333* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
12334*********************************************************************************************************************************/
12335
12336#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12337 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12338 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
12339 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
12340
12341#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12342 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12343 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
12344 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
12345
12346#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12347 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12348 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
12349 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
12350
12351#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12352 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12353 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
12354 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
12355
12356
12357#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12358 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12359 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12360 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
12361
12362#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12363 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12364 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12365 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
12366
12367#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12368 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12369 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12370 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
12371
12372#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12373 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12374 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12375 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
12376
12377#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12378 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
12379 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12380 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
12381
12382
12383#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12384 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12385 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12386 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
12387
12388#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12389 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12390 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12391 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
12392
12393#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12394 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12395 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12396 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
12397
12398#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12399 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12400 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12401 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
12402
12403#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12404 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
12405 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12406 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
12407
12408
12409#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12410 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12411 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12412 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
12413
12414#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12415 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12416 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12417 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
12418#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12419 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12420 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12421 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
12422
12423#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12424 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12425 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12426 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
12427
12428#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12429 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
12430 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12431 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
12432
12433
12434#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12435 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
12436 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12437 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
12438
12439#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12440 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
12441 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
12442 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
12443
12444
12445#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12446 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
12447 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12448 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
12449
12450#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12451 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
12452 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12453 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
12454
12455#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12456 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
12457 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12458 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
12459
12460#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12461 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
12462 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12463 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
12464
12465
12466
12467#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
12468 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
12469 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
12470 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
12471
12472#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
12473 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
12474 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
12475 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
12476
12477#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
12478 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
12479 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
12480 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
12481
12482#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
12483 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
12484 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
12485 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
12486
12487
12488#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
12489 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
12490 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12491 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
12492
12493#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
12494 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
12495 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12496 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
12497
12498#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
12499 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
12500 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12501 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
12502
12503#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
12504 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
12505 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12506 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
12507
12508#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
12509 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
12510 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12511 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
12512
12513
12514#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
12515 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
12516 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12517 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
12518
12519#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
12520 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
12521 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12522 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
12523
12524#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
12525 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
12526 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12527 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
12528
12529#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
12530 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
12531 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12532 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
12533
12534#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
12535 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
12536 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12537 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
12538
12539
12540#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
12541 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
12542 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12543 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
12544
12545#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
12546 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
12547 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12548 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
12549
12550#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
12551 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
12552 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12553 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
12554
12555#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
12556 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
12557 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12558 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
12559
12560#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
12561 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
12562 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12563 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
12564
12565
12566#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
12567 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
12568 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12569 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
12570
12571#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
12572 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
12573 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
12574 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
12575
12576
12577#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
12578 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
12579 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12580 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
12581
12582#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
12583 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
12584 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12585 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
12586
12587#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
12588 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
12589 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12590 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
12591
12592#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
12593 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
12594 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12595 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
12596
12597
12598DECL_INLINE_THROW(uint32_t)
12599iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
12600 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
12601 uintptr_t pfnFunction, uint8_t idxInstr)
12602{
12603 /*
12604 * Assert sanity.
12605 */
12606 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
12607 AssertStmt( pReNative->Core.aVars[idxVarMem].enmKind == kIemNativeVarKind_Invalid
12608 && pReNative->Core.aVars[idxVarMem].cbVar == sizeof(void *),
12609 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
12610
12611 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
12612 AssertStmt( pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Invalid
12613 && pReNative->Core.aVars[idxVarUnmapInfo].cbVar == sizeof(uint8_t),
12614 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
12615
12616 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
12617 AssertStmt( pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
12618 || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
12619 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
12620
12621 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
12622
12623 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
12624
12625#ifdef VBOX_STRICT
12626# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
12627 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
12628 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
12629 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
12630 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
12631# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
12632 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
12633 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
12634 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
12635
12636 if (iSegReg == UINT8_MAX)
12637 {
12638 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
12639 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
12640 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
12641 switch (cbMem)
12642 {
12643 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
12644 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
12645 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
12646 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
12647 case 10:
12648 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
12649 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
12650 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
12651 break;
12652 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
12653# if 0
12654 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
12655 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
12656# endif
12657 default: AssertFailed(); break;
12658 }
12659 }
12660 else
12661 {
12662 Assert(iSegReg < 6);
12663 switch (cbMem)
12664 {
12665 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
12666 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
12667 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
12668 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
12669 case 10:
12670 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
12671 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
12672 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
12673 break;
12674 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
12675# if 0
12676 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
12677 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
12678# endif
12679 default: AssertFailed(); break;
12680 }
12681 }
12682# undef IEM_MAP_HLP_FN
12683# undef IEM_MAP_HLP_FN_NO_AT
12684#endif
12685
12686#ifdef VBOX_STRICT
12687 /*
12688 * Check that the fExec flags we've got make sense.
12689 */
12690 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
12691#endif
12692
12693 /*
12694 * To keep things simple we have to commit any pending writes first as we
12695 * may end up making calls.
12696 */
12697 off = iemNativeRegFlushPendingWrites(pReNative, off);
12698
12699#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12700 /*
12701 * Move/spill/flush stuff out of call-volatile registers.
12702 * This is the easy way out. We could contain this to the tlb-miss branch
12703 * by saving and restoring active stuff here.
12704 */
12705 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
12706 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
12707#endif
12708
12709 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
12710 while the tlb-miss codepath will temporarily put it on the stack.
12711 Set the the type to stack here so we don't need to do it twice below. */
12712 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
12713 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
12714 /** @todo use a tmp register from TlbState, since they'll be free after tlb
12715 * lookup is done. */
12716
12717 /*
12718 * Define labels and allocate the result register (trying for the return
12719 * register if we can).
12720 */
12721 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
12722 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
12723 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
12724 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
12725 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
12726 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
12727 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
12728 : UINT32_MAX;
12729//off=iemNativeEmitBrk(pReNative, off, 0);
12730 /*
12731 * Jump to the TLB lookup code.
12732 */
12733 if (!TlbState.fSkip)
12734 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
12735
12736 /*
12737 * TlbMiss:
12738 *
12739 * Call helper to do the fetching.
12740 * We flush all guest register shadow copies here.
12741 */
12742 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
12743
12744#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
12745 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
12746#else
12747 RT_NOREF(idxInstr);
12748#endif
12749
12750#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12751 /* Save variables in volatile registers. */
12752 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
12753 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
12754#endif
12755
12756 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
12757 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
12758#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12759 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
12760#else
12761 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
12762#endif
12763
12764 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
12765 if (iSegReg != UINT8_MAX)
12766 {
12767 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
12768 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
12769 }
12770
12771 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
12772 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
12773 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
12774
12775 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
12776 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
12777
12778 /* Done setting up parameters, make the call. */
12779 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
12780
12781 /*
12782 * Put the output in the right registers.
12783 */
12784 Assert(idxRegMemResult == pReNative->Core.aVars[idxVarMem].idxReg);
12785 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
12786 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
12787
12788#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12789 /* Restore variables and guest shadow registers to volatile registers. */
12790 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
12791 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
12792#endif
12793
12794 Assert(pReNative->Core.aVars[idxVarUnmapInfo].idxReg == idxRegUnmapInfo);
12795 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
12796
12797#ifdef IEMNATIVE_WITH_TLB_LOOKUP
12798 if (!TlbState.fSkip)
12799 {
12800 /* end of tlbsmiss - Jump to the done label. */
12801 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
12802 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
12803
12804 /*
12805 * TlbLookup:
12806 */
12807 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
12808 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
12809# ifdef VBOX_WITH_STATISTICS
12810 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
12811 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
12812# endif
12813
12814 /* [idxVarUnmapInfo] = 0; */
12815 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
12816
12817 /*
12818 * TlbDone:
12819 */
12820 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
12821
12822 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
12823
12824# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12825 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
12826 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
12827# endif
12828 }
12829#else
12830 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
12831#endif
12832
12833 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
12834 iemNativeVarRegisterRelease(pReNative, idxVarMem);
12835
12836 return off;
12837}
12838
12839
12840#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
12841 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
12842 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
12843
12844#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
12845 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
12846 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
12847
12848#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
12849 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
12850 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
12851
12852#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
12853 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
12854 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
12855
12856DECL_INLINE_THROW(uint32_t)
12857iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
12858 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
12859{
12860 /*
12861 * Assert sanity.
12862 */
12863 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
12864 Assert(pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Stack);
12865 Assert( pReNative->Core.aVars[idxVarUnmapInfo].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
12866 || pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
12867#ifdef VBOX_STRICT
12868 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
12869 {
12870 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
12871 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
12872 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
12873 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
12874 case IEM_ACCESS_TYPE_WRITE:
12875 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
12876 case IEM_ACCESS_TYPE_READ:
12877 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
12878 default: AssertFailed();
12879 }
12880#else
12881 RT_NOREF(fAccess);
12882#endif
12883
12884 /*
12885 * To keep things simple we have to commit any pending writes first as we
12886 * may end up making calls (there shouldn't be any at this point, so this
12887 * is just for consistency).
12888 */
12889 /** @todo we could postpone this till we make the call and reload the
12890 * registers after returning from the call. Not sure if that's sensible or
12891 * not, though. */
12892 off = iemNativeRegFlushPendingWrites(pReNative, off);
12893
12894 /*
12895 * Move/spill/flush stuff out of call-volatile registers.
12896 *
12897 * We exclude any register holding the bUnmapInfo variable, as we'll be
12898 * checking it after returning from the call and will free it afterwards.
12899 */
12900 /** @todo save+restore active registers and maybe guest shadows in miss
12901 * scenario. */
12902 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarUnmapInfo));
12903
12904 /*
12905 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
12906 * to call the unmap helper function.
12907 *
12908 * The likelyhood of it being zero is higher than for the TLB hit when doing
12909 * the mapping, as a TLB miss for an well aligned and unproblematic memory
12910 * access should also end up with a mapping that won't need special unmapping.
12911 */
12912 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
12913 * should speed up things for the pure interpreter as well when TLBs
12914 * are enabled. */
12915#ifdef RT_ARCH_AMD64
12916 if (pReNative->Core.aVars[idxVarUnmapInfo].idxReg == UINT8_MAX)
12917 {
12918 /* test byte [rbp - xxx], 0ffh */
12919 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
12920 pbCodeBuf[off++] = 0xf6;
12921 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot;
12922 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
12923 pbCodeBuf[off++] = 0xff;
12924 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12925 }
12926 else
12927#endif
12928 {
12929 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
12930 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
12931 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
12932 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
12933 }
12934 uint32_t const offJmpFixup = off;
12935 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
12936
12937 /*
12938 * Call the unmap helper function.
12939 */
12940#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
12941 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
12942#else
12943 RT_NOREF(idxInstr);
12944#endif
12945
12946 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
12947 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
12948 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
12949
12950 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
12951 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
12952
12953 /* Done setting up parameters, make the call. */
12954 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
12955
12956 /* The bUnmapInfo variable is implictly free by these MCs. */
12957 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
12958
12959 /*
12960 * Done, just fixup the jump for the non-call case.
12961 */
12962 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
12963
12964 return off;
12965}
12966
12967
12968
12969/*********************************************************************************************************************************
12970* State and Exceptions *
12971*********************************************************************************************************************************/
12972
12973#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
12974#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
12975
12976#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
12977#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
12978#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
12979
12980#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
12981#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
12982#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
12983
12984
12985DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
12986{
12987 /** @todo this needs a lot more work later. */
12988 RT_NOREF(pReNative, fForChange);
12989 return off;
12990}
12991
12992
12993/*********************************************************************************************************************************
12994* The native code generator functions for each MC block. *
12995*********************************************************************************************************************************/
12996
12997
12998/*
12999 * Include g_apfnIemNativeRecompileFunctions and associated functions.
13000 *
13001 * This should probably live in it's own file later, but lets see what the
13002 * compile times turn out to be first.
13003 */
13004#include "IEMNativeFunctions.cpp.h"
13005
13006
13007
13008/*********************************************************************************************************************************
13009* Recompiler Core. *
13010*********************************************************************************************************************************/
13011
13012
13013/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
13014static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
13015{
13016 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
13017 pDis->cbCachedInstr += cbMaxRead;
13018 RT_NOREF(cbMinRead);
13019 return VERR_NO_DATA;
13020}
13021
13022
13023/**
13024 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
13025 * @returns pszBuf.
13026 * @param fFlags The flags.
13027 * @param pszBuf The output buffer.
13028 * @param cbBuf The output buffer size. At least 32 bytes.
13029 */
13030DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
13031{
13032 Assert(cbBuf >= 32);
13033 static RTSTRTUPLE const s_aModes[] =
13034 {
13035 /* [00] = */ { RT_STR_TUPLE("16BIT") },
13036 /* [01] = */ { RT_STR_TUPLE("32BIT") },
13037 /* [02] = */ { RT_STR_TUPLE("!2!") },
13038 /* [03] = */ { RT_STR_TUPLE("!3!") },
13039 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
13040 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
13041 /* [06] = */ { RT_STR_TUPLE("!6!") },
13042 /* [07] = */ { RT_STR_TUPLE("!7!") },
13043 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
13044 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
13045 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
13046 /* [0b] = */ { RT_STR_TUPLE("!b!") },
13047 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
13048 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
13049 /* [0e] = */ { RT_STR_TUPLE("!e!") },
13050 /* [0f] = */ { RT_STR_TUPLE("!f!") },
13051 /* [10] = */ { RT_STR_TUPLE("!10!") },
13052 /* [11] = */ { RT_STR_TUPLE("!11!") },
13053 /* [12] = */ { RT_STR_TUPLE("!12!") },
13054 /* [13] = */ { RT_STR_TUPLE("!13!") },
13055 /* [14] = */ { RT_STR_TUPLE("!14!") },
13056 /* [15] = */ { RT_STR_TUPLE("!15!") },
13057 /* [16] = */ { RT_STR_TUPLE("!16!") },
13058 /* [17] = */ { RT_STR_TUPLE("!17!") },
13059 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
13060 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
13061 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
13062 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
13063 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
13064 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
13065 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
13066 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
13067 };
13068 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
13069 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
13070 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
13071
13072 pszBuf[off++] = ' ';
13073 pszBuf[off++] = 'C';
13074 pszBuf[off++] = 'P';
13075 pszBuf[off++] = 'L';
13076 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
13077 Assert(off < 32);
13078
13079 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
13080
13081 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
13082 {
13083 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
13084 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
13085 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
13086 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
13087 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
13088 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
13089 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
13090 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
13091 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
13092 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
13093 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
13094 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
13095 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
13096 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
13097 };
13098 if (fFlags)
13099 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
13100 if (s_aFlags[i].fFlag & fFlags)
13101 {
13102 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
13103 pszBuf[off++] = ' ';
13104 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
13105 off += s_aFlags[i].cchName;
13106 fFlags &= ~s_aFlags[i].fFlag;
13107 if (!fFlags)
13108 break;
13109 }
13110 pszBuf[off] = '\0';
13111
13112 return pszBuf;
13113}
13114
13115
13116DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
13117{
13118 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
13119#if defined(RT_ARCH_AMD64)
13120 static const char * const a_apszMarkers[] =
13121 {
13122 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
13123 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
13124 };
13125#endif
13126
13127 char szDisBuf[512];
13128 DISSTATE Dis;
13129 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
13130 uint32_t const cNative = pTb->Native.cInstructions;
13131 uint32_t offNative = 0;
13132#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
13133 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
13134#endif
13135 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
13136 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
13137 : DISCPUMODE_64BIT;
13138#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
13139 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
13140#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
13141 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
13142#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
13143# error "Port me"
13144#else
13145 csh hDisasm = ~(size_t)0;
13146# if defined(RT_ARCH_AMD64)
13147 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
13148# elif defined(RT_ARCH_ARM64)
13149 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
13150# else
13151# error "Port me"
13152# endif
13153 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
13154#endif
13155
13156 /*
13157 * Print TB info.
13158 */
13159 pHlp->pfnPrintf(pHlp,
13160 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
13161 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
13162 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
13163 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
13164#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
13165 if (pDbgInfo && pDbgInfo->cEntries > 1)
13166 {
13167 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
13168
13169 /*
13170 * This disassembly is driven by the debug info which follows the native
13171 * code and indicates when it starts with the next guest instructions,
13172 * where labels are and such things.
13173 */
13174 uint32_t idxThreadedCall = 0;
13175 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
13176 uint8_t idxRange = UINT8_MAX;
13177 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
13178 uint32_t offRange = 0;
13179 uint32_t offOpcodes = 0;
13180 uint32_t const cbOpcodes = pTb->cbOpcodes;
13181 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
13182 uint32_t const cDbgEntries = pDbgInfo->cEntries;
13183 uint32_t iDbgEntry = 1;
13184 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
13185
13186 while (offNative < cNative)
13187 {
13188 /* If we're at or have passed the point where the next chunk of debug
13189 info starts, process it. */
13190 if (offDbgNativeNext <= offNative)
13191 {
13192 offDbgNativeNext = UINT32_MAX;
13193 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
13194 {
13195 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
13196 {
13197 case kIemTbDbgEntryType_GuestInstruction:
13198 {
13199 /* Did the exec flag change? */
13200 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
13201 {
13202 pHlp->pfnPrintf(pHlp,
13203 " fExec change %#08x -> %#08x %s\n",
13204 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
13205 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
13206 szDisBuf, sizeof(szDisBuf)));
13207 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
13208 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
13209 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
13210 : DISCPUMODE_64BIT;
13211 }
13212
13213 /* New opcode range? We need to fend up a spurious debug info entry here for cases
13214 where the compilation was aborted before the opcode was recorded and the actual
13215 instruction was translated to a threaded call. This may happen when we run out
13216 of ranges, or when some complicated interrupts/FFs are found to be pending or
13217 similar. So, we just deal with it here rather than in the compiler code as it
13218 is a lot simpler to do here. */
13219 if ( idxRange == UINT8_MAX
13220 || idxRange >= cRanges
13221 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
13222 {
13223 idxRange += 1;
13224 if (idxRange < cRanges)
13225 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
13226 else
13227 continue;
13228 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
13229 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
13230 + (pTb->aRanges[idxRange].idxPhysPage == 0
13231 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
13232 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
13233 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
13234 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
13235 pTb->aRanges[idxRange].idxPhysPage);
13236 GCPhysPc += offRange;
13237 }
13238
13239 /* Disassemble the instruction. */
13240 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
13241 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
13242 uint32_t cbInstr = 1;
13243 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
13244 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
13245 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
13246 if (RT_SUCCESS(rc))
13247 {
13248 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
13249 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
13250 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13251 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13252
13253 static unsigned const s_offMarker = 55;
13254 static char const s_szMarker[] = " ; <--- guest";
13255 if (cch < s_offMarker)
13256 {
13257 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
13258 cch = s_offMarker;
13259 }
13260 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
13261 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
13262
13263 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
13264 }
13265 else
13266 {
13267 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
13268 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
13269 cbInstr = 1;
13270 }
13271 GCPhysPc += cbInstr;
13272 offOpcodes += cbInstr;
13273 offRange += cbInstr;
13274 continue;
13275 }
13276
13277 case kIemTbDbgEntryType_ThreadedCall:
13278 pHlp->pfnPrintf(pHlp,
13279 " Call #%u to %s (%u args) - %s\n",
13280 idxThreadedCall,
13281 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
13282 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
13283 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
13284 idxThreadedCall++;
13285 continue;
13286
13287 case kIemTbDbgEntryType_GuestRegShadowing:
13288 {
13289 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
13290 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
13291 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
13292 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
13293 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
13294 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
13295 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
13296 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
13297 else
13298 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
13299 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
13300 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
13301 continue;
13302 }
13303
13304 case kIemTbDbgEntryType_Label:
13305 {
13306 const char *pszName = "what_the_fudge";
13307 const char *pszComment = "";
13308 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
13309 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
13310 {
13311 case kIemNativeLabelType_Return:
13312 pszName = "Return";
13313 break;
13314 case kIemNativeLabelType_ReturnBreak:
13315 pszName = "ReturnBreak";
13316 break;
13317 case kIemNativeLabelType_ReturnWithFlags:
13318 pszName = "ReturnWithFlags";
13319 break;
13320 case kIemNativeLabelType_NonZeroRetOrPassUp:
13321 pszName = "NonZeroRetOrPassUp";
13322 break;
13323 case kIemNativeLabelType_RaiseGp0:
13324 pszName = "RaiseGp0";
13325 break;
13326 case kIemNativeLabelType_ObsoleteTb:
13327 pszName = "ObsoleteTb";
13328 break;
13329 case kIemNativeLabelType_NeedCsLimChecking:
13330 pszName = "NeedCsLimChecking";
13331 break;
13332 case kIemNativeLabelType_CheckBranchMiss:
13333 pszName = "CheckBranchMiss";
13334 break;
13335 case kIemNativeLabelType_If:
13336 pszName = "If";
13337 fNumbered = true;
13338 break;
13339 case kIemNativeLabelType_Else:
13340 pszName = "Else";
13341 fNumbered = true;
13342 pszComment = " ; regs state restored pre-if-block";
13343 break;
13344 case kIemNativeLabelType_Endif:
13345 pszName = "Endif";
13346 fNumbered = true;
13347 break;
13348 case kIemNativeLabelType_CheckIrq:
13349 pszName = "CheckIrq_CheckVM";
13350 fNumbered = true;
13351 break;
13352 case kIemNativeLabelType_TlbLookup:
13353 pszName = "TlbLookup";
13354 fNumbered = true;
13355 break;
13356 case kIemNativeLabelType_TlbMiss:
13357 pszName = "TlbMiss";
13358 fNumbered = true;
13359 break;
13360 case kIemNativeLabelType_TlbDone:
13361 pszName = "TlbDone";
13362 fNumbered = true;
13363 break;
13364 case kIemNativeLabelType_Invalid:
13365 case kIemNativeLabelType_End:
13366 break;
13367 }
13368 if (fNumbered)
13369 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
13370 else
13371 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
13372 continue;
13373 }
13374
13375 case kIemTbDbgEntryType_NativeOffset:
13376 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
13377 Assert(offDbgNativeNext > offNative);
13378 break;
13379
13380 default:
13381 AssertFailed();
13382 }
13383 iDbgEntry++;
13384 break;
13385 }
13386 }
13387
13388 /*
13389 * Disassemble the next native instruction.
13390 */
13391 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
13392# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
13393 uint32_t cbInstr = sizeof(paNative[0]);
13394 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
13395 if (RT_SUCCESS(rc))
13396 {
13397# if defined(RT_ARCH_AMD64)
13398 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
13399 {
13400 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
13401 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
13402 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
13403 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
13404 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
13405 uInfo & 0x8000 ? "recompiled" : "todo");
13406 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
13407 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
13408 else
13409 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
13410 }
13411 else
13412# endif
13413 {
13414# ifdef RT_ARCH_AMD64
13415 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
13416 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
13417 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13418 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13419# elif defined(RT_ARCH_ARM64)
13420 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
13421 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13422 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13423# else
13424# error "Port me"
13425# endif
13426 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
13427 }
13428 }
13429 else
13430 {
13431# if defined(RT_ARCH_AMD64)
13432 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
13433 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
13434# elif defined(RT_ARCH_ARM64)
13435 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
13436# else
13437# error "Port me"
13438# endif
13439 cbInstr = sizeof(paNative[0]);
13440 }
13441 offNative += cbInstr / sizeof(paNative[0]);
13442
13443# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
13444 cs_insn *pInstr;
13445 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
13446 (uintptr_t)pNativeCur, 1, &pInstr);
13447 if (cInstrs > 0)
13448 {
13449 Assert(cInstrs == 1);
13450# if defined(RT_ARCH_AMD64)
13451 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
13452 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
13453# else
13454 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
13455 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
13456# endif
13457 offNative += pInstr->size / sizeof(*pNativeCur);
13458 cs_free(pInstr, cInstrs);
13459 }
13460 else
13461 {
13462# if defined(RT_ARCH_AMD64)
13463 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
13464 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
13465# else
13466 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
13467# endif
13468 offNative++;
13469 }
13470# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
13471 }
13472 }
13473 else
13474#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
13475 {
13476 /*
13477 * No debug info, just disassemble the x86 code and then the native code.
13478 *
13479 * First the guest code:
13480 */
13481 for (unsigned i = 0; i < pTb->cRanges; i++)
13482 {
13483 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
13484 + (pTb->aRanges[i].idxPhysPage == 0
13485 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
13486 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
13487 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
13488 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
13489 unsigned off = pTb->aRanges[i].offOpcodes;
13490 /** @todo this ain't working when crossing pages! */
13491 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
13492 while (off < cbOpcodes)
13493 {
13494 uint32_t cbInstr = 1;
13495 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
13496 &pTb->pabOpcodes[off], cbOpcodes - off,
13497 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
13498 if (RT_SUCCESS(rc))
13499 {
13500 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
13501 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
13502 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13503 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13504 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
13505 GCPhysPc += cbInstr;
13506 off += cbInstr;
13507 }
13508 else
13509 {
13510 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
13511 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
13512 break;
13513 }
13514 }
13515 }
13516
13517 /*
13518 * Then the native code:
13519 */
13520 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
13521 while (offNative < cNative)
13522 {
13523 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
13524# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
13525 uint32_t cbInstr = sizeof(paNative[0]);
13526 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
13527 if (RT_SUCCESS(rc))
13528 {
13529# if defined(RT_ARCH_AMD64)
13530 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
13531 {
13532 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
13533 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
13534 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
13535 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
13536 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
13537 uInfo & 0x8000 ? "recompiled" : "todo");
13538 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
13539 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
13540 else
13541 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
13542 }
13543 else
13544# endif
13545 {
13546# ifdef RT_ARCH_AMD64
13547 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
13548 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
13549 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13550 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13551# elif defined(RT_ARCH_ARM64)
13552 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
13553 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13554 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13555# else
13556# error "Port me"
13557# endif
13558 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
13559 }
13560 }
13561 else
13562 {
13563# if defined(RT_ARCH_AMD64)
13564 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
13565 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
13566# else
13567 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
13568# endif
13569 cbInstr = sizeof(paNative[0]);
13570 }
13571 offNative += cbInstr / sizeof(paNative[0]);
13572
13573# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
13574 cs_insn *pInstr;
13575 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
13576 (uintptr_t)pNativeCur, 1, &pInstr);
13577 if (cInstrs > 0)
13578 {
13579 Assert(cInstrs == 1);
13580# if defined(RT_ARCH_AMD64)
13581 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
13582 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
13583# else
13584 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
13585 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
13586# endif
13587 offNative += pInstr->size / sizeof(*pNativeCur);
13588 cs_free(pInstr, cInstrs);
13589 }
13590 else
13591 {
13592# if defined(RT_ARCH_AMD64)
13593 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
13594 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
13595# else
13596 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
13597# endif
13598 offNative++;
13599 }
13600# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
13601 }
13602 }
13603
13604#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
13605 /* Cleanup. */
13606 cs_close(&hDisasm);
13607#endif
13608}
13609
13610
13611/**
13612 * Recompiles the given threaded TB into a native one.
13613 *
13614 * In case of failure the translation block will be returned as-is.
13615 *
13616 * @returns pTb.
13617 * @param pVCpu The cross context virtual CPU structure of the calling
13618 * thread.
13619 * @param pTb The threaded translation to recompile to native.
13620 */
13621DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
13622{
13623 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
13624
13625 /*
13626 * The first time thru, we allocate the recompiler state, the other times
13627 * we just need to reset it before using it again.
13628 */
13629 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
13630 if (RT_LIKELY(pReNative))
13631 iemNativeReInit(pReNative, pTb);
13632 else
13633 {
13634 pReNative = iemNativeInit(pVCpu, pTb);
13635 AssertReturn(pReNative, pTb);
13636 }
13637
13638#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
13639 /*
13640 * First do liveness analysis. This is done backwards.
13641 */
13642 {
13643 uint32_t idxCall = pTb->Thrd.cCalls;
13644 if (idxCall <= pReNative->cLivenessEntriesAlloc)
13645 { /* likely */ }
13646 else
13647 {
13648 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
13649 while (idxCall > cAlloc)
13650 cAlloc *= 2;
13651 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
13652 AssertReturn(pvNew, pTb);
13653 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
13654 pReNative->cLivenessEntriesAlloc = cAlloc;
13655 }
13656 AssertReturn(idxCall > 0, pTb);
13657 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
13658
13659 /* The initial (final) entry. */
13660 idxCall--;
13661 paLivenessEntries[idxCall].Bit0.bm64 = IEMLIVENESSBIT0_ALL_UNUSED;
13662 paLivenessEntries[idxCall].Bit1.bm64 = IEMLIVENESSBIT1_ALL_UNUSED;
13663
13664 /* Loop backwards thru the calls and fill in the other entries. */
13665 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
13666 while (idxCall > 0)
13667 {
13668 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
13669 if (pfnLiveness)
13670 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
13671 else
13672 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
13673 pCallEntry--;
13674 idxCall--;
13675 }
13676
13677# ifdef VBOX_WITH_STATISTICS
13678 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
13679 to 'clobbered' rather that 'input'. */
13680 /** @todo */
13681# endif
13682 }
13683#endif
13684
13685 /*
13686 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
13687 * for aborting if an error happens.
13688 */
13689 uint32_t cCallsLeft = pTb->Thrd.cCalls;
13690#ifdef LOG_ENABLED
13691 uint32_t const cCallsOrg = cCallsLeft;
13692#endif
13693 uint32_t off = 0;
13694 int rc = VINF_SUCCESS;
13695 IEMNATIVE_TRY_SETJMP(pReNative, rc)
13696 {
13697 /*
13698 * Emit prolog code (fixed).
13699 */
13700 off = iemNativeEmitProlog(pReNative, off);
13701
13702 /*
13703 * Convert the calls to native code.
13704 */
13705#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
13706 int32_t iGstInstr = -1;
13707#endif
13708#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
13709 uint32_t cThreadedCalls = 0;
13710 uint32_t cRecompiledCalls = 0;
13711#endif
13712#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
13713 uint32_t idxCurCall = 0;
13714#endif
13715 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
13716 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
13717 while (cCallsLeft-- > 0)
13718 {
13719 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
13720#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
13721 pReNative->idxCurCall = idxCurCall;
13722#endif
13723
13724 /*
13725 * Debug info and assembly markup.
13726 */
13727#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
13728 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
13729 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
13730#endif
13731#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
13732 iemNativeDbgInfoAddNativeOffset(pReNative, off);
13733 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
13734 {
13735 if (iGstInstr < (int32_t)pTb->cInstructions)
13736 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
13737 else
13738 Assert(iGstInstr == pTb->cInstructions);
13739 iGstInstr = pCallEntry->idxInstr;
13740 }
13741 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
13742#endif
13743#if defined(VBOX_STRICT)
13744 off = iemNativeEmitMarker(pReNative, off,
13745 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
13746#endif
13747#if defined(VBOX_STRICT)
13748 iemNativeRegAssertSanity(pReNative);
13749#endif
13750
13751 /*
13752 * Actual work.
13753 */
13754 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
13755 pfnRecom ? "(recompiled)" : "(todo)"));
13756 if (pfnRecom) /** @todo stats on this. */
13757 {
13758 off = pfnRecom(pReNative, off, pCallEntry);
13759 STAM_REL_STATS({cRecompiledCalls++;});
13760 }
13761 else
13762 {
13763 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
13764 STAM_REL_STATS({cThreadedCalls++;});
13765 }
13766 Assert(off <= pReNative->cInstrBufAlloc);
13767 Assert(pReNative->cCondDepth == 0);
13768
13769#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
13770 if (LogIs2Enabled())
13771 {
13772 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
13773 static const char s_achState[] = "CUXI";
13774
13775 char szGpr[17];
13776 for (unsigned i = 0; i < 16; i++)
13777 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
13778 szGpr[16] = '\0';
13779
13780 char szSegBase[X86_SREG_COUNT + 1];
13781 char szSegLimit[X86_SREG_COUNT + 1];
13782 char szSegAttrib[X86_SREG_COUNT + 1];
13783 char szSegSel[X86_SREG_COUNT + 1];
13784 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
13785 {
13786 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
13787 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
13788 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
13789 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
13790 }
13791 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
13792 = szSegSel[X86_SREG_COUNT] = '\0';
13793
13794 char szEFlags[8];
13795 for (unsigned i = 0; i < 7; i++)
13796 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
13797 szEFlags[7] = '\0';
13798
13799 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
13800 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
13801 }
13802#endif
13803
13804 /*
13805 * Advance.
13806 */
13807 pCallEntry++;
13808#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
13809 idxCurCall++;
13810#endif
13811 }
13812
13813 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
13814 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
13815 if (!cThreadedCalls)
13816 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
13817
13818 /*
13819 * Emit the epilog code.
13820 */
13821 uint32_t idxReturnLabel;
13822 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
13823
13824 /*
13825 * Generate special jump labels.
13826 */
13827 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
13828 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
13829 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
13830 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
13831 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
13832 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
13833 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
13834 off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
13835 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
13836 off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
13837 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_CheckBranchMiss))
13838 off = iemNativeEmitCheckBranchMiss(pReNative, off, idxReturnLabel);
13839 }
13840 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
13841 {
13842 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
13843 return pTb;
13844 }
13845 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
13846 Assert(off <= pReNative->cInstrBufAlloc);
13847
13848 /*
13849 * Make sure all labels has been defined.
13850 */
13851 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
13852#ifdef VBOX_STRICT
13853 uint32_t const cLabels = pReNative->cLabels;
13854 for (uint32_t i = 0; i < cLabels; i++)
13855 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
13856#endif
13857
13858 /*
13859 * Allocate executable memory, copy over the code we've generated.
13860 */
13861 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
13862 if (pTbAllocator->pDelayedFreeHead)
13863 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
13864
13865 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
13866 AssertReturn(paFinalInstrBuf, pTb);
13867 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
13868
13869 /*
13870 * Apply fixups.
13871 */
13872 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
13873 uint32_t const cFixups = pReNative->cFixups;
13874 for (uint32_t i = 0; i < cFixups; i++)
13875 {
13876 Assert(paFixups[i].off < off);
13877 Assert(paFixups[i].idxLabel < cLabels);
13878 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
13879 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
13880 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
13881 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
13882 switch (paFixups[i].enmType)
13883 {
13884#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
13885 case kIemNativeFixupType_Rel32:
13886 Assert(paFixups[i].off + 4 <= off);
13887 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
13888 continue;
13889
13890#elif defined(RT_ARCH_ARM64)
13891 case kIemNativeFixupType_RelImm26At0:
13892 {
13893 Assert(paFixups[i].off < off);
13894 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
13895 Assert(offDisp >= -262144 && offDisp < 262144);
13896 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
13897 continue;
13898 }
13899
13900 case kIemNativeFixupType_RelImm19At5:
13901 {
13902 Assert(paFixups[i].off < off);
13903 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
13904 Assert(offDisp >= -262144 && offDisp < 262144);
13905 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
13906 continue;
13907 }
13908
13909 case kIemNativeFixupType_RelImm14At5:
13910 {
13911 Assert(paFixups[i].off < off);
13912 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
13913 Assert(offDisp >= -8192 && offDisp < 8192);
13914 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
13915 continue;
13916 }
13917
13918#endif
13919 case kIemNativeFixupType_Invalid:
13920 case kIemNativeFixupType_End:
13921 break;
13922 }
13923 AssertFailed();
13924 }
13925
13926 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
13927 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
13928
13929 /*
13930 * Convert the translation block.
13931 */
13932 RTMemFree(pTb->Thrd.paCalls);
13933 pTb->Native.paInstructions = paFinalInstrBuf;
13934 pTb->Native.cInstructions = off;
13935 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
13936#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
13937 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
13938 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
13939#endif
13940
13941 Assert(pTbAllocator->cThreadedTbs > 0);
13942 pTbAllocator->cThreadedTbs -= 1;
13943 pTbAllocator->cNativeTbs += 1;
13944 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
13945
13946#ifdef LOG_ENABLED
13947 /*
13948 * Disassemble to the log if enabled.
13949 */
13950 if (LogIs3Enabled())
13951 {
13952 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
13953 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
13954# ifdef DEBUG_bird
13955 RTLogFlush(NULL);
13956# endif
13957 }
13958#endif
13959 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
13960
13961 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
13962 return pTb;
13963}
13964
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette