VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103376

Last change on this file since 103376 was 103376, checked in by vboxsync, 13 months ago

VMM/IEM: Experimental alternative to throw/longjmp when executing native TBs. bugref:10370

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 624.0 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103376 2024-02-15 01:09:23Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef VBOX_STRICT
133static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
134 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
135static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
136#endif
137#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
138static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
139static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
140#endif
141DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
142DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
143 IEMNATIVEGSTREG enmGstReg, uint32_t off);
144DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
145
146
147/*********************************************************************************************************************************
148* Executable Memory Allocator *
149*********************************************************************************************************************************/
150/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
151 * Use an alternative chunk sub-allocator that does store internal data
152 * in the chunk.
153 *
154 * Using the RTHeapSimple is not practial on newer darwin systems where
155 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
156 * memory. We would have to change the protection of the whole chunk for
157 * every call to RTHeapSimple, which would be rather expensive.
158 *
159 * This alternative implemenation let restrict page protection modifications
160 * to the pages backing the executable memory we just allocated.
161 */
162#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
163/** The chunk sub-allocation unit size in bytes. */
164#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
165/** The chunk sub-allocation unit size as a shift factor. */
166#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
167
168#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
169# ifdef IEMNATIVE_USE_GDB_JIT
170# define IEMNATIVE_USE_GDB_JIT_ET_DYN
171
172/** GDB JIT: Code entry. */
173typedef struct GDBJITCODEENTRY
174{
175 struct GDBJITCODEENTRY *pNext;
176 struct GDBJITCODEENTRY *pPrev;
177 uint8_t *pbSymFile;
178 uint64_t cbSymFile;
179} GDBJITCODEENTRY;
180
181/** GDB JIT: Actions. */
182typedef enum GDBJITACTIONS : uint32_t
183{
184 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
185} GDBJITACTIONS;
186
187/** GDB JIT: Descriptor. */
188typedef struct GDBJITDESCRIPTOR
189{
190 uint32_t uVersion;
191 GDBJITACTIONS enmAction;
192 GDBJITCODEENTRY *pRelevant;
193 GDBJITCODEENTRY *pHead;
194 /** Our addition: */
195 GDBJITCODEENTRY *pTail;
196} GDBJITDESCRIPTOR;
197
198/** GDB JIT: Our simple symbol file data. */
199typedef struct GDBJITSYMFILE
200{
201 Elf64_Ehdr EHdr;
202# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
203 Elf64_Shdr aShdrs[5];
204# else
205 Elf64_Shdr aShdrs[7];
206 Elf64_Phdr aPhdrs[2];
207# endif
208 /** The dwarf ehframe data for the chunk. */
209 uint8_t abEhFrame[512];
210 char szzStrTab[128];
211 Elf64_Sym aSymbols[3];
212# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
213 Elf64_Sym aDynSyms[2];
214 Elf64_Dyn aDyn[6];
215# endif
216} GDBJITSYMFILE;
217
218extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
219extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
220
221/** Init once for g_IemNativeGdbJitLock. */
222static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
223/** Init once for the critical section. */
224static RTCRITSECT g_IemNativeGdbJitLock;
225
226/** GDB reads the info here. */
227GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
228
229/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
230DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
231{
232 ASMNopPause();
233}
234
235/** @callback_method_impl{FNRTONCE} */
236static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
237{
238 RT_NOREF(pvUser);
239 return RTCritSectInit(&g_IemNativeGdbJitLock);
240}
241
242
243# endif /* IEMNATIVE_USE_GDB_JIT */
244
245/**
246 * Per-chunk unwind info for non-windows hosts.
247 */
248typedef struct IEMEXECMEMCHUNKEHFRAME
249{
250# ifdef IEMNATIVE_USE_LIBUNWIND
251 /** The offset of the FDA into abEhFrame. */
252 uintptr_t offFda;
253# else
254 /** 'struct object' storage area. */
255 uint8_t abObject[1024];
256# endif
257# ifdef IEMNATIVE_USE_GDB_JIT
258# if 0
259 /** The GDB JIT 'symbol file' data. */
260 GDBJITSYMFILE GdbJitSymFile;
261# endif
262 /** The GDB JIT list entry. */
263 GDBJITCODEENTRY GdbJitEntry;
264# endif
265 /** The dwarf ehframe data for the chunk. */
266 uint8_t abEhFrame[512];
267} IEMEXECMEMCHUNKEHFRAME;
268/** Pointer to per-chunk info info for non-windows hosts. */
269typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
270#endif
271
272
273/**
274 * An chunk of executable memory.
275 */
276typedef struct IEMEXECMEMCHUNK
277{
278#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
279 /** Number of free items in this chunk. */
280 uint32_t cFreeUnits;
281 /** Hint were to start searching for free space in the allocation bitmap. */
282 uint32_t idxFreeHint;
283#else
284 /** The heap handle. */
285 RTHEAPSIMPLE hHeap;
286#endif
287 /** Pointer to the chunk. */
288 void *pvChunk;
289#ifdef IN_RING3
290 /**
291 * Pointer to the unwind information.
292 *
293 * This is used during C++ throw and longjmp (windows and probably most other
294 * platforms). Some debuggers (windbg) makes use of it as well.
295 *
296 * Windows: This is allocated from hHeap on windows because (at least for
297 * AMD64) the UNWIND_INFO structure address in the
298 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
299 *
300 * Others: Allocated from the regular heap to avoid unnecessary executable data
301 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
302 void *pvUnwindInfo;
303#elif defined(IN_RING0)
304 /** Allocation handle. */
305 RTR0MEMOBJ hMemObj;
306#endif
307} IEMEXECMEMCHUNK;
308/** Pointer to a memory chunk. */
309typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
310
311
312/**
313 * Executable memory allocator for the native recompiler.
314 */
315typedef struct IEMEXECMEMALLOCATOR
316{
317 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
318 uint32_t uMagic;
319
320 /** The chunk size. */
321 uint32_t cbChunk;
322 /** The maximum number of chunks. */
323 uint32_t cMaxChunks;
324 /** The current number of chunks. */
325 uint32_t cChunks;
326 /** Hint where to start looking for available memory. */
327 uint32_t idxChunkHint;
328 /** Statistics: Current number of allocations. */
329 uint32_t cAllocations;
330
331 /** The total amount of memory available. */
332 uint64_t cbTotal;
333 /** Total amount of free memory. */
334 uint64_t cbFree;
335 /** Total amount of memory allocated. */
336 uint64_t cbAllocated;
337
338#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
339 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
340 *
341 * Since the chunk size is a power of two and the minimum chunk size is a lot
342 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
343 * require a whole number of uint64_t elements in the allocation bitmap. So,
344 * for sake of simplicity, they are allocated as one continous chunk for
345 * simplicity/laziness. */
346 uint64_t *pbmAlloc;
347 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
348 uint32_t cUnitsPerChunk;
349 /** Number of bitmap elements per chunk (for quickly locating the bitmap
350 * portion corresponding to an chunk). */
351 uint32_t cBitmapElementsPerChunk;
352#else
353 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
354 * @{ */
355 /** The size of the heap internal block header. This is used to adjust the
356 * request memory size to make sure there is exacly enough room for a header at
357 * the end of the blocks we allocate before the next 64 byte alignment line. */
358 uint32_t cbHeapBlockHdr;
359 /** The size of initial heap allocation required make sure the first
360 * allocation is correctly aligned. */
361 uint32_t cbHeapAlignTweak;
362 /** The alignment tweak allocation address. */
363 void *pvAlignTweak;
364 /** @} */
365#endif
366
367#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
368 /** Pointer to the array of unwind info running parallel to aChunks (same
369 * allocation as this structure, located after the bitmaps).
370 * (For Windows, the structures must reside in 32-bit RVA distance to the
371 * actual chunk, so they are allocated off the chunk.) */
372 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
373#endif
374
375 /** The allocation chunks. */
376 RT_FLEXIBLE_ARRAY_EXTENSION
377 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
378} IEMEXECMEMALLOCATOR;
379/** Pointer to an executable memory allocator. */
380typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
381
382/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
383#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
384
385
386static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
387
388
389/**
390 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
391 * the heap statistics.
392 */
393static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
394 uint32_t cbReq, uint32_t idxChunk)
395{
396 pExecMemAllocator->cAllocations += 1;
397 pExecMemAllocator->cbAllocated += cbReq;
398#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
399 pExecMemAllocator->cbFree -= cbReq;
400#else
401 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
402#endif
403 pExecMemAllocator->idxChunkHint = idxChunk;
404
405#ifdef RT_OS_DARWIN
406 /*
407 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
408 * on darwin. So, we mark the pages returned as read+write after alloc and
409 * expect the caller to call iemExecMemAllocatorReadyForUse when done
410 * writing to the allocation.
411 *
412 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
413 * for details.
414 */
415 /** @todo detect if this is necessary... it wasn't required on 10.15 or
416 * whatever older version it was. */
417 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
418 AssertRC(rc);
419#endif
420
421 return pvRet;
422}
423
424
425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
426static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
427 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
428{
429 /*
430 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
431 */
432 Assert(!(cToScan & 63));
433 Assert(!(idxFirst & 63));
434 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
435 pbmAlloc += idxFirst / 64;
436
437 /*
438 * Scan the bitmap for cReqUnits of consequtive clear bits
439 */
440 /** @todo This can probably be done more efficiently for non-x86 systems. */
441 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
442 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
443 {
444 uint32_t idxAddBit = 1;
445 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
446 idxAddBit++;
447 if (idxAddBit >= cReqUnits)
448 {
449 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
450
451 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
452 pChunk->cFreeUnits -= cReqUnits;
453 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
454
455 void * const pvRet = (uint8_t *)pChunk->pvChunk
456 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
457
458 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
459 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
460 }
461
462 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
463 }
464 return NULL;
465}
466#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
467
468
469static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
470{
471#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
472 /*
473 * Figure out how much to allocate.
474 */
475 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
476 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
477 {
478 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
479 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
480 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
481 {
482 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
483 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
484 if (pvRet)
485 return pvRet;
486 }
487 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
488 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
489 cReqUnits, idxChunk);
490 }
491#else
492 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
493 if (pvRet)
494 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
495#endif
496 return NULL;
497
498}
499
500
501/**
502 * Allocates @a cbReq bytes of executable memory.
503 *
504 * @returns Pointer to the memory, NULL if out of memory or other problem
505 * encountered.
506 * @param pVCpu The cross context virtual CPU structure of the calling
507 * thread.
508 * @param cbReq How many bytes are required.
509 */
510static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
511{
512 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
513 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
514 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
515
516
517 for (unsigned iIteration = 0;; iIteration++)
518 {
519 /*
520 * Adjust the request size so it'll fit the allocator alignment/whatnot.
521 *
522 * For the RTHeapSimple allocator this means to follow the logic described
523 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
524 * existing chunks if we think we've got sufficient free memory around.
525 *
526 * While for the alternative one we just align it up to a whole unit size.
527 */
528#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
529 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
530#else
531 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
532#endif
533 if (cbReq <= pExecMemAllocator->cbFree)
534 {
535 uint32_t const cChunks = pExecMemAllocator->cChunks;
536 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
537 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
538 {
539 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
540 if (pvRet)
541 return pvRet;
542 }
543 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
544 {
545 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
546 if (pvRet)
547 return pvRet;
548 }
549 }
550
551 /*
552 * Can we grow it with another chunk?
553 */
554 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
555 {
556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
557 AssertLogRelRCReturn(rc, NULL);
558
559 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
560 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
561 if (pvRet)
562 return pvRet;
563 AssertFailed();
564 }
565
566 /*
567 * Try prune native TBs once.
568 */
569 if (iIteration == 0)
570 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
571 else
572 {
573 /** @todo stats... */
574 return NULL;
575 }
576 }
577
578}
579
580
581/** This is a hook that we may need later for changing memory protection back
582 * to readonly+exec */
583static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
584{
585#ifdef RT_OS_DARWIN
586 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
587 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
588 AssertRC(rc); RT_NOREF(pVCpu);
589
590 /*
591 * Flush the instruction cache:
592 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
593 */
594 /* sys_dcache_flush(pv, cb); - not necessary */
595 sys_icache_invalidate(pv, cb);
596#else
597 RT_NOREF(pVCpu, pv, cb);
598#endif
599}
600
601
602/**
603 * Frees executable memory.
604 */
605void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
606{
607 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
608 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
609 Assert(pv);
610#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
611 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
612#else
613 Assert(!((uintptr_t)pv & 63));
614#endif
615
616 /* Align the size as we did when allocating the block. */
617#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
618 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
619#else
620 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
621#endif
622
623 /* Free it / assert sanity. */
624#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
625 uint32_t const cChunks = pExecMemAllocator->cChunks;
626 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
627 bool fFound = false;
628 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
629 {
630 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
631 fFound = offChunk < cbChunk;
632 if (fFound)
633 {
634#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
635 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
636 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
637
638 /* Check that it's valid and free it. */
639 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
640 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
641 for (uint32_t i = 1; i < cReqUnits; i++)
642 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
643 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
644
645 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
646 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
647
648 /* Update the stats. */
649 pExecMemAllocator->cbAllocated -= cb;
650 pExecMemAllocator->cbFree += cb;
651 pExecMemAllocator->cAllocations -= 1;
652 return;
653#else
654 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
655 break;
656#endif
657 }
658 }
659# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
660 AssertFailed();
661# else
662 Assert(fFound);
663# endif
664#endif
665
666#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
667 /* Update stats while cb is freshly calculated.*/
668 pExecMemAllocator->cbAllocated -= cb;
669 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
670 pExecMemAllocator->cAllocations -= 1;
671
672 /* Free it. */
673 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
674#endif
675}
676
677
678
679#ifdef IN_RING3
680# ifdef RT_OS_WINDOWS
681
682/**
683 * Initializes the unwind info structures for windows hosts.
684 */
685static int
686iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
687 void *pvChunk, uint32_t idxChunk)
688{
689 RT_NOREF(pVCpu);
690
691 /*
692 * The AMD64 unwind opcodes.
693 *
694 * This is a program that starts with RSP after a RET instruction that
695 * ends up in recompiled code, and the operations we describe here will
696 * restore all non-volatile registers and bring RSP back to where our
697 * RET address is. This means it's reverse order from what happens in
698 * the prologue.
699 *
700 * Note! Using a frame register approach here both because we have one
701 * and but mainly because the UWOP_ALLOC_LARGE argument values
702 * would be a pain to write initializers for. On the positive
703 * side, we're impervious to changes in the the stack variable
704 * area can can deal with dynamic stack allocations if necessary.
705 */
706 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
707 {
708 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
709 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
710 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
711 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
712 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
713 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
714 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
715 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
716 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
717 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
718 };
719 union
720 {
721 IMAGE_UNWIND_INFO Info;
722 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
723 } s_UnwindInfo =
724 {
725 {
726 /* .Version = */ 1,
727 /* .Flags = */ 0,
728 /* .SizeOfProlog = */ 16, /* whatever */
729 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
730 /* .FrameRegister = */ X86_GREG_xBP,
731 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
732 }
733 };
734 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
735 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
736
737 /*
738 * Calc how much space we need and allocate it off the exec heap.
739 */
740 unsigned const cFunctionEntries = 1;
741 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
742 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
743# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
744 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
745 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
746 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
747# else
748 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
749 - pExecMemAllocator->cbHeapBlockHdr;
750 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
751 32 /*cbAlignment*/);
752# endif
753 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
754 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
755
756 /*
757 * Initialize the structures.
758 */
759 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
760
761 paFunctions[0].BeginAddress = 0;
762 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
763 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
764
765 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
766 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
767
768 /*
769 * Register it.
770 */
771 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
772 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
773
774 return VINF_SUCCESS;
775}
776
777
778# else /* !RT_OS_WINDOWS */
779
780/**
781 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
782 */
783DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
784{
785 if (iValue >= 64)
786 {
787 Assert(iValue < 0x2000);
788 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
789 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
790 }
791 else if (iValue >= 0)
792 *Ptr.pb++ = (uint8_t)iValue;
793 else if (iValue > -64)
794 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
795 else
796 {
797 Assert(iValue > -0x2000);
798 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
799 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
800 }
801 return Ptr;
802}
803
804
805/**
806 * Emits an ULEB128 encoded value (up to 64-bit wide).
807 */
808DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
809{
810 while (uValue >= 0x80)
811 {
812 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
813 uValue >>= 7;
814 }
815 *Ptr.pb++ = (uint8_t)uValue;
816 return Ptr;
817}
818
819
820/**
821 * Emits a CFA rule as register @a uReg + offset @a off.
822 */
823DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
824{
825 *Ptr.pb++ = DW_CFA_def_cfa;
826 Ptr = iemDwarfPutUleb128(Ptr, uReg);
827 Ptr = iemDwarfPutUleb128(Ptr, off);
828 return Ptr;
829}
830
831
832/**
833 * Emits a register (@a uReg) save location:
834 * CFA + @a off * data_alignment_factor
835 */
836DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
837{
838 if (uReg < 0x40)
839 *Ptr.pb++ = DW_CFA_offset | uReg;
840 else
841 {
842 *Ptr.pb++ = DW_CFA_offset_extended;
843 Ptr = iemDwarfPutUleb128(Ptr, uReg);
844 }
845 Ptr = iemDwarfPutUleb128(Ptr, off);
846 return Ptr;
847}
848
849
850# if 0 /* unused */
851/**
852 * Emits a register (@a uReg) save location, using signed offset:
853 * CFA + @a offSigned * data_alignment_factor
854 */
855DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
856{
857 *Ptr.pb++ = DW_CFA_offset_extended_sf;
858 Ptr = iemDwarfPutUleb128(Ptr, uReg);
859 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
860 return Ptr;
861}
862# endif
863
864
865/**
866 * Initializes the unwind info section for non-windows hosts.
867 */
868static int
869iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
870 void *pvChunk, uint32_t idxChunk)
871{
872 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
873 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
874
875 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
876
877 /*
878 * Generate the CIE first.
879 */
880# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
881 uint8_t const iDwarfVer = 3;
882# else
883 uint8_t const iDwarfVer = 4;
884# endif
885 RTPTRUNION const PtrCie = Ptr;
886 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
887 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
888 *Ptr.pb++ = iDwarfVer; /* DwARF version */
889 *Ptr.pb++ = 0; /* Augmentation. */
890 if (iDwarfVer >= 4)
891 {
892 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
893 *Ptr.pb++ = 0; /* Segment selector size. */
894 }
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
897# else
898 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
899# endif
900 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
901# ifdef RT_ARCH_AMD64
902 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
903# elif defined(RT_ARCH_ARM64)
904 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
905# else
906# error "port me"
907# endif
908 /* Initial instructions: */
909# ifdef RT_ARCH_AMD64
910 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
912 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
913 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
914 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
916 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
917 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
918# elif defined(RT_ARCH_ARM64)
919# if 1
920 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
921# else
922 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
923# endif
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
930 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
931 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
932 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
933 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
934 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
935 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
936 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
937 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
938# else
939# error "port me"
940# endif
941 while ((Ptr.u - PtrCie.u) & 3)
942 *Ptr.pb++ = DW_CFA_nop;
943 /* Finalize the CIE size. */
944 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
945
946 /*
947 * Generate an FDE for the whole chunk area.
948 */
949# ifdef IEMNATIVE_USE_LIBUNWIND
950 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
951# endif
952 RTPTRUNION const PtrFde = Ptr;
953 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
954 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
955 Ptr.pu32++;
956 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
957 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
958# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
959 *Ptr.pb++ = DW_CFA_nop;
960# endif
961 while ((Ptr.u - PtrFde.u) & 3)
962 *Ptr.pb++ = DW_CFA_nop;
963 /* Finalize the FDE size. */
964 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
965
966 /* Terminator entry. */
967 *Ptr.pu32++ = 0;
968 *Ptr.pu32++ = 0; /* just to be sure... */
969 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
970
971 /*
972 * Register it.
973 */
974# ifdef IEMNATIVE_USE_LIBUNWIND
975 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
976# else
977 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
978 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
979# endif
980
981# ifdef IEMNATIVE_USE_GDB_JIT
982 /*
983 * Now for telling GDB about this (experimental).
984 *
985 * This seems to work best with ET_DYN.
986 */
987 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
988# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
989 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
990 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
991# else
992 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
993 - pExecMemAllocator->cbHeapBlockHdr;
994 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
995# endif
996 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
997 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
998
999 RT_ZERO(*pSymFile);
1000
1001 /*
1002 * The ELF header:
1003 */
1004 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1005 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1006 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1007 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1008 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1009 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1010 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1011 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1012# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1013 pSymFile->EHdr.e_type = ET_DYN;
1014# else
1015 pSymFile->EHdr.e_type = ET_REL;
1016# endif
1017# ifdef RT_ARCH_AMD64
1018 pSymFile->EHdr.e_machine = EM_AMD64;
1019# elif defined(RT_ARCH_ARM64)
1020 pSymFile->EHdr.e_machine = EM_AARCH64;
1021# else
1022# error "port me"
1023# endif
1024 pSymFile->EHdr.e_version = 1; /*?*/
1025 pSymFile->EHdr.e_entry = 0;
1026# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1027 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1028# else
1029 pSymFile->EHdr.e_phoff = 0;
1030# endif
1031 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1032 pSymFile->EHdr.e_flags = 0;
1033 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1034# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1035 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1036 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1037# else
1038 pSymFile->EHdr.e_phentsize = 0;
1039 pSymFile->EHdr.e_phnum = 0;
1040# endif
1041 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1042 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1043 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1044
1045 uint32_t offStrTab = 0;
1046#define APPEND_STR(a_szStr) do { \
1047 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1048 offStrTab += sizeof(a_szStr); \
1049 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1050 } while (0)
1051#define APPEND_STR_FMT(a_szStr, ...) do { \
1052 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1053 offStrTab++; \
1054 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1055 } while (0)
1056
1057 /*
1058 * Section headers.
1059 */
1060 /* Section header #0: NULL */
1061 unsigned i = 0;
1062 APPEND_STR("");
1063 RT_ZERO(pSymFile->aShdrs[i]);
1064 i++;
1065
1066 /* Section header: .eh_frame */
1067 pSymFile->aShdrs[i].sh_name = offStrTab;
1068 APPEND_STR(".eh_frame");
1069 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1070 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1071# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1072 pSymFile->aShdrs[i].sh_offset
1073 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1074# else
1075 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1076 pSymFile->aShdrs[i].sh_offset = 0;
1077# endif
1078
1079 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1080 pSymFile->aShdrs[i].sh_link = 0;
1081 pSymFile->aShdrs[i].sh_info = 0;
1082 pSymFile->aShdrs[i].sh_addralign = 1;
1083 pSymFile->aShdrs[i].sh_entsize = 0;
1084 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1085 i++;
1086
1087 /* Section header: .shstrtab */
1088 unsigned const iShStrTab = i;
1089 pSymFile->EHdr.e_shstrndx = iShStrTab;
1090 pSymFile->aShdrs[i].sh_name = offStrTab;
1091 APPEND_STR(".shstrtab");
1092 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1093 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1094# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1095 pSymFile->aShdrs[i].sh_offset
1096 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1097# else
1098 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1099 pSymFile->aShdrs[i].sh_offset = 0;
1100# endif
1101 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1102 pSymFile->aShdrs[i].sh_link = 0;
1103 pSymFile->aShdrs[i].sh_info = 0;
1104 pSymFile->aShdrs[i].sh_addralign = 1;
1105 pSymFile->aShdrs[i].sh_entsize = 0;
1106 i++;
1107
1108 /* Section header: .symbols */
1109 pSymFile->aShdrs[i].sh_name = offStrTab;
1110 APPEND_STR(".symtab");
1111 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1112 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1113 pSymFile->aShdrs[i].sh_offset
1114 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1115 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1116 pSymFile->aShdrs[i].sh_link = iShStrTab;
1117 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1118 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1119 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1120 i++;
1121
1122# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1123 /* Section header: .symbols */
1124 pSymFile->aShdrs[i].sh_name = offStrTab;
1125 APPEND_STR(".dynsym");
1126 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1127 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1128 pSymFile->aShdrs[i].sh_offset
1129 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1130 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1131 pSymFile->aShdrs[i].sh_link = iShStrTab;
1132 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1133 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1134 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1135 i++;
1136# endif
1137
1138# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1139 /* Section header: .dynamic */
1140 pSymFile->aShdrs[i].sh_name = offStrTab;
1141 APPEND_STR(".dynamic");
1142 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1143 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1144 pSymFile->aShdrs[i].sh_offset
1145 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1146 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1147 pSymFile->aShdrs[i].sh_link = iShStrTab;
1148 pSymFile->aShdrs[i].sh_info = 0;
1149 pSymFile->aShdrs[i].sh_addralign = 1;
1150 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1151 i++;
1152# endif
1153
1154 /* Section header: .text */
1155 unsigned const iShText = i;
1156 pSymFile->aShdrs[i].sh_name = offStrTab;
1157 APPEND_STR(".text");
1158 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1159 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1160# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1161 pSymFile->aShdrs[i].sh_offset
1162 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1163# else
1164 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1165 pSymFile->aShdrs[i].sh_offset = 0;
1166# endif
1167 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1168 pSymFile->aShdrs[i].sh_link = 0;
1169 pSymFile->aShdrs[i].sh_info = 0;
1170 pSymFile->aShdrs[i].sh_addralign = 1;
1171 pSymFile->aShdrs[i].sh_entsize = 0;
1172 i++;
1173
1174 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1175
1176# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1177 /*
1178 * The program headers:
1179 */
1180 /* Everything in a single LOAD segment: */
1181 i = 0;
1182 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1183 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1184 pSymFile->aPhdrs[i].p_offset
1185 = pSymFile->aPhdrs[i].p_vaddr
1186 = pSymFile->aPhdrs[i].p_paddr = 0;
1187 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1188 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1189 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1190 i++;
1191 /* The .dynamic segment. */
1192 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1193 pSymFile->aPhdrs[i].p_flags = PF_R;
1194 pSymFile->aPhdrs[i].p_offset
1195 = pSymFile->aPhdrs[i].p_vaddr
1196 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1197 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1198 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1199 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1200 i++;
1201
1202 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1203
1204 /*
1205 * The dynamic section:
1206 */
1207 i = 0;
1208 pSymFile->aDyn[i].d_tag = DT_SONAME;
1209 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1210 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1219 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1220 i++;
1221 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1222 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1223 i++;
1224 pSymFile->aDyn[i].d_tag = DT_NULL;
1225 i++;
1226 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1227# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1228
1229 /*
1230 * Symbol tables:
1231 */
1232 /** @todo gdb doesn't seem to really like this ... */
1233 i = 0;
1234 pSymFile->aSymbols[i].st_name = 0;
1235 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1236 pSymFile->aSymbols[i].st_value = 0;
1237 pSymFile->aSymbols[i].st_size = 0;
1238 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1239 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1240# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1241 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1242# endif
1243 i++;
1244
1245 pSymFile->aSymbols[i].st_name = 0;
1246 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1247 pSymFile->aSymbols[i].st_value = 0;
1248 pSymFile->aSymbols[i].st_size = 0;
1249 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1250 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1251 i++;
1252
1253 pSymFile->aSymbols[i].st_name = offStrTab;
1254 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1255# if 0
1256 pSymFile->aSymbols[i].st_shndx = iShText;
1257 pSymFile->aSymbols[i].st_value = 0;
1258# else
1259 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1260 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1261# endif
1262 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1263 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1264 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1265# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1266 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1267 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1268# endif
1269 i++;
1270
1271 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1272 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1273
1274 /*
1275 * The GDB JIT entry and informing GDB.
1276 */
1277 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1278# if 1
1279 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1280# else
1281 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1282# endif
1283
1284 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1285 RTCritSectEnter(&g_IemNativeGdbJitLock);
1286 pEhFrame->GdbJitEntry.pNext = NULL;
1287 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1288 if (__jit_debug_descriptor.pTail)
1289 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1290 else
1291 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1292 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1293 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1294
1295 /* Notify GDB: */
1296 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1297 __jit_debug_register_code();
1298 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1299 RTCritSectLeave(&g_IemNativeGdbJitLock);
1300
1301# else /* !IEMNATIVE_USE_GDB_JIT */
1302 RT_NOREF(pVCpu);
1303# endif /* !IEMNATIVE_USE_GDB_JIT */
1304
1305 return VINF_SUCCESS;
1306}
1307
1308# endif /* !RT_OS_WINDOWS */
1309#endif /* IN_RING3 */
1310
1311
1312/**
1313 * Adds another chunk to the executable memory allocator.
1314 *
1315 * This is used by the init code for the initial allocation and later by the
1316 * regular allocator function when it's out of memory.
1317 */
1318static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1319{
1320 /* Check that we've room for growth. */
1321 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1322 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1323
1324 /* Allocate a chunk. */
1325#ifdef RT_OS_DARWIN
1326 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1327#else
1328 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1329#endif
1330 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1331
1332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1333 int rc = VINF_SUCCESS;
1334#else
1335 /* Initialize the heap for the chunk. */
1336 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1337 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1338 AssertRC(rc);
1339 if (RT_SUCCESS(rc))
1340 {
1341 /*
1342 * We want the memory to be aligned on 64 byte, so the first time thru
1343 * here we do some exploratory allocations to see how we can achieve this.
1344 * On subsequent runs we only make an initial adjustment allocation, if
1345 * necessary.
1346 *
1347 * Since we own the heap implementation, we know that the internal block
1348 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1349 * so all we need to wrt allocation size adjustments is to add 32 bytes
1350 * to the size, align up by 64 bytes, and subtract 32 bytes.
1351 *
1352 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1353 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1354 * allocation to force subsequent allocations to return 64 byte aligned
1355 * user areas.
1356 */
1357 if (!pExecMemAllocator->cbHeapBlockHdr)
1358 {
1359 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1360 pExecMemAllocator->cbHeapAlignTweak = 64;
1361 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1362 32 /*cbAlignment*/);
1363 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1364
1365 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1372 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1373 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1374 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1375 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1376
1377 RTHeapSimpleFree(hHeap, pvTest2);
1378 RTHeapSimpleFree(hHeap, pvTest1);
1379 }
1380 else
1381 {
1382 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1383 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1384 }
1385 if (RT_SUCCESS(rc))
1386#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1387 {
1388 /*
1389 * Add the chunk.
1390 *
1391 * This must be done before the unwind init so windows can allocate
1392 * memory from the chunk when using the alternative sub-allocator.
1393 */
1394 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1395#ifdef IN_RING3
1396 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1397#endif
1398#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1399 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1400#else
1401 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1402 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1403 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1404 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1405#endif
1406
1407 pExecMemAllocator->cChunks = idxChunk + 1;
1408 pExecMemAllocator->idxChunkHint = idxChunk;
1409
1410#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1411 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1412 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1413#else
1414 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1415 pExecMemAllocator->cbTotal += cbFree;
1416 pExecMemAllocator->cbFree += cbFree;
1417#endif
1418
1419#ifdef IN_RING3
1420 /*
1421 * Initialize the unwind information (this cannot really fail atm).
1422 * (This sets pvUnwindInfo.)
1423 */
1424 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1425 if (RT_SUCCESS(rc))
1426#endif
1427 {
1428 return VINF_SUCCESS;
1429 }
1430
1431#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1432 /* Just in case the impossible happens, undo the above up: */
1433 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1434 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1435 pExecMemAllocator->cChunks = idxChunk;
1436 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1437 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1438 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1439 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1440#endif
1441 }
1442#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1443 }
1444#endif
1445 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1446 RT_NOREF(pVCpu);
1447 return rc;
1448}
1449
1450
1451/**
1452 * Initializes the executable memory allocator for native recompilation on the
1453 * calling EMT.
1454 *
1455 * @returns VBox status code.
1456 * @param pVCpu The cross context virtual CPU structure of the calling
1457 * thread.
1458 * @param cbMax The max size of the allocator.
1459 * @param cbInitial The initial allocator size.
1460 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1461 * dependent).
1462 */
1463int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1464{
1465 /*
1466 * Validate input.
1467 */
1468 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1469 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1470 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1471 || cbChunk == 0
1472 || ( RT_IS_POWER_OF_TWO(cbChunk)
1473 && cbChunk >= _1M
1474 && cbChunk <= _256M
1475 && cbChunk <= cbMax),
1476 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1477 VERR_OUT_OF_RANGE);
1478
1479 /*
1480 * Adjust/figure out the chunk size.
1481 */
1482 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1483 {
1484 if (cbMax >= _256M)
1485 cbChunk = _64M;
1486 else
1487 {
1488 if (cbMax < _16M)
1489 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1490 else
1491 cbChunk = (uint32_t)cbMax / 4;
1492 if (!RT_IS_POWER_OF_TWO(cbChunk))
1493 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1494 }
1495 }
1496
1497 if (cbChunk > cbMax)
1498 cbMax = cbChunk;
1499 else
1500 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1501 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1502 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1503
1504 /*
1505 * Allocate and initialize the allocatore instance.
1506 */
1507 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1508#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1509 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1510 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1511 cbNeeded += cbBitmap * cMaxChunks;
1512 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1513 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1514#endif
1515#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1516 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1517 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1518#endif
1519 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1520 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1521 VERR_NO_MEMORY);
1522 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1523 pExecMemAllocator->cbChunk = cbChunk;
1524 pExecMemAllocator->cMaxChunks = cMaxChunks;
1525 pExecMemAllocator->cChunks = 0;
1526 pExecMemAllocator->idxChunkHint = 0;
1527 pExecMemAllocator->cAllocations = 0;
1528 pExecMemAllocator->cbTotal = 0;
1529 pExecMemAllocator->cbFree = 0;
1530 pExecMemAllocator->cbAllocated = 0;
1531#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1532 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1533 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1534 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1535 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1536#endif
1537#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1538 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1539#endif
1540 for (uint32_t i = 0; i < cMaxChunks; i++)
1541 {
1542#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1543 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1544 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1545#else
1546 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1547#endif
1548 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1549#ifdef IN_RING0
1550 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1551#else
1552 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1553#endif
1554 }
1555 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1556
1557 /*
1558 * Do the initial allocations.
1559 */
1560 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1561 {
1562 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1563 AssertLogRelRCReturn(rc, rc);
1564 }
1565
1566 pExecMemAllocator->idxChunkHint = 0;
1567
1568 return VINF_SUCCESS;
1569}
1570
1571
1572/*********************************************************************************************************************************
1573* Native Recompilation *
1574*********************************************************************************************************************************/
1575
1576
1577/**
1578 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1579 */
1580IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1581{
1582 pVCpu->iem.s.cInstructions += idxInstr;
1583 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1584}
1585
1586
1587/**
1588 * Used by TB code when it wants to raise a \#GP(0).
1589 */
1590IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1591{
1592 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1593#ifndef _MSC_VER
1594 return VINF_IEM_RAISED_XCPT; /* not reached */
1595#endif
1596}
1597
1598
1599/**
1600 * Used by TB code when detecting opcode changes.
1601 * @see iemThreadeFuncWorkerObsoleteTb
1602 */
1603IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1604{
1605 /* We set fSafeToFree to false where as we're being called in the context
1606 of a TB callback function, which for native TBs means we cannot release
1607 the executable memory till we've returned our way back to iemTbExec as
1608 that return path codes via the native code generated for the TB. */
1609 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1610 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1611 return VINF_IEM_REEXEC_BREAK;
1612}
1613
1614
1615/**
1616 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1617 */
1618IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1619{
1620 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1621 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1622 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1623 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1624 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1625 return VINF_IEM_REEXEC_BREAK;
1626}
1627
1628
1629/**
1630 * Used by TB code when we missed a PC check after a branch.
1631 */
1632IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1633{
1634 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1635 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1636 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1637 pVCpu->iem.s.pbInstrBuf));
1638 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1639 return VINF_IEM_REEXEC_BREAK;
1640}
1641
1642
1643
1644/*********************************************************************************************************************************
1645* Helpers: Segmented memory fetches and stores. *
1646*********************************************************************************************************************************/
1647
1648/**
1649 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1650 */
1651IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1652{
1653#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1654 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1655#else
1656 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1657#endif
1658}
1659
1660
1661/**
1662 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1663 * to 16 bits.
1664 */
1665IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1666{
1667#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1668 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1669#else
1670 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1671#endif
1672}
1673
1674
1675/**
1676 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1677 * to 32 bits.
1678 */
1679IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1680{
1681#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1682 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1683#else
1684 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1685#endif
1686}
1687
1688/**
1689 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1690 * to 64 bits.
1691 */
1692IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1693{
1694#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1695 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1696#else
1697 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1698#endif
1699}
1700
1701
1702/**
1703 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1704 */
1705IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1706{
1707#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1708 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1709#else
1710 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1711#endif
1712}
1713
1714
1715/**
1716 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1717 * to 32 bits.
1718 */
1719IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1720{
1721#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1722 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1723#else
1724 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1725#endif
1726}
1727
1728
1729/**
1730 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1731 * to 64 bits.
1732 */
1733IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1734{
1735#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1736 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1737#else
1738 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1739#endif
1740}
1741
1742
1743/**
1744 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1745 */
1746IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1747{
1748#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1749 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1750#else
1751 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1752#endif
1753}
1754
1755
1756/**
1757 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1758 * to 64 bits.
1759 */
1760IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1761{
1762#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1763 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1764#else
1765 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1766#endif
1767}
1768
1769
1770/**
1771 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1772 */
1773IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1774{
1775#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1776 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1777#else
1778 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1779#endif
1780}
1781
1782
1783/**
1784 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1785 */
1786IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1787{
1788#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1789 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1790#else
1791 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1792#endif
1793}
1794
1795
1796/**
1797 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1798 */
1799IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1800{
1801#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1802 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1803#else
1804 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1805#endif
1806}
1807
1808
1809/**
1810 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1811 */
1812IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1813{
1814#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1815 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1816#else
1817 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1818#endif
1819}
1820
1821
1822/**
1823 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1824 */
1825IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1826{
1827#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1828 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1829#else
1830 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1831#endif
1832}
1833
1834
1835
1836/**
1837 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
1838 */
1839IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1840{
1841#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1842 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1843#else
1844 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1845#endif
1846}
1847
1848
1849/**
1850 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
1851 */
1852IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1853{
1854#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1855 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1856#else
1857 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1858#endif
1859}
1860
1861
1862/**
1863 * Used by TB code to store an 32-bit selector value onto a generic stack.
1864 *
1865 * Intel CPUs doesn't do write a whole dword, thus the special function.
1866 */
1867IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1868{
1869#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1870 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1871#else
1872 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1873#endif
1874}
1875
1876
1877/**
1878 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1879 */
1880IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1881{
1882#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1883 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1884#else
1885 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1886#endif
1887}
1888
1889
1890/**
1891 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1892 */
1893IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1894{
1895#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1896 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1897#else
1898 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
1899#endif
1900}
1901
1902
1903/**
1904 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1905 */
1906IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1907{
1908#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1909 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1910#else
1911 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
1912#endif
1913}
1914
1915
1916/**
1917 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1918 */
1919IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1920{
1921#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1922 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1923#else
1924 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
1925#endif
1926}
1927
1928
1929
1930/*********************************************************************************************************************************
1931* Helpers: Flat memory fetches and stores. *
1932*********************************************************************************************************************************/
1933
1934/**
1935 * Used by TB code to load unsigned 8-bit data w/ flat address.
1936 * @note Zero extending the value to 64-bit to simplify assembly.
1937 */
1938IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1939{
1940#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1941 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1942#else
1943 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1944#endif
1945}
1946
1947
1948/**
1949 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1950 * to 16 bits.
1951 * @note Zero extending the value to 64-bit to simplify assembly.
1952 */
1953IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1954{
1955#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1956 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1957#else
1958 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1959#endif
1960}
1961
1962
1963/**
1964 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1965 * to 32 bits.
1966 * @note Zero extending the value to 64-bit to simplify assembly.
1967 */
1968IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1969{
1970#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1971 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1972#else
1973 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1974#endif
1975}
1976
1977
1978/**
1979 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1980 * to 64 bits.
1981 */
1982IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1983{
1984#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1985 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1986#else
1987 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1988#endif
1989}
1990
1991
1992/**
1993 * Used by TB code to load unsigned 16-bit data w/ flat address.
1994 * @note Zero extending the value to 64-bit to simplify assembly.
1995 */
1996IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1997{
1998#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1999 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2000#else
2001 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2002#endif
2003}
2004
2005
2006/**
2007 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2008 * to 32 bits.
2009 * @note Zero extending the value to 64-bit to simplify assembly.
2010 */
2011IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2012{
2013#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2014 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2015#else
2016 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2017#endif
2018}
2019
2020
2021/**
2022 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2023 * to 64 bits.
2024 * @note Zero extending the value to 64-bit to simplify assembly.
2025 */
2026IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2027{
2028#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2029 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2030#else
2031 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2032#endif
2033}
2034
2035
2036/**
2037 * Used by TB code to load unsigned 32-bit data w/ flat address.
2038 * @note Zero extending the value to 64-bit to simplify assembly.
2039 */
2040IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2041{
2042#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2043 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2044#else
2045 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2046#endif
2047}
2048
2049
2050/**
2051 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2052 * to 64 bits.
2053 * @note Zero extending the value to 64-bit to simplify assembly.
2054 */
2055IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2056{
2057#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2058 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2059#else
2060 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2061#endif
2062}
2063
2064
2065/**
2066 * Used by TB code to load unsigned 64-bit data w/ flat address.
2067 */
2068IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2069{
2070#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2071 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2072#else
2073 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2074#endif
2075}
2076
2077
2078/**
2079 * Used by TB code to store unsigned 8-bit data w/ flat address.
2080 */
2081IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2082{
2083#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2084 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2085#else
2086 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2087#endif
2088}
2089
2090
2091/**
2092 * Used by TB code to store unsigned 16-bit data w/ flat address.
2093 */
2094IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2095{
2096#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2097 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2098#else
2099 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2100#endif
2101}
2102
2103
2104/**
2105 * Used by TB code to store unsigned 32-bit data w/ flat address.
2106 */
2107IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2108{
2109#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2110 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2111#else
2112 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2113#endif
2114}
2115
2116
2117/**
2118 * Used by TB code to store unsigned 64-bit data w/ flat address.
2119 */
2120IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2121{
2122#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2123 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2124#else
2125 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2126#endif
2127}
2128
2129
2130
2131/**
2132 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2133 */
2134IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2135{
2136#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2137 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2138#else
2139 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2140#endif
2141}
2142
2143
2144/**
2145 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2146 */
2147IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2148{
2149#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2150 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2151#else
2152 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2153#endif
2154}
2155
2156
2157/**
2158 * Used by TB code to store a segment selector value onto a flat stack.
2159 *
2160 * Intel CPUs doesn't do write a whole dword, thus the special function.
2161 */
2162IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2163{
2164#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2165 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2166#else
2167 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2168#endif
2169}
2170
2171
2172/**
2173 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2174 */
2175IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2176{
2177#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2178 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2179#else
2180 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2181#endif
2182}
2183
2184
2185/**
2186 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2187 */
2188IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2189{
2190#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2191 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2192#else
2193 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2194#endif
2195}
2196
2197
2198/**
2199 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2200 */
2201IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2202{
2203#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2204 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2205#else
2206 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2207#endif
2208}
2209
2210
2211/**
2212 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2213 */
2214IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2215{
2216#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2217 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2218#else
2219 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2220#endif
2221}
2222
2223
2224
2225/*********************************************************************************************************************************
2226* Helpers: Segmented memory mapping. *
2227*********************************************************************************************************************************/
2228
2229/**
2230 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2231 * segmentation.
2232 */
2233IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2234 RTGCPTR GCPtrMem, uint8_t iSegReg))
2235{
2236#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2237 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2238#else
2239 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2240#endif
2241}
2242
2243
2244/**
2245 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2246 */
2247IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2248 RTGCPTR GCPtrMem, uint8_t iSegReg))
2249{
2250#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2251 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2252#else
2253 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2254#endif
2255}
2256
2257
2258/**
2259 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2260 */
2261IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2262 RTGCPTR GCPtrMem, uint8_t iSegReg))
2263{
2264#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2265 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2266#else
2267 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2268#endif
2269}
2270
2271
2272/**
2273 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2274 */
2275IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2276 RTGCPTR GCPtrMem, uint8_t iSegReg))
2277{
2278#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2279 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2280#else
2281 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2282#endif
2283}
2284
2285
2286/**
2287 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2288 * segmentation.
2289 */
2290IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2291 RTGCPTR GCPtrMem, uint8_t iSegReg))
2292{
2293#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2294 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2295#else
2296 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2297#endif
2298}
2299
2300
2301/**
2302 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2303 */
2304IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2305 RTGCPTR GCPtrMem, uint8_t iSegReg))
2306{
2307#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2308 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2309#else
2310 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2311#endif
2312}
2313
2314
2315/**
2316 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2317 */
2318IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2319 RTGCPTR GCPtrMem, uint8_t iSegReg))
2320{
2321#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2322 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2323#else
2324 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2325#endif
2326}
2327
2328
2329/**
2330 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2331 */
2332IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2333 RTGCPTR GCPtrMem, uint8_t iSegReg))
2334{
2335#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2336 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2337#else
2338 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2339#endif
2340}
2341
2342
2343/**
2344 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2345 * segmentation.
2346 */
2347IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2348 RTGCPTR GCPtrMem, uint8_t iSegReg))
2349{
2350#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2351 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2352#else
2353 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2354#endif
2355}
2356
2357
2358/**
2359 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2360 */
2361IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2362 RTGCPTR GCPtrMem, uint8_t iSegReg))
2363{
2364#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2365 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2366#else
2367 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2368#endif
2369}
2370
2371
2372/**
2373 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2374 */
2375IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2376 RTGCPTR GCPtrMem, uint8_t iSegReg))
2377{
2378#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2379 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2380#else
2381 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2382#endif
2383}
2384
2385
2386/**
2387 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2388 */
2389IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2390 RTGCPTR GCPtrMem, uint8_t iSegReg))
2391{
2392#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2393 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2394#else
2395 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2396#endif
2397}
2398
2399
2400/**
2401 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2402 * segmentation.
2403 */
2404IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2405 RTGCPTR GCPtrMem, uint8_t iSegReg))
2406{
2407#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2408 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2409#else
2410 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2411#endif
2412}
2413
2414
2415/**
2416 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2417 */
2418IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2419 RTGCPTR GCPtrMem, uint8_t iSegReg))
2420{
2421#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2422 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2423#else
2424 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2425#endif
2426}
2427
2428
2429/**
2430 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2431 */
2432IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2433 RTGCPTR GCPtrMem, uint8_t iSegReg))
2434{
2435#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2436 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2437#else
2438 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2439#endif
2440}
2441
2442
2443/**
2444 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2445 */
2446IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2447 RTGCPTR GCPtrMem, uint8_t iSegReg))
2448{
2449#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2450 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2451#else
2452 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2453#endif
2454}
2455
2456
2457/**
2458 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2459 */
2460IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2461 RTGCPTR GCPtrMem, uint8_t iSegReg))
2462{
2463#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2464 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2465#else
2466 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2467#endif
2468}
2469
2470
2471/**
2472 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2473 */
2474IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2475 RTGCPTR GCPtrMem, uint8_t iSegReg))
2476{
2477#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2478 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2479#else
2480 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2481#endif
2482}
2483
2484
2485/**
2486 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2487 * segmentation.
2488 */
2489IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2490 RTGCPTR GCPtrMem, uint8_t iSegReg))
2491{
2492#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2493 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2494#else
2495 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2496#endif
2497}
2498
2499
2500/**
2501 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2502 */
2503IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2504 RTGCPTR GCPtrMem, uint8_t iSegReg))
2505{
2506#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2507 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2508#else
2509 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2510#endif
2511}
2512
2513
2514/**
2515 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2516 */
2517IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2518 RTGCPTR GCPtrMem, uint8_t iSegReg))
2519{
2520#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2521 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2522#else
2523 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2524#endif
2525}
2526
2527
2528/**
2529 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2530 */
2531IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2532 RTGCPTR GCPtrMem, uint8_t iSegReg))
2533{
2534#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2535 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2536#else
2537 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2538#endif
2539}
2540
2541
2542/*********************************************************************************************************************************
2543* Helpers: Flat memory mapping. *
2544*********************************************************************************************************************************/
2545
2546/**
2547 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2548 * address.
2549 */
2550IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2551{
2552#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2553 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2554#else
2555 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2556#endif
2557}
2558
2559
2560/**
2561 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2562 */
2563IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2564{
2565#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2566 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2567#else
2568 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2569#endif
2570}
2571
2572
2573/**
2574 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2575 */
2576IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2577{
2578#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2579 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2580#else
2581 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2582#endif
2583}
2584
2585
2586/**
2587 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2588 */
2589IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2590{
2591#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2592 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2593#else
2594 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2595#endif
2596}
2597
2598
2599/**
2600 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2601 * address.
2602 */
2603IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2604{
2605#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2606 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2607#else
2608 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2609#endif
2610}
2611
2612
2613/**
2614 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2615 */
2616IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2617{
2618#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2619 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2620#else
2621 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2622#endif
2623}
2624
2625
2626/**
2627 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2628 */
2629IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2630{
2631#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2632 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2633#else
2634 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2635#endif
2636}
2637
2638
2639/**
2640 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2641 */
2642IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2643{
2644#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2645 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2646#else
2647 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2648#endif
2649}
2650
2651
2652/**
2653 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2654 * address.
2655 */
2656IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2657{
2658#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2659 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2660#else
2661 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2662#endif
2663}
2664
2665
2666/**
2667 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2668 */
2669IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2670{
2671#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2672 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2673#else
2674 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2675#endif
2676}
2677
2678
2679/**
2680 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2681 */
2682IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2683{
2684#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2685 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2686#else
2687 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2688#endif
2689}
2690
2691
2692/**
2693 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2694 */
2695IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2696{
2697#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2698 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2699#else
2700 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2701#endif
2702}
2703
2704
2705/**
2706 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
2707 * address.
2708 */
2709IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2710{
2711#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2712 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2713#else
2714 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2715#endif
2716}
2717
2718
2719/**
2720 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2721 */
2722IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2723{
2724#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2725 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2726#else
2727 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2728#endif
2729}
2730
2731
2732/**
2733 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2734 */
2735IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2736{
2737#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2738 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2739#else
2740 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2741#endif
2742}
2743
2744
2745/**
2746 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2747 */
2748IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2749{
2750#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2751 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2752#else
2753 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2754#endif
2755}
2756
2757
2758/**
2759 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2760 */
2761IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2762{
2763#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2764 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2765#else
2766 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2767#endif
2768}
2769
2770
2771/**
2772 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2773 */
2774IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2775{
2776#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2777 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2778#else
2779 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2780#endif
2781}
2782
2783
2784/**
2785 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
2786 * address.
2787 */
2788IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2789{
2790#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2791 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2792#else
2793 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2794#endif
2795}
2796
2797
2798/**
2799 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2800 */
2801IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2802{
2803#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2804 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2805#else
2806 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2807#endif
2808}
2809
2810
2811/**
2812 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2813 */
2814IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2815{
2816#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2817 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2818#else
2819 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2820#endif
2821}
2822
2823
2824/**
2825 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2826 */
2827IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2828{
2829#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2830 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2831#else
2832 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2833#endif
2834}
2835
2836
2837/*********************************************************************************************************************************
2838* Helpers: Commit, rollback & unmap *
2839*********************************************************************************************************************************/
2840
2841/**
2842 * Used by TB code to commit and unmap a read-write memory mapping.
2843 */
2844IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2845{
2846 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2847}
2848
2849
2850/**
2851 * Used by TB code to commit and unmap a read-write memory mapping.
2852 */
2853IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2854{
2855 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2856}
2857
2858
2859/**
2860 * Used by TB code to commit and unmap a write-only memory mapping.
2861 */
2862IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2863{
2864 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2865}
2866
2867
2868/**
2869 * Used by TB code to commit and unmap a read-only memory mapping.
2870 */
2871IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2872{
2873 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2874}
2875
2876
2877/**
2878 * Reinitializes the native recompiler state.
2879 *
2880 * Called before starting a new recompile job.
2881 */
2882static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2883{
2884 pReNative->cLabels = 0;
2885 pReNative->bmLabelTypes = 0;
2886 pReNative->cFixups = 0;
2887#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2888 pReNative->pDbgInfo->cEntries = 0;
2889#endif
2890 pReNative->pTbOrg = pTb;
2891 pReNative->cCondDepth = 0;
2892 pReNative->uCondSeqNo = 0;
2893 pReNative->uCheckIrqSeqNo = 0;
2894 pReNative->uTlbSeqNo = 0;
2895
2896 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2897#if IEMNATIVE_HST_GREG_COUNT < 32
2898 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2899#endif
2900 ;
2901 pReNative->Core.bmHstRegsWithGstShadow = 0;
2902 pReNative->Core.bmGstRegShadows = 0;
2903 pReNative->Core.bmVars = 0;
2904 pReNative->Core.bmStack = 0;
2905 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2906 pReNative->Core.u64ArgVars = UINT64_MAX;
2907
2908 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 9);
2909 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2910 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2911 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2912 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2913 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2914 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2915 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2916 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2917 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2918
2919 /* Full host register reinit: */
2920 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2921 {
2922 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2923 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2924 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2925 }
2926
2927 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2928 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2929#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2930 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2931#endif
2932#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2933 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2934#endif
2935 );
2936 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2937 {
2938 fRegs &= ~RT_BIT_32(idxReg);
2939 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2940 }
2941
2942 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2943#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2944 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2945#endif
2946#ifdef IEMNATIVE_REG_FIXED_TMP0
2947 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2948#endif
2949 return pReNative;
2950}
2951
2952
2953/**
2954 * Allocates and initializes the native recompiler state.
2955 *
2956 * This is called the first time an EMT wants to recompile something.
2957 *
2958 * @returns Pointer to the new recompiler state.
2959 * @param pVCpu The cross context virtual CPU structure of the calling
2960 * thread.
2961 * @param pTb The TB that's about to be recompiled.
2962 * @thread EMT(pVCpu)
2963 */
2964static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
2965{
2966 VMCPU_ASSERT_EMT(pVCpu);
2967
2968 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
2969 AssertReturn(pReNative, NULL);
2970
2971 /*
2972 * Try allocate all the buffers and stuff we need.
2973 */
2974 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
2975 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
2976 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
2977#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2978 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
2979#endif
2980 if (RT_LIKELY( pReNative->pInstrBuf
2981 && pReNative->paLabels
2982 && pReNative->paFixups)
2983#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2984 && pReNative->pDbgInfo
2985#endif
2986 )
2987 {
2988 /*
2989 * Set the buffer & array sizes on success.
2990 */
2991 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
2992 pReNative->cLabelsAlloc = _8K;
2993 pReNative->cFixupsAlloc = _16K;
2994#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2995 pReNative->cDbgInfoAlloc = _16K;
2996#endif
2997
2998 /* Other constant stuff: */
2999 pReNative->pVCpu = pVCpu;
3000
3001 /*
3002 * Done, just need to save it and reinit it.
3003 */
3004 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3005 return iemNativeReInit(pReNative, pTb);
3006 }
3007
3008 /*
3009 * Failed. Cleanup and return.
3010 */
3011 AssertFailed();
3012 RTMemFree(pReNative->pInstrBuf);
3013 RTMemFree(pReNative->paLabels);
3014 RTMemFree(pReNative->paFixups);
3015#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3016 RTMemFree(pReNative->pDbgInfo);
3017#endif
3018 RTMemFree(pReNative);
3019 return NULL;
3020}
3021
3022
3023/**
3024 * Creates a label
3025 *
3026 * If the label does not yet have a defined position,
3027 * call iemNativeLabelDefine() later to set it.
3028 *
3029 * @returns Label ID. Throws VBox status code on failure, so no need to check
3030 * the return value.
3031 * @param pReNative The native recompile state.
3032 * @param enmType The label type.
3033 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3034 * label is not yet defined (default).
3035 * @param uData Data associated with the lable. Only applicable to
3036 * certain type of labels. Default is zero.
3037 */
3038DECL_HIDDEN_THROW(uint32_t)
3039iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3040 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3041{
3042 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3043
3044 /*
3045 * Locate existing label definition.
3046 *
3047 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3048 * and uData is zero.
3049 */
3050 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3051 uint32_t const cLabels = pReNative->cLabels;
3052 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3053#ifndef VBOX_STRICT
3054 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3055 && offWhere == UINT32_MAX
3056 && uData == 0
3057#endif
3058 )
3059 {
3060#ifndef VBOX_STRICT
3061 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3062 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3063 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3064 if (idxLabel < pReNative->cLabels)
3065 return idxLabel;
3066#else
3067 for (uint32_t i = 0; i < cLabels; i++)
3068 if ( paLabels[i].enmType == enmType
3069 && paLabels[i].uData == uData)
3070 {
3071 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3072 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3073 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3074 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3075 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3076 return i;
3077 }
3078 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3079 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3080#endif
3081 }
3082
3083 /*
3084 * Make sure we've got room for another label.
3085 */
3086 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3087 { /* likely */ }
3088 else
3089 {
3090 uint32_t cNew = pReNative->cLabelsAlloc;
3091 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3092 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3093 cNew *= 2;
3094 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3095 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3096 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3097 pReNative->paLabels = paLabels;
3098 pReNative->cLabelsAlloc = cNew;
3099 }
3100
3101 /*
3102 * Define a new label.
3103 */
3104 paLabels[cLabels].off = offWhere;
3105 paLabels[cLabels].enmType = enmType;
3106 paLabels[cLabels].uData = uData;
3107 pReNative->cLabels = cLabels + 1;
3108
3109 Assert((unsigned)enmType < 64);
3110 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3111
3112 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3113 {
3114 Assert(uData == 0);
3115 pReNative->aidxUniqueLabels[enmType] = cLabels;
3116 }
3117
3118 if (offWhere != UINT32_MAX)
3119 {
3120#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3121 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3122 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3123#endif
3124 }
3125 return cLabels;
3126}
3127
3128
3129/**
3130 * Defines the location of an existing label.
3131 *
3132 * @param pReNative The native recompile state.
3133 * @param idxLabel The label to define.
3134 * @param offWhere The position.
3135 */
3136DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3137{
3138 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3139 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3140 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3141 pLabel->off = offWhere;
3142#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3143 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3144 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3145#endif
3146}
3147
3148
3149/**
3150 * Looks up a lable.
3151 *
3152 * @returns Label ID if found, UINT32_MAX if not.
3153 */
3154static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3155 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3156{
3157 Assert((unsigned)enmType < 64);
3158 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3159 {
3160 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3161 return pReNative->aidxUniqueLabels[enmType];
3162
3163 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3164 uint32_t const cLabels = pReNative->cLabels;
3165 for (uint32_t i = 0; i < cLabels; i++)
3166 if ( paLabels[i].enmType == enmType
3167 && paLabels[i].uData == uData
3168 && ( paLabels[i].off == offWhere
3169 || offWhere == UINT32_MAX
3170 || paLabels[i].off == UINT32_MAX))
3171 return i;
3172 }
3173 return UINT32_MAX;
3174}
3175
3176
3177/**
3178 * Adds a fixup.
3179 *
3180 * @throws VBox status code (int) on failure.
3181 * @param pReNative The native recompile state.
3182 * @param offWhere The instruction offset of the fixup location.
3183 * @param idxLabel The target label ID for the fixup.
3184 * @param enmType The fixup type.
3185 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3186 */
3187DECL_HIDDEN_THROW(void)
3188iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3189 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3190{
3191 Assert(idxLabel <= UINT16_MAX);
3192 Assert((unsigned)enmType <= UINT8_MAX);
3193
3194 /*
3195 * Make sure we've room.
3196 */
3197 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3198 uint32_t const cFixups = pReNative->cFixups;
3199 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3200 { /* likely */ }
3201 else
3202 {
3203 uint32_t cNew = pReNative->cFixupsAlloc;
3204 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3205 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3206 cNew *= 2;
3207 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3208 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3209 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3210 pReNative->paFixups = paFixups;
3211 pReNative->cFixupsAlloc = cNew;
3212 }
3213
3214 /*
3215 * Add the fixup.
3216 */
3217 paFixups[cFixups].off = offWhere;
3218 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3219 paFixups[cFixups].enmType = enmType;
3220 paFixups[cFixups].offAddend = offAddend;
3221 pReNative->cFixups = cFixups + 1;
3222}
3223
3224
3225/**
3226 * Slow code path for iemNativeInstrBufEnsure.
3227 */
3228DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3229{
3230 /* Double the buffer size till we meet the request. */
3231 uint32_t cNew = pReNative->cInstrBufAlloc;
3232 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3233 do
3234 cNew *= 2;
3235 while (cNew < off + cInstrReq);
3236
3237 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3238#ifdef RT_ARCH_ARM64
3239 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3240#else
3241 uint32_t const cbMaxInstrBuf = _2M;
3242#endif
3243 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3244
3245 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3246 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3247
3248#ifdef VBOX_STRICT
3249 pReNative->offInstrBufChecked = off + cInstrReq;
3250#endif
3251 pReNative->cInstrBufAlloc = cNew;
3252 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3253}
3254
3255#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3256
3257/**
3258 * Grows the static debug info array used during recompilation.
3259 *
3260 * @returns Pointer to the new debug info block; throws VBox status code on
3261 * failure, so no need to check the return value.
3262 */
3263DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3264{
3265 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3266 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3267 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3268 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3269 pReNative->pDbgInfo = pDbgInfo;
3270 pReNative->cDbgInfoAlloc = cNew;
3271 return pDbgInfo;
3272}
3273
3274
3275/**
3276 * Adds a new debug info uninitialized entry, returning the pointer to it.
3277 */
3278DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3279{
3280 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3281 { /* likely */ }
3282 else
3283 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3284 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3285}
3286
3287
3288/**
3289 * Debug Info: Adds a native offset record, if necessary.
3290 */
3291static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3292{
3293 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3294
3295 /*
3296 * Search backwards to see if we've got a similar record already.
3297 */
3298 uint32_t idx = pDbgInfo->cEntries;
3299 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3300 while (idx-- > idxStop)
3301 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3302 {
3303 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3304 return;
3305 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3306 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3307 break;
3308 }
3309
3310 /*
3311 * Add it.
3312 */
3313 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3314 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3315 pEntry->NativeOffset.offNative = off;
3316}
3317
3318
3319/**
3320 * Debug Info: Record info about a label.
3321 */
3322static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3323{
3324 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3325 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3326 pEntry->Label.uUnused = 0;
3327 pEntry->Label.enmLabel = (uint8_t)enmType;
3328 pEntry->Label.uData = uData;
3329}
3330
3331
3332/**
3333 * Debug Info: Record info about a threaded call.
3334 */
3335static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3336{
3337 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3338 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3339 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3340 pEntry->ThreadedCall.uUnused = 0;
3341 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3342}
3343
3344
3345/**
3346 * Debug Info: Record info about a new guest instruction.
3347 */
3348static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3349{
3350 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3351 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3352 pEntry->GuestInstruction.uUnused = 0;
3353 pEntry->GuestInstruction.fExec = fExec;
3354}
3355
3356
3357/**
3358 * Debug Info: Record info about guest register shadowing.
3359 */
3360static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3361 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
3362{
3363 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3364 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3365 pEntry->GuestRegShadowing.uUnused = 0;
3366 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3367 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3368 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3369}
3370
3371#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3372
3373
3374/*********************************************************************************************************************************
3375* Register Allocator *
3376*********************************************************************************************************************************/
3377
3378/**
3379 * Register parameter indexes (indexed by argument number).
3380 */
3381DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3382{
3383 IEMNATIVE_CALL_ARG0_GREG,
3384 IEMNATIVE_CALL_ARG1_GREG,
3385 IEMNATIVE_CALL_ARG2_GREG,
3386 IEMNATIVE_CALL_ARG3_GREG,
3387#if defined(IEMNATIVE_CALL_ARG4_GREG)
3388 IEMNATIVE_CALL_ARG4_GREG,
3389# if defined(IEMNATIVE_CALL_ARG5_GREG)
3390 IEMNATIVE_CALL_ARG5_GREG,
3391# if defined(IEMNATIVE_CALL_ARG6_GREG)
3392 IEMNATIVE_CALL_ARG6_GREG,
3393# if defined(IEMNATIVE_CALL_ARG7_GREG)
3394 IEMNATIVE_CALL_ARG7_GREG,
3395# endif
3396# endif
3397# endif
3398#endif
3399};
3400
3401/**
3402 * Call register masks indexed by argument count.
3403 */
3404DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3405{
3406 0,
3407 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3408 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3409 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3410 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3411 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3412#if defined(IEMNATIVE_CALL_ARG4_GREG)
3413 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3414 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3415# if defined(IEMNATIVE_CALL_ARG5_GREG)
3416 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3417 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3418# if defined(IEMNATIVE_CALL_ARG6_GREG)
3419 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3420 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3421 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3422# if defined(IEMNATIVE_CALL_ARG7_GREG)
3423 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3424 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3425 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3426# endif
3427# endif
3428# endif
3429#endif
3430};
3431
3432#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3433/**
3434 * BP offset of the stack argument slots.
3435 *
3436 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3437 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3438 */
3439DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3440{
3441 IEMNATIVE_FP_OFF_STACK_ARG0,
3442# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3443 IEMNATIVE_FP_OFF_STACK_ARG1,
3444# endif
3445# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3446 IEMNATIVE_FP_OFF_STACK_ARG2,
3447# endif
3448# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3449 IEMNATIVE_FP_OFF_STACK_ARG3,
3450# endif
3451};
3452AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3453#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3454
3455/**
3456 * Info about shadowed guest register values.
3457 * @see IEMNATIVEGSTREG
3458 */
3459static struct
3460{
3461 /** Offset in VMCPU. */
3462 uint32_t off;
3463 /** The field size. */
3464 uint8_t cb;
3465 /** Name (for logging). */
3466 const char *pszName;
3467} const g_aGstShadowInfo[] =
3468{
3469#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3470 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3471 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3472 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3473 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3474 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3475 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3476 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3477 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3478 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3479 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3480 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3481 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3482 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3483 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3484 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3485 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3486 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3487 /* [kIemNativeGstReg_LivenessPadding17] = */ { UINT32_MAX / 4, 0, "pad17", },
3488 /* [kIemNativeGstReg_LivenessPadding18] = */ { UINT32_MAX / 4, 0, "pad18", },
3489 /* [kIemNativeGstReg_LivenessPadding19] = */ { UINT32_MAX / 4, 0, "pad19", },
3490 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3491 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3492 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3493 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3494 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3495 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3496 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3497 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3498 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3499 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3500 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3501 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3502 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3503 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3504 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3505 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3506 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3507 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3508 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3509 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3510 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3511 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3512 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3513 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3514 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3515#undef CPUMCTX_OFF_AND_SIZE
3516};
3517AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3518
3519
3520/** Host CPU general purpose register names. */
3521DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3522{
3523#ifdef RT_ARCH_AMD64
3524 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3525#elif RT_ARCH_ARM64
3526 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3527 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3528#else
3529# error "port me"
3530#endif
3531};
3532
3533
3534DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
3535 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
3536{
3537 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3538
3539 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
3540 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3541 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
3542 return (uint8_t)idxReg;
3543}
3544
3545
3546#if 0 /* unused */
3547/**
3548 * Tries to locate a suitable register in the given register mask.
3549 *
3550 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3551 * failed.
3552 *
3553 * @returns Host register number on success, returns UINT8_MAX on failure.
3554 */
3555static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3556{
3557 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3558 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3559 if (fRegs)
3560 {
3561 /** @todo pick better here: */
3562 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3563
3564 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3565 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3566 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3567 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3568
3569 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3570 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3571 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3572 return idxReg;
3573 }
3574 return UINT8_MAX;
3575}
3576#endif /* unused */
3577
3578
3579/**
3580 * Locate a register, possibly freeing one up.
3581 *
3582 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3583 * failed.
3584 *
3585 * @returns Host register number on success. Returns UINT8_MAX if no registers
3586 * found, the caller is supposed to deal with this and raise a
3587 * allocation type specific status code (if desired).
3588 *
3589 * @throws VBox status code if we're run into trouble spilling a variable of
3590 * recording debug info. Does NOT throw anything if we're out of
3591 * registers, though.
3592 */
3593static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3594 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3595{
3596 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3597 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3598 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3599
3600 /*
3601 * Try a freed register that's shadowing a guest register.
3602 */
3603 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3604 if (fRegs)
3605 {
3606 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3607
3608#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3609 /*
3610 * When we have livness information, we use it to kick out all shadowed
3611 * guest register that will not be needed any more in this TB. If we're
3612 * lucky, this may prevent us from ending up here again.
3613 *
3614 * Note! We must consider the previous entry here so we don't free
3615 * anything that the current threaded function requires (current
3616 * entry is produced by the next threaded function).
3617 */
3618 uint32_t const idxCurCall = pReNative->idxCurCall;
3619 if (idxCurCall > 0)
3620 {
3621 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3622
3623 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3624 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3625# if 0
3626 IEMLIVENESSBIT Tmp = { pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64 }; /* mask of regs in either UNUSED */
3627 Tmp.fEflOther &= Tmp.fEflCf; /** @todo optimize this (pair of 3 (status), pair of 4 (in other), pair of 2, pair of 1). */
3628 Tmp.fEflOther &= Tmp.fEflPf;
3629 Tmp.fEflOther &= Tmp.fEflAf;
3630 Tmp.fEflOther &= Tmp.fEflZf;
3631 Tmp.fEflOther &= Tmp.fEflSf;
3632 Tmp.fEflOther &= Tmp.fEflOf;
3633 Tmp.fEflCf = 0; /* not necessary, but better safe. */
3634 Tmp.fEflPf = 0;
3635 Tmp.fEflAf = 0;
3636 Tmp.fEflZf = 0;
3637 Tmp.fEflSf = 0;
3638 Tmp.fEflOf = 0;
3639 uint64_t fToFreeMask = Tmp.bm64;
3640# else
3641 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3642 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3643 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3644 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3645 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3646 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3647# endif
3648
3649 /* If it matches any shadowed registers. */
3650 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3651 {
3652 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3653 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3654 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3655
3656 /* See if we've got any unshadowed registers we can return now. */
3657 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3658 if (fUnshadowedRegs)
3659 {
3660 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3661 return (fPreferVolatile
3662 ? ASMBitFirstSetU32(fUnshadowedRegs)
3663 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3664 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3665 - 1;
3666 }
3667 }
3668 }
3669#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3670
3671 unsigned const idxReg = (fPreferVolatile
3672 ? ASMBitFirstSetU32(fRegs)
3673 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3674 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3675 - 1;
3676
3677 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3678 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3679 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3680 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3681
3682 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3683 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3684 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3685 return idxReg;
3686 }
3687
3688 /*
3689 * Try free up a variable that's in a register.
3690 *
3691 * We do two rounds here, first evacuating variables we don't need to be
3692 * saved on the stack, then in the second round move things to the stack.
3693 */
3694 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3695 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3696 {
3697 uint32_t fVars = pReNative->Core.bmVars;
3698 while (fVars)
3699 {
3700 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3701 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3702 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3703 && (RT_BIT_32(idxReg) & fRegMask)
3704 && ( iLoop == 0
3705 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3706 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3707 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3708 {
3709 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3710 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3711 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3712 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3713 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3714 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3715
3716 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3717 {
3718 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3719 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3720 }
3721
3722 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3723 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3724
3725 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3726 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3727 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3728 return idxReg;
3729 }
3730 fVars &= ~RT_BIT_32(idxVar);
3731 }
3732 }
3733
3734 return UINT8_MAX;
3735}
3736
3737
3738/**
3739 * Reassigns a variable to a different register specified by the caller.
3740 *
3741 * @returns The new code buffer position.
3742 * @param pReNative The native recompile state.
3743 * @param off The current code buffer position.
3744 * @param idxVar The variable index.
3745 * @param idxRegOld The old host register number.
3746 * @param idxRegNew The new host register number.
3747 * @param pszCaller The caller for logging.
3748 */
3749static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3750 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3751{
3752 Assert(pReNative->Core.aVars[idxVar].idxReg == idxRegOld);
3753 RT_NOREF(pszCaller);
3754
3755 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3756
3757 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3758 Log12(("%s: moving idxVar=%d from %s to %s (fGstRegShadows=%RX64)\n",
3759 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3760 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3761
3762 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3763 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3764 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3765 if (fGstRegShadows)
3766 {
3767 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3768 | RT_BIT_32(idxRegNew);
3769 while (fGstRegShadows)
3770 {
3771 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3772 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3773
3774 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3775 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3776 }
3777 }
3778
3779 pReNative->Core.aVars[idxVar].idxReg = (uint8_t)idxRegNew;
3780 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3781 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3782 return off;
3783}
3784
3785
3786/**
3787 * Moves a variable to a different register or spills it onto the stack.
3788 *
3789 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3790 * kinds can easily be recreated if needed later.
3791 *
3792 * @returns The new code buffer position.
3793 * @param pReNative The native recompile state.
3794 * @param off The current code buffer position.
3795 * @param idxVar The variable index.
3796 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3797 * call-volatile registers.
3798 */
3799static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3800 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3801{
3802 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3803 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
3804 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
3805
3806 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
3807 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3808 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3809 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3810 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3811 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3812 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3813 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3814 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3815
3816
3817 /** @todo Add statistics on this.*/
3818 /** @todo Implement basic variable liveness analysis (python) so variables
3819 * can be freed immediately once no longer used. This has the potential to
3820 * be trashing registers and stack for dead variables.
3821 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3822
3823 /*
3824 * First try move it to a different register, as that's cheaper.
3825 */
3826 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3827 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3828 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3829 if (fRegs)
3830 {
3831 /* Avoid using shadow registers, if possible. */
3832 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3833 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3834 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3835 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3836 }
3837
3838 /*
3839 * Otherwise we must spill the register onto the stack.
3840 */
3841 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3842 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%d/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3843 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3844 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3845
3846 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3847 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3848 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3849 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3850 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3851 return off;
3852}
3853
3854
3855/**
3856 * Allocates a temporary host general purpose register.
3857 *
3858 * This may emit code to save register content onto the stack in order to free
3859 * up a register.
3860 *
3861 * @returns The host register number; throws VBox status code on failure,
3862 * so no need to check the return value.
3863 * @param pReNative The native recompile state.
3864 * @param poff Pointer to the variable with the code buffer position.
3865 * This will be update if we need to move a variable from
3866 * register to stack in order to satisfy the request.
3867 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3868 * registers (@c true, default) or the other way around
3869 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3870 */
3871DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3872{
3873 /*
3874 * Try find a completely unused register, preferably a call-volatile one.
3875 */
3876 uint8_t idxReg;
3877 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3878 & ~pReNative->Core.bmHstRegsWithGstShadow
3879 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3880 if (fRegs)
3881 {
3882 if (fPreferVolatile)
3883 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3884 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3885 else
3886 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3887 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3888 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3889 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3890 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3891 }
3892 else
3893 {
3894 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3895 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3896 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3897 }
3898 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3899}
3900
3901
3902/**
3903 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3904 * registers.
3905 *
3906 * @returns The host register number; throws VBox status code on failure,
3907 * so no need to check the return value.
3908 * @param pReNative The native recompile state.
3909 * @param poff Pointer to the variable with the code buffer position.
3910 * This will be update if we need to move a variable from
3911 * register to stack in order to satisfy the request.
3912 * @param fRegMask Mask of acceptable registers.
3913 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3914 * registers (@c true, default) or the other way around
3915 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3916 */
3917DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3918 bool fPreferVolatile /*= true*/)
3919{
3920 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3921 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3922
3923 /*
3924 * Try find a completely unused register, preferably a call-volatile one.
3925 */
3926 uint8_t idxReg;
3927 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3928 & ~pReNative->Core.bmHstRegsWithGstShadow
3929 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3930 & fRegMask;
3931 if (fRegs)
3932 {
3933 if (fPreferVolatile)
3934 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3935 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3936 else
3937 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3938 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3939 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3940 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3941 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3942 }
3943 else
3944 {
3945 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
3946 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3947 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3948 }
3949 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3950}
3951
3952
3953/**
3954 * Allocates a temporary register for loading an immediate value into.
3955 *
3956 * This will emit code to load the immediate, unless there happens to be an
3957 * unused register with the value already loaded.
3958 *
3959 * The caller will not modify the returned register, it must be considered
3960 * read-only. Free using iemNativeRegFreeTmpImm.
3961 *
3962 * @returns The host register number; throws VBox status code on failure, so no
3963 * need to check the return value.
3964 * @param pReNative The native recompile state.
3965 * @param poff Pointer to the variable with the code buffer position.
3966 * @param uImm The immediate value that the register must hold upon
3967 * return.
3968 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3969 * registers (@c true, default) or the other way around
3970 * (@c false).
3971 *
3972 * @note Reusing immediate values has not been implemented yet.
3973 */
3974DECL_HIDDEN_THROW(uint8_t)
3975iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3976{
3977 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
3978 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3979 return idxReg;
3980}
3981
3982
3983/**
3984 * Helper for iemNativeLivenessGetStateByGstReg.
3985 *
3986 * @returns IEMLIVENESS_STATE_XXX
3987 * @param fMergedStateExp2 This is the RT_BIT_32() of each sub-state
3988 * ORed together.
3989 */
3990DECL_FORCE_INLINE(uint32_t)
3991iemNativeLivenessMergeExpandedEFlagsState(uint32_t fMergedStateExp2)
3992{
3993 /* INPUT trumps anything else. */
3994 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_INPUT))
3995 return IEMLIVENESS_STATE_INPUT;
3996
3997 /* CLOBBERED trumps XCPT_OR_CALL and UNUSED. */
3998 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_CLOBBERED))
3999 {
4000 /* If not all sub-fields are clobbered they must be considered INPUT. */
4001 if (fMergedStateExp2 & (RT_BIT_32(IEMLIVENESS_STATE_UNUSED) | RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL)))
4002 return IEMLIVENESS_STATE_INPUT;
4003 return IEMLIVENESS_STATE_CLOBBERED;
4004 }
4005
4006 /* XCPT_OR_CALL trumps UNUSED. */
4007 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL))
4008 return IEMLIVENESS_STATE_XCPT_OR_CALL;
4009
4010 return IEMLIVENESS_STATE_UNUSED;
4011}
4012
4013#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4014
4015DECL_FORCE_INLINE(uint32_t)
4016iemNativeLivenessGetStateByGstRegEx(PCIEMLIVENESSENTRY pLivenessEntry, unsigned enmGstRegEx)
4017{
4018 return ((pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
4019 | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2);
4020}
4021
4022
4023DECL_FORCE_INLINE(uint32_t)
4024iemNativeLivenessGetStateByGstReg(PCIEMLIVENESSENTRY pLivenessEntry, IEMNATIVEGSTREG enmGstReg)
4025{
4026 uint32_t uRet = ((pLivenessEntry->Bit0.bm64 >> (unsigned)enmGstReg) & 1)
4027 | (((pLivenessEntry->Bit1.bm64 >> (unsigned)enmGstReg) << 1) & 2);
4028 if (enmGstReg == kIemNativeGstReg_EFlags)
4029 {
4030 /* Merge the eflags states to one. */
4031 uRet = RT_BIT_32(uRet);
4032 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflCf | (pLivenessEntry->Bit1.fEflCf << 1));
4033 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflPf | (pLivenessEntry->Bit1.fEflPf << 1));
4034 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflAf | (pLivenessEntry->Bit1.fEflAf << 1));
4035 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflZf | (pLivenessEntry->Bit1.fEflZf << 1));
4036 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflSf | (pLivenessEntry->Bit1.fEflSf << 1));
4037 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflOf | (pLivenessEntry->Bit1.fEflOf << 1));
4038 uRet = iemNativeLivenessMergeExpandedEFlagsState(uRet);
4039 }
4040 return uRet;
4041}
4042
4043
4044# ifdef VBOX_STRICT
4045/** For assertions only, user checks that idxCurCall isn't zerow. */
4046DECL_FORCE_INLINE(uint32_t)
4047iemNativeLivenessGetPrevStateByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg)
4048{
4049 return iemNativeLivenessGetStateByGstReg(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], enmGstReg);
4050}
4051# endif /* VBOX_STRICT */
4052
4053#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4054
4055/**
4056 * Marks host register @a idxHstReg as containing a shadow copy of guest
4057 * register @a enmGstReg.
4058 *
4059 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
4060 * host register before calling.
4061 */
4062DECL_FORCE_INLINE(void)
4063iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4064{
4065 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
4066 Assert(!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4067 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
4068
4069 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
4070 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg); /** @todo why? not OR? */
4071 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
4072 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
4073#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4074 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4075 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
4076#else
4077 RT_NOREF(off);
4078#endif
4079}
4080
4081
4082/**
4083 * Clear any guest register shadow claims from @a idxHstReg.
4084 *
4085 * The register does not need to be shadowing any guest registers.
4086 */
4087DECL_FORCE_INLINE(void)
4088iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
4089{
4090 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4091 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4092 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4093 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4094 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4095
4096#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4097 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4098 if (fGstRegs)
4099 {
4100 Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
4101 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4102 while (fGstRegs)
4103 {
4104 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4105 fGstRegs &= ~RT_BIT_64(iGstReg);
4106 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
4107 }
4108 }
4109#else
4110 RT_NOREF(off);
4111#endif
4112
4113 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4114 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4115 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4116}
4117
4118
4119/**
4120 * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
4121 * and global overview flags.
4122 */
4123DECL_FORCE_INLINE(void)
4124iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4125{
4126 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4127 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4128 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4129 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4130 Assert(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg));
4131 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4132 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4133
4134#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4135 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4136 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
4137#else
4138 RT_NOREF(off);
4139#endif
4140
4141 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4142 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4143 if (!fGstRegShadowsNew)
4144 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4145 pReNative->Core.bmGstRegShadows &= ~RT_BIT_64(enmGstReg);
4146}
4147
4148
4149#if 0 /* unused */
4150/**
4151 * Clear any guest register shadow claim for @a enmGstReg.
4152 */
4153DECL_FORCE_INLINE(void)
4154iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4155{
4156 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4157 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4158 {
4159 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
4160 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4161 }
4162}
4163#endif
4164
4165
4166/**
4167 * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
4168 * as the new shadow of it.
4169 *
4170 * Unlike the other guest reg shadow helpers, this does the logging for you.
4171 * However, it is the liveness state is not asserted here, the caller must do
4172 * that.
4173 */
4174DECL_FORCE_INLINE(void)
4175iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
4176 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4177{
4178 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4179 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4180 {
4181 uint8_t const idxHstRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
4182 Assert(idxHstRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
4183 if (idxHstRegOld == idxHstRegNew)
4184 return;
4185 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s (from %s)\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4186 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstRegOld]));
4187 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4188 }
4189 else
4190 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4191 g_aGstShadowInfo[enmGstReg].pszName));
4192 iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
4193}
4194
4195
4196/**
4197 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
4198 * to @a idxRegTo.
4199 */
4200DECL_FORCE_INLINE(void)
4201iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
4202 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4203{
4204 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
4205 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] == idxRegFrom);
4206 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
4207 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
4208 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4209 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
4210 == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
4211 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
4212 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
4213
4214 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4215 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows = fGstRegShadowsFrom;
4216 if (!fGstRegShadowsFrom)
4217 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegFrom);
4218 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegTo);
4219 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows |= RT_BIT_64(enmGstReg);
4220 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
4221#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4222 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4223 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
4224#else
4225 RT_NOREF(off);
4226#endif
4227}
4228
4229
4230/**
4231 * Allocates a temporary host general purpose register for keeping a guest
4232 * register value.
4233 *
4234 * Since we may already have a register holding the guest register value,
4235 * code will be emitted to do the loading if that's not the case. Code may also
4236 * be emitted if we have to free up a register to satify the request.
4237 *
4238 * @returns The host register number; throws VBox status code on failure, so no
4239 * need to check the return value.
4240 * @param pReNative The native recompile state.
4241 * @param poff Pointer to the variable with the code buffer
4242 * position. This will be update if we need to move a
4243 * variable from register to stack in order to satisfy
4244 * the request.
4245 * @param enmGstReg The guest register that will is to be updated.
4246 * @param enmIntendedUse How the caller will be using the host register.
4247 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4248 * register is okay (default). The ASSUMPTION here is
4249 * that the caller has already flushed all volatile
4250 * registers, so this is only applied if we allocate a
4251 * new register.
4252 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4253 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4254 */
4255DECL_HIDDEN_THROW(uint8_t)
4256iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4257 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4258 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4259{
4260 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4261#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4262 AssertMsg( fSkipLivenessAssert
4263 || pReNative->idxCurCall == 0
4264 || enmGstReg == kIemNativeGstReg_Pc
4265 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4266 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4267 : IEMLIVENESS_STATE_IS_ACCESS_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4268 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4269#endif
4270 RT_NOREF(fSkipLivenessAssert);
4271#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4272 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4273#endif
4274 uint32_t const fRegMask = !fNoVolatileRegs
4275 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4276 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4277
4278 /*
4279 * First check if the guest register value is already in a host register.
4280 */
4281 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4282 {
4283 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4284 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4285 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4286 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4287
4288 /* It's not supposed to be allocated... */
4289 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4290 {
4291 /*
4292 * If the register will trash the guest shadow copy, try find a
4293 * completely unused register we can use instead. If that fails,
4294 * we need to disassociate the host reg from the guest reg.
4295 */
4296 /** @todo would be nice to know if preserving the register is in any way helpful. */
4297 /* If the purpose is calculations, try duplicate the register value as
4298 we'll be clobbering the shadow. */
4299 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4300 && ( ~pReNative->Core.bmHstRegs
4301 & ~pReNative->Core.bmHstRegsWithGstShadow
4302 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4303 {
4304 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4305
4306 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4307
4308 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4309 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4310 g_apszIemNativeHstRegNames[idxRegNew]));
4311 idxReg = idxRegNew;
4312 }
4313 /* If the current register matches the restrictions, go ahead and allocate
4314 it for the caller. */
4315 else if (fRegMask & RT_BIT_32(idxReg))
4316 {
4317 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4318 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4319 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4320 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4321 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4322 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4323 else
4324 {
4325 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4326 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4327 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4328 }
4329 }
4330 /* Otherwise, allocate a register that satisfies the caller and transfer
4331 the shadowing if compatible with the intended use. (This basically
4332 means the call wants a non-volatile register (RSP push/pop scenario).) */
4333 else
4334 {
4335 Assert(fNoVolatileRegs);
4336 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4337 !fNoVolatileRegs
4338 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4339 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4340 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4341 {
4342 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4343 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4344 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4345 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4346 }
4347 else
4348 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4349 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4350 g_apszIemNativeHstRegNames[idxRegNew]));
4351 idxReg = idxRegNew;
4352 }
4353 }
4354 else
4355 {
4356 /*
4357 * Oops. Shadowed guest register already allocated!
4358 *
4359 * Allocate a new register, copy the value and, if updating, the
4360 * guest shadow copy assignment to the new register.
4361 */
4362 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4363 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4364 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4365 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4366
4367 /** @todo share register for readonly access. */
4368 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4369 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4370
4371 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4372 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4373
4374 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4375 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4376 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4377 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4378 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4379 else
4380 {
4381 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4382 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4383 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4384 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4385 }
4386 idxReg = idxRegNew;
4387 }
4388 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4389
4390#ifdef VBOX_STRICT
4391 /* Strict builds: Check that the value is correct. */
4392 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4393#endif
4394
4395 return idxReg;
4396 }
4397
4398 /*
4399 * Allocate a new register, load it with the guest value and designate it as a copy of the
4400 */
4401 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4402
4403 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4404 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4405
4406 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4407 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4408 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4409 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4410
4411 return idxRegNew;
4412}
4413
4414
4415/**
4416 * Allocates a temporary host general purpose register that already holds the
4417 * given guest register value.
4418 *
4419 * The use case for this function is places where the shadowing state cannot be
4420 * modified due to branching and such. This will fail if the we don't have a
4421 * current shadow copy handy or if it's incompatible. The only code that will
4422 * be emitted here is value checking code in strict builds.
4423 *
4424 * The intended use can only be readonly!
4425 *
4426 * @returns The host register number, UINT8_MAX if not present.
4427 * @param pReNative The native recompile state.
4428 * @param poff Pointer to the instruction buffer offset.
4429 * Will be updated in strict builds if a register is
4430 * found.
4431 * @param enmGstReg The guest register that will is to be updated.
4432 * @note In strict builds, this may throw instruction buffer growth failures.
4433 * Non-strict builds will not throw anything.
4434 * @sa iemNativeRegAllocTmpForGuestReg
4435 */
4436DECL_HIDDEN_THROW(uint8_t)
4437iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4438{
4439 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4440#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4441 AssertMsg( pReNative->idxCurCall == 0
4442 || IEMLIVENESS_STATE_IS_ACCESS_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4443 || enmGstReg == kIemNativeGstReg_Pc,
4444 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4445#endif
4446
4447 /*
4448 * First check if the guest register value is already in a host register.
4449 */
4450 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4451 {
4452 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4453 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4454 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4455 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4456
4457 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4458 {
4459 /*
4460 * We only do readonly use here, so easy compared to the other
4461 * variant of this code.
4462 */
4463 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4464 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4465 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4466 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4467 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4468
4469#ifdef VBOX_STRICT
4470 /* Strict builds: Check that the value is correct. */
4471 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4472#else
4473 RT_NOREF(poff);
4474#endif
4475 return idxReg;
4476 }
4477 }
4478
4479 return UINT8_MAX;
4480}
4481
4482
4483DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar);
4484
4485
4486/**
4487 * Allocates argument registers for a function call.
4488 *
4489 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4490 * need to check the return value.
4491 * @param pReNative The native recompile state.
4492 * @param off The current code buffer offset.
4493 * @param cArgs The number of arguments the function call takes.
4494 */
4495DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4496{
4497 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4498 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4499 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4500 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4501
4502 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4503 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4504 else if (cArgs == 0)
4505 return true;
4506
4507 /*
4508 * Do we get luck and all register are free and not shadowing anything?
4509 */
4510 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4511 for (uint32_t i = 0; i < cArgs; i++)
4512 {
4513 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4514 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4515 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4516 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4517 }
4518 /*
4519 * Okay, not lucky so we have to free up the registers.
4520 */
4521 else
4522 for (uint32_t i = 0; i < cArgs; i++)
4523 {
4524 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4525 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4526 {
4527 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4528 {
4529 case kIemNativeWhat_Var:
4530 {
4531 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4532 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars),
4533 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4534 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
4535 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
4536
4537 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4538 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4539 else
4540 {
4541 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4542 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4543 }
4544 break;
4545 }
4546
4547 case kIemNativeWhat_Tmp:
4548 case kIemNativeWhat_Arg:
4549 case kIemNativeWhat_rc:
4550 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4551 default:
4552 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4553 }
4554
4555 }
4556 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4557 {
4558 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4559 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4560 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4561 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4562 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4563 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4564 }
4565 else
4566 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4567 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4568 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4569 }
4570 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4571 return true;
4572}
4573
4574
4575DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4576
4577
4578#if 0
4579/**
4580 * Frees a register assignment of any type.
4581 *
4582 * @param pReNative The native recompile state.
4583 * @param idxHstReg The register to free.
4584 *
4585 * @note Does not update variables.
4586 */
4587DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4588{
4589 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4590 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4591 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4592 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4593 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4594 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4595 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4596 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4597 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4598 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4599 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4600 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4601 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4602 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4603
4604 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4605 /* no flushing, right:
4606 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4607 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4608 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4609 */
4610}
4611#endif
4612
4613
4614/**
4615 * Frees a temporary register.
4616 *
4617 * Any shadow copies of guest registers assigned to the host register will not
4618 * be flushed by this operation.
4619 */
4620DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4621{
4622 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4623 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4624 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4625 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4626 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4627}
4628
4629
4630/**
4631 * Frees a temporary immediate register.
4632 *
4633 * It is assumed that the call has not modified the register, so it still hold
4634 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4635 */
4636DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4637{
4638 iemNativeRegFreeTmp(pReNative, idxHstReg);
4639}
4640
4641
4642/**
4643 * Frees a register assigned to a variable.
4644 *
4645 * The register will be disassociated from the variable.
4646 */
4647DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4648{
4649 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4650 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4651 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4652 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4653 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
4654
4655 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4656 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4657 if (!fFlushShadows)
4658 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%d\n",
4659 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4660 else
4661 {
4662 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4663 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4664 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4665 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4666 uint64_t fGstRegShadows = fGstRegShadowsOld;
4667 while (fGstRegShadows)
4668 {
4669 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4670 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4671
4672 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4673 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4674 }
4675 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%d\n",
4676 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4677 }
4678}
4679
4680
4681/**
4682 * Called right before emitting a call instruction to move anything important
4683 * out of call-volatile registers, free and flush the call-volatile registers,
4684 * optionally freeing argument variables.
4685 *
4686 * @returns New code buffer offset, UINT32_MAX on failure.
4687 * @param pReNative The native recompile state.
4688 * @param off The code buffer offset.
4689 * @param cArgs The number of arguments the function call takes.
4690 * It is presumed that the host register part of these have
4691 * been allocated as such already and won't need moving,
4692 * just freeing.
4693 * @param fKeepVars Mask of variables that should keep their register
4694 * assignments. Caller must take care to handle these.
4695 */
4696DECL_HIDDEN_THROW(uint32_t)
4697iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4698{
4699 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4700
4701 /* fKeepVars will reduce this mask. */
4702 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4703
4704 /*
4705 * Move anything important out of volatile registers.
4706 */
4707 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4708 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4709 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4710#ifdef IEMNATIVE_REG_FIXED_TMP0
4711 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4712#endif
4713 & ~g_afIemNativeCallRegs[cArgs];
4714
4715 fRegsToMove &= pReNative->Core.bmHstRegs;
4716 if (!fRegsToMove)
4717 { /* likely */ }
4718 else
4719 {
4720 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4721 while (fRegsToMove != 0)
4722 {
4723 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4724 fRegsToMove &= ~RT_BIT_32(idxReg);
4725
4726 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4727 {
4728 case kIemNativeWhat_Var:
4729 {
4730 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4731 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
4732 Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
4733 Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
4734 if (!(RT_BIT_32(idxVar) & fKeepVars))
4735 {
4736 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%d enmKind=%d idxReg=%d\n",
4737 idxVar, pReNative->Core.aVars[idxVar].enmKind, pReNative->Core.aVars[idxVar].idxReg));
4738 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
4739 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4740 else
4741 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4742 }
4743 else
4744 fRegsToFree &= ~RT_BIT_32(idxReg);
4745 continue;
4746 }
4747
4748 case kIemNativeWhat_Arg:
4749 AssertMsgFailed(("What?!?: %u\n", idxReg));
4750 continue;
4751
4752 case kIemNativeWhat_rc:
4753 case kIemNativeWhat_Tmp:
4754 AssertMsgFailed(("Missing free: %u\n", idxReg));
4755 continue;
4756
4757 case kIemNativeWhat_FixedTmp:
4758 case kIemNativeWhat_pVCpuFixed:
4759 case kIemNativeWhat_pCtxFixed:
4760 case kIemNativeWhat_FixedReserved:
4761 case kIemNativeWhat_Invalid:
4762 case kIemNativeWhat_End:
4763 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4764 }
4765 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4766 }
4767 }
4768
4769 /*
4770 * Do the actual freeing.
4771 */
4772 if (pReNative->Core.bmHstRegs & fRegsToFree)
4773 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4774 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4775 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4776
4777 /* If there are guest register shadows in any call-volatile register, we
4778 have to clear the corrsponding guest register masks for each register. */
4779 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4780 if (fHstRegsWithGstShadow)
4781 {
4782 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4783 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4784 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4785 do
4786 {
4787 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4788 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4789
4790 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4791 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4792 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4793 } while (fHstRegsWithGstShadow != 0);
4794 }
4795
4796 return off;
4797}
4798
4799
4800/**
4801 * Flushes a set of guest register shadow copies.
4802 *
4803 * This is usually done after calling a threaded function or a C-implementation
4804 * of an instruction.
4805 *
4806 * @param pReNative The native recompile state.
4807 * @param fGstRegs Set of guest registers to flush.
4808 */
4809DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4810{
4811 /*
4812 * Reduce the mask by what's currently shadowed
4813 */
4814 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4815 fGstRegs &= bmGstRegShadowsOld;
4816 if (fGstRegs)
4817 {
4818 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4819 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4820 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4821 if (bmGstRegShadowsNew)
4822 {
4823 /*
4824 * Partial.
4825 */
4826 do
4827 {
4828 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4829 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4830 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4831 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4832 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4833
4834 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4835 fGstRegs &= ~fInThisHstReg;
4836 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4837 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4838 if (!fGstRegShadowsNew)
4839 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4840 } while (fGstRegs != 0);
4841 }
4842 else
4843 {
4844 /*
4845 * Clear all.
4846 */
4847 do
4848 {
4849 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4850 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4851 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4852 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4853 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4854
4855 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4856 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4857 } while (fGstRegs != 0);
4858 pReNative->Core.bmHstRegsWithGstShadow = 0;
4859 }
4860 }
4861}
4862
4863
4864/**
4865 * Flushes guest register shadow copies held by a set of host registers.
4866 *
4867 * This is used with the TLB lookup code for ensuring that we don't carry on
4868 * with any guest shadows in volatile registers, as these will get corrupted by
4869 * a TLB miss.
4870 *
4871 * @param pReNative The native recompile state.
4872 * @param fHstRegs Set of host registers to flush guest shadows for.
4873 */
4874DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4875{
4876 /*
4877 * Reduce the mask by what's currently shadowed.
4878 */
4879 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4880 fHstRegs &= bmHstRegsWithGstShadowOld;
4881 if (fHstRegs)
4882 {
4883 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4884 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4885 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4886 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4887 if (bmHstRegsWithGstShadowNew)
4888 {
4889 /*
4890 * Partial (likely).
4891 */
4892 uint64_t fGstShadows = 0;
4893 do
4894 {
4895 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4896 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4897 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4898 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4899
4900 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4901 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4902 fHstRegs &= ~RT_BIT_32(idxHstReg);
4903 } while (fHstRegs != 0);
4904 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4905 }
4906 else
4907 {
4908 /*
4909 * Clear all.
4910 */
4911 do
4912 {
4913 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4914 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4915 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4916 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4917
4918 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4919 fHstRegs &= ~RT_BIT_32(idxHstReg);
4920 } while (fHstRegs != 0);
4921 pReNative->Core.bmGstRegShadows = 0;
4922 }
4923 }
4924}
4925
4926
4927/**
4928 * Restores guest shadow copies in volatile registers.
4929 *
4930 * This is used after calling a helper function (think TLB miss) to restore the
4931 * register state of volatile registers.
4932 *
4933 * @param pReNative The native recompile state.
4934 * @param off The code buffer offset.
4935 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4936 * be active (allocated) w/o asserting. Hack.
4937 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4938 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4939 */
4940DECL_HIDDEN_THROW(uint32_t)
4941iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4942{
4943 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4944 if (fHstRegs)
4945 {
4946 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4947 do
4948 {
4949 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4950
4951 /* It's not fatal if a register is active holding a variable that
4952 shadowing a guest register, ASSUMING all pending guest register
4953 writes were flushed prior to the helper call. However, we'll be
4954 emitting duplicate restores, so it wasts code space. */
4955 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4956 RT_NOREF(fHstRegsActiveShadows);
4957
4958 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4959 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4960 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4961 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4962
4963 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4964 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4965
4966 fHstRegs &= ~RT_BIT_32(idxHstReg);
4967 } while (fHstRegs != 0);
4968 }
4969 return off;
4970}
4971
4972
4973/**
4974 * Flushes delayed write of a specific guest register.
4975 *
4976 * This must be called prior to calling CImpl functions and any helpers that use
4977 * the guest state (like raising exceptions) and such.
4978 *
4979 * This optimization has not yet been implemented. The first target would be
4980 * RIP updates, since these are the most common ones.
4981 */
4982DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4983 IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
4984{
4985 RT_NOREF(pReNative, enmClass, idxReg);
4986 return off;
4987}
4988
4989
4990/**
4991 * Flushes any delayed guest register writes.
4992 *
4993 * This must be called prior to calling CImpl functions and any helpers that use
4994 * the guest state (like raising exceptions) and such.
4995 *
4996 * This optimization has not yet been implemented. The first target would be
4997 * RIP updates, since these are the most common ones.
4998 */
4999DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5000{
5001 RT_NOREF(pReNative, off);
5002 return off;
5003}
5004
5005
5006#ifdef VBOX_STRICT
5007/**
5008 * Does internal register allocator sanity checks.
5009 */
5010static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5011{
5012 /*
5013 * Iterate host registers building a guest shadowing set.
5014 */
5015 uint64_t bmGstRegShadows = 0;
5016 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5017 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5018 while (bmHstRegsWithGstShadow)
5019 {
5020 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5021 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5022 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5023
5024 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5025 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5026 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5027 bmGstRegShadows |= fThisGstRegShadows;
5028 while (fThisGstRegShadows)
5029 {
5030 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5031 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5032 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5033 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5034 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5035 }
5036 }
5037 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5038 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5039 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5040
5041 /*
5042 * Now the other way around, checking the guest to host index array.
5043 */
5044 bmHstRegsWithGstShadow = 0;
5045 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5046 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5047 while (bmGstRegShadows)
5048 {
5049 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5050 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5051 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5052
5053 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5054 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5055 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5056 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5057 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5058 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5059 }
5060 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5061 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5062 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5063}
5064#endif
5065
5066
5067/*********************************************************************************************************************************
5068* Code Emitters (larger snippets) *
5069*********************************************************************************************************************************/
5070
5071/**
5072 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5073 * extending to 64-bit width.
5074 *
5075 * @returns New code buffer offset on success, UINT32_MAX on failure.
5076 * @param pReNative .
5077 * @param off The current code buffer position.
5078 * @param idxHstReg The host register to load the guest register value into.
5079 * @param enmGstReg The guest register to load.
5080 *
5081 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5082 * that is something the caller needs to do if applicable.
5083 */
5084DECL_HIDDEN_THROW(uint32_t)
5085iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5086{
5087 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
5088 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5089
5090 switch (g_aGstShadowInfo[enmGstReg].cb)
5091 {
5092 case sizeof(uint64_t):
5093 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5094 case sizeof(uint32_t):
5095 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5096 case sizeof(uint16_t):
5097 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5098#if 0 /* not present in the table. */
5099 case sizeof(uint8_t):
5100 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5101#endif
5102 default:
5103 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5104 }
5105}
5106
5107
5108#ifdef VBOX_STRICT
5109/**
5110 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5111 *
5112 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5113 * Trashes EFLAGS on AMD64.
5114 */
5115static uint32_t
5116iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5117{
5118# ifdef RT_ARCH_AMD64
5119 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5120
5121 /* rol reg64, 32 */
5122 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5123 pbCodeBuf[off++] = 0xc1;
5124 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5125 pbCodeBuf[off++] = 32;
5126
5127 /* test reg32, ffffffffh */
5128 if (idxReg >= 8)
5129 pbCodeBuf[off++] = X86_OP_REX_B;
5130 pbCodeBuf[off++] = 0xf7;
5131 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5132 pbCodeBuf[off++] = 0xff;
5133 pbCodeBuf[off++] = 0xff;
5134 pbCodeBuf[off++] = 0xff;
5135 pbCodeBuf[off++] = 0xff;
5136
5137 /* je/jz +1 */
5138 pbCodeBuf[off++] = 0x74;
5139 pbCodeBuf[off++] = 0x01;
5140
5141 /* int3 */
5142 pbCodeBuf[off++] = 0xcc;
5143
5144 /* rol reg64, 32 */
5145 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5146 pbCodeBuf[off++] = 0xc1;
5147 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5148 pbCodeBuf[off++] = 32;
5149
5150# elif defined(RT_ARCH_ARM64)
5151 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5152 /* lsr tmp0, reg64, #32 */
5153 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
5154 /* cbz tmp0, +1 */
5155 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5156 /* brk #0x1100 */
5157 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
5158
5159# else
5160# error "Port me!"
5161# endif
5162 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5163 return off;
5164}
5165#endif /* VBOX_STRICT */
5166
5167
5168#ifdef VBOX_STRICT
5169/**
5170 * Emitting code that checks that the content of register @a idxReg is the same
5171 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
5172 * instruction if that's not the case.
5173 *
5174 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5175 * Trashes EFLAGS on AMD64.
5176 */
5177static uint32_t
5178iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5179{
5180# ifdef RT_ARCH_AMD64
5181 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5182
5183 /* cmp reg, [mem] */
5184 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5185 {
5186 if (idxReg >= 8)
5187 pbCodeBuf[off++] = X86_OP_REX_R;
5188 pbCodeBuf[off++] = 0x38;
5189 }
5190 else
5191 {
5192 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5193 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5194 else
5195 {
5196 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5197 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5198 else
5199 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5200 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5201 if (idxReg >= 8)
5202 pbCodeBuf[off++] = X86_OP_REX_R;
5203 }
5204 pbCodeBuf[off++] = 0x39;
5205 }
5206 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
5207
5208 /* je/jz +1 */
5209 pbCodeBuf[off++] = 0x74;
5210 pbCodeBuf[off++] = 0x01;
5211
5212 /* int3 */
5213 pbCodeBuf[off++] = 0xcc;
5214
5215 /* For values smaller than the register size, we must check that the rest
5216 of the register is all zeros. */
5217 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
5218 {
5219 /* test reg64, imm32 */
5220 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5221 pbCodeBuf[off++] = 0xf7;
5222 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5223 pbCodeBuf[off++] = 0;
5224 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
5225 pbCodeBuf[off++] = 0xff;
5226 pbCodeBuf[off++] = 0xff;
5227
5228 /* je/jz +1 */
5229 pbCodeBuf[off++] = 0x74;
5230 pbCodeBuf[off++] = 0x01;
5231
5232 /* int3 */
5233 pbCodeBuf[off++] = 0xcc;
5234 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5235 }
5236 else
5237 {
5238 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5239 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
5240 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
5241 }
5242
5243# elif defined(RT_ARCH_ARM64)
5244 /* mov TMP0, [gstreg] */
5245 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
5246
5247 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5248 /* sub tmp0, tmp0, idxReg */
5249 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
5250 /* cbz tmp0, +1 */
5251 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5252 /* brk #0x1000+enmGstReg */
5253 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
5254 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5255
5256# else
5257# error "Port me!"
5258# endif
5259 return off;
5260}
5261#endif /* VBOX_STRICT */
5262
5263
5264#ifdef VBOX_STRICT
5265/**
5266 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
5267 * important bits.
5268 *
5269 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5270 * Trashes EFLAGS on AMD64.
5271 */
5272static uint32_t
5273iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
5274{
5275 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5276 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
5277 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
5278 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
5279
5280#ifdef RT_ARCH_AMD64
5281 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5282
5283 /* je/jz +1 */
5284 pbCodeBuf[off++] = 0x74;
5285 pbCodeBuf[off++] = 0x01;
5286
5287 /* int3 */
5288 pbCodeBuf[off++] = 0xcc;
5289
5290# elif defined(RT_ARCH_ARM64)
5291 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5292
5293 /* b.eq +1 */
5294 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
5295 /* brk #0x2000 */
5296 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
5297
5298# else
5299# error "Port me!"
5300# endif
5301 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5302
5303 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5304 return off;
5305}
5306#endif /* VBOX_STRICT */
5307
5308
5309/**
5310 * Emits a code for checking the return code of a call and rcPassUp, returning
5311 * from the code if either are non-zero.
5312 */
5313DECL_HIDDEN_THROW(uint32_t)
5314iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
5315{
5316#ifdef RT_ARCH_AMD64
5317 /*
5318 * AMD64: eax = call status code.
5319 */
5320
5321 /* edx = rcPassUp */
5322 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5323# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5324 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
5325# endif
5326
5327 /* edx = eax | rcPassUp */
5328 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5329 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
5330 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
5331 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5332
5333 /* Jump to non-zero status return path. */
5334 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
5335
5336 /* done. */
5337
5338#elif RT_ARCH_ARM64
5339 /*
5340 * ARM64: w0 = call status code.
5341 */
5342# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5343 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
5344# endif
5345 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5346
5347 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5348
5349 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
5350
5351 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5352 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
5353 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
5354
5355#else
5356# error "port me"
5357#endif
5358 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5359 RT_NOREF_PV(idxInstr);
5360 return off;
5361}
5362
5363
5364/**
5365 * Emits code to check if the content of @a idxAddrReg is a canonical address,
5366 * raising a \#GP(0) if it isn't.
5367 *
5368 * @returns New code buffer offset, UINT32_MAX on failure.
5369 * @param pReNative The native recompile state.
5370 * @param off The code buffer offset.
5371 * @param idxAddrReg The host register with the address to check.
5372 * @param idxInstr The current instruction.
5373 */
5374DECL_HIDDEN_THROW(uint32_t)
5375iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
5376{
5377 /*
5378 * Make sure we don't have any outstanding guest register writes as we may
5379 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5380 */
5381 off = iemNativeRegFlushPendingWrites(pReNative, off);
5382
5383#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5384 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5385#else
5386 RT_NOREF(idxInstr);
5387#endif
5388
5389#ifdef RT_ARCH_AMD64
5390 /*
5391 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
5392 * return raisexcpt();
5393 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
5394 */
5395 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5396
5397 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
5398 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
5399 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
5400 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
5401 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5402
5403 iemNativeRegFreeTmp(pReNative, iTmpReg);
5404
5405#elif defined(RT_ARCH_ARM64)
5406 /*
5407 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
5408 * return raisexcpt();
5409 * ----
5410 * mov x1, 0x800000000000
5411 * add x1, x0, x1
5412 * cmp xzr, x1, lsr 48
5413 * b.ne .Lraisexcpt
5414 */
5415 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5416
5417 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
5418 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
5419 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
5420 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5421
5422 iemNativeRegFreeTmp(pReNative, iTmpReg);
5423
5424#else
5425# error "Port me"
5426#endif
5427 return off;
5428}
5429
5430
5431/**
5432 * Emits code to check if the content of @a idxAddrReg is within the limit of
5433 * idxSegReg, raising a \#GP(0) if it isn't.
5434 *
5435 * @returns New code buffer offset; throws VBox status code on error.
5436 * @param pReNative The native recompile state.
5437 * @param off The code buffer offset.
5438 * @param idxAddrReg The host register (32-bit) with the address to
5439 * check.
5440 * @param idxSegReg The segment register (X86_SREG_XXX) to check
5441 * against.
5442 * @param idxInstr The current instruction.
5443 */
5444DECL_HIDDEN_THROW(uint32_t)
5445iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5446 uint8_t idxAddrReg, uint8_t idxSegReg, uint8_t idxInstr)
5447{
5448 /*
5449 * Make sure we don't have any outstanding guest register writes as we may
5450 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5451 */
5452 off = iemNativeRegFlushPendingWrites(pReNative, off);
5453
5454#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5455 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5456#else
5457 RT_NOREF(idxInstr);
5458#endif
5459
5460 AssertStmt(idxSegReg == X86_SREG_CS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
5461
5462 uint8_t const iTmpLimReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5463 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + idxSegReg),
5464 kIemNativeGstRegUse_ForUpdate);
5465
5466 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, iTmpLimReg);
5467 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5468
5469 iemNativeRegFreeTmp(pReNative, iTmpLimReg);
5470 return off;
5471}
5472
5473
5474/**
5475 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
5476 *
5477 * @returns The flush mask.
5478 * @param fCImpl The IEM_CIMPL_F_XXX flags.
5479 * @param fGstShwFlush The starting flush mask.
5480 */
5481DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
5482{
5483 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
5484 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
5485 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
5486 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
5487 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
5488 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
5489 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
5490 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
5491 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
5492 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
5493 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
5494 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
5495 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
5496 return fGstShwFlush;
5497}
5498
5499
5500/**
5501 * Emits a call to a CImpl function or something similar.
5502 */
5503DECL_HIDDEN_THROW(uint32_t)
5504iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
5505 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
5506{
5507 /*
5508 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
5509 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
5510 */
5511 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
5512 fGstShwFlush
5513 | RT_BIT_64(kIemNativeGstReg_Pc)
5514 | RT_BIT_64(kIemNativeGstReg_EFlags));
5515 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
5516
5517 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
5518
5519 /*
5520 * Load the parameters.
5521 */
5522#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
5523 /* Special code the hidden VBOXSTRICTRC pointer. */
5524 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5525 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
5526 if (cAddParams > 0)
5527 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
5528 if (cAddParams > 1)
5529 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
5530 if (cAddParams > 2)
5531 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
5532 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5533
5534#else
5535 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
5536 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5537 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
5538 if (cAddParams > 0)
5539 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
5540 if (cAddParams > 1)
5541 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
5542 if (cAddParams > 2)
5543# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
5544 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
5545# else
5546 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
5547# endif
5548#endif
5549
5550 /*
5551 * Make the call.
5552 */
5553 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
5554
5555#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
5556 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5557#endif
5558
5559 /*
5560 * Check the status code.
5561 */
5562 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
5563}
5564
5565
5566/**
5567 * Emits a call to a threaded worker function.
5568 */
5569DECL_HIDDEN_THROW(uint32_t)
5570iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
5571{
5572 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
5573 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
5574
5575#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5576 /* The threaded function may throw / long jmp, so set current instruction
5577 number if we're counting. */
5578 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5579#endif
5580
5581 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
5582
5583#ifdef RT_ARCH_AMD64
5584 /* Load the parameters and emit the call. */
5585# ifdef RT_OS_WINDOWS
5586# ifndef VBOXSTRICTRC_STRICT_ENABLED
5587 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
5588 if (cParams > 0)
5589 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
5590 if (cParams > 1)
5591 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
5592 if (cParams > 2)
5593 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
5594# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
5595 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
5596 if (cParams > 0)
5597 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
5598 if (cParams > 1)
5599 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
5600 if (cParams > 2)
5601 {
5602 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
5603 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
5604 }
5605 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5606# endif /* VBOXSTRICTRC_STRICT_ENABLED */
5607# else
5608 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
5609 if (cParams > 0)
5610 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
5611 if (cParams > 1)
5612 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
5613 if (cParams > 2)
5614 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
5615# endif
5616
5617 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
5618
5619# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
5620 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5621# endif
5622
5623#elif RT_ARCH_ARM64
5624 /*
5625 * ARM64:
5626 */
5627 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5628 if (cParams > 0)
5629 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
5630 if (cParams > 1)
5631 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
5632 if (cParams > 2)
5633 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
5634
5635 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
5636
5637#else
5638# error "port me"
5639#endif
5640
5641 /*
5642 * Check the status code.
5643 */
5644 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
5645
5646 return off;
5647}
5648
5649
5650/**
5651 * Emits the code at the CheckBranchMiss label.
5652 */
5653static uint32_t iemNativeEmitCheckBranchMiss(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5654{
5655 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_CheckBranchMiss);
5656 if (idxLabel != UINT32_MAX)
5657 {
5658 iemNativeLabelDefine(pReNative, idxLabel, off);
5659
5660 /* int iemNativeHlpCheckBranchMiss(PVMCPUCC pVCpu) */
5661 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5662 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpCheckBranchMiss);
5663
5664 /* jump back to the return sequence. */
5665 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5666 }
5667 return off;
5668}
5669
5670
5671/**
5672 * Emits the code at the NeedCsLimChecking label.
5673 */
5674static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5675{
5676 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
5677 if (idxLabel != UINT32_MAX)
5678 {
5679 iemNativeLabelDefine(pReNative, idxLabel, off);
5680
5681 /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
5682 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5683 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
5684
5685 /* jump back to the return sequence. */
5686 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5687 }
5688 return off;
5689}
5690
5691
5692/**
5693 * Emits the code at the ObsoleteTb label.
5694 */
5695static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5696{
5697 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
5698 if (idxLabel != UINT32_MAX)
5699 {
5700 iemNativeLabelDefine(pReNative, idxLabel, off);
5701
5702 /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
5703 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5704 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
5705
5706 /* jump back to the return sequence. */
5707 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5708 }
5709 return off;
5710}
5711
5712
5713/**
5714 * Emits the code at the RaiseGP0 label.
5715 */
5716static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5717{
5718 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
5719 if (idxLabel != UINT32_MAX)
5720 {
5721 iemNativeLabelDefine(pReNative, idxLabel, off);
5722
5723 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
5724 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5725 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
5726
5727 /* jump back to the return sequence. */
5728 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5729 }
5730 return off;
5731}
5732
5733
5734/**
5735 * Emits the code at the ReturnWithFlags label (returns
5736 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
5737 */
5738static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5739{
5740 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
5741 if (idxLabel != UINT32_MAX)
5742 {
5743 iemNativeLabelDefine(pReNative, idxLabel, off);
5744
5745 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
5746
5747 /* jump back to the return sequence. */
5748 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5749 }
5750 return off;
5751}
5752
5753
5754/**
5755 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
5756 */
5757static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5758{
5759 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
5760 if (idxLabel != UINT32_MAX)
5761 {
5762 iemNativeLabelDefine(pReNative, idxLabel, off);
5763
5764 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
5765
5766 /* jump back to the return sequence. */
5767 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5768 }
5769 return off;
5770}
5771
5772
5773/**
5774 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
5775 */
5776static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5777{
5778 /*
5779 * Generate the rc + rcPassUp fiddling code if needed.
5780 */
5781 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5782 if (idxLabel != UINT32_MAX)
5783 {
5784 iemNativeLabelDefine(pReNative, idxLabel, off);
5785
5786 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
5787#ifdef RT_ARCH_AMD64
5788# ifdef RT_OS_WINDOWS
5789# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5790 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
5791# endif
5792 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
5793 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
5794# else
5795 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
5796 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
5797# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5798 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
5799# endif
5800# endif
5801# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5802 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
5803# endif
5804
5805#else
5806 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
5807 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5808 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
5809#endif
5810
5811 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
5812 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5813 }
5814 return off;
5815}
5816
5817
5818/**
5819 * Emits a standard epilog.
5820 */
5821static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
5822{
5823 *pidxReturnLabel = UINT32_MAX;
5824
5825 /*
5826 * Successful return, so clear the return register (eax, w0).
5827 */
5828 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
5829
5830 /*
5831 * Define label for common return point.
5832 */
5833 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
5834 *pidxReturnLabel = idxReturn;
5835
5836 /*
5837 * Restore registers and return.
5838 */
5839#ifdef RT_ARCH_AMD64
5840 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5841
5842 /* Reposition esp at the r15 restore point. */
5843 pbCodeBuf[off++] = X86_OP_REX_W;
5844 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
5845 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
5846 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
5847
5848 /* Pop non-volatile registers and return */
5849 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
5850 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
5851 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
5852 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
5853 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
5854 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
5855 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
5856 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
5857# ifdef RT_OS_WINDOWS
5858 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
5859 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
5860# endif
5861 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
5862 pbCodeBuf[off++] = 0xc9; /* leave */
5863 pbCodeBuf[off++] = 0xc3; /* ret */
5864 pbCodeBuf[off++] = 0xcc; /* int3 poison */
5865
5866#elif RT_ARCH_ARM64
5867 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5868
5869 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
5870 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
5871 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
5872 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
5873 IEMNATIVE_FRAME_VAR_SIZE / 8);
5874 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
5875 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5876 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
5877 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5878 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
5879 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5880 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
5881 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5882 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
5883 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5884 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
5885 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
5886
5887 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
5888 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
5889 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
5890 IEMNATIVE_FRAME_SAVE_REG_SIZE);
5891
5892 /* retab / ret */
5893# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
5894 if (1)
5895 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
5896 else
5897# endif
5898 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
5899
5900#else
5901# error "port me"
5902#endif
5903 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5904
5905 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
5906}
5907
5908
5909/**
5910 * Emits a standard prolog.
5911 */
5912static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5913{
5914#ifdef RT_ARCH_AMD64
5915 /*
5916 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
5917 * reserving 64 bytes for stack variables plus 4 non-register argument
5918 * slots. Fixed register assignment: xBX = pReNative;
5919 *
5920 * Since we always do the same register spilling, we can use the same
5921 * unwind description for all the code.
5922 */
5923 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5924 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
5925 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
5926 pbCodeBuf[off++] = 0x8b;
5927 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
5928 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
5929 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
5930# ifdef RT_OS_WINDOWS
5931 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
5932 pbCodeBuf[off++] = 0x8b;
5933 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
5934 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
5935 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
5936# else
5937 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
5938 pbCodeBuf[off++] = 0x8b;
5939 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
5940# endif
5941 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
5942 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
5943 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
5944 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
5945 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
5946 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
5947 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
5948 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
5949
5950# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
5951 /* Save the frame pointer. */
5952 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
5953# endif
5954
5955 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
5956 X86_GREG_xSP,
5957 IEMNATIVE_FRAME_ALIGN_SIZE
5958 + IEMNATIVE_FRAME_VAR_SIZE
5959 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
5960 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
5961 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
5962 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
5963 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
5964
5965#elif RT_ARCH_ARM64
5966 /*
5967 * We set up a stack frame exactly like on x86, only we have to push the
5968 * return address our selves here. We save all non-volatile registers.
5969 */
5970 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5971
5972# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
5973 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
5974 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
5975 * in any way conditional, so just emitting this instructions now and hoping for the best... */
5976 /* pacibsp */
5977 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
5978# endif
5979
5980 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
5981 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
5982 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
5983 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
5984 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
5985 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
5986 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5987 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
5988 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5989 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
5990 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5991 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
5992 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5993 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
5994 /* Save the BP and LR (ret address) registers at the top of the frame. */
5995 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5996 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
5997 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
5998 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
5999 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
6000 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
6001
6002 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
6003 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
6004
6005 /* mov r28, r0 */
6006 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
6007 /* mov r27, r1 */
6008 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
6009
6010# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6011 /* Save the frame pointer. */
6012 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
6013 ARMV8_A64_REG_X2);
6014# endif
6015
6016#else
6017# error "port me"
6018#endif
6019 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6020 return off;
6021}
6022
6023
6024
6025
6026/*********************************************************************************************************************************
6027* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
6028*********************************************************************************************************************************/
6029
6030#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
6031 { \
6032 Assert(pReNative->Core.bmVars == 0); \
6033 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
6034 Assert(pReNative->Core.bmStack == 0); \
6035 pReNative->fMc = (a_fMcFlags); \
6036 pReNative->fCImpl = (a_fCImplFlags); \
6037 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
6038
6039/** We have to get to the end in recompilation mode, as otherwise we won't
6040 * generate code for all the IEM_MC_IF_XXX branches. */
6041#define IEM_MC_END() \
6042 iemNativeVarFreeAll(pReNative); \
6043 } return off
6044
6045
6046
6047/*********************************************************************************************************************************
6048* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
6049*********************************************************************************************************************************/
6050
6051#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
6052 pReNative->fMc = 0; \
6053 pReNative->fCImpl = (a_fFlags); \
6054 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
6055
6056
6057#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
6058 pReNative->fMc = 0; \
6059 pReNative->fCImpl = (a_fFlags); \
6060 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
6061
6062DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6063 uint8_t idxInstr, uint64_t a_fGstShwFlush,
6064 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
6065{
6066 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
6067}
6068
6069
6070#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
6071 pReNative->fMc = 0; \
6072 pReNative->fCImpl = (a_fFlags); \
6073 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
6074 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
6075
6076DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6077 uint8_t idxInstr, uint64_t a_fGstShwFlush,
6078 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
6079{
6080 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
6081}
6082
6083
6084#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
6085 pReNative->fMc = 0; \
6086 pReNative->fCImpl = (a_fFlags); \
6087 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
6088 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
6089
6090DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6091 uint8_t idxInstr, uint64_t a_fGstShwFlush,
6092 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
6093 uint64_t uArg2)
6094{
6095 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
6096}
6097
6098
6099
6100/*********************************************************************************************************************************
6101* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
6102*********************************************************************************************************************************/
6103
6104/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
6105 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
6106DECL_INLINE_THROW(uint32_t)
6107iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6108{
6109 /*
6110 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
6111 * return with special status code and make the execution loop deal with
6112 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
6113 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
6114 * could continue w/o interruption, it probably will drop into the
6115 * debugger, so not worth the effort of trying to services it here and we
6116 * just lump it in with the handling of the others.
6117 *
6118 * To simplify the code and the register state management even more (wrt
6119 * immediate in AND operation), we always update the flags and skip the
6120 * extra check associated conditional jump.
6121 */
6122 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
6123 <= UINT32_MAX);
6124#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
6125 AssertMsg( pReNative->idxCurCall == 0
6126 || IEMLIVENESS_STATE_IS_ACCESS_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], kIemNativeGstReg_EFlags/*_Other*/)),
6127 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], kIemNativeGstReg_EFlags/*_Other*/)));
6128#endif
6129
6130 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6131 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
6132 true /*fSkipLivenessAssert*/);
6133 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
6134 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
6135 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
6136 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
6137 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
6138
6139 /* Free but don't flush the EFLAGS register. */
6140 iemNativeRegFreeTmp(pReNative, idxEflReg);
6141
6142 return off;
6143}
6144
6145
6146/** The VINF_SUCCESS dummy. */
6147template<int const a_rcNormal>
6148DECL_FORCE_INLINE(uint32_t)
6149iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6150{
6151 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
6152 if (a_rcNormal != VINF_SUCCESS)
6153 {
6154#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6155 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6156#else
6157 RT_NOREF_PV(idxInstr);
6158#endif
6159 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
6160 }
6161 return off;
6162}
6163
6164
6165#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
6166 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6167 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6168
6169#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
6170 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6171 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6172 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6173
6174/** Same as iemRegAddToRip64AndFinishingNoFlags. */
6175DECL_INLINE_THROW(uint32_t)
6176iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
6177{
6178 /* Allocate a temporary PC register. */
6179 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6180
6181 /* Perform the addition and store the result. */
6182 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
6183 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6184
6185 /* Free but don't flush the PC register. */
6186 iemNativeRegFreeTmp(pReNative, idxPcReg);
6187
6188 return off;
6189}
6190
6191
6192#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
6193 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6194 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6195
6196#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
6197 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6198 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6199 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6200
6201/** Same as iemRegAddToEip32AndFinishingNoFlags. */
6202DECL_INLINE_THROW(uint32_t)
6203iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
6204{
6205 /* Allocate a temporary PC register. */
6206 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6207
6208 /* Perform the addition and store the result. */
6209 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
6210 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6211
6212 /* Free but don't flush the PC register. */
6213 iemNativeRegFreeTmp(pReNative, idxPcReg);
6214
6215 return off;
6216}
6217
6218
6219#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
6220 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6221 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6222
6223#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
6224 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6225 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6226 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6227
6228/** Same as iemRegAddToIp16AndFinishingNoFlags. */
6229DECL_INLINE_THROW(uint32_t)
6230iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
6231{
6232 /* Allocate a temporary PC register. */
6233 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6234
6235 /* Perform the addition and store the result. */
6236 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
6237 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6238 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6239
6240 /* Free but don't flush the PC register. */
6241 iemNativeRegFreeTmp(pReNative, idxPcReg);
6242
6243 return off;
6244}
6245
6246
6247
6248/*********************************************************************************************************************************
6249* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
6250*********************************************************************************************************************************/
6251
6252#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6253 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6254 (a_enmEffOpSize), pCallEntry->idxInstr); \
6255 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6256
6257#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6258 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6259 (a_enmEffOpSize), pCallEntry->idxInstr); \
6260 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6261 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6262
6263#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
6264 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6265 IEMMODE_16BIT, pCallEntry->idxInstr); \
6266 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6267
6268#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
6269 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6270 IEMMODE_16BIT, pCallEntry->idxInstr); \
6271 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6272 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6273
6274#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
6275 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6276 IEMMODE_64BIT, pCallEntry->idxInstr); \
6277 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6278
6279#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
6280 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6281 IEMMODE_64BIT, pCallEntry->idxInstr); \
6282 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6283 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6284
6285/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
6286 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
6287 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
6288DECL_INLINE_THROW(uint32_t)
6289iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
6290 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
6291{
6292 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
6293
6294 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6295 off = iemNativeRegFlushPendingWrites(pReNative, off);
6296
6297 /* Allocate a temporary PC register. */
6298 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6299
6300 /* Perform the addition. */
6301 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
6302
6303 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
6304 {
6305 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
6306 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6307 }
6308 else
6309 {
6310 /* Just truncate the result to 16-bit IP. */
6311 Assert(enmEffOpSize == IEMMODE_16BIT);
6312 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6313 }
6314 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6315
6316 /* Free but don't flush the PC register. */
6317 iemNativeRegFreeTmp(pReNative, idxPcReg);
6318
6319 return off;
6320}
6321
6322
6323#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6324 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6325 (a_enmEffOpSize), pCallEntry->idxInstr); \
6326 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6327
6328#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6329 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6330 (a_enmEffOpSize), pCallEntry->idxInstr); \
6331 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6332 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6333
6334#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
6335 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6336 IEMMODE_16BIT, pCallEntry->idxInstr); \
6337 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6338
6339#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
6340 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6341 IEMMODE_16BIT, pCallEntry->idxInstr); \
6342 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6343 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6344
6345#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
6346 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6347 IEMMODE_32BIT, pCallEntry->idxInstr); \
6348 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6349
6350#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
6351 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6352 IEMMODE_32BIT, pCallEntry->idxInstr); \
6353 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6354 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6355
6356/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
6357 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
6358 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
6359DECL_INLINE_THROW(uint32_t)
6360iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
6361 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
6362{
6363 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
6364
6365 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6366 off = iemNativeRegFlushPendingWrites(pReNative, off);
6367
6368 /* Allocate a temporary PC register. */
6369 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6370
6371 /* Perform the addition. */
6372 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
6373
6374 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
6375 if (enmEffOpSize == IEMMODE_16BIT)
6376 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6377
6378 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
6379/** @todo we can skip this in 32-bit FLAT mode. */
6380 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
6381
6382 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6383
6384 /* Free but don't flush the PC register. */
6385 iemNativeRegFreeTmp(pReNative, idxPcReg);
6386
6387 return off;
6388}
6389
6390
6391#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
6392 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
6393 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6394
6395#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
6396 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
6397 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6398 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6399
6400#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
6401 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
6402 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6403
6404#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
6405 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
6406 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6407 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6408
6409#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
6410 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
6411 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6412
6413#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
6414 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
6415 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6416 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6417
6418/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
6419DECL_INLINE_THROW(uint32_t)
6420iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6421 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
6422{
6423 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6424 off = iemNativeRegFlushPendingWrites(pReNative, off);
6425
6426 /* Allocate a temporary PC register. */
6427 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6428
6429 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
6430 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
6431 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6432 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
6433 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6434
6435 /* Free but don't flush the PC register. */
6436 iemNativeRegFreeTmp(pReNative, idxPcReg);
6437
6438 return off;
6439}
6440
6441
6442
6443/*********************************************************************************************************************************
6444* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
6445*********************************************************************************************************************************/
6446
6447/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
6448#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
6449 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6450
6451/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
6452#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
6453 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6454
6455/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
6456#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
6457 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6458
6459/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
6460 * clears flags. */
6461#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
6462 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
6463 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6464
6465/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
6466 * clears flags. */
6467#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
6468 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
6469 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6470
6471/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
6472 * clears flags. */
6473#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
6474 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
6475 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6476
6477#undef IEM_MC_SET_RIP_U16_AND_FINISH
6478
6479
6480/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
6481#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
6482 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
6483
6484/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
6485#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
6486 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
6487
6488/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
6489 * clears flags. */
6490#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
6491 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
6492 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6493
6494/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
6495 * and clears flags. */
6496#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
6497 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
6498 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6499
6500#undef IEM_MC_SET_RIP_U32_AND_FINISH
6501
6502
6503/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
6504#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
6505 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
6506
6507/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
6508 * and clears flags. */
6509#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
6510 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
6511 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6512
6513#undef IEM_MC_SET_RIP_U64_AND_FINISH
6514
6515
6516/** Same as iemRegRipJumpU16AndFinishNoFlags,
6517 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
6518DECL_INLINE_THROW(uint32_t)
6519iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
6520 uint8_t idxInstr, uint8_t cbVar)
6521{
6522 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
6523 Assert(pReNative->Core.aVars[idxVarPc].cbVar == cbVar);
6524
6525 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6526 off = iemNativeRegFlushPendingWrites(pReNative, off);
6527
6528 /* Get a register with the new PC loaded from idxVarPc.
6529 Note! This ASSUMES that the high bits of the GPR is zeroed. */
6530 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
6531
6532 /* Check limit (may #GP(0) + exit TB). */
6533 if (!f64Bit)
6534/** @todo we can skip this test in FLAT 32-bit mode. */
6535 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
6536 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
6537 else if (cbVar > sizeof(uint32_t))
6538 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6539
6540 /* Store the result. */
6541 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6542
6543 iemNativeVarRegisterRelease(pReNative, idxVarPc);
6544 /** @todo implictly free the variable? */
6545
6546 return off;
6547}
6548
6549
6550
6551/*********************************************************************************************************************************
6552* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
6553*********************************************************************************************************************************/
6554
6555/**
6556 * Pushes an IEM_MC_IF_XXX onto the condition stack.
6557 *
6558 * @returns Pointer to the condition stack entry on success, NULL on failure
6559 * (too many nestings)
6560 */
6561DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
6562{
6563 uint32_t const idxStack = pReNative->cCondDepth;
6564 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
6565
6566 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
6567 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
6568
6569 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
6570 pEntry->fInElse = false;
6571 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
6572 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
6573
6574 return pEntry;
6575}
6576
6577
6578/**
6579 * Start of the if-block, snapshotting the register and variable state.
6580 */
6581DECL_INLINE_THROW(void)
6582iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
6583{
6584 Assert(offIfBlock != UINT32_MAX);
6585 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
6586 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
6587 Assert(!pEntry->fInElse);
6588
6589 /* Define the start of the IF block if request or for disassembly purposes. */
6590 if (idxLabelIf != UINT32_MAX)
6591 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
6592#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6593 else
6594 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
6595#else
6596 RT_NOREF(offIfBlock);
6597#endif
6598
6599 /* Copy the initial state so we can restore it in the 'else' block. */
6600 pEntry->InitialState = pReNative->Core;
6601}
6602
6603
6604#define IEM_MC_ELSE() } while (0); \
6605 off = iemNativeEmitElse(pReNative, off); \
6606 do {
6607
6608/** Emits code related to IEM_MC_ELSE. */
6609DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6610{
6611 /* Check sanity and get the conditional stack entry. */
6612 Assert(off != UINT32_MAX);
6613 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
6614 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
6615 Assert(!pEntry->fInElse);
6616
6617 /* Jump to the endif */
6618 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
6619
6620 /* Define the else label and enter the else part of the condition. */
6621 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
6622 pEntry->fInElse = true;
6623
6624 /* Snapshot the core state so we can do a merge at the endif and restore
6625 the snapshot we took at the start of the if-block. */
6626 pEntry->IfFinalState = pReNative->Core;
6627 pReNative->Core = pEntry->InitialState;
6628
6629 return off;
6630}
6631
6632
6633#define IEM_MC_ENDIF() } while (0); \
6634 off = iemNativeEmitEndIf(pReNative, off)
6635
6636/** Emits code related to IEM_MC_ENDIF. */
6637DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6638{
6639 /* Check sanity and get the conditional stack entry. */
6640 Assert(off != UINT32_MAX);
6641 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
6642 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
6643
6644 /*
6645 * Now we have find common group with the core state at the end of the
6646 * if-final. Use the smallest common denominator and just drop anything
6647 * that isn't the same in both states.
6648 */
6649 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
6650 * which is why we're doing this at the end of the else-block.
6651 * But we'd need more info about future for that to be worth the effort. */
6652 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
6653 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
6654 {
6655 /* shadow guest stuff first. */
6656 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
6657 if (fGstRegs)
6658 {
6659 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
6660 do
6661 {
6662 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
6663 fGstRegs &= ~RT_BIT_64(idxGstReg);
6664
6665 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
6666 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
6667 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
6668 {
6669 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
6670 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
6671 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
6672 }
6673 } while (fGstRegs);
6674 }
6675 else
6676 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
6677
6678 /* Check variables next. For now we must require them to be identical
6679 or stuff we can recreate. */
6680 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
6681 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
6682 if (fVars)
6683 {
6684 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
6685 do
6686 {
6687 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
6688 fVars &= ~RT_BIT_32(idxVar);
6689
6690 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
6691 {
6692 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
6693 continue;
6694 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
6695 {
6696 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
6697 if (idxHstReg != UINT8_MAX)
6698 {
6699 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
6700 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6701 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u\n",
6702 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
6703 }
6704 continue;
6705 }
6706 }
6707 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
6708 continue;
6709
6710 /* Irreconcilable, so drop it. */
6711 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
6712 if (idxHstReg != UINT8_MAX)
6713 {
6714 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
6715 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6716 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u (also dropped)\n",
6717 g_apszIemNativeHstRegNames[idxHstReg], idxVar));
6718 }
6719 Log11(("iemNativeEmitEndIf: Freeing variable #%u\n", idxVar));
6720 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
6721 } while (fVars);
6722 }
6723
6724 /* Finally, check that the host register allocations matches. */
6725 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
6726 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
6727 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
6728 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
6729 }
6730
6731 /*
6732 * Define the endif label and maybe the else one if we're still in the 'if' part.
6733 */
6734 if (!pEntry->fInElse)
6735 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
6736 else
6737 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
6738 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
6739
6740 /* Pop the conditional stack.*/
6741 pReNative->cCondDepth -= 1;
6742
6743 return off;
6744}
6745
6746
6747#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
6748 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
6749 do {
6750
6751/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
6752DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
6753{
6754 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6755
6756 /* Get the eflags. */
6757 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6758 kIemNativeGstRegUse_ReadOnly);
6759
6760 /* Test and jump. */
6761 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
6762
6763 /* Free but don't flush the EFlags register. */
6764 iemNativeRegFreeTmp(pReNative, idxEflReg);
6765
6766 /* Make a copy of the core state now as we start the if-block. */
6767 iemNativeCondStartIfBlock(pReNative, off);
6768
6769 return off;
6770}
6771
6772
6773#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
6774 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
6775 do {
6776
6777/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
6778DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
6779{
6780 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6781
6782 /* Get the eflags. */
6783 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6784 kIemNativeGstRegUse_ReadOnly);
6785
6786 /* Test and jump. */
6787 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
6788
6789 /* Free but don't flush the EFlags register. */
6790 iemNativeRegFreeTmp(pReNative, idxEflReg);
6791
6792 /* Make a copy of the core state now as we start the if-block. */
6793 iemNativeCondStartIfBlock(pReNative, off);
6794
6795 return off;
6796}
6797
6798
6799#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
6800 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
6801 do {
6802
6803/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
6804DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
6805{
6806 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6807
6808 /* Get the eflags. */
6809 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6810 kIemNativeGstRegUse_ReadOnly);
6811
6812 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6813 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6814
6815 /* Test and jump. */
6816 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
6817
6818 /* Free but don't flush the EFlags register. */
6819 iemNativeRegFreeTmp(pReNative, idxEflReg);
6820
6821 /* Make a copy of the core state now as we start the if-block. */
6822 iemNativeCondStartIfBlock(pReNative, off);
6823
6824 return off;
6825}
6826
6827
6828#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
6829 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
6830 do {
6831
6832/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
6833DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
6834{
6835 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6836
6837 /* Get the eflags. */
6838 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6839 kIemNativeGstRegUse_ReadOnly);
6840
6841 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6842 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6843
6844 /* Test and jump. */
6845 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
6846
6847 /* Free but don't flush the EFlags register. */
6848 iemNativeRegFreeTmp(pReNative, idxEflReg);
6849
6850 /* Make a copy of the core state now as we start the if-block. */
6851 iemNativeCondStartIfBlock(pReNative, off);
6852
6853 return off;
6854}
6855
6856
6857#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
6858 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
6859 do {
6860
6861#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
6862 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
6863 do {
6864
6865/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
6866DECL_INLINE_THROW(uint32_t)
6867iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6868 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
6869{
6870 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6871
6872 /* Get the eflags. */
6873 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6874 kIemNativeGstRegUse_ReadOnly);
6875
6876 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
6877 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
6878
6879 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
6880 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
6881 Assert(iBitNo1 != iBitNo2);
6882
6883#ifdef RT_ARCH_AMD64
6884 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
6885
6886 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6887 if (iBitNo1 > iBitNo2)
6888 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
6889 else
6890 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
6891 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6892
6893#elif defined(RT_ARCH_ARM64)
6894 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6895 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6896
6897 /* and tmpreg, eflreg, #1<<iBitNo1 */
6898 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
6899
6900 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
6901 if (iBitNo1 > iBitNo2)
6902 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6903 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
6904 else
6905 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6906 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
6907
6908 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6909
6910#else
6911# error "Port me"
6912#endif
6913
6914 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
6915 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
6916 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
6917
6918 /* Free but don't flush the EFlags and tmp registers. */
6919 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6920 iemNativeRegFreeTmp(pReNative, idxEflReg);
6921
6922 /* Make a copy of the core state now as we start the if-block. */
6923 iemNativeCondStartIfBlock(pReNative, off);
6924
6925 return off;
6926}
6927
6928
6929#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
6930 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
6931 do {
6932
6933#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
6934 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
6935 do {
6936
6937/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
6938 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
6939DECL_INLINE_THROW(uint32_t)
6940iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
6941 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
6942{
6943 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
6944
6945 /* We need an if-block label for the non-inverted variant. */
6946 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
6947 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
6948
6949 /* Get the eflags. */
6950 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6951 kIemNativeGstRegUse_ReadOnly);
6952
6953 /* Translate the flag masks to bit numbers. */
6954 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
6955 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
6956
6957 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
6958 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
6959 Assert(iBitNo1 != iBitNo);
6960
6961 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
6962 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
6963 Assert(iBitNo2 != iBitNo);
6964 Assert(iBitNo2 != iBitNo1);
6965
6966#ifdef RT_ARCH_AMD64
6967 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
6968#elif defined(RT_ARCH_ARM64)
6969 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6970#endif
6971
6972 /* Check for the lone bit first. */
6973 if (!fInverted)
6974 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
6975 else
6976 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
6977
6978 /* Then extract and compare the other two bits. */
6979#ifdef RT_ARCH_AMD64
6980 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6981 if (iBitNo1 > iBitNo2)
6982 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
6983 else
6984 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
6985 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
6986
6987#elif defined(RT_ARCH_ARM64)
6988 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6989
6990 /* and tmpreg, eflreg, #1<<iBitNo1 */
6991 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
6992
6993 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
6994 if (iBitNo1 > iBitNo2)
6995 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6996 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
6997 else
6998 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
6999 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
7000
7001 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7002
7003#else
7004# error "Port me"
7005#endif
7006
7007 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
7008 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
7009 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
7010
7011 /* Free but don't flush the EFlags and tmp registers. */
7012 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7013 iemNativeRegFreeTmp(pReNative, idxEflReg);
7014
7015 /* Make a copy of the core state now as we start the if-block. */
7016 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
7017
7018 return off;
7019}
7020
7021
7022#define IEM_MC_IF_CX_IS_NZ() \
7023 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
7024 do {
7025
7026/** Emits code for IEM_MC_IF_CX_IS_NZ. */
7027DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7028{
7029 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7030
7031 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7032 kIemNativeGstRegUse_ReadOnly);
7033 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
7034 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7035
7036 iemNativeCondStartIfBlock(pReNative, off);
7037 return off;
7038}
7039
7040
7041#define IEM_MC_IF_ECX_IS_NZ() \
7042 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
7043 do {
7044
7045#define IEM_MC_IF_RCX_IS_NZ() \
7046 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
7047 do {
7048
7049/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
7050DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
7051{
7052 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7053
7054 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7055 kIemNativeGstRegUse_ReadOnly);
7056 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
7057 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7058
7059 iemNativeCondStartIfBlock(pReNative, off);
7060 return off;
7061}
7062
7063
7064#define IEM_MC_IF_CX_IS_NOT_ONE() \
7065 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
7066 do {
7067
7068/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
7069DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7070{
7071 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7072
7073 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7074 kIemNativeGstRegUse_ReadOnly);
7075#ifdef RT_ARCH_AMD64
7076 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7077#else
7078 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7079 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
7080 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7081#endif
7082 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7083
7084 iemNativeCondStartIfBlock(pReNative, off);
7085 return off;
7086}
7087
7088
7089#define IEM_MC_IF_ECX_IS_NOT_ONE() \
7090 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
7091 do {
7092
7093#define IEM_MC_IF_RCX_IS_NOT_ONE() \
7094 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
7095 do {
7096
7097/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
7098DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
7099{
7100 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7101
7102 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7103 kIemNativeGstRegUse_ReadOnly);
7104 if (f64Bit)
7105 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7106 else
7107 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7108 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7109
7110 iemNativeCondStartIfBlock(pReNative, off);
7111 return off;
7112}
7113
7114
7115#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
7116 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
7117 do {
7118
7119#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
7120 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
7121 do {
7122
7123/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
7124 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
7125DECL_INLINE_THROW(uint32_t)
7126iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
7127{
7128 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7129
7130 /* We have to load both RCX and EFLAGS before we can start branching,
7131 otherwise we'll end up in the else-block with an inconsistent
7132 register allocator state.
7133 Doing EFLAGS first as it's more likely to be loaded, right? */
7134 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7135 kIemNativeGstRegUse_ReadOnly);
7136 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7137 kIemNativeGstRegUse_ReadOnly);
7138
7139 /** @todo we could reduce this to a single branch instruction by spending a
7140 * temporary register and some setnz stuff. Not sure if loops are
7141 * worth it. */
7142 /* Check CX. */
7143#ifdef RT_ARCH_AMD64
7144 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7145#else
7146 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7147 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
7148 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7149#endif
7150
7151 /* Check the EFlags bit. */
7152 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7153 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7154 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
7155 !fCheckIfSet /*fJmpIfSet*/);
7156
7157 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7158 iemNativeRegFreeTmp(pReNative, idxEflReg);
7159
7160 iemNativeCondStartIfBlock(pReNative, off);
7161 return off;
7162}
7163
7164
7165#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
7166 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
7167 do {
7168
7169#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
7170 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
7171 do {
7172
7173#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
7174 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
7175 do {
7176
7177#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
7178 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
7179 do {
7180
7181/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
7182 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
7183 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
7184 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
7185DECL_INLINE_THROW(uint32_t)
7186iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7187 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
7188{
7189 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7190
7191 /* We have to load both RCX and EFLAGS before we can start branching,
7192 otherwise we'll end up in the else-block with an inconsistent
7193 register allocator state.
7194 Doing EFLAGS first as it's more likely to be loaded, right? */
7195 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7196 kIemNativeGstRegUse_ReadOnly);
7197 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7198 kIemNativeGstRegUse_ReadOnly);
7199
7200 /** @todo we could reduce this to a single branch instruction by spending a
7201 * temporary register and some setnz stuff. Not sure if loops are
7202 * worth it. */
7203 /* Check RCX/ECX. */
7204 if (f64Bit)
7205 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7206 else
7207 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7208
7209 /* Check the EFlags bit. */
7210 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7211 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7212 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
7213 !fCheckIfSet /*fJmpIfSet*/);
7214
7215 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7216 iemNativeRegFreeTmp(pReNative, idxEflReg);
7217
7218 iemNativeCondStartIfBlock(pReNative, off);
7219 return off;
7220}
7221
7222
7223
7224/*********************************************************************************************************************************
7225* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7226*********************************************************************************************************************************/
7227/** Number of hidden arguments for CIMPL calls.
7228 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
7229#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
7230# define IEM_CIMPL_HIDDEN_ARGS 3
7231#else
7232# define IEM_CIMPL_HIDDEN_ARGS 2
7233#endif
7234
7235#define IEM_MC_NOREF(a_Name) \
7236 RT_NOREF_PV(a_Name)
7237
7238#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
7239 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
7240
7241#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
7242 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
7243
7244#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
7245 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
7246
7247#define IEM_MC_LOCAL(a_Type, a_Name) \
7248 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
7249
7250#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
7251 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
7252
7253
7254/**
7255 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
7256 */
7257DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
7258{
7259 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
7260 return IEM_CIMPL_HIDDEN_ARGS;
7261 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
7262 return 1;
7263 return 0;
7264}
7265
7266
7267/**
7268 * Internal work that allocates a variable with kind set to
7269 * kIemNativeVarKind_Invalid and no current stack allocation.
7270 *
7271 * The kind will either be set by the caller or later when the variable is first
7272 * assigned a value.
7273 */
7274static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7275{
7276 Assert(cbType > 0 && cbType <= 64);
7277 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7278 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7279 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7280 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7281 pReNative->Core.aVars[idxVar].cbVar = cbType;
7282 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7283 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7284 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7285 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7286 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7287 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7288 pReNative->Core.aVars[idxVar].u.uValue = 0;
7289 return idxVar;
7290}
7291
7292
7293/**
7294 * Internal work that allocates an argument variable w/o setting enmKind.
7295 */
7296static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7297{
7298 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7299 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7300 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7301
7302 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7303 pReNative->Core.aidxArgVars[iArgNo] = idxVar;
7304 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7305 return idxVar;
7306}
7307
7308
7309/**
7310 * Gets the stack slot for a stack variable, allocating one if necessary.
7311 *
7312 * Calling this function implies that the stack slot will contain a valid
7313 * variable value. The caller deals with any register currently assigned to the
7314 * variable, typically by spilling it into the stack slot.
7315 *
7316 * @returns The stack slot number.
7317 * @param pReNative The recompiler state.
7318 * @param idxVar The variable.
7319 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7320 */
7321DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7322{
7323 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7324 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
7325
7326 /* Already got a slot? */
7327 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7328 if (idxStackSlot != UINT8_MAX)
7329 {
7330 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7331 return idxStackSlot;
7332 }
7333
7334 /*
7335 * A single slot is easy to allocate.
7336 * Allocate them from the top end, closest to BP, to reduce the displacement.
7337 */
7338 if (pReNative->Core.aVars[idxVar].cbVar <= sizeof(uint64_t))
7339 {
7340 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7341 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7342 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7343 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
7344 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x\n", idxVar, iSlot));
7345 return (uint8_t)iSlot;
7346 }
7347
7348 /*
7349 * We need more than one stack slot.
7350 *
7351 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7352 */
7353 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7354 Assert(pReNative->Core.aVars[idxVar].cbVar <= 64);
7355 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pReNative->Core.aVars[idxVar].cbVar) - 4) - 1;
7356 uint32_t fBitAllocMask = RT_BIT_32((pReNative->Core.aVars[idxVar].cbVar + 7) >> 3) - 1;
7357 uint32_t bmStack = ~pReNative->Core.bmStack;
7358 while (bmStack != UINT32_MAX)
7359 {
7360/** @todo allocate from the top to reduce BP displacement. */
7361 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
7362 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7363 if (!(iSlot & fBitAlignMask))
7364 {
7365 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
7366 {
7367 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7368 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
7369 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n",
7370 idxVar, iSlot, fBitAllocMask, pReNative->Core.aVars[idxVar].cbVar));
7371 return (uint8_t)iSlot;
7372 }
7373 }
7374 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
7375 }
7376 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7377}
7378
7379
7380/**
7381 * Changes the variable to a stack variable.
7382 *
7383 * Currently this is s only possible to do the first time the variable is used,
7384 * switching later is can be implemented but not done.
7385 *
7386 * @param pReNative The recompiler state.
7387 * @param idxVar The variable.
7388 * @throws VERR_IEM_VAR_IPE_2
7389 */
7390static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7391{
7392 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7393 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
7394 {
7395 /* We could in theory transition from immediate to stack as well, but it
7396 would involve the caller doing work storing the value on the stack. So,
7397 till that's required we only allow transition from invalid. */
7398 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7399 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7400 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7401 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Stack;
7402
7403 /* Note! We don't allocate a stack slot here, that's only done when a
7404 slot is actually needed to hold a variable value. */
7405 }
7406}
7407
7408
7409/**
7410 * Sets it to a variable with a constant value.
7411 *
7412 * This does not require stack storage as we know the value and can always
7413 * reload it, unless of course it's referenced.
7414 *
7415 * @param pReNative The recompiler state.
7416 * @param idxVar The variable.
7417 * @param uValue The immediate value.
7418 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7419 */
7420static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7421{
7422 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7423 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Immediate)
7424 {
7425 /* Only simple transitions for now. */
7426 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7427 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7428 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
7429 }
7430 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7431
7432 pReNative->Core.aVars[idxVar].u.uValue = uValue;
7433 AssertMsg( pReNative->Core.aVars[idxVar].cbVar >= sizeof(uint64_t)
7434 || pReNative->Core.aVars[idxVar].u.uValue < RT_BIT_64(pReNative->Core.aVars[idxVar].cbVar * 8),
7435 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pReNative->Core.aVars[idxVar].cbVar, uValue));
7436}
7437
7438
7439/**
7440 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7441 *
7442 * This does not require stack storage as we know the value and can always
7443 * reload it. Loading is postponed till needed.
7444 *
7445 * @param pReNative The recompiler state.
7446 * @param idxVar The variable.
7447 * @param idxOtherVar The variable to take the (stack) address of.
7448 *
7449 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7450 */
7451static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7452{
7453 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7454 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7455
7456 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7457 {
7458 /* Only simple transitions for now. */
7459 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7460 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7461 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7462 }
7463 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7464
7465 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar;
7466
7467 /* Update the other variable, ensure it's a stack variable. */
7468 /** @todo handle variables with const values... that'll go boom now. */
7469 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7470 iemNativeVarSetKindToStack(pReNative, idxOtherVar);
7471}
7472
7473
7474/**
7475 * Sets the variable to a reference (pointer) to a guest register reference.
7476 *
7477 * This does not require stack storage as we know the value and can always
7478 * reload it. Loading is postponed till needed.
7479 *
7480 * @param pReNative The recompiler state.
7481 * @param idxVar The variable.
7482 * @param enmRegClass The class guest registers to reference.
7483 * @param idxReg The register within @a enmRegClass to reference.
7484 *
7485 * @throws VERR_IEM_VAR_IPE_2
7486 */
7487static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7488 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7489{
7490 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7491
7492 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_GstRegRef)
7493 {
7494 /* Only simple transitions for now. */
7495 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7496 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7497 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_GstRegRef;
7498 }
7499 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7500
7501 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass = enmRegClass;
7502 pReNative->Core.aVars[idxVar].u.GstRegRef.idx = idxReg;
7503}
7504
7505
7506DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7507{
7508 return iemNativeArgAllocInt(pReNative, iArgNo, cbType);
7509}
7510
7511
7512DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7513{
7514 uint8_t const idxVar = iemNativeArgAllocInt(pReNative, iArgNo, cbType);
7515
7516 /* Since we're using a generic uint64_t value type, we must truncate it if
7517 the variable is smaller otherwise we may end up with too large value when
7518 scaling up a imm8 w/ sign-extension.
7519
7520 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7521 in the bios, bx=1) when running on arm, because clang expect 16-bit
7522 register parameters to have bits 16 and up set to zero. Instead of
7523 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7524 CF value in the result. */
7525 switch (cbType)
7526 {
7527 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7528 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7529 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7530 }
7531 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7532 return idxVar;
7533}
7534
7535
7536DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7537{
7538 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7539 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7540 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7541 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7542
7543 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7544 iemNativeVarSetKindToLocalRef(pReNative, idxArgVar, idxOtherVar);
7545 return idxArgVar;
7546}
7547
7548
7549DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7550{
7551 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7552 /* Don't set to stack now, leave that to the first use as for instance
7553 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7554 return idxVar;
7555}
7556
7557
7558DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7559{
7560 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7561
7562 /* Since we're using a generic uint64_t value type, we must truncate it if
7563 the variable is smaller otherwise we may end up with too large value when
7564 scaling up a imm8 w/ sign-extension. */
7565 switch (cbType)
7566 {
7567 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7568 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7569 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7570 }
7571 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7572 return idxVar;
7573}
7574
7575
7576/**
7577 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7578 * fixed till we call iemNativeVarRegisterRelease.
7579 *
7580 * @returns The host register number.
7581 * @param pReNative The recompiler state.
7582 * @param idxVar The variable.
7583 * @param poff Pointer to the instruction buffer offset.
7584 * In case a register needs to be freed up or the value
7585 * loaded off the stack.
7586 * @param fInitialized Set if the variable must already have been initialized.
7587 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7588 * the case.
7589 * @param idxRegPref Preferred register number or UINT8_MAX.
7590 */
7591DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7592 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7593{
7594 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7595 Assert(pReNative->Core.aVars[idxVar].cbVar <= 8);
7596 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7597
7598 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
7599 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7600 {
7601 Assert( pReNative->Core.aVars[idxVar].enmKind > kIemNativeVarKind_Invalid
7602 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
7603 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7604 return idxReg;
7605 }
7606
7607 /*
7608 * If the kind of variable has not yet been set, default to 'stack'.
7609 */
7610 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid
7611 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
7612 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid)
7613 iemNativeVarSetKindToStack(pReNative, idxVar);
7614
7615 /*
7616 * We have to allocate a register for the variable, even if its a stack one
7617 * as we don't know if there are modification being made to it before its
7618 * finalized (todo: analyze and insert hints about that?).
7619 *
7620 * If we can, we try get the correct register for argument variables. This
7621 * is assuming that most argument variables are fetched as close as possible
7622 * to the actual call, so that there aren't any interfering hidden calls
7623 * (memory accesses, etc) inbetween.
7624 *
7625 * If we cannot or it's a variable, we make sure no argument registers
7626 * that will be used by this MC block will be allocated here, and we always
7627 * prefer non-volatile registers to avoid needing to spill stuff for internal
7628 * call.
7629 */
7630 /** @todo Detect too early argument value fetches and warn about hidden
7631 * calls causing less optimal code to be generated in the python script. */
7632
7633 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7634 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7635 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7636 {
7637 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7638 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7639 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7640 }
7641 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7642 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7643 {
7644 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7645 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7646 & ~pReNative->Core.bmHstRegsWithGstShadow
7647 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7648 & fNotArgsMask;
7649 if (fRegs)
7650 {
7651 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7652 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7653 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7654 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7655 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7656 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7657 }
7658 else
7659 {
7660 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7661 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7662 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7663 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7664 }
7665 }
7666 else
7667 {
7668 idxReg = idxRegPref;
7669 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7670 Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (preferred)\n", idxVar, idxReg));
7671 }
7672 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7673 pReNative->Core.aVars[idxVar].idxReg = idxReg;
7674
7675 /*
7676 * Load it off the stack if we've got a stack slot.
7677 */
7678 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7679 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7680 {
7681 Assert(fInitialized);
7682 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7683 switch (pReNative->Core.aVars[idxVar].cbVar)
7684 {
7685 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7686 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7687 case 3: AssertFailed(); RT_FALL_THRU();
7688 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7689 default: AssertFailed(); RT_FALL_THRU();
7690 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7691 }
7692 }
7693 else
7694 {
7695 Assert(idxStackSlot == UINT8_MAX);
7696 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7697 }
7698 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7699 return idxReg;
7700}
7701
7702
7703/**
7704 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7705 * guest register.
7706 *
7707 * This function makes sure there is a register for it and sets it to be the
7708 * current shadow copy of @a enmGstReg.
7709 *
7710 * @returns The host register number.
7711 * @param pReNative The recompiler state.
7712 * @param idxVar The variable.
7713 * @param enmGstReg The guest register this variable will be written to
7714 * after this call.
7715 * @param poff Pointer to the instruction buffer offset.
7716 * In case a register needs to be freed up or if the
7717 * variable content needs to be loaded off the stack.
7718 *
7719 * @note We DO NOT expect @a idxVar to be an argument variable,
7720 * because we can only in the commit stage of an instruction when this
7721 * function is used.
7722 */
7723DECL_HIDDEN_THROW(uint8_t)
7724iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7725{
7726 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7727 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7728 AssertMsgStmt( pReNative->Core.aVars[idxVar].cbVar <= 8
7729 && ( pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate
7730 || pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack),
7731 ("idxVar=%d cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pReNative->Core.aVars[idxVar].cbVar,
7732 pReNative->Core.aVars[idxVar].enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7733 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7734
7735 /*
7736 * This shouldn't ever be used for arguments, unless it's in a weird else
7737 * branch that doesn't do any calling and even then it's questionable.
7738 *
7739 * However, in case someone writes crazy wrong MC code and does register
7740 * updates before making calls, just use the regular register allocator to
7741 * ensure we get a register suitable for the intended argument number.
7742 */
7743 AssertStmt(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7744
7745 /*
7746 * If there is already a register for the variable, we transfer/set the
7747 * guest shadow copy assignment to it.
7748 */
7749 uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
7750 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7751 {
7752 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7753 {
7754 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7755 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7756 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7757 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7758 }
7759 else
7760 {
7761 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7762 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7763 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7764 }
7765 /** @todo figure this one out. We need some way of making sure the register isn't
7766 * modified after this point, just in case we start writing crappy MC code. */
7767 pReNative->Core.aVars[idxVar].enmGstReg = enmGstReg;
7768 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7769 return idxReg;
7770 }
7771 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
7772
7773 /*
7774 * Because this is supposed to be the commit stage, we're just tag along with the
7775 * temporary register allocator and upgrade it to a variable register.
7776 */
7777 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7778 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7779 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7780 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7781 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7782 pReNative->Core.aVars[idxVar].idxReg = idxReg;
7783
7784 /*
7785 * Now we need to load the register value.
7786 */
7787 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate)
7788 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pReNative->Core.aVars[idxVar].u.uValue);
7789 else
7790 {
7791 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7792 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7793 switch (pReNative->Core.aVars[idxVar].cbVar)
7794 {
7795 case sizeof(uint64_t):
7796 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7797 break;
7798 case sizeof(uint32_t):
7799 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7800 break;
7801 case sizeof(uint16_t):
7802 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7803 break;
7804 case sizeof(uint8_t):
7805 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7806 break;
7807 default:
7808 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7809 }
7810 }
7811
7812 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7813 return idxReg;
7814}
7815
7816
7817/**
7818 * Sets the host register for @a idxVarRc to @a idxReg.
7819 *
7820 * The register must not be allocated. Any guest register shadowing will be
7821 * implictly dropped by this call.
7822 *
7823 * The variable must not have any register associated with it (causes
7824 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
7825 * implied.
7826 *
7827 * @returns idxReg
7828 * @param pReNative The recompiler state.
7829 * @param idxVar The variable.
7830 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
7831 * @param off For recording in debug info.
7832 *
7833 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
7834 */
7835DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
7836{
7837 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7838 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7839 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
7840 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
7841 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
7842
7843 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
7844 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7845
7846 iemNativeVarSetKindToStack(pReNative, idxVar);
7847 pReNative->Core.aVars[idxVar].idxReg = idxReg;
7848
7849 return idxReg;
7850}
7851
7852
7853/**
7854 * A convenient helper function.
7855 */
7856DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7857 uint8_t idxReg, uint32_t *poff)
7858{
7859 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
7860 pReNative->Core.aVars[idxVar].fRegAcquired = true;
7861 return idxReg;
7862}
7863
7864
7865/**
7866 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7867 *
7868 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7869 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7870 * requirement of flushing anything in volatile host registers when making a
7871 * call.
7872 *
7873 * @returns New @a off value.
7874 * @param pReNative The recompiler state.
7875 * @param off The code buffer position.
7876 * @param fHstRegsNotToSave Set of registers not to save & restore.
7877 */
7878DECL_HIDDEN_THROW(uint32_t)
7879iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7880{
7881 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7882 if (fHstRegs)
7883 {
7884 do
7885 {
7886 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7887 fHstRegs &= ~RT_BIT_32(idxHstReg);
7888
7889 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7890 {
7891 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7892 AssertStmt( idxVar < RT_ELEMENTS(pReNative->Core.aVars)
7893 && (pReNative->Core.bmVars & RT_BIT_32(idxVar))
7894 && pReNative->Core.aVars[idxVar].idxReg == idxHstReg,
7895 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7896 switch (pReNative->Core.aVars[idxVar].enmKind)
7897 {
7898 case kIemNativeVarKind_Stack:
7899 {
7900 /* Temporarily spill the variable register. */
7901 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7902 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%d/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7903 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7904 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7905 continue;
7906 }
7907
7908 case kIemNativeVarKind_Immediate:
7909 case kIemNativeVarKind_VarRef:
7910 case kIemNativeVarKind_GstRegRef:
7911 /* It is weird to have any of these loaded at this point. */
7912 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7913 continue;
7914
7915 case kIemNativeVarKind_End:
7916 case kIemNativeVarKind_Invalid:
7917 break;
7918 }
7919 AssertFailed();
7920 }
7921 else
7922 {
7923 /*
7924 * Allocate a temporary stack slot and spill the register to it.
7925 */
7926 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7927 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7928 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7929 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7930 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7931 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7932 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7933 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7934 }
7935 } while (fHstRegs);
7936 }
7937 return off;
7938}
7939
7940
7941/**
7942 * Emit code to restore volatile registers after to a call to a helper.
7943 *
7944 * @returns New @a off value.
7945 * @param pReNative The recompiler state.
7946 * @param off The code buffer position.
7947 * @param fHstRegsNotToSave Set of registers not to save & restore.
7948 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7949 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7950 */
7951DECL_HIDDEN_THROW(uint32_t)
7952iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7953{
7954 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7955 if (fHstRegs)
7956 {
7957 do
7958 {
7959 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7960 fHstRegs &= ~RT_BIT_32(idxHstReg);
7961
7962 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7963 {
7964 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7965 AssertStmt( idxVar < RT_ELEMENTS(pReNative->Core.aVars)
7966 && (pReNative->Core.bmVars & RT_BIT_32(idxVar))
7967 && pReNative->Core.aVars[idxVar].idxReg == idxHstReg,
7968 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7969 switch (pReNative->Core.aVars[idxVar].enmKind)
7970 {
7971 case kIemNativeVarKind_Stack:
7972 {
7973 /* Unspill the variable register. */
7974 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7975 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%d/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7976 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7977 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7978 continue;
7979 }
7980
7981 case kIemNativeVarKind_Immediate:
7982 case kIemNativeVarKind_VarRef:
7983 case kIemNativeVarKind_GstRegRef:
7984 /* It is weird to have any of these loaded at this point. */
7985 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7986 continue;
7987
7988 case kIemNativeVarKind_End:
7989 case kIemNativeVarKind_Invalid:
7990 break;
7991 }
7992 AssertFailed();
7993 }
7994 else
7995 {
7996 /*
7997 * Restore from temporary stack slot.
7998 */
7999 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
8000 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
8001 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
8002 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
8003
8004 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8005 }
8006 } while (fHstRegs);
8007 }
8008 return off;
8009}
8010
8011
8012/**
8013 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8014 *
8015 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8016 */
8017DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8018{
8019 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8020 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8021 {
8022 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8023 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8024 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8025 Assert(cSlots > 0);
8026 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8027 Log11(("iemNativeVarFreeStackSlots: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n", idxVar, idxStackSlot, fAllocMask, cbVar));
8028 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8029 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8030 }
8031 else
8032 Assert(idxStackSlot == UINT8_MAX);
8033}
8034
8035
8036/**
8037 * Worker that frees a single variable.
8038 *
8039 * ASSUMES that @a idxVar is valid.
8040 */
8041DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8042{
8043 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8044 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8045 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8046
8047 /* Free the host register first if any assigned. */
8048 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8049 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8050 {
8051 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
8052 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8053 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8054 }
8055
8056 /* Free argument mapping. */
8057 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8058 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8059 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8060
8061 /* Free the stack slots. */
8062 iemNativeVarFreeStackSlots(pReNative, idxVar);
8063
8064 /* Free the actual variable. */
8065 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8066 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8067}
8068
8069
8070/**
8071 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8072 */
8073DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8074{
8075 while (bmVars != 0)
8076 {
8077 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8078 bmVars &= ~RT_BIT_32(idxVar);
8079
8080#if 1 /** @todo optimize by simplifying this later... */
8081 iemNativeVarFreeOneWorker(pReNative, idxVar);
8082#else
8083 /* Only need to free the host register, the rest is done as bulk updates below. */
8084 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8085 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8086 {
8087 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
8088 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8089 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8090 }
8091#endif
8092 }
8093#if 0 /** @todo optimize by simplifying this later... */
8094 pReNative->Core.bmVars = 0;
8095 pReNative->Core.bmStack = 0;
8096 pReNative->Core.u64ArgVars = UINT64_MAX;
8097#endif
8098}
8099
8100
8101/**
8102 * This is called by IEM_MC_END() to clean up all variables.
8103 */
8104DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
8105{
8106 uint32_t const bmVars = pReNative->Core.bmVars;
8107 if (bmVars != 0)
8108 iemNativeVarFreeAllSlow(pReNative, bmVars);
8109 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8110 Assert(pReNative->Core.bmStack == 0);
8111}
8112
8113
8114#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
8115
8116/**
8117 * This is called by IEM_MC_FREE_LOCAL.
8118 */
8119DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8120{
8121 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8122 Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
8123 iemNativeVarFreeOneWorker(pReNative, idxVar);
8124}
8125
8126
8127#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
8128
8129/**
8130 * This is called by IEM_MC_FREE_ARG.
8131 */
8132DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8133{
8134 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8135 Assert(pReNative->Core.aVars[idxVar].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
8136 iemNativeVarFreeOneWorker(pReNative, idxVar);
8137}
8138
8139
8140#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
8141
8142/**
8143 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
8144 */
8145DECL_INLINE_THROW(uint32_t)
8146iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
8147{
8148 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
8149 AssertStmt(pReNative->Core.aVars[idxVarDst].enmKind == kIemNativeVarKind_Invalid,
8150 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8151 Assert( pReNative->Core.aVars[idxVarDst].cbVar == sizeof(uint16_t)
8152 || pReNative->Core.aVars[idxVarDst].cbVar == sizeof(uint32_t));
8153
8154 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
8155 AssertStmt( pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Stack
8156 || pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Immediate,
8157 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8158
8159 Assert(pReNative->Core.aVars[idxVarDst].cbVar < pReNative->Core.aVars[idxVarSrc].cbVar);
8160
8161 /*
8162 * Special case for immediates.
8163 */
8164 if (pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Immediate)
8165 {
8166 switch (pReNative->Core.aVars[idxVarDst].cbVar)
8167 {
8168 case sizeof(uint16_t):
8169 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pReNative->Core.aVars[idxVarSrc].u.uValue);
8170 break;
8171 case sizeof(uint32_t):
8172 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pReNative->Core.aVars[idxVarSrc].u.uValue);
8173 break;
8174 default: AssertFailed(); break;
8175 }
8176 }
8177 else
8178 {
8179 /*
8180 * The generic solution for now.
8181 */
8182 /** @todo optimize this by having the python script make sure the source
8183 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
8184 * statement. Then we could just transfer the register assignments. */
8185 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
8186 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
8187 switch (pReNative->Core.aVars[idxVarDst].cbVar)
8188 {
8189 case sizeof(uint16_t):
8190 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
8191 break;
8192 case sizeof(uint32_t):
8193 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
8194 break;
8195 default: AssertFailed(); break;
8196 }
8197 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
8198 iemNativeVarRegisterRelease(pReNative, idxVarDst);
8199 }
8200 return off;
8201}
8202
8203
8204
8205/*********************************************************************************************************************************
8206* Emitters for IEM_MC_CALL_CIMPL_XXX *
8207*********************************************************************************************************************************/
8208
8209/**
8210 * Emits code to load a reference to the given guest register into @a idxGprDst.
8211 */
8212DECL_INLINE_THROW(uint32_t)
8213iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8214 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8215{
8216 /*
8217 * Get the offset relative to the CPUMCTX structure.
8218 */
8219 uint32_t offCpumCtx;
8220 switch (enmClass)
8221 {
8222 case kIemNativeGstRegRef_Gpr:
8223 Assert(idxRegInClass < 16);
8224 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8225 break;
8226
8227 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8228 Assert(idxRegInClass < 4);
8229 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8230 break;
8231
8232 case kIemNativeGstRegRef_EFlags:
8233 Assert(idxRegInClass == 0);
8234 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8235 break;
8236
8237 case kIemNativeGstRegRef_MxCsr:
8238 Assert(idxRegInClass == 0);
8239 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8240 break;
8241
8242 case kIemNativeGstRegRef_FpuReg:
8243 Assert(idxRegInClass < 8);
8244 AssertFailed(); /** @todo what kind of indexing? */
8245 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8246 break;
8247
8248 case kIemNativeGstRegRef_MReg:
8249 Assert(idxRegInClass < 8);
8250 AssertFailed(); /** @todo what kind of indexing? */
8251 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8252 break;
8253
8254 case kIemNativeGstRegRef_XReg:
8255 Assert(idxRegInClass < 16);
8256 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8257 break;
8258
8259 default:
8260 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8261 }
8262
8263 /*
8264 * Load the value into the destination register.
8265 */
8266#ifdef RT_ARCH_AMD64
8267 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8268
8269#elif defined(RT_ARCH_ARM64)
8270 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8271 Assert(offCpumCtx < 4096);
8272 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8273
8274#else
8275# error "Port me!"
8276#endif
8277
8278 return off;
8279}
8280
8281
8282/**
8283 * Common code for CIMPL and AIMPL calls.
8284 *
8285 * These are calls that uses argument variables and such. They should not be
8286 * confused with internal calls required to implement an MC operation,
8287 * like a TLB load and similar.
8288 *
8289 * Upon return all that is left to do is to load any hidden arguments and
8290 * perform the call. All argument variables are freed.
8291 *
8292 * @returns New code buffer offset; throws VBox status code on error.
8293 * @param pReNative The native recompile state.
8294 * @param off The code buffer offset.
8295 * @param cArgs The total nubmer of arguments (includes hidden
8296 * count).
8297 * @param cHiddenArgs The number of hidden arguments. The hidden
8298 * arguments must not have any variable declared for
8299 * them, whereas all the regular arguments must
8300 * (tstIEMCheckMc ensures this).
8301 */
8302DECL_HIDDEN_THROW(uint32_t)
8303iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
8304{
8305#ifdef VBOX_STRICT
8306 /*
8307 * Assert sanity.
8308 */
8309 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8310 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8311 for (unsigned i = 0; i < cHiddenArgs; i++)
8312 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8313 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8314 {
8315 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8316 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8317 }
8318 iemNativeRegAssertSanity(pReNative);
8319#endif
8320
8321 /*
8322 * Before we do anything else, go over variables that are referenced and
8323 * make sure they are not in a register.
8324 */
8325 uint32_t bmVars = pReNative->Core.bmVars;
8326 if (bmVars)
8327 {
8328 do
8329 {
8330 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8331 bmVars &= ~RT_BIT_32(idxVar);
8332
8333 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8334 {
8335 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8336 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
8337 {
8338 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8339 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8340 idxVar, idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8341 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8342 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8343
8344 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8345 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8346 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8347 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8348 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8349 }
8350 }
8351 } while (bmVars != 0);
8352#if 0 //def VBOX_STRICT
8353 iemNativeRegAssertSanity(pReNative);
8354#endif
8355 }
8356
8357 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8358
8359 /*
8360 * First, go over the host registers that will be used for arguments and make
8361 * sure they either hold the desired argument or are free.
8362 */
8363 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8364 {
8365 for (uint32_t i = 0; i < cRegArgs; i++)
8366 {
8367 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8368 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8369 {
8370 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8371 {
8372 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8373 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8374 Assert(pReNative->Core.aVars[idxVar].idxReg == idxArgReg);
8375 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8376 if (uArgNo == i)
8377 { /* prefect */ }
8378 /* The variable allocator logic should make sure this is impossible,
8379 except for when the return register is used as a parameter (ARM,
8380 but not x86). */
8381#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8382 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8383 {
8384# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8385# error "Implement this"
8386# endif
8387 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8388 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8389 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8390 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8391 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8392 }
8393#endif
8394 else
8395 {
8396 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8397
8398 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
8399 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8400 else
8401 {
8402 /* just free it, can be reloaded if used again */
8403 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8404 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8405 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8406 }
8407 }
8408 }
8409 else
8410 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8411 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8412 }
8413 }
8414#if 0 //def VBOX_STRICT
8415 iemNativeRegAssertSanity(pReNative);
8416#endif
8417 }
8418
8419 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8420
8421#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8422 /*
8423 * If there are any stack arguments, make sure they are in their place as well.
8424 *
8425 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8426 * the caller) be loading it later and it must be free (see first loop).
8427 */
8428 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8429 {
8430 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8431 {
8432 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
8433 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8434 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8435 {
8436 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8437 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].idxReg);
8438 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg);
8439 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8440 }
8441 else
8442 {
8443 /* Use ARG0 as temp for stuff we need registers for. */
8444 switch (pReNative->Core.aVars[idxVar].enmKind)
8445 {
8446 case kIemNativeVarKind_Stack:
8447 {
8448 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8449 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8450 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8451 iemNativeStackCalcBpDisp(idxStackSlot));
8452 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8453 continue;
8454 }
8455
8456 case kIemNativeVarKind_Immediate:
8457 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].u.uValue);
8458 continue;
8459
8460 case kIemNativeVarKind_VarRef:
8461 {
8462 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
8463 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8464 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
8465 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8466 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8467 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8468 {
8469 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8470 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8471 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8472 }
8473 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8474 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8475 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8476 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8477 continue;
8478 }
8479
8480 case kIemNativeVarKind_GstRegRef:
8481 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8482 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
8483 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
8484 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8485 continue;
8486
8487 case kIemNativeVarKind_Invalid:
8488 case kIemNativeVarKind_End:
8489 break;
8490 }
8491 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8492 }
8493 }
8494# if 0 //def VBOX_STRICT
8495 iemNativeRegAssertSanity(pReNative);
8496# endif
8497 }
8498#else
8499 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8500#endif
8501
8502 /*
8503 * Make sure the argument variables are loaded into their respective registers.
8504 *
8505 * We can optimize this by ASSUMING that any register allocations are for
8506 * registeres that have already been loaded and are ready. The previous step
8507 * saw to that.
8508 */
8509 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8510 {
8511 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8512 {
8513 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8514 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8515 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == pReNative->Core.aidxArgVars[i]
8516 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8517 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8518 else
8519 {
8520 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
8521 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8522 {
8523 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
8524 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].idxReg);
8525 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg))
8526 | RT_BIT_32(idxArgReg);
8527 pReNative->Core.aVars[idxVar].idxReg = idxArgReg;
8528 }
8529 else
8530 {
8531 /* Use ARG0 as temp for stuff we need registers for. */
8532 switch (pReNative->Core.aVars[idxVar].enmKind)
8533 {
8534 case kIemNativeVarKind_Stack:
8535 {
8536 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8537 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8538 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8539 continue;
8540 }
8541
8542 case kIemNativeVarKind_Immediate:
8543 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].u.uValue);
8544 continue;
8545
8546 case kIemNativeVarKind_VarRef:
8547 {
8548 uint8_t const idxOtherVar = pReNative->Core.aVars[idxVar].u.idxRefVar;
8549 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8550 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
8551 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8552 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8553 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8554 {
8555 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8556 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8557 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8558 }
8559 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8560 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8561 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8562 continue;
8563 }
8564
8565 case kIemNativeVarKind_GstRegRef:
8566 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8567 pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
8568 pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
8569 continue;
8570
8571 case kIemNativeVarKind_Invalid:
8572 case kIemNativeVarKind_End:
8573 break;
8574 }
8575 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8576 }
8577 }
8578 }
8579#if 0 //def VBOX_STRICT
8580 iemNativeRegAssertSanity(pReNative);
8581#endif
8582 }
8583#ifdef VBOX_STRICT
8584 else
8585 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8586 {
8587 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8588 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8589 }
8590#endif
8591
8592 /*
8593 * Free all argument variables (simplified).
8594 * Their lifetime always expires with the call they are for.
8595 */
8596 /** @todo Make the python script check that arguments aren't used after
8597 * IEM_MC_CALL_XXXX. */
8598 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8599 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8600 * an argument value. There is also some FPU stuff. */
8601 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8602 {
8603 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
8604 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8605
8606 /* no need to free registers: */
8607 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8608 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8609 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8610 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8611 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8612 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8613
8614 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8615 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8616 iemNativeVarFreeStackSlots(pReNative, idxVar);
8617 }
8618 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8619
8620 /*
8621 * Flush volatile registers as we make the call.
8622 */
8623 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8624
8625 return off;
8626}
8627
8628
8629/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
8630DECL_HIDDEN_THROW(uint32_t)
8631iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
8632 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
8633
8634{
8635 /*
8636 * Do all the call setup and cleanup.
8637 */
8638 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
8639
8640 /*
8641 * Load the two or three hidden arguments.
8642 */
8643#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
8644 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
8645 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8646 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
8647#else
8648 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8649 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
8650#endif
8651
8652 /*
8653 * Make the call and check the return code.
8654 *
8655 * Shadow PC copies are always flushed here, other stuff depends on flags.
8656 * Segment and general purpose registers are explictily flushed via the
8657 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
8658 * macros.
8659 */
8660 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
8661#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
8662 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
8663#endif
8664 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
8665 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
8666 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
8667 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
8668
8669 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
8670}
8671
8672
8673#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
8674 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
8675
8676/** Emits code for IEM_MC_CALL_CIMPL_1. */
8677DECL_INLINE_THROW(uint32_t)
8678iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8679 uintptr_t pfnCImpl, uint8_t idxArg0)
8680{
8681 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8682 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
8683}
8684
8685
8686#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
8687 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
8688
8689/** Emits code for IEM_MC_CALL_CIMPL_2. */
8690DECL_INLINE_THROW(uint32_t)
8691iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8692 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
8693{
8694 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8695 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
8696 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
8697}
8698
8699
8700#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
8701 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
8702 (uintptr_t)a_pfnCImpl, a0, a1, a2)
8703
8704/** Emits code for IEM_MC_CALL_CIMPL_3. */
8705DECL_INLINE_THROW(uint32_t)
8706iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8707 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8708{
8709 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8710 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
8711 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
8712 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
8713}
8714
8715
8716#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
8717 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
8718 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
8719
8720/** Emits code for IEM_MC_CALL_CIMPL_4. */
8721DECL_INLINE_THROW(uint32_t)
8722iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8723 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
8724{
8725 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8726 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
8727 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
8728 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
8729 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
8730}
8731
8732
8733#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
8734 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
8735 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
8736
8737/** Emits code for IEM_MC_CALL_CIMPL_4. */
8738DECL_INLINE_THROW(uint32_t)
8739iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
8740 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
8741{
8742 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
8743 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
8744 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
8745 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
8746 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
8747 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
8748}
8749
8750
8751/** Recompiler debugging: Flush guest register shadow copies. */
8752#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
8753
8754
8755
8756/*********************************************************************************************************************************
8757* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
8758*********************************************************************************************************************************/
8759
8760/**
8761 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
8762 */
8763DECL_INLINE_THROW(uint32_t)
8764iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
8765 uintptr_t pfnAImpl, uint8_t cArgs)
8766{
8767 if (idxVarRc != UINT8_MAX)
8768 {
8769 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
8770 AssertStmt(pReNative->Core.aVars[idxVarRc].uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
8771 AssertStmt(pReNative->Core.aVars[idxVarRc].cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
8772 }
8773
8774 /*
8775 * Do all the call setup and cleanup.
8776 */
8777 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
8778
8779 /*
8780 * Make the call and update the return code variable if we've got one.
8781 */
8782 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
8783 if (idxVarRc < RT_ELEMENTS(pReNative->Core.aVars))
8784 {
8785pReNative->pInstrBuf[off++] = 0xcc; /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
8786 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
8787 }
8788
8789 return off;
8790}
8791
8792
8793
8794#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
8795 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
8796
8797#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
8798 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
8799
8800/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
8801DECL_INLINE_THROW(uint32_t)
8802iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
8803{
8804 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
8805}
8806
8807
8808#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
8809 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
8810
8811#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
8812 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
8813
8814/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
8815DECL_INLINE_THROW(uint32_t)
8816iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
8817{
8818 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
8819 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
8820}
8821
8822
8823#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
8824 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
8825
8826#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
8827 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
8828
8829/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
8830DECL_INLINE_THROW(uint32_t)
8831iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
8832 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
8833{
8834 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
8835 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
8836 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
8837}
8838
8839
8840#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
8841 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
8842
8843#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
8844 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
8845
8846/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
8847DECL_INLINE_THROW(uint32_t)
8848iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
8849 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8850{
8851 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
8852 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
8853 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
8854 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
8855}
8856
8857
8858#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
8859 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
8860
8861#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
8862 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
8863
8864/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
8865DECL_INLINE_THROW(uint32_t)
8866iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
8867 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
8868{
8869 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
8870 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
8871 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
8872 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
8873 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
8874}
8875
8876
8877
8878/*********************************************************************************************************************************
8879* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
8880*********************************************************************************************************************************/
8881
8882#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
8883 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
8884
8885#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
8886 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
8887
8888#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
8889 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
8890
8891#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
8892 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
8893
8894
8895/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
8896 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
8897DECL_INLINE_THROW(uint32_t)
8898iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
8899{
8900 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8901 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
8902 Assert(iGRegEx < 20);
8903
8904 /* Same discussion as in iemNativeEmitFetchGregU16 */
8905 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
8906 kIemNativeGstRegUse_ReadOnly);
8907
8908 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8909 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8910
8911 /* The value is zero-extended to the full 64-bit host register width. */
8912 if (iGRegEx < 16)
8913 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
8914 else
8915 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
8916
8917 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8918 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8919 return off;
8920}
8921
8922
8923#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
8924 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
8925
8926#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
8927 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
8928
8929#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
8930 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
8931
8932/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
8933DECL_INLINE_THROW(uint32_t)
8934iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
8935{
8936 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8937 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
8938 Assert(iGRegEx < 20);
8939
8940 /* Same discussion as in iemNativeEmitFetchGregU16 */
8941 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
8942 kIemNativeGstRegUse_ReadOnly);
8943
8944 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8945 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8946
8947 if (iGRegEx < 16)
8948 {
8949 switch (cbSignExtended)
8950 {
8951 case sizeof(uint16_t):
8952 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
8953 break;
8954 case sizeof(uint32_t):
8955 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
8956 break;
8957 case sizeof(uint64_t):
8958 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
8959 break;
8960 default: AssertFailed(); break;
8961 }
8962 }
8963 else
8964 {
8965 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
8966 switch (cbSignExtended)
8967 {
8968 case sizeof(uint16_t):
8969 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
8970 break;
8971 case sizeof(uint32_t):
8972 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
8973 break;
8974 case sizeof(uint64_t):
8975 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
8976 break;
8977 default: AssertFailed(); break;
8978 }
8979 }
8980
8981 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8982 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
8983 return off;
8984}
8985
8986
8987
8988#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
8989 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
8990
8991#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
8992 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
8993
8994#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
8995 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
8996
8997/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
8998DECL_INLINE_THROW(uint32_t)
8999iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
9000{
9001 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9002 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
9003 Assert(iGReg < 16);
9004
9005 /*
9006 * We can either just load the low 16-bit of the GPR into a host register
9007 * for the variable, or we can do so via a shadow copy host register. The
9008 * latter will avoid having to reload it if it's being stored later, but
9009 * will waste a host register if it isn't touched again. Since we don't
9010 * know what going to happen, we choose the latter for now.
9011 */
9012 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9013 kIemNativeGstRegUse_ReadOnly);
9014
9015 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9016 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9017 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
9018 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9019
9020 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9021 return off;
9022}
9023
9024
9025#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
9026 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
9027
9028#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
9029 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
9030
9031/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
9032DECL_INLINE_THROW(uint32_t)
9033iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
9034{
9035 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9036 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
9037 Assert(iGReg < 16);
9038
9039 /*
9040 * We can either just load the low 16-bit of the GPR into a host register
9041 * for the variable, or we can do so via a shadow copy host register. The
9042 * latter will avoid having to reload it if it's being stored later, but
9043 * will waste a host register if it isn't touched again. Since we don't
9044 * know what going to happen, we choose the latter for now.
9045 */
9046 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9047 kIemNativeGstRegUse_ReadOnly);
9048
9049 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9050 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9051 if (cbSignExtended == sizeof(uint32_t))
9052 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
9053 else
9054 {
9055 Assert(cbSignExtended == sizeof(uint64_t));
9056 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
9057 }
9058 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9059
9060 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9061 return off;
9062}
9063
9064
9065#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
9066 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
9067
9068#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
9069 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
9070
9071/** Emits code for IEM_MC_FETCH_GREG_U32. */
9072DECL_INLINE_THROW(uint32_t)
9073iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
9074{
9075 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9076 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF_PV(cbZeroExtended);
9077 Assert(iGReg < 16);
9078
9079 /*
9080 * We can either just load the low 16-bit of the GPR into a host register
9081 * for the variable, or we can do so via a shadow copy host register. The
9082 * latter will avoid having to reload it if it's being stored later, but
9083 * will waste a host register if it isn't touched again. Since we don't
9084 * know what going to happen, we choose the latter for now.
9085 */
9086 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9087 kIemNativeGstRegUse_ReadOnly);
9088
9089 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9090 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9091 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
9092 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9093
9094 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9095 return off;
9096}
9097
9098
9099#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
9100 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
9101
9102/** Emits code for IEM_MC_FETCH_GREG_U32. */
9103DECL_INLINE_THROW(uint32_t)
9104iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
9105{
9106 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9107 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint64_t));
9108 Assert(iGReg < 16);
9109
9110 /*
9111 * We can either just load the low 32-bit of the GPR into a host register
9112 * for the variable, or we can do so via a shadow copy host register. The
9113 * latter will avoid having to reload it if it's being stored later, but
9114 * will waste a host register if it isn't touched again. Since we don't
9115 * know what going to happen, we choose the latter for now.
9116 */
9117 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9118 kIemNativeGstRegUse_ReadOnly);
9119
9120 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9121 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9122 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
9123 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9124
9125 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9126 return off;
9127}
9128
9129
9130#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
9131 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
9132
9133#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
9134 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
9135
9136/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
9137 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
9138DECL_INLINE_THROW(uint32_t)
9139iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
9140{
9141 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9142 Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint64_t));
9143 Assert(iGReg < 16);
9144
9145 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9146 kIemNativeGstRegUse_ReadOnly);
9147
9148 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9149 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9150 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
9151 /** @todo name the register a shadow one already? */
9152 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9153
9154 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9155 return off;
9156}
9157
9158
9159
9160/*********************************************************************************************************************************
9161* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
9162*********************************************************************************************************************************/
9163
9164#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
9165 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
9166
9167/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
9168DECL_INLINE_THROW(uint32_t)
9169iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
9170{
9171 Assert(iGRegEx < 20);
9172 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
9173 kIemNativeGstRegUse_ForUpdate);
9174#ifdef RT_ARCH_AMD64
9175 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
9176
9177 /* To the lowest byte of the register: mov r8, imm8 */
9178 if (iGRegEx < 16)
9179 {
9180 if (idxGstTmpReg >= 8)
9181 pbCodeBuf[off++] = X86_OP_REX_B;
9182 else if (idxGstTmpReg >= 4)
9183 pbCodeBuf[off++] = X86_OP_REX;
9184 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
9185 pbCodeBuf[off++] = u8Value;
9186 }
9187 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
9188 else if (idxGstTmpReg < 4)
9189 {
9190 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
9191 pbCodeBuf[off++] = u8Value;
9192 }
9193 else
9194 {
9195 /* ror reg64, 8 */
9196 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9197 pbCodeBuf[off++] = 0xc1;
9198 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9199 pbCodeBuf[off++] = 8;
9200
9201 /* mov reg8, imm8 */
9202 if (idxGstTmpReg >= 8)
9203 pbCodeBuf[off++] = X86_OP_REX_B;
9204 else if (idxGstTmpReg >= 4)
9205 pbCodeBuf[off++] = X86_OP_REX;
9206 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
9207 pbCodeBuf[off++] = u8Value;
9208
9209 /* rol reg64, 8 */
9210 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9211 pbCodeBuf[off++] = 0xc1;
9212 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9213 pbCodeBuf[off++] = 8;
9214 }
9215
9216#elif defined(RT_ARCH_ARM64)
9217 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
9218 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9219 if (iGRegEx < 16)
9220 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
9221 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
9222 else
9223 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
9224 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
9225 iemNativeRegFreeTmp(pReNative, idxImmReg);
9226
9227#else
9228# error "Port me!"
9229#endif
9230
9231 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9232
9233 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
9234
9235 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9236 return off;
9237}
9238
9239
9240#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
9241 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
9242
9243/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
9244DECL_INLINE_THROW(uint32_t)
9245iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
9246{
9247 Assert(iGRegEx < 20);
9248 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9249
9250 /*
9251 * If it's a constant value (unlikely) we treat this as a
9252 * IEM_MC_STORE_GREG_U8_CONST statement.
9253 */
9254 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
9255 { /* likely */ }
9256 else
9257 {
9258 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
9259 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9260 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pReNative->Core.aVars[idxValueVar].u.uValue);
9261 }
9262
9263 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
9264 kIemNativeGstRegUse_ForUpdate);
9265 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
9266
9267#ifdef RT_ARCH_AMD64
9268 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
9269 if (iGRegEx < 16)
9270 {
9271 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
9272 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
9273 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
9274 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
9275 pbCodeBuf[off++] = X86_OP_REX;
9276 pbCodeBuf[off++] = 0x8a;
9277 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
9278 }
9279 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
9280 else if (idxGstTmpReg < 4 && idxVarReg < 4)
9281 {
9282 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
9283 pbCodeBuf[off++] = 0x8a;
9284 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
9285 }
9286 else
9287 {
9288 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
9289
9290 /* ror reg64, 8 */
9291 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9292 pbCodeBuf[off++] = 0xc1;
9293 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9294 pbCodeBuf[off++] = 8;
9295
9296 /* mov reg8, reg8(r/m) */
9297 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
9298 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
9299 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
9300 pbCodeBuf[off++] = X86_OP_REX;
9301 pbCodeBuf[off++] = 0x8a;
9302 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
9303
9304 /* rol reg64, 8 */
9305 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9306 pbCodeBuf[off++] = 0xc1;
9307 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9308 pbCodeBuf[off++] = 8;
9309 }
9310
9311#elif defined(RT_ARCH_ARM64)
9312 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
9313 or
9314 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
9315 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9316 if (iGRegEx < 16)
9317 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
9318 else
9319 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
9320
9321#else
9322# error "Port me!"
9323#endif
9324 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9325
9326 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9327
9328 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
9329 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9330 return off;
9331}
9332
9333
9334
9335#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
9336 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
9337
9338/** Emits code for IEM_MC_STORE_GREG_U16. */
9339DECL_INLINE_THROW(uint32_t)
9340iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
9341{
9342 Assert(iGReg < 16);
9343 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9344 kIemNativeGstRegUse_ForUpdate);
9345#ifdef RT_ARCH_AMD64
9346 /* mov reg16, imm16 */
9347 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
9348 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9349 if (idxGstTmpReg >= 8)
9350 pbCodeBuf[off++] = X86_OP_REX_B;
9351 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
9352 pbCodeBuf[off++] = RT_BYTE1(uValue);
9353 pbCodeBuf[off++] = RT_BYTE2(uValue);
9354
9355#elif defined(RT_ARCH_ARM64)
9356 /* movk xdst, #uValue, lsl #0 */
9357 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9358 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
9359
9360#else
9361# error "Port me!"
9362#endif
9363
9364 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9365
9366 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9367 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9368 return off;
9369}
9370
9371
9372#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
9373 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
9374
9375/** Emits code for IEM_MC_STORE_GREG_U16. */
9376DECL_INLINE_THROW(uint32_t)
9377iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
9378{
9379 Assert(iGReg < 16);
9380 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9381
9382 /*
9383 * If it's a constant value (unlikely) we treat this as a
9384 * IEM_MC_STORE_GREG_U16_CONST statement.
9385 */
9386 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
9387 { /* likely */ }
9388 else
9389 {
9390 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
9391 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9392 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pReNative->Core.aVars[idxValueVar].u.uValue);
9393 }
9394
9395 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9396 kIemNativeGstRegUse_ForUpdate);
9397
9398#ifdef RT_ARCH_AMD64
9399 /* mov reg16, reg16 or [mem16] */
9400 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
9401 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9402 if (pReNative->Core.aVars[idxValueVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9403 {
9404 if (idxGstTmpReg >= 8 || pReNative->Core.aVars[idxValueVar].idxReg >= 8)
9405 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
9406 | (pReNative->Core.aVars[idxValueVar].idxReg >= 8 ? X86_OP_REX_B : 0);
9407 pbCodeBuf[off++] = 0x8b;
9408 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pReNative->Core.aVars[idxValueVar].idxReg & 7);
9409 }
9410 else
9411 {
9412 uint8_t const idxStackSlot = pReNative->Core.aVars[idxValueVar].idxStackSlot;
9413 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9414 if (idxGstTmpReg >= 8)
9415 pbCodeBuf[off++] = X86_OP_REX_R;
9416 pbCodeBuf[off++] = 0x8b;
9417 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
9418 }
9419
9420#elif defined(RT_ARCH_ARM64)
9421 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
9422 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
9423 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9424 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
9425 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9426
9427#else
9428# error "Port me!"
9429#endif
9430
9431 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9432
9433 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9434 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9435 return off;
9436}
9437
9438
9439#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
9440 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
9441
9442/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
9443DECL_INLINE_THROW(uint32_t)
9444iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
9445{
9446 Assert(iGReg < 16);
9447 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9448 kIemNativeGstRegUse_ForFullWrite);
9449 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
9450 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9451 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9452 return off;
9453}
9454
9455
9456#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
9457 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
9458
9459/** Emits code for IEM_MC_STORE_GREG_U32. */
9460DECL_INLINE_THROW(uint32_t)
9461iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
9462{
9463 Assert(iGReg < 16);
9464 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9465
9466 /*
9467 * If it's a constant value (unlikely) we treat this as a
9468 * IEM_MC_STORE_GREG_U32_CONST statement.
9469 */
9470 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
9471 { /* likely */ }
9472 else
9473 {
9474 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
9475 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9476 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pReNative->Core.aVars[idxValueVar].u.uValue);
9477 }
9478
9479 /*
9480 * For the rest we allocate a guest register for the variable and writes
9481 * it to the CPUMCTX structure.
9482 */
9483 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
9484 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9485#ifdef VBOX_STRICT
9486 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
9487#endif
9488 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9489 return off;
9490}
9491
9492
9493#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
9494 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
9495
9496/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
9497DECL_INLINE_THROW(uint32_t)
9498iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
9499{
9500 Assert(iGReg < 16);
9501 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9502 kIemNativeGstRegUse_ForFullWrite);
9503 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
9504 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9505 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9506 return off;
9507}
9508
9509
9510#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
9511 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
9512
9513/** Emits code for IEM_MC_STORE_GREG_U64. */
9514DECL_INLINE_THROW(uint32_t)
9515iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
9516{
9517 Assert(iGReg < 16);
9518 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9519
9520 /*
9521 * If it's a constant value (unlikely) we treat this as a
9522 * IEM_MC_STORE_GREG_U64_CONST statement.
9523 */
9524 if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
9525 { /* likely */ }
9526 else
9527 {
9528 AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
9529 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9530 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pReNative->Core.aVars[idxValueVar].u.uValue);
9531 }
9532
9533 /*
9534 * For the rest we allocate a guest register for the variable and writes
9535 * it to the CPUMCTX structure.
9536 */
9537 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
9538 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9539 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9540 return off;
9541}
9542
9543
9544#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
9545 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
9546
9547/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
9548DECL_INLINE_THROW(uint32_t)
9549iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
9550{
9551 Assert(iGReg < 16);
9552 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9553 kIemNativeGstRegUse_ForUpdate);
9554 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
9555 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9556 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9557 return off;
9558}
9559
9560
9561/*********************************************************************************************************************************
9562* General purpose register manipulation (add, sub). *
9563*********************************************************************************************************************************/
9564
9565#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
9566 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
9567
9568/** Emits code for IEM_MC_ADD_GREG_U16. */
9569DECL_INLINE_THROW(uint32_t)
9570iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
9571{
9572 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9573 kIemNativeGstRegUse_ForUpdate);
9574
9575#ifdef RT_ARCH_AMD64
9576 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
9577 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9578 if (idxGstTmpReg >= 8)
9579 pbCodeBuf[off++] = X86_OP_REX_B;
9580 if (uAddend == 1)
9581 {
9582 pbCodeBuf[off++] = 0xff; /* inc */
9583 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9584 }
9585 else
9586 {
9587 pbCodeBuf[off++] = 0x81;
9588 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9589 pbCodeBuf[off++] = uAddend;
9590 pbCodeBuf[off++] = 0;
9591 }
9592
9593#else
9594 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
9595 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9596
9597 /* sub tmp, gstgrp, uAddend */
9598 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
9599
9600 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
9601 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
9602
9603 iemNativeRegFreeTmp(pReNative, idxTmpReg);
9604#endif
9605
9606 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9607
9608 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9609
9610 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9611 return off;
9612}
9613
9614
9615#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
9616 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
9617
9618#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
9619 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
9620
9621/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
9622DECL_INLINE_THROW(uint32_t)
9623iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
9624{
9625 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9626 kIemNativeGstRegUse_ForUpdate);
9627
9628#ifdef RT_ARCH_AMD64
9629 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9630 if (f64Bit)
9631 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
9632 else if (idxGstTmpReg >= 8)
9633 pbCodeBuf[off++] = X86_OP_REX_B;
9634 if (uAddend == 1)
9635 {
9636 pbCodeBuf[off++] = 0xff; /* inc */
9637 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9638 }
9639 else if (uAddend < 128)
9640 {
9641 pbCodeBuf[off++] = 0x83; /* add */
9642 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9643 pbCodeBuf[off++] = RT_BYTE1(uAddend);
9644 }
9645 else
9646 {
9647 pbCodeBuf[off++] = 0x81; /* add */
9648 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9649 pbCodeBuf[off++] = RT_BYTE1(uAddend);
9650 pbCodeBuf[off++] = 0;
9651 pbCodeBuf[off++] = 0;
9652 pbCodeBuf[off++] = 0;
9653 }
9654
9655#else
9656 /* sub tmp, gstgrp, uAddend */
9657 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9658 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
9659
9660#endif
9661
9662 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9663
9664 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9665
9666 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9667 return off;
9668}
9669
9670
9671
9672#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
9673 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
9674
9675/** Emits code for IEM_MC_SUB_GREG_U16. */
9676DECL_INLINE_THROW(uint32_t)
9677iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
9678{
9679 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9680 kIemNativeGstRegUse_ForUpdate);
9681
9682#ifdef RT_ARCH_AMD64
9683 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
9684 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9685 if (idxGstTmpReg >= 8)
9686 pbCodeBuf[off++] = X86_OP_REX_B;
9687 if (uSubtrahend == 1)
9688 {
9689 pbCodeBuf[off++] = 0xff; /* dec */
9690 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9691 }
9692 else
9693 {
9694 pbCodeBuf[off++] = 0x81;
9695 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
9696 pbCodeBuf[off++] = uSubtrahend;
9697 pbCodeBuf[off++] = 0;
9698 }
9699
9700#else
9701 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
9702 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9703
9704 /* sub tmp, gstgrp, uSubtrahend */
9705 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
9706
9707 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
9708 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
9709
9710 iemNativeRegFreeTmp(pReNative, idxTmpReg);
9711#endif
9712
9713 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9714
9715 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9716
9717 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9718 return off;
9719}
9720
9721
9722#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
9723 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
9724
9725#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
9726 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
9727
9728/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
9729DECL_INLINE_THROW(uint32_t)
9730iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
9731{
9732 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9733 kIemNativeGstRegUse_ForUpdate);
9734
9735#ifdef RT_ARCH_AMD64
9736 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9737 if (f64Bit)
9738 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
9739 else if (idxGstTmpReg >= 8)
9740 pbCodeBuf[off++] = X86_OP_REX_B;
9741 if (uSubtrahend == 1)
9742 {
9743 pbCodeBuf[off++] = 0xff; /* dec */
9744 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9745 }
9746 else if (uSubtrahend < 128)
9747 {
9748 pbCodeBuf[off++] = 0x83; /* sub */
9749 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
9750 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
9751 }
9752 else
9753 {
9754 pbCodeBuf[off++] = 0x81; /* sub */
9755 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
9756 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
9757 pbCodeBuf[off++] = 0;
9758 pbCodeBuf[off++] = 0;
9759 pbCodeBuf[off++] = 0;
9760 }
9761
9762#else
9763 /* sub tmp, gstgrp, uSubtrahend */
9764 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9765 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
9766
9767#endif
9768
9769 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9770
9771 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9772
9773 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9774 return off;
9775}
9776
9777
9778
9779/*********************************************************************************************************************************
9780* EFLAGS *
9781*********************************************************************************************************************************/
9782
9783#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
9784# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
9785#else
9786# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
9787 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
9788
9789DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
9790{
9791 if (fEflOutput)
9792 {
9793 PVMCPUCC const pVCpu = pReNative->pVCpu;
9794 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
9795 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
9796 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
9797# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
9798 if (fEflOutput & (a_fEfl)) \
9799 { \
9800 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
9801 STAM_COUNTER_INC(&pVCpu->iem.s. a_CoreStatName ## Required); \
9802 else \
9803 STAM_COUNTER_INC(&pVCpu->iem.s. a_CoreStatName ## Skippable); \
9804 } else do { } while (0)
9805 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
9806 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
9807 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
9808 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
9809 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
9810 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
9811 CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
9812# undef CHECK_FLAG_AND_UPDATE_STATS
9813 }
9814 RT_NOREF(fEflInput);
9815}
9816#endif /* VBOX_WITH_STATISTICS */
9817
9818#undef IEM_MC_FETCH_EFLAGS /* should not be used */
9819#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
9820 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
9821
9822/** Handles IEM_MC_FETCH_EFLAGS_EX. */
9823DECL_INLINE_THROW(uint32_t)
9824iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
9825 uint32_t fEflInput, uint32_t fEflOutput)
9826{
9827 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
9828 Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
9829 RT_NOREF(fEflInput, fEflOutput);
9830
9831#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9832# ifdef VBOX_STRICT
9833 if ( pReNative->idxCurCall != 0
9834 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
9835 {
9836 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
9837 uint32_t const fBoth = fEflInput | fEflOutput;
9838# define ASSERT_ONE_EFL(a_fElfConst, a_offField) \
9839 AssertMsg( !(fBoth & (a_fElfConst)) \
9840 || (!(fEflInput & (a_fElfConst)) \
9841 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, kIemNativeGstReg_EFlags + (a_offField))) \
9842 : IEMLIVENESS_STATE_IS_ACCESS_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, kIemNativeGstReg_EFlags + (a_offField))) ), \
9843 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, kIemNativeGstReg_EFlags + (a_offField))))
9844 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, 0);
9845 ASSERT_ONE_EFL(X86_EFL_CF, 1);
9846 ASSERT_ONE_EFL(X86_EFL_PF, 2);
9847 ASSERT_ONE_EFL(X86_EFL_AF, 3);
9848 ASSERT_ONE_EFL(X86_EFL_ZF, 4);
9849 ASSERT_ONE_EFL(X86_EFL_SF, 5);
9850 ASSERT_ONE_EFL(X86_EFL_OF, 6);
9851# undef ASSERT_ONE_EFL
9852 }
9853# endif
9854#endif
9855
9856 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
9857 * the existing shadow copy. */
9858 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
9859 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
9860 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
9861 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
9862 return off;
9863}
9864
9865
9866
9867/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
9868 * start using it with custom native code emission (inlining assembly
9869 * instruction helpers). */
9870#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
9871#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
9872 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
9873 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput)
9874
9875/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
9876DECL_INLINE_THROW(uint32_t)
9877iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput)
9878{
9879 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
9880 Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
9881 RT_NOREF(fEflOutput);
9882
9883 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
9884
9885#ifdef VBOX_STRICT
9886 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
9887 uint32_t offFixup = off;
9888 off = iemNativeEmitJnzToFixed(pReNative, off, off);
9889 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
9890 iemNativeFixupFixedJump(pReNative, offFixup, off);
9891
9892 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
9893 offFixup = off;
9894 off = iemNativeEmitJzToFixed(pReNative, off, off);
9895 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
9896 iemNativeFixupFixedJump(pReNative, offFixup, off);
9897
9898 /** @todo validate that only bits in the fElfOutput mask changed. */
9899#endif
9900
9901 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
9902 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
9903 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
9904 return off;
9905}
9906
9907
9908
9909/*********************************************************************************************************************************
9910* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
9911*********************************************************************************************************************************/
9912
9913#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
9914 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
9915
9916#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
9917 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
9918
9919#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
9920 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
9921
9922
9923/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
9924 * IEM_MC_FETCH_SREG_ZX_U64. */
9925DECL_INLINE_THROW(uint32_t)
9926iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
9927{
9928 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9929 Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbVar); RT_NOREF(cbVar);
9930 Assert(iSReg < X86_SREG_COUNT);
9931
9932 /*
9933 * For now, we will not create a shadow copy of a selector. The rational
9934 * is that since we do not recompile the popping and loading of segment
9935 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
9936 * pushing and moving to registers, there is only a small chance that the
9937 * shadow copy will be accessed again before the register is reloaded. One
9938 * scenario would be nested called in 16-bit code, but I doubt it's worth
9939 * the extra register pressure atm.
9940 *
9941 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
9942 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
9943 * store scencario covered at present (r160730).
9944 */
9945 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9946 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9947 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
9948 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9949 return off;
9950}
9951
9952
9953
9954/*********************************************************************************************************************************
9955* Register references. *
9956*********************************************************************************************************************************/
9957
9958#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
9959 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
9960
9961#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGReg) \
9962 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
9963
9964/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
9965DECL_INLINE_THROW(uint32_t)
9966iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
9967{
9968 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
9969 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
9970 Assert(iGRegEx < 20);
9971
9972 if (iGRegEx < 16)
9973 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
9974 else
9975 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
9976
9977 /* If we've delayed writing back the register value, flush it now. */
9978 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
9979
9980 /* If it's not a const reference we need to flush the shadow copy of the register now. */
9981 if (!fConst)
9982 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
9983
9984 return off;
9985}
9986
9987#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
9988 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
9989
9990#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
9991 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
9992
9993#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
9994 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
9995
9996#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
9997 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
9998
9999#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
10000 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
10001
10002#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
10003 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
10004
10005#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
10006 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
10007
10008#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
10009 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
10010
10011#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
10012 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
10013
10014#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
10015 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
10016
10017/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
10018DECL_INLINE_THROW(uint32_t)
10019iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
10020{
10021 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
10022 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
10023 Assert(iGReg < 16);
10024
10025 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
10026
10027 /* If we've delayed writing back the register value, flush it now. */
10028 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
10029
10030 /* If it's not a const reference we need to flush the shadow copy of the register now. */
10031 if (!fConst)
10032 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
10033
10034 return off;
10035}
10036
10037
10038#undef IEM_MC_REF_EFLAGS /* should not be used. */
10039#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
10040 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
10041 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
10042
10043/** Handles IEM_MC_REF_EFLAGS. */
10044DECL_INLINE_THROW(uint32_t)
10045iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
10046{
10047 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
10048 Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
10049
10050 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
10051
10052 /* If we've delayed writing back the register value, flush it now. */
10053 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
10054
10055 /* If there is a shadow copy of guest EFLAGS, flush it now. */
10056 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
10057
10058 return off;
10059}
10060
10061
10062/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
10063 * different code from threaded recompiler, maybe it would be helpful. For now
10064 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
10065#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
10066
10067
10068
10069/*********************************************************************************************************************************
10070* Effective Address Calculation *
10071*********************************************************************************************************************************/
10072#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
10073 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
10074
10075/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
10076 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
10077DECL_INLINE_THROW(uint32_t)
10078iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
10079 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
10080{
10081 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
10082
10083 /*
10084 * Handle the disp16 form with no registers first.
10085 *
10086 * Convert to an immediate value, as that'll delay the register allocation
10087 * and assignment till the memory access / call / whatever and we can use
10088 * a more appropriate register (or none at all).
10089 */
10090 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
10091 {
10092 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
10093 return off;
10094 }
10095
10096 /* Determin the displacment. */
10097 uint16_t u16EffAddr;
10098 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
10099 {
10100 case 0: u16EffAddr = 0; break;
10101 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
10102 case 2: u16EffAddr = u16Disp; break;
10103 default: AssertFailedStmt(u16EffAddr = 0);
10104 }
10105
10106 /* Determine the registers involved. */
10107 uint8_t idxGstRegBase;
10108 uint8_t idxGstRegIndex;
10109 switch (bRm & X86_MODRM_RM_MASK)
10110 {
10111 case 0:
10112 idxGstRegBase = X86_GREG_xBX;
10113 idxGstRegIndex = X86_GREG_xSI;
10114 break;
10115 case 1:
10116 idxGstRegBase = X86_GREG_xBX;
10117 idxGstRegIndex = X86_GREG_xDI;
10118 break;
10119 case 2:
10120 idxGstRegBase = X86_GREG_xBP;
10121 idxGstRegIndex = X86_GREG_xSI;
10122 break;
10123 case 3:
10124 idxGstRegBase = X86_GREG_xBP;
10125 idxGstRegIndex = X86_GREG_xDI;
10126 break;
10127 case 4:
10128 idxGstRegBase = X86_GREG_xSI;
10129 idxGstRegIndex = UINT8_MAX;
10130 break;
10131 case 5:
10132 idxGstRegBase = X86_GREG_xDI;
10133 idxGstRegIndex = UINT8_MAX;
10134 break;
10135 case 6:
10136 idxGstRegBase = X86_GREG_xBP;
10137 idxGstRegIndex = UINT8_MAX;
10138 break;
10139#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
10140 default:
10141#endif
10142 case 7:
10143 idxGstRegBase = X86_GREG_xBX;
10144 idxGstRegIndex = UINT8_MAX;
10145 break;
10146 }
10147
10148 /*
10149 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
10150 */
10151 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
10152 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
10153 kIemNativeGstRegUse_ReadOnly);
10154 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
10155 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
10156 kIemNativeGstRegUse_ReadOnly)
10157 : UINT8_MAX;
10158#ifdef RT_ARCH_AMD64
10159 if (idxRegIndex == UINT8_MAX)
10160 {
10161 if (u16EffAddr == 0)
10162 {
10163 /* movxz ret, base */
10164 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
10165 }
10166 else
10167 {
10168 /* lea ret32, [base64 + disp32] */
10169 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
10170 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10171 if (idxRegRet >= 8 || idxRegBase >= 8)
10172 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
10173 pbCodeBuf[off++] = 0x8d;
10174 if (idxRegBase != X86_GREG_x12 /*SIB*/)
10175 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
10176 else
10177 {
10178 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
10179 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
10180 }
10181 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
10182 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
10183 pbCodeBuf[off++] = 0;
10184 pbCodeBuf[off++] = 0;
10185 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10186
10187 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
10188 }
10189 }
10190 else
10191 {
10192 /* lea ret32, [index64 + base64 (+ disp32)] */
10193 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
10194 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10195 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
10196 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10197 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
10198 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
10199 pbCodeBuf[off++] = 0x8d;
10200 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
10201 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
10202 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
10203 if (bMod == X86_MOD_MEM4)
10204 {
10205 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
10206 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
10207 pbCodeBuf[off++] = 0;
10208 pbCodeBuf[off++] = 0;
10209 }
10210 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10211 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
10212 }
10213
10214#elif defined(RT_ARCH_ARM64)
10215 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
10216 if (u16EffAddr == 0)
10217 {
10218 if (idxRegIndex == UINT8_MAX)
10219 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
10220 else
10221 {
10222 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
10223 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
10224 }
10225 }
10226 else
10227 {
10228 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
10229 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
10230 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
10231 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
10232 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
10233 else
10234 {
10235 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
10236 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
10237 }
10238 if (idxRegIndex != UINT8_MAX)
10239 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
10240 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
10241 }
10242
10243#else
10244# error "port me"
10245#endif
10246
10247 if (idxRegIndex != UINT8_MAX)
10248 iemNativeRegFreeTmp(pReNative, idxRegIndex);
10249 iemNativeRegFreeTmp(pReNative, idxRegBase);
10250 iemNativeVarRegisterRelease(pReNative, idxVarRet);
10251 return off;
10252}
10253
10254
10255#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
10256 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
10257
10258/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
10259 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
10260DECL_INLINE_THROW(uint32_t)
10261iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
10262 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
10263{
10264 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
10265
10266 /*
10267 * Handle the disp32 form with no registers first.
10268 *
10269 * Convert to an immediate value, as that'll delay the register allocation
10270 * and assignment till the memory access / call / whatever and we can use
10271 * a more appropriate register (or none at all).
10272 */
10273 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
10274 {
10275 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
10276 return off;
10277 }
10278
10279 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
10280 uint32_t u32EffAddr = 0;
10281 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
10282 {
10283 case 0: break;
10284 case 1: u32EffAddr = (int8_t)u32Disp; break;
10285 case 2: u32EffAddr = u32Disp; break;
10286 default: AssertFailed();
10287 }
10288
10289 /* Get the register (or SIB) value. */
10290 uint8_t idxGstRegBase = UINT8_MAX;
10291 uint8_t idxGstRegIndex = UINT8_MAX;
10292 uint8_t cShiftIndex = 0;
10293 switch (bRm & X86_MODRM_RM_MASK)
10294 {
10295 case 0: idxGstRegBase = X86_GREG_xAX; break;
10296 case 1: idxGstRegBase = X86_GREG_xCX; break;
10297 case 2: idxGstRegBase = X86_GREG_xDX; break;
10298 case 3: idxGstRegBase = X86_GREG_xBX; break;
10299 case 4: /* SIB */
10300 {
10301 /* index /w scaling . */
10302 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
10303 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
10304 {
10305 case 0: idxGstRegIndex = X86_GREG_xAX; break;
10306 case 1: idxGstRegIndex = X86_GREG_xCX; break;
10307 case 2: idxGstRegIndex = X86_GREG_xDX; break;
10308 case 3: idxGstRegIndex = X86_GREG_xBX; break;
10309 case 4: cShiftIndex = 0; /*no index*/ break;
10310 case 5: idxGstRegIndex = X86_GREG_xBP; break;
10311 case 6: idxGstRegIndex = X86_GREG_xSI; break;
10312 case 7: idxGstRegIndex = X86_GREG_xDI; break;
10313 }
10314
10315 /* base */
10316 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
10317 {
10318 case 0: idxGstRegBase = X86_GREG_xAX; break;
10319 case 1: idxGstRegBase = X86_GREG_xCX; break;
10320 case 2: idxGstRegBase = X86_GREG_xDX; break;
10321 case 3: idxGstRegBase = X86_GREG_xBX; break;
10322 case 4:
10323 idxGstRegBase = X86_GREG_xSP;
10324 u32EffAddr += uSibAndRspOffset >> 8;
10325 break;
10326 case 5:
10327 if ((bRm & X86_MODRM_MOD_MASK) != 0)
10328 idxGstRegBase = X86_GREG_xBP;
10329 else
10330 {
10331 Assert(u32EffAddr == 0);
10332 u32EffAddr = u32Disp;
10333 }
10334 break;
10335 case 6: idxGstRegBase = X86_GREG_xSI; break;
10336 case 7: idxGstRegBase = X86_GREG_xDI; break;
10337 }
10338 break;
10339 }
10340 case 5: idxGstRegBase = X86_GREG_xBP; break;
10341 case 6: idxGstRegBase = X86_GREG_xSI; break;
10342 case 7: idxGstRegBase = X86_GREG_xDI; break;
10343 }
10344
10345 /*
10346 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
10347 * the start of the function.
10348 */
10349 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
10350 {
10351 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
10352 return off;
10353 }
10354
10355 /*
10356 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
10357 */
10358 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
10359 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
10360 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
10361 kIemNativeGstRegUse_ReadOnly);
10362 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
10363 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
10364 kIemNativeGstRegUse_ReadOnly);
10365
10366 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
10367 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
10368 {
10369 idxRegBase = idxRegIndex;
10370 idxRegIndex = UINT8_MAX;
10371 }
10372
10373#ifdef RT_ARCH_AMD64
10374 if (idxRegIndex == UINT8_MAX)
10375 {
10376 if (u32EffAddr == 0)
10377 {
10378 /* mov ret, base */
10379 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
10380 }
10381 else
10382 {
10383 /* lea ret32, [base64 + disp32] */
10384 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
10385 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10386 if (idxRegRet >= 8 || idxRegBase >= 8)
10387 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
10388 pbCodeBuf[off++] = 0x8d;
10389 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
10390 if (idxRegBase != X86_GREG_x12 /*SIB*/)
10391 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
10392 else
10393 {
10394 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
10395 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
10396 }
10397 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10398 if (bMod == X86_MOD_MEM4)
10399 {
10400 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10401 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10402 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10403 }
10404 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10405 }
10406 }
10407 else
10408 {
10409 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
10410 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10411 if (idxRegBase == UINT8_MAX)
10412 {
10413 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
10414 if (idxRegRet >= 8 || idxRegIndex >= 8)
10415 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10416 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
10417 pbCodeBuf[off++] = 0x8d;
10418 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
10419 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
10420 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10421 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10422 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10423 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10424 }
10425 else
10426 {
10427 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
10428 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
10429 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10430 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
10431 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
10432 pbCodeBuf[off++] = 0x8d;
10433 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
10434 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
10435 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
10436 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
10437 if (bMod != X86_MOD_MEM0)
10438 {
10439 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10440 if (bMod == X86_MOD_MEM4)
10441 {
10442 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10443 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10444 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10445 }
10446 }
10447 }
10448 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10449 }
10450
10451#elif defined(RT_ARCH_ARM64)
10452 if (u32EffAddr == 0)
10453 {
10454 if (idxRegIndex == UINT8_MAX)
10455 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
10456 else if (idxRegBase == UINT8_MAX)
10457 {
10458 if (cShiftIndex == 0)
10459 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
10460 else
10461 {
10462 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10463 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
10464 }
10465 }
10466 else
10467 {
10468 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10469 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
10470 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
10471 }
10472 }
10473 else
10474 {
10475 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
10476 {
10477 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10478 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
10479 }
10480 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
10481 {
10482 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10483 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
10484 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
10485 }
10486 else
10487 {
10488 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
10489 if (idxRegBase != UINT8_MAX)
10490 {
10491 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10492 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
10493 }
10494 }
10495 if (idxRegIndex != UINT8_MAX)
10496 {
10497 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10498 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
10499 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
10500 }
10501 }
10502
10503#else
10504# error "port me"
10505#endif
10506
10507 if (idxRegIndex != UINT8_MAX)
10508 iemNativeRegFreeTmp(pReNative, idxRegIndex);
10509 if (idxRegBase != UINT8_MAX)
10510 iemNativeRegFreeTmp(pReNative, idxRegBase);
10511 iemNativeVarRegisterRelease(pReNative, idxVarRet);
10512 return off;
10513}
10514
10515
10516#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
10517 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
10518 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
10519
10520#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
10521 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
10522 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
10523
10524#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
10525 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
10526 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
10527
10528/**
10529 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
10530 *
10531 * @returns New off.
10532 * @param pReNative .
10533 * @param off .
10534 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
10535 * bit 4 to REX.X. The two bits are part of the
10536 * REG sub-field, which isn't needed in this
10537 * function.
10538 * @param uSibAndRspOffset Two parts:
10539 * - The first 8 bits make up the SIB byte.
10540 * - The next 8 bits are the fixed RSP/ESP offset
10541 * in case of a pop [xSP].
10542 * @param u32Disp The displacement byte/word/dword, if any.
10543 * @param cbInstr The size of the fully decoded instruction. Used
10544 * for RIP relative addressing.
10545 * @param idxVarRet The result variable number.
10546 * @param f64Bit Whether to use a 64-bit or 32-bit address size
10547 * when calculating the address.
10548 *
10549 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
10550 */
10551DECL_INLINE_THROW(uint32_t)
10552iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
10553 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
10554{
10555 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
10556
10557 /*
10558 * Special case the rip + disp32 form first.
10559 */
10560 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
10561 {
10562 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
10563 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
10564 kIemNativeGstRegUse_ReadOnly);
10565#ifdef RT_ARCH_AMD64
10566 if (f64Bit)
10567 {
10568 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
10569 if ((int32_t)offFinalDisp == offFinalDisp)
10570 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
10571 else
10572 {
10573 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
10574 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
10575 }
10576 }
10577 else
10578 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
10579
10580#elif defined(RT_ARCH_ARM64)
10581 if (f64Bit)
10582 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
10583 (int64_t)(int32_t)u32Disp + cbInstr);
10584 else
10585 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
10586 (int32_t)u32Disp + cbInstr);
10587
10588#else
10589# error "Port me!"
10590#endif
10591 iemNativeRegFreeTmp(pReNative, idxRegPc);
10592 iemNativeVarRegisterRelease(pReNative, idxVarRet);
10593 return off;
10594 }
10595
10596 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
10597 int64_t i64EffAddr = 0;
10598 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
10599 {
10600 case 0: break;
10601 case 1: i64EffAddr = (int8_t)u32Disp; break;
10602 case 2: i64EffAddr = (int32_t)u32Disp; break;
10603 default: AssertFailed();
10604 }
10605
10606 /* Get the register (or SIB) value. */
10607 uint8_t idxGstRegBase = UINT8_MAX;
10608 uint8_t idxGstRegIndex = UINT8_MAX;
10609 uint8_t cShiftIndex = 0;
10610 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
10611 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
10612 else /* SIB: */
10613 {
10614 /* index /w scaling . */
10615 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
10616 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
10617 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
10618 if (idxGstRegIndex == 4)
10619 {
10620 /* no index */
10621 cShiftIndex = 0;
10622 idxGstRegIndex = UINT8_MAX;
10623 }
10624
10625 /* base */
10626 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
10627 if (idxGstRegBase == 4)
10628 {
10629 /* pop [rsp] hack */
10630 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
10631 }
10632 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
10633 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
10634 {
10635 /* mod=0 and base=5 -> disp32, no base reg. */
10636 Assert(i64EffAddr == 0);
10637 i64EffAddr = (int32_t)u32Disp;
10638 idxGstRegBase = UINT8_MAX;
10639 }
10640 }
10641
10642 /*
10643 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
10644 * the start of the function.
10645 */
10646 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
10647 {
10648 if (f64Bit)
10649 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
10650 else
10651 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
10652 return off;
10653 }
10654
10655 /*
10656 * Now emit code that calculates:
10657 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
10658 * or if !f64Bit:
10659 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
10660 */
10661 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
10662 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
10663 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
10664 kIemNativeGstRegUse_ReadOnly);
10665 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
10666 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
10667 kIemNativeGstRegUse_ReadOnly);
10668
10669 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
10670 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
10671 {
10672 idxRegBase = idxRegIndex;
10673 idxRegIndex = UINT8_MAX;
10674 }
10675
10676#ifdef RT_ARCH_AMD64
10677 uint8_t bFinalAdj;
10678 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
10679 bFinalAdj = 0; /* likely */
10680 else
10681 {
10682 /* pop [rsp] with a problematic disp32 value. Split out the
10683 RSP offset and add it separately afterwards (bFinalAdj). */
10684 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
10685 Assert(idxGstRegBase == X86_GREG_xSP);
10686 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
10687 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
10688 Assert(bFinalAdj != 0);
10689 i64EffAddr -= bFinalAdj;
10690 Assert((int32_t)i64EffAddr == i64EffAddr);
10691 }
10692 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
10693//pReNative->pInstrBuf[off++] = 0xcc;
10694
10695 if (idxRegIndex == UINT8_MAX)
10696 {
10697 if (u32EffAddr == 0)
10698 {
10699 /* mov ret, base */
10700 if (f64Bit)
10701 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
10702 else
10703 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
10704 }
10705 else
10706 {
10707 /* lea ret, [base + disp32] */
10708 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
10709 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10710 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
10711 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10712 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
10713 | (f64Bit ? X86_OP_REX_W : 0);
10714 pbCodeBuf[off++] = 0x8d;
10715 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
10716 if (idxRegBase != X86_GREG_x12 /*SIB*/)
10717 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
10718 else
10719 {
10720 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
10721 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
10722 }
10723 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10724 if (bMod == X86_MOD_MEM4)
10725 {
10726 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10727 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10728 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10729 }
10730 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10731 }
10732 }
10733 else
10734 {
10735 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
10736 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10737 if (idxRegBase == UINT8_MAX)
10738 {
10739 /* lea ret, [(index64 << cShiftIndex) + disp32] */
10740 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
10741 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10742 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
10743 | (f64Bit ? X86_OP_REX_W : 0);
10744 pbCodeBuf[off++] = 0x8d;
10745 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
10746 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
10747 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10748 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10749 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10750 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10751 }
10752 else
10753 {
10754 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
10755 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
10756 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10757 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
10758 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
10759 | (f64Bit ? X86_OP_REX_W : 0);
10760 pbCodeBuf[off++] = 0x8d;
10761 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
10762 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
10763 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
10764 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
10765 if (bMod != X86_MOD_MEM0)
10766 {
10767 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10768 if (bMod == X86_MOD_MEM4)
10769 {
10770 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10771 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10772 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10773 }
10774 }
10775 }
10776 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10777 }
10778
10779 if (!bFinalAdj)
10780 { /* likely */ }
10781 else
10782 {
10783 Assert(f64Bit);
10784 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
10785 }
10786
10787#elif defined(RT_ARCH_ARM64)
10788 if (i64EffAddr == 0)
10789 {
10790 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10791 if (idxRegIndex == UINT8_MAX)
10792 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
10793 else if (idxRegBase != UINT8_MAX)
10794 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
10795 f64Bit, false /*fSetFlags*/, cShiftIndex);
10796 else
10797 {
10798 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
10799 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
10800 }
10801 }
10802 else
10803 {
10804 if (f64Bit)
10805 { /* likely */ }
10806 else
10807 i64EffAddr = (int32_t)i64EffAddr;
10808
10809 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
10810 {
10811 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10812 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
10813 }
10814 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
10815 {
10816 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10817 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
10818 }
10819 else
10820 {
10821 if (f64Bit)
10822 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
10823 else
10824 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
10825 if (idxRegBase != UINT8_MAX)
10826 {
10827 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10828 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
10829 }
10830 }
10831 if (idxRegIndex != UINT8_MAX)
10832 {
10833 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10834 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
10835 f64Bit, false /*fSetFlags*/, cShiftIndex);
10836 }
10837 }
10838
10839#else
10840# error "port me"
10841#endif
10842
10843 if (idxRegIndex != UINT8_MAX)
10844 iemNativeRegFreeTmp(pReNative, idxRegIndex);
10845 if (idxRegBase != UINT8_MAX)
10846 iemNativeRegFreeTmp(pReNative, idxRegBase);
10847 iemNativeVarRegisterRelease(pReNative, idxVarRet);
10848 return off;
10849}
10850
10851
10852/*********************************************************************************************************************************
10853* TLB Lookup. *
10854*********************************************************************************************************************************/
10855
10856/**
10857 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
10858 */
10859DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
10860{
10861 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
10862 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
10863 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
10864 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
10865
10866 /* Do the lookup manually. */
10867 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
10868 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
10869 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
10870 if (RT_LIKELY(pTlbe->uTag == uTag))
10871 {
10872 /*
10873 * Check TLB page table level access flags.
10874 */
10875 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
10876 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
10877 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
10878 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
10879 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
10880 | IEMTLBE_F_PG_UNASSIGNED
10881 | IEMTLBE_F_PT_NO_ACCESSED
10882 | fNoWriteNoDirty | fNoUser);
10883 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
10884 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
10885 {
10886 /*
10887 * Return the address.
10888 */
10889 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
10890 if ((uintptr_t)pbAddr == uResult)
10891 return;
10892 RT_NOREF(cbMem);
10893 AssertFailed();
10894 }
10895 else
10896 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
10897 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
10898 }
10899 else
10900 AssertFailed();
10901 RT_BREAKPOINT();
10902}
10903
10904/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
10905
10906
10907/*********************************************************************************************************************************
10908* Memory fetches and stores common *
10909*********************************************************************************************************************************/
10910
10911typedef enum IEMNATIVEMITMEMOP
10912{
10913 kIemNativeEmitMemOp_Store = 0,
10914 kIemNativeEmitMemOp_Fetch,
10915 kIemNativeEmitMemOp_Fetch_Zx_U16,
10916 kIemNativeEmitMemOp_Fetch_Zx_U32,
10917 kIemNativeEmitMemOp_Fetch_Zx_U64,
10918 kIemNativeEmitMemOp_Fetch_Sx_U16,
10919 kIemNativeEmitMemOp_Fetch_Sx_U32,
10920 kIemNativeEmitMemOp_Fetch_Sx_U64
10921} IEMNATIVEMITMEMOP;
10922
10923/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
10924 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
10925 * (with iSegReg = UINT8_MAX). */
10926DECL_INLINE_THROW(uint32_t)
10927iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
10928 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
10929 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
10930{
10931 /*
10932 * Assert sanity.
10933 */
10934 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
10935 Assert( enmOp != kIemNativeEmitMemOp_Store
10936 || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Immediate
10937 || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Stack);
10938 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
10939 AssertStmt( pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
10940 || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
10941 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
10942 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
10943 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
10944 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
10945#ifdef VBOX_STRICT
10946 if (iSegReg == UINT8_MAX)
10947 {
10948 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
10949 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
10950 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
10951 switch (cbMem)
10952 {
10953 case 1:
10954 Assert( pfnFunction
10955 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
10956 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
10957 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
10958 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
10959 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
10960 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
10961 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
10962 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
10963 : UINT64_C(0xc000b000a0009000) ));
10964 break;
10965 case 2:
10966 Assert( pfnFunction
10967 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
10968 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
10969 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
10970 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
10971 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
10972 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
10973 : UINT64_C(0xc000b000a0009000) ));
10974 break;
10975 case 4:
10976 Assert( pfnFunction
10977 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
10978 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
10979 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
10980 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
10981 : UINT64_C(0xc000b000a0009000) ));
10982 break;
10983 case 8:
10984 Assert( pfnFunction
10985 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
10986 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
10987 : UINT64_C(0xc000b000a0009000) ));
10988 break;
10989 }
10990 }
10991 else
10992 {
10993 Assert(iSegReg < 6);
10994 switch (cbMem)
10995 {
10996 case 1:
10997 Assert( pfnFunction
10998 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
10999 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
11000 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
11001 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
11002 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
11003 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
11004 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
11005 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
11006 : UINT64_C(0xc000b000a0009000) ));
11007 break;
11008 case 2:
11009 Assert( pfnFunction
11010 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
11011 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
11012 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
11013 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
11014 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
11015 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
11016 : UINT64_C(0xc000b000a0009000) ));
11017 break;
11018 case 4:
11019 Assert( pfnFunction
11020 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
11021 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
11022 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
11023 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
11024 : UINT64_C(0xc000b000a0009000) ));
11025 break;
11026 case 8:
11027 Assert( pfnFunction
11028 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
11029 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
11030 : UINT64_C(0xc000b000a0009000) ));
11031 break;
11032 }
11033 }
11034#endif
11035
11036#ifdef VBOX_STRICT
11037 /*
11038 * Check that the fExec flags we've got make sense.
11039 */
11040 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
11041#endif
11042
11043 /*
11044 * To keep things simple we have to commit any pending writes first as we
11045 * may end up making calls.
11046 */
11047 /** @todo we could postpone this till we make the call and reload the
11048 * registers after returning from the call. Not sure if that's sensible or
11049 * not, though. */
11050 off = iemNativeRegFlushPendingWrites(pReNative, off);
11051
11052#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11053 /*
11054 * Move/spill/flush stuff out of call-volatile registers.
11055 * This is the easy way out. We could contain this to the tlb-miss branch
11056 * by saving and restoring active stuff here.
11057 */
11058 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
11059#endif
11060
11061 /*
11062 * Define labels and allocate the result register (trying for the return
11063 * register if we can).
11064 */
11065 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
11066 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
11067 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
11068 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
11069 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
11070 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
11071 uint8_t const idxRegValueStore = !TlbState.fSkip
11072 && enmOp == kIemNativeEmitMemOp_Store
11073 && pReNative->Core.aVars[idxVarValue].enmKind != kIemNativeVarKind_Immediate
11074 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off)
11075 : UINT8_MAX;
11076 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
11077 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
11078 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
11079 : UINT32_MAX;
11080
11081 /*
11082 * Jump to the TLB lookup code.
11083 */
11084 if (!TlbState.fSkip)
11085 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
11086
11087 /*
11088 * TlbMiss:
11089 *
11090 * Call helper to do the fetching.
11091 * We flush all guest register shadow copies here.
11092 */
11093 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
11094
11095#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
11096 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
11097#else
11098 RT_NOREF(idxInstr);
11099#endif
11100
11101#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11102 /* Save variables in volatile registers. */
11103 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
11104 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
11105 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
11106 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
11107#endif
11108
11109 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
11110 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
11111 if (enmOp == kIemNativeEmitMemOp_Store)
11112 {
11113 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
11114 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
11115#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11116 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
11117#else
11118 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
11119 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
11120#endif
11121 }
11122
11123 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
11124 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
11125#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11126 fVolGregMask);
11127#else
11128 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
11129#endif
11130
11131 if (iSegReg != UINT8_MAX)
11132 {
11133 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
11134 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
11135 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
11136 }
11137
11138 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
11139 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
11140
11141 /* Done setting up parameters, make the call. */
11142 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
11143
11144 /*
11145 * Put the result in the right register if this is a fetch.
11146 */
11147 if (enmOp != kIemNativeEmitMemOp_Store)
11148 {
11149 Assert(idxRegValueFetch == pReNative->Core.aVars[idxVarValue].idxReg);
11150 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
11151 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
11152 }
11153
11154#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11155 /* Restore variables and guest shadow registers to volatile registers. */
11156 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
11157 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
11158#endif
11159
11160#ifdef IEMNATIVE_WITH_TLB_LOOKUP
11161 if (!TlbState.fSkip)
11162 {
11163 /* end of TlbMiss - Jump to the done label. */
11164 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
11165 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
11166
11167 /*
11168 * TlbLookup:
11169 */
11170 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
11171 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
11172 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
11173
11174 /*
11175 * Emit code to do the actual storing / fetching.
11176 */
11177 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
11178# ifdef VBOX_WITH_STATISTICS
11179 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
11180 enmOp == kIemNativeEmitMemOp_Store
11181 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
11182 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
11183# endif
11184 switch (enmOp)
11185 {
11186 case kIemNativeEmitMemOp_Store:
11187 if (pReNative->Core.aVars[idxVarValue].enmKind != kIemNativeVarKind_Immediate)
11188 {
11189 switch (cbMem)
11190 {
11191 case 1:
11192 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
11193 break;
11194 case 2:
11195 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
11196 break;
11197 case 4:
11198 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
11199 break;
11200 case 8:
11201 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
11202 break;
11203 default:
11204 AssertFailed();
11205 }
11206 }
11207 else
11208 {
11209 switch (cbMem)
11210 {
11211 case 1:
11212 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off,
11213 (uint8_t)pReNative->Core.aVars[idxVarValue].u.uValue,
11214 idxRegMemResult, TlbState.idxReg1);
11215 break;
11216 case 2:
11217 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off,
11218 (uint16_t)pReNative->Core.aVars[idxVarValue].u.uValue,
11219 idxRegMemResult, TlbState.idxReg1);
11220 break;
11221 case 4:
11222 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off,
11223 (uint32_t)pReNative->Core.aVars[idxVarValue].u.uValue,
11224 idxRegMemResult, TlbState.idxReg1);
11225 break;
11226 case 8:
11227 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pReNative->Core.aVars[idxVarValue].u.uValue,
11228 idxRegMemResult, TlbState.idxReg1);
11229 break;
11230 default:
11231 AssertFailed();
11232 }
11233 }
11234 break;
11235
11236 case kIemNativeEmitMemOp_Fetch:
11237 case kIemNativeEmitMemOp_Fetch_Zx_U16:
11238 case kIemNativeEmitMemOp_Fetch_Zx_U32:
11239 case kIemNativeEmitMemOp_Fetch_Zx_U64:
11240 switch (cbMem)
11241 {
11242 case 1:
11243 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11244 break;
11245 case 2:
11246 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11247 break;
11248 case 4:
11249 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11250 break;
11251 case 8:
11252 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11253 break;
11254 default:
11255 AssertFailed();
11256 }
11257 break;
11258
11259 case kIemNativeEmitMemOp_Fetch_Sx_U16:
11260 Assert(cbMem == 1);
11261 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11262 break;
11263
11264 case kIemNativeEmitMemOp_Fetch_Sx_U32:
11265 Assert(cbMem == 1 || cbMem == 2);
11266 if (cbMem == 1)
11267 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11268 else
11269 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11270 break;
11271
11272 case kIemNativeEmitMemOp_Fetch_Sx_U64:
11273 switch (cbMem)
11274 {
11275 case 1:
11276 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11277 break;
11278 case 2:
11279 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11280 break;
11281 case 4:
11282 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11283 break;
11284 default:
11285 AssertFailed();
11286 }
11287 break;
11288
11289 default:
11290 AssertFailed();
11291 }
11292
11293 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
11294
11295 /*
11296 * TlbDone:
11297 */
11298 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
11299
11300 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
11301
11302# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11303 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
11304 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
11305# endif
11306 }
11307#else
11308 RT_NOREF(fAlignMask, idxLabelTlbMiss);
11309#endif
11310
11311 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
11312 iemNativeVarRegisterRelease(pReNative, idxVarValue);
11313 return off;
11314}
11315
11316
11317
11318/*********************************************************************************************************************************
11319* Memory fetches (IEM_MEM_FETCH_XXX). *
11320*********************************************************************************************************************************/
11321
11322/* 8-bit segmented: */
11323#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
11324 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
11325 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
11326 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
11327
11328#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
11329 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
11330 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
11331 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
11332
11333#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11334 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11335 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
11336 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
11337
11338#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11339 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11340 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11341 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
11342
11343#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
11344 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
11345 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
11346 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
11347
11348#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11349 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11350 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
11351 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
11352
11353#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11354 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11355 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11356 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
11357
11358/* 16-bit segmented: */
11359#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
11360 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
11361 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
11362 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
11363
11364#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
11365 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
11366 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
11367 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
11368
11369#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11370 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11371 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
11372 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
11373
11374#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11375 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11376 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11377 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
11378
11379#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11380 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11381 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
11382 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
11383
11384#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11385 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11386 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11387 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
11388
11389
11390/* 32-bit segmented: */
11391#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11392 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11393 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
11394 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
11395
11396#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
11397 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11398 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
11399 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
11400
11401#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11402 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11403 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11404 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
11405
11406#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11407 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11408 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11409 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
11410
11411
11412/* 64-bit segmented: */
11413#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11414 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11415 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
11416 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
11417
11418
11419
11420/* 8-bit flat: */
11421#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
11422 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
11423 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
11424 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
11425
11426#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
11427 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
11428 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
11429 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
11430
11431#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
11432 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11433 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
11434 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
11435
11436#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
11437 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11438 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11439 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
11440
11441#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
11442 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
11443 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
11444 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
11445
11446#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
11447 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11448 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
11449 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
11450
11451#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
11452 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11453 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11454 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
11455
11456
11457/* 16-bit flat: */
11458#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
11459 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
11460 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
11461 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
11462
11463#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
11464 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
11465 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
11466 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
11467
11468#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
11469 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11470 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
11471 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
11472
11473#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
11474 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11475 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11476 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
11477
11478#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
11479 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11480 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
11481 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
11482
11483#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
11484 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11485 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11486 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
11487
11488/* 32-bit flat: */
11489#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
11490 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11491 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
11492 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
11493
11494#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
11495 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11496 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
11497 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
11498
11499#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
11500 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11501 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11502 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
11503
11504#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
11505 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11506 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11507 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
11508
11509/* 64-bit flat: */
11510#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
11511 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11512 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
11513 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
11514
11515
11516
11517/*********************************************************************************************************************************
11518* Memory stores (IEM_MEM_STORE_XXX). *
11519*********************************************************************************************************************************/
11520
11521#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
11522 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
11523 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
11524 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
11525
11526#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
11527 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
11528 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
11529 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
11530
11531#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
11532 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
11533 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
11534 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
11535
11536#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
11537 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
11538 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
11539 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
11540
11541
11542#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
11543 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
11544 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
11545 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
11546
11547#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
11548 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
11549 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
11550 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
11551
11552#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
11553 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
11554 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
11555 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
11556
11557#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
11558 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
11559 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
11560 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
11561
11562
11563#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
11564 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
11565 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
11566
11567#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
11568 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
11569 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
11570
11571#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
11572 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
11573 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
11574
11575#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
11576 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
11577 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
11578
11579
11580#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
11581 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
11582 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
11583
11584#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
11585 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
11586 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
11587
11588#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
11589 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
11590 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
11591
11592#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
11593 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
11594 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
11595
11596/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
11597 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
11598DECL_INLINE_THROW(uint32_t)
11599iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
11600 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
11601{
11602 /*
11603 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
11604 * to do the grunt work.
11605 */
11606 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
11607 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
11608 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
11609 pfnFunction, idxInstr);
11610 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
11611 return off;
11612}
11613
11614
11615
11616/*********************************************************************************************************************************
11617* Stack Accesses. *
11618*********************************************************************************************************************************/
11619/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
11620#define IEM_MC_PUSH_U16(a_u16Value) \
11621 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
11622 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
11623#define IEM_MC_PUSH_U32(a_u32Value) \
11624 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
11625 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
11626#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
11627 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
11628 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
11629#define IEM_MC_PUSH_U64(a_u64Value) \
11630 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
11631 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
11632
11633#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
11634 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
11635 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
11636#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
11637 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
11638 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
11639#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
11640 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
11641 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
11642
11643#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
11644 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
11645 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
11646#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
11647 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
11648 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
11649
11650
11651DECL_FORCE_INLINE_THROW(uint32_t)
11652iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
11653{
11654 /* Use16BitSp: */
11655#ifdef RT_ARCH_AMD64
11656 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
11657 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
11658#else
11659 /* sub regeff, regrsp, #cbMem */
11660 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
11661 /* and regeff, regeff, #0xffff */
11662 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
11663 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
11664 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
11665 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
11666#endif
11667 return off;
11668}
11669
11670
11671DECL_FORCE_INLINE(uint32_t)
11672iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
11673{
11674 /* Use32BitSp: */
11675 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
11676 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
11677 return off;
11678}
11679
11680
11681/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
11682DECL_INLINE_THROW(uint32_t)
11683iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
11684 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
11685{
11686 /*
11687 * Assert sanity.
11688 */
11689 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
11690#ifdef VBOX_STRICT
11691 if (RT_BYTE2(cBitsVarAndFlat) != 0)
11692 {
11693 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
11694 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
11695 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
11696 Assert( pfnFunction
11697 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
11698 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
11699 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
11700 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
11701 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
11702 : UINT64_C(0xc000b000a0009000) ));
11703 }
11704 else
11705 Assert( pfnFunction
11706 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
11707 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
11708 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
11709 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
11710 : UINT64_C(0xc000b000a0009000) ));
11711#endif
11712
11713#ifdef VBOX_STRICT
11714 /*
11715 * Check that the fExec flags we've got make sense.
11716 */
11717 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
11718#endif
11719
11720 /*
11721 * To keep things simple we have to commit any pending writes first as we
11722 * may end up making calls.
11723 */
11724 /** @todo we could postpone this till we make the call and reload the
11725 * registers after returning from the call. Not sure if that's sensible or
11726 * not, though. */
11727 off = iemNativeRegFlushPendingWrites(pReNative, off);
11728
11729 /*
11730 * First we calculate the new RSP and the effective stack pointer value.
11731 * For 64-bit mode and flat 32-bit these two are the same.
11732 * (Code structure is very similar to that of PUSH)
11733 */
11734 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
11735 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
11736 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
11737 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
11738 ? cbMem : sizeof(uint16_t);
11739 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
11740 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
11741 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
11742 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
11743 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
11744 if (cBitsFlat != 0)
11745 {
11746 Assert(idxRegEffSp == idxRegRsp);
11747 Assert(cBitsFlat == 32 || cBitsFlat == 64);
11748 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
11749 if (cBitsFlat == 64)
11750 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
11751 else
11752 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
11753 }
11754 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
11755 {
11756 Assert(idxRegEffSp != idxRegRsp);
11757 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
11758 kIemNativeGstRegUse_ReadOnly);
11759#ifdef RT_ARCH_AMD64
11760 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
11761#else
11762 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
11763#endif
11764 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
11765 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
11766 offFixupJumpToUseOtherBitSp = off;
11767 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
11768 {
11769 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
11770 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
11771 }
11772 else
11773 {
11774 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
11775 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
11776 }
11777 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11778 }
11779 /* SpUpdateEnd: */
11780 uint32_t const offLabelSpUpdateEnd = off;
11781
11782 /*
11783 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
11784 * we're skipping lookup).
11785 */
11786 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
11787 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
11788 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
11789 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
11790 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
11791 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
11792 : UINT32_MAX;
11793 uint8_t const idxRegValue = !TlbState.fSkip
11794 && pReNative->Core.aVars[idxVarValue].enmKind != kIemNativeVarKind_Immediate
11795 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
11796 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
11797 : UINT8_MAX;
11798 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
11799
11800
11801 if (!TlbState.fSkip)
11802 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
11803 else
11804 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
11805
11806 /*
11807 * Use16BitSp:
11808 */
11809 if (cBitsFlat == 0)
11810 {
11811#ifdef RT_ARCH_AMD64
11812 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
11813#else
11814 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
11815#endif
11816 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
11817 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
11818 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
11819 else
11820 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
11821 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
11822 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11823 }
11824
11825 /*
11826 * TlbMiss:
11827 *
11828 * Call helper to do the pushing.
11829 */
11830 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
11831
11832#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
11833 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
11834#else
11835 RT_NOREF(idxInstr);
11836#endif
11837
11838 /* Save variables in volatile registers. */
11839 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
11840 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
11841 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
11842 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
11843 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
11844
11845 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
11846 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
11847 {
11848 /* Swap them using ARG0 as temp register: */
11849 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
11850 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
11851 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
11852 }
11853 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
11854 {
11855 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
11856 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
11857 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
11858
11859 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
11860 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
11861 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
11862 }
11863 else
11864 {
11865 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
11866 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
11867
11868 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
11869 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
11870 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
11871 }
11872
11873 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
11874 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
11875
11876 /* Done setting up parameters, make the call. */
11877 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
11878
11879 /* Restore variables and guest shadow registers to volatile registers. */
11880 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
11881 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
11882
11883#ifdef IEMNATIVE_WITH_TLB_LOOKUP
11884 if (!TlbState.fSkip)
11885 {
11886 /* end of TlbMiss - Jump to the done label. */
11887 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
11888 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
11889
11890 /*
11891 * TlbLookup:
11892 */
11893 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
11894 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
11895
11896 /*
11897 * Emit code to do the actual storing / fetching.
11898 */
11899 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
11900# ifdef VBOX_WITH_STATISTICS
11901 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
11902 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
11903# endif
11904 if (idxRegValue != UINT8_MAX)
11905 {
11906 switch (cbMemAccess)
11907 {
11908 case 2:
11909 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
11910 break;
11911 case 4:
11912 if (!fIsIntelSeg)
11913 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
11914 else
11915 {
11916 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
11917 PUSH FS in real mode, so we have to try emulate that here.
11918 We borrow the now unused idxReg1 from the TLB lookup code here. */
11919 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
11920 kIemNativeGstReg_EFlags);
11921 if (idxRegEfl != UINT8_MAX)
11922 {
11923#ifdef ARCH_AMD64
11924 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
11925 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
11926 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
11927#else
11928 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
11929 off, TlbState.idxReg1, idxRegEfl,
11930 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
11931#endif
11932 iemNativeRegFreeTmp(pReNative, idxRegEfl);
11933 }
11934 else
11935 {
11936 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
11937 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
11938 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
11939 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
11940 }
11941 /* ASSUMES the upper half of idxRegValue is ZERO. */
11942 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
11943 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
11944 }
11945 break;
11946 case 8:
11947 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
11948 break;
11949 default:
11950 AssertFailed();
11951 }
11952 }
11953 else
11954 {
11955 switch (cbMemAccess)
11956 {
11957 case 2:
11958 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off,
11959 (uint16_t)pReNative->Core.aVars[idxVarValue].u.uValue,
11960 idxRegMemResult, TlbState.idxReg1);
11961 break;
11962 case 4:
11963 Assert(!fIsSegReg);
11964 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off,
11965 (uint32_t)pReNative->Core.aVars[idxVarValue].u.uValue,
11966 idxRegMemResult, TlbState.idxReg1);
11967 break;
11968 case 8:
11969 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pReNative->Core.aVars[idxVarValue].u.uValue,
11970 idxRegMemResult, TlbState.idxReg1);
11971 break;
11972 default:
11973 AssertFailed();
11974 }
11975 }
11976
11977 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
11978 TlbState.freeRegsAndReleaseVars(pReNative);
11979
11980 /*
11981 * TlbDone:
11982 *
11983 * Commit the new RSP value.
11984 */
11985 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
11986 }
11987#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
11988
11989 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
11990 iemNativeRegFreeTmp(pReNative, idxRegRsp);
11991 if (idxRegEffSp != idxRegRsp)
11992 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
11993
11994 /* The value variable is implictly flushed. */
11995 if (idxRegValue != UINT8_MAX)
11996 iemNativeVarRegisterRelease(pReNative, idxVarValue);
11997 iemNativeVarFreeLocal(pReNative, idxVarValue);
11998
11999 return off;
12000}
12001
12002
12003
12004/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
12005#define IEM_MC_POP_GREG_U16(a_iGReg) \
12006 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
12007 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
12008#define IEM_MC_POP_GREG_U32(a_iGReg) \
12009 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
12010 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
12011#define IEM_MC_POP_GREG_U64(a_iGReg) \
12012 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
12013 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
12014
12015#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
12016 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
12017 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
12018#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
12019 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
12020 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
12021
12022#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
12023 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
12024 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
12025#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
12026 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
12027 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
12028
12029
12030DECL_FORCE_INLINE_THROW(uint32_t)
12031iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
12032 uint8_t idxRegTmp)
12033{
12034 /* Use16BitSp: */
12035#ifdef RT_ARCH_AMD64
12036 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
12037 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
12038 RT_NOREF(idxRegTmp);
12039#else
12040 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
12041 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
12042 /* add tmp, regrsp, #cbMem */
12043 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
12044 /* and tmp, tmp, #0xffff */
12045 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
12046 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
12047 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
12048 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
12049#endif
12050 return off;
12051}
12052
12053
12054DECL_FORCE_INLINE(uint32_t)
12055iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
12056{
12057 /* Use32BitSp: */
12058 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
12059 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
12060 return off;
12061}
12062
12063
12064/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
12065DECL_INLINE_THROW(uint32_t)
12066iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
12067 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
12068{
12069 /*
12070 * Assert sanity.
12071 */
12072 Assert(idxGReg < 16);
12073#ifdef VBOX_STRICT
12074 if (RT_BYTE2(cBitsVarAndFlat) != 0)
12075 {
12076 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
12077 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
12078 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
12079 Assert( pfnFunction
12080 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
12081 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
12082 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
12083 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
12084 : UINT64_C(0xc000b000a0009000) ));
12085 }
12086 else
12087 Assert( pfnFunction
12088 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
12089 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
12090 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
12091 : UINT64_C(0xc000b000a0009000) ));
12092#endif
12093
12094#ifdef VBOX_STRICT
12095 /*
12096 * Check that the fExec flags we've got make sense.
12097 */
12098 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
12099#endif
12100
12101 /*
12102 * To keep things simple we have to commit any pending writes first as we
12103 * may end up making calls.
12104 */
12105 off = iemNativeRegFlushPendingWrites(pReNative, off);
12106
12107 /*
12108 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
12109 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
12110 * directly as the effective stack pointer.
12111 * (Code structure is very similar to that of PUSH)
12112 */
12113 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
12114 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
12115 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
12116 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
12117 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
12118 /** @todo can do a better job picking the register here. For cbMem >= 4 this
12119 * will be the resulting register value. */
12120 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
12121
12122 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
12123 if (cBitsFlat != 0)
12124 {
12125 Assert(idxRegEffSp == idxRegRsp);
12126 Assert(cBitsFlat == 32 || cBitsFlat == 64);
12127 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
12128 }
12129 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
12130 {
12131 Assert(idxRegEffSp != idxRegRsp);
12132 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
12133 kIemNativeGstRegUse_ReadOnly);
12134#ifdef RT_ARCH_AMD64
12135 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12136#else
12137 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
12138#endif
12139 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
12140 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
12141 offFixupJumpToUseOtherBitSp = off;
12142 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
12143 {
12144/** @todo can skip idxRegRsp updating when popping ESP. */
12145 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
12146 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12147 }
12148 else
12149 {
12150 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
12151 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
12152 }
12153 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12154 }
12155 /* SpUpdateEnd: */
12156 uint32_t const offLabelSpUpdateEnd = off;
12157
12158 /*
12159 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
12160 * we're skipping lookup).
12161 */
12162 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
12163 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
12164 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
12165 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
12166 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
12167 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
12168 : UINT32_MAX;
12169
12170 if (!TlbState.fSkip)
12171 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
12172 else
12173 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
12174
12175 /*
12176 * Use16BitSp:
12177 */
12178 if (cBitsFlat == 0)
12179 {
12180#ifdef RT_ARCH_AMD64
12181 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12182#else
12183 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
12184#endif
12185 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
12186 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
12187 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
12188 else
12189 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12190 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
12191 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12192 }
12193
12194 /*
12195 * TlbMiss:
12196 *
12197 * Call helper to do the pushing.
12198 */
12199 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
12200
12201#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
12202 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
12203#else
12204 RT_NOREF(idxInstr);
12205#endif
12206
12207 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
12208 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
12209 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
12210 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
12211
12212
12213 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
12214 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
12215 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
12216
12217 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
12218 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
12219
12220 /* Done setting up parameters, make the call. */
12221 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
12222
12223 /* Move the return register content to idxRegMemResult. */
12224 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
12225 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
12226
12227 /* Restore variables and guest shadow registers to volatile registers. */
12228 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
12229 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
12230
12231#ifdef IEMNATIVE_WITH_TLB_LOOKUP
12232 if (!TlbState.fSkip)
12233 {
12234 /* end of TlbMiss - Jump to the done label. */
12235 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
12236 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
12237
12238 /*
12239 * TlbLookup:
12240 */
12241 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
12242 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
12243
12244 /*
12245 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
12246 */
12247 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12248# ifdef VBOX_WITH_STATISTICS
12249 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
12250 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
12251# endif
12252 switch (cbMem)
12253 {
12254 case 2:
12255 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
12256 break;
12257 case 4:
12258 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
12259 break;
12260 case 8:
12261 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
12262 break;
12263 default:
12264 AssertFailed();
12265 }
12266
12267 TlbState.freeRegsAndReleaseVars(pReNative);
12268
12269 /*
12270 * TlbDone:
12271 *
12272 * Set the new RSP value (FLAT accesses needs to calculate it first) and
12273 * commit the popped register value.
12274 */
12275 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
12276 }
12277#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
12278
12279 if (idxGReg != X86_GREG_xSP)
12280 {
12281 /* Set the register. */
12282 if (cbMem >= sizeof(uint32_t))
12283 {
12284#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
12285 AssertMsg( pReNative->idxCurCall == 0
12286 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
12287 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
12288#endif
12289 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
12290 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
12291 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
12292 }
12293 else
12294 {
12295 Assert(cbMem == sizeof(uint16_t));
12296 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
12297 kIemNativeGstRegUse_ForUpdate);
12298 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
12299 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
12300 iemNativeRegFreeTmp(pReNative, idxRegDst);
12301 }
12302
12303 /* Complete RSP calculation for FLAT mode. */
12304 if (idxRegEffSp == idxRegRsp)
12305 {
12306 if (cBitsFlat == 64)
12307 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
12308 else
12309 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
12310 }
12311 }
12312 else
12313 {
12314 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
12315 if (cbMem == sizeof(uint64_t))
12316 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
12317 else if (cbMem == sizeof(uint32_t))
12318 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
12319 else
12320 {
12321 if (idxRegEffSp == idxRegRsp)
12322 {
12323 if (cBitsFlat == 64)
12324 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
12325 else
12326 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
12327 }
12328 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
12329 }
12330 }
12331 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
12332
12333 iemNativeRegFreeTmp(pReNative, idxRegRsp);
12334 if (idxRegEffSp != idxRegRsp)
12335 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
12336 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
12337
12338 return off;
12339}
12340
12341
12342
12343/*********************************************************************************************************************************
12344* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
12345*********************************************************************************************************************************/
12346
12347#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12348 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12349 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
12350 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
12351
12352#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12353 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12354 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
12355 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
12356
12357#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12358 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12359 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
12360 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
12361
12362#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12363 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12364 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
12365 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
12366
12367
12368#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12369 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12370 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12371 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
12372
12373#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12374 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12375 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12376 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
12377
12378#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12379 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12380 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12381 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
12382
12383#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12384 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12385 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12386 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
12387
12388#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12389 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
12390 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12391 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
12392
12393
12394#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12395 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12396 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12397 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
12398
12399#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12400 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12401 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12402 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
12403
12404#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12405 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12406 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12407 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
12408
12409#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12410 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12411 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12412 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
12413
12414#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12415 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
12416 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12417 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
12418
12419
12420#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12421 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12422 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12423 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
12424
12425#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12426 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12427 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12428 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
12429#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12430 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12431 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12432 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
12433
12434#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12435 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12436 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12437 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
12438
12439#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12440 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
12441 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12442 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
12443
12444
12445#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12446 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
12447 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12448 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
12449
12450#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12451 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
12452 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
12453 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
12454
12455
12456#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12457 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
12458 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12459 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
12460
12461#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12462 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
12463 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12464 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
12465
12466#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12467 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
12468 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12469 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
12470
12471#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12472 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
12473 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12474 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
12475
12476
12477
12478#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
12479 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
12480 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
12481 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
12482
12483#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
12484 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
12485 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
12486 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
12487
12488#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
12489 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
12490 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
12491 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
12492
12493#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
12494 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
12495 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
12496 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
12497
12498
12499#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
12500 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
12501 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12502 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
12503
12504#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
12505 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
12506 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12507 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
12508
12509#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
12510 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
12511 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12512 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
12513
12514#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
12515 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
12516 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12517 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
12518
12519#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
12520 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
12521 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12522 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
12523
12524
12525#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
12526 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
12527 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12528 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
12529
12530#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
12531 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
12532 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12533 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
12534
12535#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
12536 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
12537 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12538 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
12539
12540#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
12541 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
12542 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12543 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
12544
12545#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
12546 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
12547 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12548 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
12549
12550
12551#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
12552 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
12553 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12554 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
12555
12556#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
12557 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
12558 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12559 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
12560
12561#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
12562 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
12563 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12564 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
12565
12566#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
12567 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
12568 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12569 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
12570
12571#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
12572 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
12573 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12574 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
12575
12576
12577#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
12578 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
12579 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12580 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
12581
12582#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
12583 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
12584 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
12585 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
12586
12587
12588#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
12589 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
12590 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12591 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
12592
12593#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
12594 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
12595 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12596 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
12597
12598#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
12599 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
12600 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12601 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
12602
12603#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
12604 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
12605 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12606 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
12607
12608
12609DECL_INLINE_THROW(uint32_t)
12610iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
12611 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
12612 uintptr_t pfnFunction, uint8_t idxInstr)
12613{
12614 /*
12615 * Assert sanity.
12616 */
12617 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
12618 AssertStmt( pReNative->Core.aVars[idxVarMem].enmKind == kIemNativeVarKind_Invalid
12619 && pReNative->Core.aVars[idxVarMem].cbVar == sizeof(void *),
12620 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
12621
12622 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
12623 AssertStmt( pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Invalid
12624 && pReNative->Core.aVars[idxVarUnmapInfo].cbVar == sizeof(uint8_t),
12625 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
12626
12627 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
12628 AssertStmt( pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
12629 || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
12630 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
12631
12632 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
12633
12634 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
12635
12636#ifdef VBOX_STRICT
12637# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
12638 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
12639 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
12640 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
12641 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
12642# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
12643 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
12644 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
12645 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
12646
12647 if (iSegReg == UINT8_MAX)
12648 {
12649 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
12650 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
12651 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
12652 switch (cbMem)
12653 {
12654 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
12655 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
12656 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
12657 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
12658 case 10:
12659 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
12660 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
12661 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
12662 break;
12663 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
12664# if 0
12665 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
12666 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
12667# endif
12668 default: AssertFailed(); break;
12669 }
12670 }
12671 else
12672 {
12673 Assert(iSegReg < 6);
12674 switch (cbMem)
12675 {
12676 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
12677 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
12678 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
12679 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
12680 case 10:
12681 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
12682 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
12683 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
12684 break;
12685 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
12686# if 0
12687 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
12688 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
12689# endif
12690 default: AssertFailed(); break;
12691 }
12692 }
12693# undef IEM_MAP_HLP_FN
12694# undef IEM_MAP_HLP_FN_NO_AT
12695#endif
12696
12697#ifdef VBOX_STRICT
12698 /*
12699 * Check that the fExec flags we've got make sense.
12700 */
12701 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
12702#endif
12703
12704 /*
12705 * To keep things simple we have to commit any pending writes first as we
12706 * may end up making calls.
12707 */
12708 off = iemNativeRegFlushPendingWrites(pReNative, off);
12709
12710#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12711 /*
12712 * Move/spill/flush stuff out of call-volatile registers.
12713 * This is the easy way out. We could contain this to the tlb-miss branch
12714 * by saving and restoring active stuff here.
12715 */
12716 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
12717 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
12718#endif
12719
12720 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
12721 while the tlb-miss codepath will temporarily put it on the stack.
12722 Set the the type to stack here so we don't need to do it twice below. */
12723 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
12724 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
12725 /** @todo use a tmp register from TlbState, since they'll be free after tlb
12726 * lookup is done. */
12727
12728 /*
12729 * Define labels and allocate the result register (trying for the return
12730 * register if we can).
12731 */
12732 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
12733 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
12734 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
12735 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
12736 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
12737 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
12738 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
12739 : UINT32_MAX;
12740//off=iemNativeEmitBrk(pReNative, off, 0);
12741 /*
12742 * Jump to the TLB lookup code.
12743 */
12744 if (!TlbState.fSkip)
12745 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
12746
12747 /*
12748 * TlbMiss:
12749 *
12750 * Call helper to do the fetching.
12751 * We flush all guest register shadow copies here.
12752 */
12753 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
12754
12755#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
12756 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
12757#else
12758 RT_NOREF(idxInstr);
12759#endif
12760
12761#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12762 /* Save variables in volatile registers. */
12763 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
12764 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
12765#endif
12766
12767 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
12768 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
12769#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12770 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
12771#else
12772 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
12773#endif
12774
12775 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
12776 if (iSegReg != UINT8_MAX)
12777 {
12778 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
12779 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
12780 }
12781
12782 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
12783 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
12784 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
12785
12786 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
12787 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
12788
12789 /* Done setting up parameters, make the call. */
12790 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
12791
12792 /*
12793 * Put the output in the right registers.
12794 */
12795 Assert(idxRegMemResult == pReNative->Core.aVars[idxVarMem].idxReg);
12796 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
12797 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
12798
12799#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12800 /* Restore variables and guest shadow registers to volatile registers. */
12801 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
12802 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
12803#endif
12804
12805 Assert(pReNative->Core.aVars[idxVarUnmapInfo].idxReg == idxRegUnmapInfo);
12806 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
12807
12808#ifdef IEMNATIVE_WITH_TLB_LOOKUP
12809 if (!TlbState.fSkip)
12810 {
12811 /* end of tlbsmiss - Jump to the done label. */
12812 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
12813 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
12814
12815 /*
12816 * TlbLookup:
12817 */
12818 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
12819 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
12820# ifdef VBOX_WITH_STATISTICS
12821 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
12822 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
12823# endif
12824
12825 /* [idxVarUnmapInfo] = 0; */
12826 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
12827
12828 /*
12829 * TlbDone:
12830 */
12831 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
12832
12833 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
12834
12835# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12836 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
12837 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
12838# endif
12839 }
12840#else
12841 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
12842#endif
12843
12844 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
12845 iemNativeVarRegisterRelease(pReNative, idxVarMem);
12846
12847 return off;
12848}
12849
12850
12851#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
12852 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
12853 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
12854
12855#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
12856 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
12857 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
12858
12859#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
12860 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
12861 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
12862
12863#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
12864 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
12865 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
12866
12867DECL_INLINE_THROW(uint32_t)
12868iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
12869 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
12870{
12871 /*
12872 * Assert sanity.
12873 */
12874 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
12875 Assert(pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Stack);
12876 Assert( pReNative->Core.aVars[idxVarUnmapInfo].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
12877 || pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
12878#ifdef VBOX_STRICT
12879 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
12880 {
12881 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
12882 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
12883 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
12884 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
12885 case IEM_ACCESS_TYPE_WRITE:
12886 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
12887 case IEM_ACCESS_TYPE_READ:
12888 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
12889 default: AssertFailed();
12890 }
12891#else
12892 RT_NOREF(fAccess);
12893#endif
12894
12895 /*
12896 * To keep things simple we have to commit any pending writes first as we
12897 * may end up making calls (there shouldn't be any at this point, so this
12898 * is just for consistency).
12899 */
12900 /** @todo we could postpone this till we make the call and reload the
12901 * registers after returning from the call. Not sure if that's sensible or
12902 * not, though. */
12903 off = iemNativeRegFlushPendingWrites(pReNative, off);
12904
12905 /*
12906 * Move/spill/flush stuff out of call-volatile registers.
12907 *
12908 * We exclude any register holding the bUnmapInfo variable, as we'll be
12909 * checking it after returning from the call and will free it afterwards.
12910 */
12911 /** @todo save+restore active registers and maybe guest shadows in miss
12912 * scenario. */
12913 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarUnmapInfo));
12914
12915 /*
12916 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
12917 * to call the unmap helper function.
12918 *
12919 * The likelyhood of it being zero is higher than for the TLB hit when doing
12920 * the mapping, as a TLB miss for an well aligned and unproblematic memory
12921 * access should also end up with a mapping that won't need special unmapping.
12922 */
12923 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
12924 * should speed up things for the pure interpreter as well when TLBs
12925 * are enabled. */
12926#ifdef RT_ARCH_AMD64
12927 if (pReNative->Core.aVars[idxVarUnmapInfo].idxReg == UINT8_MAX)
12928 {
12929 /* test byte [rbp - xxx], 0ffh */
12930 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
12931 pbCodeBuf[off++] = 0xf6;
12932 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot;
12933 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
12934 pbCodeBuf[off++] = 0xff;
12935 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12936 }
12937 else
12938#endif
12939 {
12940 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
12941 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
12942 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
12943 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
12944 }
12945 uint32_t const offJmpFixup = off;
12946 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
12947
12948 /*
12949 * Call the unmap helper function.
12950 */
12951#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
12952 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
12953#else
12954 RT_NOREF(idxInstr);
12955#endif
12956
12957 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
12958 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
12959 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
12960
12961 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
12962 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
12963
12964 /* Done setting up parameters, make the call. */
12965 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
12966
12967 /* The bUnmapInfo variable is implictly free by these MCs. */
12968 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
12969
12970 /*
12971 * Done, just fixup the jump for the non-call case.
12972 */
12973 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
12974
12975 return off;
12976}
12977
12978
12979
12980/*********************************************************************************************************************************
12981* State and Exceptions *
12982*********************************************************************************************************************************/
12983
12984#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
12985#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
12986
12987#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
12988#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
12989#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
12990
12991#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
12992#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
12993#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
12994
12995
12996DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
12997{
12998 /** @todo this needs a lot more work later. */
12999 RT_NOREF(pReNative, fForChange);
13000 return off;
13001}
13002
13003
13004/*********************************************************************************************************************************
13005* The native code generator functions for each MC block. *
13006*********************************************************************************************************************************/
13007
13008
13009/*
13010 * Include g_apfnIemNativeRecompileFunctions and associated functions.
13011 *
13012 * This should probably live in it's own file later, but lets see what the
13013 * compile times turn out to be first.
13014 */
13015#include "IEMNativeFunctions.cpp.h"
13016
13017
13018
13019/*********************************************************************************************************************************
13020* Recompiler Core. *
13021*********************************************************************************************************************************/
13022
13023
13024/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
13025static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
13026{
13027 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
13028 pDis->cbCachedInstr += cbMaxRead;
13029 RT_NOREF(cbMinRead);
13030 return VERR_NO_DATA;
13031}
13032
13033
13034/**
13035 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
13036 * @returns pszBuf.
13037 * @param fFlags The flags.
13038 * @param pszBuf The output buffer.
13039 * @param cbBuf The output buffer size. At least 32 bytes.
13040 */
13041DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
13042{
13043 Assert(cbBuf >= 32);
13044 static RTSTRTUPLE const s_aModes[] =
13045 {
13046 /* [00] = */ { RT_STR_TUPLE("16BIT") },
13047 /* [01] = */ { RT_STR_TUPLE("32BIT") },
13048 /* [02] = */ { RT_STR_TUPLE("!2!") },
13049 /* [03] = */ { RT_STR_TUPLE("!3!") },
13050 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
13051 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
13052 /* [06] = */ { RT_STR_TUPLE("!6!") },
13053 /* [07] = */ { RT_STR_TUPLE("!7!") },
13054 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
13055 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
13056 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
13057 /* [0b] = */ { RT_STR_TUPLE("!b!") },
13058 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
13059 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
13060 /* [0e] = */ { RT_STR_TUPLE("!e!") },
13061 /* [0f] = */ { RT_STR_TUPLE("!f!") },
13062 /* [10] = */ { RT_STR_TUPLE("!10!") },
13063 /* [11] = */ { RT_STR_TUPLE("!11!") },
13064 /* [12] = */ { RT_STR_TUPLE("!12!") },
13065 /* [13] = */ { RT_STR_TUPLE("!13!") },
13066 /* [14] = */ { RT_STR_TUPLE("!14!") },
13067 /* [15] = */ { RT_STR_TUPLE("!15!") },
13068 /* [16] = */ { RT_STR_TUPLE("!16!") },
13069 /* [17] = */ { RT_STR_TUPLE("!17!") },
13070 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
13071 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
13072 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
13073 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
13074 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
13075 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
13076 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
13077 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
13078 };
13079 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
13080 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
13081 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
13082
13083 pszBuf[off++] = ' ';
13084 pszBuf[off++] = 'C';
13085 pszBuf[off++] = 'P';
13086 pszBuf[off++] = 'L';
13087 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
13088 Assert(off < 32);
13089
13090 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
13091
13092 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
13093 {
13094 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
13095 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
13096 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
13097 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
13098 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
13099 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
13100 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
13101 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
13102 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
13103 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
13104 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
13105 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
13106 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
13107 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
13108 };
13109 if (fFlags)
13110 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
13111 if (s_aFlags[i].fFlag & fFlags)
13112 {
13113 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
13114 pszBuf[off++] = ' ';
13115 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
13116 off += s_aFlags[i].cchName;
13117 fFlags &= ~s_aFlags[i].fFlag;
13118 if (!fFlags)
13119 break;
13120 }
13121 pszBuf[off] = '\0';
13122
13123 return pszBuf;
13124}
13125
13126
13127DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
13128{
13129 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
13130#if defined(RT_ARCH_AMD64)
13131 static const char * const a_apszMarkers[] =
13132 {
13133 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
13134 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
13135 };
13136#endif
13137
13138 char szDisBuf[512];
13139 DISSTATE Dis;
13140 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
13141 uint32_t const cNative = pTb->Native.cInstructions;
13142 uint32_t offNative = 0;
13143#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
13144 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
13145#endif
13146 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
13147 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
13148 : DISCPUMODE_64BIT;
13149#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
13150 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
13151#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
13152 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
13153#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
13154# error "Port me"
13155#else
13156 csh hDisasm = ~(size_t)0;
13157# if defined(RT_ARCH_AMD64)
13158 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
13159# elif defined(RT_ARCH_ARM64)
13160 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
13161# else
13162# error "Port me"
13163# endif
13164 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
13165#endif
13166
13167 /*
13168 * Print TB info.
13169 */
13170 pHlp->pfnPrintf(pHlp,
13171 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
13172 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
13173 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
13174 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
13175#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
13176 if (pDbgInfo && pDbgInfo->cEntries > 1)
13177 {
13178 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
13179
13180 /*
13181 * This disassembly is driven by the debug info which follows the native
13182 * code and indicates when it starts with the next guest instructions,
13183 * where labels are and such things.
13184 */
13185 uint32_t idxThreadedCall = 0;
13186 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
13187 uint8_t idxRange = UINT8_MAX;
13188 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
13189 uint32_t offRange = 0;
13190 uint32_t offOpcodes = 0;
13191 uint32_t const cbOpcodes = pTb->cbOpcodes;
13192 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
13193 uint32_t const cDbgEntries = pDbgInfo->cEntries;
13194 uint32_t iDbgEntry = 1;
13195 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
13196
13197 while (offNative < cNative)
13198 {
13199 /* If we're at or have passed the point where the next chunk of debug
13200 info starts, process it. */
13201 if (offDbgNativeNext <= offNative)
13202 {
13203 offDbgNativeNext = UINT32_MAX;
13204 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
13205 {
13206 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
13207 {
13208 case kIemTbDbgEntryType_GuestInstruction:
13209 {
13210 /* Did the exec flag change? */
13211 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
13212 {
13213 pHlp->pfnPrintf(pHlp,
13214 " fExec change %#08x -> %#08x %s\n",
13215 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
13216 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
13217 szDisBuf, sizeof(szDisBuf)));
13218 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
13219 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
13220 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
13221 : DISCPUMODE_64BIT;
13222 }
13223
13224 /* New opcode range? We need to fend up a spurious debug info entry here for cases
13225 where the compilation was aborted before the opcode was recorded and the actual
13226 instruction was translated to a threaded call. This may happen when we run out
13227 of ranges, or when some complicated interrupts/FFs are found to be pending or
13228 similar. So, we just deal with it here rather than in the compiler code as it
13229 is a lot simpler to do here. */
13230 if ( idxRange == UINT8_MAX
13231 || idxRange >= cRanges
13232 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
13233 {
13234 idxRange += 1;
13235 if (idxRange < cRanges)
13236 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
13237 else
13238 continue;
13239 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
13240 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
13241 + (pTb->aRanges[idxRange].idxPhysPage == 0
13242 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
13243 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
13244 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
13245 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
13246 pTb->aRanges[idxRange].idxPhysPage);
13247 GCPhysPc += offRange;
13248 }
13249
13250 /* Disassemble the instruction. */
13251 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
13252 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
13253 uint32_t cbInstr = 1;
13254 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
13255 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
13256 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
13257 if (RT_SUCCESS(rc))
13258 {
13259 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
13260 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
13261 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13262 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13263
13264 static unsigned const s_offMarker = 55;
13265 static char const s_szMarker[] = " ; <--- guest";
13266 if (cch < s_offMarker)
13267 {
13268 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
13269 cch = s_offMarker;
13270 }
13271 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
13272 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
13273
13274 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
13275 }
13276 else
13277 {
13278 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
13279 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
13280 cbInstr = 1;
13281 }
13282 GCPhysPc += cbInstr;
13283 offOpcodes += cbInstr;
13284 offRange += cbInstr;
13285 continue;
13286 }
13287
13288 case kIemTbDbgEntryType_ThreadedCall:
13289 pHlp->pfnPrintf(pHlp,
13290 " Call #%u to %s (%u args) - %s\n",
13291 idxThreadedCall,
13292 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
13293 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
13294 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
13295 idxThreadedCall++;
13296 continue;
13297
13298 case kIemTbDbgEntryType_GuestRegShadowing:
13299 {
13300 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
13301 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
13302 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
13303 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
13304 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
13305 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
13306 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
13307 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
13308 else
13309 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
13310 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
13311 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
13312 continue;
13313 }
13314
13315 case kIemTbDbgEntryType_Label:
13316 {
13317 const char *pszName = "what_the_fudge";
13318 const char *pszComment = "";
13319 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
13320 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
13321 {
13322 case kIemNativeLabelType_Return:
13323 pszName = "Return";
13324 break;
13325 case kIemNativeLabelType_ReturnBreak:
13326 pszName = "ReturnBreak";
13327 break;
13328 case kIemNativeLabelType_ReturnWithFlags:
13329 pszName = "ReturnWithFlags";
13330 break;
13331 case kIemNativeLabelType_NonZeroRetOrPassUp:
13332 pszName = "NonZeroRetOrPassUp";
13333 break;
13334 case kIemNativeLabelType_RaiseGp0:
13335 pszName = "RaiseGp0";
13336 break;
13337 case kIemNativeLabelType_ObsoleteTb:
13338 pszName = "ObsoleteTb";
13339 break;
13340 case kIemNativeLabelType_NeedCsLimChecking:
13341 pszName = "NeedCsLimChecking";
13342 break;
13343 case kIemNativeLabelType_CheckBranchMiss:
13344 pszName = "CheckBranchMiss";
13345 break;
13346 case kIemNativeLabelType_If:
13347 pszName = "If";
13348 fNumbered = true;
13349 break;
13350 case kIemNativeLabelType_Else:
13351 pszName = "Else";
13352 fNumbered = true;
13353 pszComment = " ; regs state restored pre-if-block";
13354 break;
13355 case kIemNativeLabelType_Endif:
13356 pszName = "Endif";
13357 fNumbered = true;
13358 break;
13359 case kIemNativeLabelType_CheckIrq:
13360 pszName = "CheckIrq_CheckVM";
13361 fNumbered = true;
13362 break;
13363 case kIemNativeLabelType_TlbLookup:
13364 pszName = "TlbLookup";
13365 fNumbered = true;
13366 break;
13367 case kIemNativeLabelType_TlbMiss:
13368 pszName = "TlbMiss";
13369 fNumbered = true;
13370 break;
13371 case kIemNativeLabelType_TlbDone:
13372 pszName = "TlbDone";
13373 fNumbered = true;
13374 break;
13375 case kIemNativeLabelType_Invalid:
13376 case kIemNativeLabelType_End:
13377 break;
13378 }
13379 if (fNumbered)
13380 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
13381 else
13382 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
13383 continue;
13384 }
13385
13386 case kIemTbDbgEntryType_NativeOffset:
13387 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
13388 Assert(offDbgNativeNext > offNative);
13389 break;
13390
13391 default:
13392 AssertFailed();
13393 }
13394 iDbgEntry++;
13395 break;
13396 }
13397 }
13398
13399 /*
13400 * Disassemble the next native instruction.
13401 */
13402 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
13403# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
13404 uint32_t cbInstr = sizeof(paNative[0]);
13405 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
13406 if (RT_SUCCESS(rc))
13407 {
13408# if defined(RT_ARCH_AMD64)
13409 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
13410 {
13411 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
13412 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
13413 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
13414 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
13415 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
13416 uInfo & 0x8000 ? "recompiled" : "todo");
13417 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
13418 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
13419 else
13420 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
13421 }
13422 else
13423# endif
13424 {
13425# ifdef RT_ARCH_AMD64
13426 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
13427 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
13428 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13429 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13430# elif defined(RT_ARCH_ARM64)
13431 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
13432 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13433 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13434# else
13435# error "Port me"
13436# endif
13437 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
13438 }
13439 }
13440 else
13441 {
13442# if defined(RT_ARCH_AMD64)
13443 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
13444 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
13445# elif defined(RT_ARCH_ARM64)
13446 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
13447# else
13448# error "Port me"
13449# endif
13450 cbInstr = sizeof(paNative[0]);
13451 }
13452 offNative += cbInstr / sizeof(paNative[0]);
13453
13454# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
13455 cs_insn *pInstr;
13456 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
13457 (uintptr_t)pNativeCur, 1, &pInstr);
13458 if (cInstrs > 0)
13459 {
13460 Assert(cInstrs == 1);
13461# if defined(RT_ARCH_AMD64)
13462 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
13463 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
13464# else
13465 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
13466 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
13467# endif
13468 offNative += pInstr->size / sizeof(*pNativeCur);
13469 cs_free(pInstr, cInstrs);
13470 }
13471 else
13472 {
13473# if defined(RT_ARCH_AMD64)
13474 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
13475 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
13476# else
13477 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
13478# endif
13479 offNative++;
13480 }
13481# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
13482 }
13483 }
13484 else
13485#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
13486 {
13487 /*
13488 * No debug info, just disassemble the x86 code and then the native code.
13489 *
13490 * First the guest code:
13491 */
13492 for (unsigned i = 0; i < pTb->cRanges; i++)
13493 {
13494 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
13495 + (pTb->aRanges[i].idxPhysPage == 0
13496 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
13497 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
13498 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
13499 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
13500 unsigned off = pTb->aRanges[i].offOpcodes;
13501 /** @todo this ain't working when crossing pages! */
13502 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
13503 while (off < cbOpcodes)
13504 {
13505 uint32_t cbInstr = 1;
13506 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
13507 &pTb->pabOpcodes[off], cbOpcodes - off,
13508 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
13509 if (RT_SUCCESS(rc))
13510 {
13511 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
13512 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
13513 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13514 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13515 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
13516 GCPhysPc += cbInstr;
13517 off += cbInstr;
13518 }
13519 else
13520 {
13521 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
13522 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
13523 break;
13524 }
13525 }
13526 }
13527
13528 /*
13529 * Then the native code:
13530 */
13531 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
13532 while (offNative < cNative)
13533 {
13534 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
13535# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
13536 uint32_t cbInstr = sizeof(paNative[0]);
13537 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
13538 if (RT_SUCCESS(rc))
13539 {
13540# if defined(RT_ARCH_AMD64)
13541 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
13542 {
13543 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
13544 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
13545 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
13546 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
13547 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
13548 uInfo & 0x8000 ? "recompiled" : "todo");
13549 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
13550 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
13551 else
13552 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
13553 }
13554 else
13555# endif
13556 {
13557# ifdef RT_ARCH_AMD64
13558 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
13559 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
13560 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13561 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13562# elif defined(RT_ARCH_ARM64)
13563 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
13564 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13565 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13566# else
13567# error "Port me"
13568# endif
13569 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
13570 }
13571 }
13572 else
13573 {
13574# if defined(RT_ARCH_AMD64)
13575 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
13576 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
13577# else
13578 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
13579# endif
13580 cbInstr = sizeof(paNative[0]);
13581 }
13582 offNative += cbInstr / sizeof(paNative[0]);
13583
13584# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
13585 cs_insn *pInstr;
13586 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
13587 (uintptr_t)pNativeCur, 1, &pInstr);
13588 if (cInstrs > 0)
13589 {
13590 Assert(cInstrs == 1);
13591# if defined(RT_ARCH_AMD64)
13592 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
13593 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
13594# else
13595 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
13596 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
13597# endif
13598 offNative += pInstr->size / sizeof(*pNativeCur);
13599 cs_free(pInstr, cInstrs);
13600 }
13601 else
13602 {
13603# if defined(RT_ARCH_AMD64)
13604 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
13605 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
13606# else
13607 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
13608# endif
13609 offNative++;
13610 }
13611# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
13612 }
13613 }
13614
13615#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
13616 /* Cleanup. */
13617 cs_close(&hDisasm);
13618#endif
13619}
13620
13621
13622/**
13623 * Recompiles the given threaded TB into a native one.
13624 *
13625 * In case of failure the translation block will be returned as-is.
13626 *
13627 * @returns pTb.
13628 * @param pVCpu The cross context virtual CPU structure of the calling
13629 * thread.
13630 * @param pTb The threaded translation to recompile to native.
13631 */
13632DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
13633{
13634 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
13635
13636 /*
13637 * The first time thru, we allocate the recompiler state, the other times
13638 * we just need to reset it before using it again.
13639 */
13640 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
13641 if (RT_LIKELY(pReNative))
13642 iemNativeReInit(pReNative, pTb);
13643 else
13644 {
13645 pReNative = iemNativeInit(pVCpu, pTb);
13646 AssertReturn(pReNative, pTb);
13647 }
13648
13649#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
13650 /*
13651 * First do liveness analysis. This is done backwards.
13652 */
13653 {
13654 uint32_t idxCall = pTb->Thrd.cCalls;
13655 if (idxCall <= pReNative->cLivenessEntriesAlloc)
13656 { /* likely */ }
13657 else
13658 {
13659 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
13660 while (idxCall > cAlloc)
13661 cAlloc *= 2;
13662 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
13663 AssertReturn(pvNew, pTb);
13664 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
13665 pReNative->cLivenessEntriesAlloc = cAlloc;
13666 }
13667 AssertReturn(idxCall > 0, pTb);
13668 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
13669
13670 /* The initial (final) entry. */
13671 idxCall--;
13672 paLivenessEntries[idxCall].Bit0.bm64 = IEMLIVENESSBIT0_ALL_UNUSED;
13673 paLivenessEntries[idxCall].Bit1.bm64 = IEMLIVENESSBIT1_ALL_UNUSED;
13674
13675 /* Loop backwards thru the calls and fill in the other entries. */
13676 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
13677 while (idxCall > 0)
13678 {
13679 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
13680 if (pfnLiveness)
13681 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
13682 else
13683 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
13684 pCallEntry--;
13685 idxCall--;
13686 }
13687
13688# ifdef VBOX_WITH_STATISTICS
13689 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
13690 to 'clobbered' rather that 'input'. */
13691 /** @todo */
13692# endif
13693 }
13694#endif
13695
13696 /*
13697 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
13698 * for aborting if an error happens.
13699 */
13700 uint32_t cCallsLeft = pTb->Thrd.cCalls;
13701#ifdef LOG_ENABLED
13702 uint32_t const cCallsOrg = cCallsLeft;
13703#endif
13704 uint32_t off = 0;
13705 int rc = VINF_SUCCESS;
13706 IEMNATIVE_TRY_SETJMP(pReNative, rc)
13707 {
13708 /*
13709 * Emit prolog code (fixed).
13710 */
13711 off = iemNativeEmitProlog(pReNative, off);
13712
13713 /*
13714 * Convert the calls to native code.
13715 */
13716#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
13717 int32_t iGstInstr = -1;
13718#endif
13719#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
13720 uint32_t cThreadedCalls = 0;
13721 uint32_t cRecompiledCalls = 0;
13722#endif
13723#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
13724 uint32_t idxCurCall = 0;
13725#endif
13726 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
13727 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
13728 while (cCallsLeft-- > 0)
13729 {
13730 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
13731#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
13732 pReNative->idxCurCall = idxCurCall;
13733#endif
13734
13735 /*
13736 * Debug info and assembly markup.
13737 */
13738#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
13739 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
13740 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
13741#endif
13742#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
13743 iemNativeDbgInfoAddNativeOffset(pReNative, off);
13744 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
13745 {
13746 if (iGstInstr < (int32_t)pTb->cInstructions)
13747 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
13748 else
13749 Assert(iGstInstr == pTb->cInstructions);
13750 iGstInstr = pCallEntry->idxInstr;
13751 }
13752 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
13753#endif
13754#if defined(VBOX_STRICT)
13755 off = iemNativeEmitMarker(pReNative, off,
13756 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
13757#endif
13758#if defined(VBOX_STRICT)
13759 iemNativeRegAssertSanity(pReNative);
13760#endif
13761
13762 /*
13763 * Actual work.
13764 */
13765 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
13766 pfnRecom ? "(recompiled)" : "(todo)"));
13767 if (pfnRecom) /** @todo stats on this. */
13768 {
13769 off = pfnRecom(pReNative, off, pCallEntry);
13770 STAM_REL_STATS({cRecompiledCalls++;});
13771 }
13772 else
13773 {
13774 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
13775 STAM_REL_STATS({cThreadedCalls++;});
13776 }
13777 Assert(off <= pReNative->cInstrBufAlloc);
13778 Assert(pReNative->cCondDepth == 0);
13779
13780#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
13781 if (LogIs2Enabled())
13782 {
13783 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
13784 static const char s_achState[] = "CUXI";
13785
13786 char szGpr[17];
13787 for (unsigned i = 0; i < 16; i++)
13788 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
13789 szGpr[16] = '\0';
13790
13791 char szSegBase[X86_SREG_COUNT + 1];
13792 char szSegLimit[X86_SREG_COUNT + 1];
13793 char szSegAttrib[X86_SREG_COUNT + 1];
13794 char szSegSel[X86_SREG_COUNT + 1];
13795 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
13796 {
13797 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
13798 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
13799 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
13800 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
13801 }
13802 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
13803 = szSegSel[X86_SREG_COUNT] = '\0';
13804
13805 char szEFlags[8];
13806 for (unsigned i = 0; i < 7; i++)
13807 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
13808 szEFlags[7] = '\0';
13809
13810 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
13811 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
13812 }
13813#endif
13814
13815 /*
13816 * Advance.
13817 */
13818 pCallEntry++;
13819#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
13820 idxCurCall++;
13821#endif
13822 }
13823
13824 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
13825 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
13826 if (!cThreadedCalls)
13827 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
13828
13829 /*
13830 * Emit the epilog code.
13831 */
13832 uint32_t idxReturnLabel;
13833 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
13834
13835 /*
13836 * Generate special jump labels.
13837 */
13838 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
13839 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
13840 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
13841 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
13842 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
13843 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
13844 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
13845 off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
13846 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
13847 off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
13848 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_CheckBranchMiss))
13849 off = iemNativeEmitCheckBranchMiss(pReNative, off, idxReturnLabel);
13850 }
13851 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
13852 {
13853 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
13854 return pTb;
13855 }
13856 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
13857 Assert(off <= pReNative->cInstrBufAlloc);
13858
13859 /*
13860 * Make sure all labels has been defined.
13861 */
13862 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
13863#ifdef VBOX_STRICT
13864 uint32_t const cLabels = pReNative->cLabels;
13865 for (uint32_t i = 0; i < cLabels; i++)
13866 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
13867#endif
13868
13869 /*
13870 * Allocate executable memory, copy over the code we've generated.
13871 */
13872 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
13873 if (pTbAllocator->pDelayedFreeHead)
13874 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
13875
13876 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
13877 AssertReturn(paFinalInstrBuf, pTb);
13878 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
13879
13880 /*
13881 * Apply fixups.
13882 */
13883 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
13884 uint32_t const cFixups = pReNative->cFixups;
13885 for (uint32_t i = 0; i < cFixups; i++)
13886 {
13887 Assert(paFixups[i].off < off);
13888 Assert(paFixups[i].idxLabel < cLabels);
13889 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
13890 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
13891 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
13892 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
13893 switch (paFixups[i].enmType)
13894 {
13895#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
13896 case kIemNativeFixupType_Rel32:
13897 Assert(paFixups[i].off + 4 <= off);
13898 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
13899 continue;
13900
13901#elif defined(RT_ARCH_ARM64)
13902 case kIemNativeFixupType_RelImm26At0:
13903 {
13904 Assert(paFixups[i].off < off);
13905 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
13906 Assert(offDisp >= -262144 && offDisp < 262144);
13907 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
13908 continue;
13909 }
13910
13911 case kIemNativeFixupType_RelImm19At5:
13912 {
13913 Assert(paFixups[i].off < off);
13914 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
13915 Assert(offDisp >= -262144 && offDisp < 262144);
13916 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
13917 continue;
13918 }
13919
13920 case kIemNativeFixupType_RelImm14At5:
13921 {
13922 Assert(paFixups[i].off < off);
13923 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
13924 Assert(offDisp >= -8192 && offDisp < 8192);
13925 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
13926 continue;
13927 }
13928
13929#endif
13930 case kIemNativeFixupType_Invalid:
13931 case kIemNativeFixupType_End:
13932 break;
13933 }
13934 AssertFailed();
13935 }
13936
13937 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
13938 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
13939
13940 /*
13941 * Convert the translation block.
13942 */
13943 RTMemFree(pTb->Thrd.paCalls);
13944 pTb->Native.paInstructions = paFinalInstrBuf;
13945 pTb->Native.cInstructions = off;
13946 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
13947#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
13948 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
13949 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
13950#endif
13951
13952 Assert(pTbAllocator->cThreadedTbs > 0);
13953 pTbAllocator->cThreadedTbs -= 1;
13954 pTbAllocator->cNativeTbs += 1;
13955 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
13956
13957#ifdef LOG_ENABLED
13958 /*
13959 * Disassemble to the log if enabled.
13960 */
13961 if (LogIs3Enabled())
13962 {
13963 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
13964 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
13965# ifdef DEBUG_bird
13966 RTLogFlush(NULL);
13967# endif
13968 }
13969#endif
13970 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
13971
13972 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
13973 return pTb;
13974}
13975
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette