VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 106432

Last change on this file since 106432 was 106432, checked in by vboxsync, 6 weeks ago

VMM/IEM: Reduced the arguments for iemNativeEmitStackPushRip and eliminated some unused 'push sreg' code from a copy&paste. bugref:10720

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 541.9 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 106432 2024-10-17 11:30:41Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62#include "target-x86/IEMAllN8veEmit-x86.h"
63
64
65/*
66 * Narrow down configs here to avoid wasting time on unused configs here.
67 * Note! Same checks in IEMAllThrdRecompiler.cpp.
68 */
69
70#ifndef IEM_WITH_CODE_TLB
71# error The code TLB must be enabled for the recompiler.
72#endif
73
74#ifndef IEM_WITH_DATA_TLB
75# error The data TLB must be enabled for the recompiler.
76#endif
77
78#ifndef IEM_WITH_SETJMP
79# error The setjmp approach must be enabled for the recompiler.
80#endif
81
82#if defined(IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS) && !defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
83# error "IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS requires IEMNATIVE_WITH_SIMD_REG_ALLOCATOR"
84#endif
85
86
87/*********************************************************************************************************************************
88* Code emitters for flushing pending guest register writes and sanity checks *
89*********************************************************************************************************************************/
90
91#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
92
93# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
94/**
95 * Updates IEMCPU::uPcUpdatingDebug.
96 */
97DECL_INLINE_THROW(uint32_t) iemNativeEmitPcDebugAdd(PIEMRECOMPILERSTATE pReNative, uint32_t off, int64_t offDisp, uint8_t cBits)
98{
99# ifdef RT_ARCH_AMD64
100 if (pReNative->Core.fDebugPcInitialized && cBits >= 32)
101 {
102 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
103 if ((int32_t)offDisp == offDisp || cBits != 64)
104 {
105 /* add [q]word [pVCpu->iem.s.uPcUpdatingDebug], imm32/imm8 */
106 if (cBits == 64)
107 pCodeBuf[off++] = X86_OP_REX_W;
108 pCodeBuf[off++] = (int8_t)offDisp == offDisp ? 0x83 : 0x81;
109 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
110 if ((int8_t)offDisp == offDisp)
111 pCodeBuf[off++] = (int8_t)offDisp;
112 else
113 {
114 *(int32_t *)&pCodeBuf[off] = (int32_t)offDisp;
115 off += sizeof(int32_t);
116 }
117 }
118 else
119 {
120 /* mov tmp0, imm64 */
121 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, offDisp);
122
123 /* add [pVCpu->iem.s.uPcUpdatingDebug], tmp0 */
124 if (cBits == 64)
125 pCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 >= 8 ? X86_OP_REX_R : 0);
126 else if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
127 pCodeBuf[off++] = X86_OP_REX_R;
128 pCodeBuf[off++] = 0x01;
129 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0 & 7,
130 RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
131 }
132 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
133 return off;
134 }
135# endif
136
137 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
138 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, RT_ARCH_VAL == RT_ARCH_VAL_AMD64 ? 32 : 12);
139
140 if (pReNative->Core.fDebugPcInitialized)
141 {
142 Log4(("uPcUpdatingDebug+=%ld cBits=%d off=%#x\n", offDisp, cBits, off));
143 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
144 }
145 else
146 {
147 Log4(("uPcUpdatingDebug=rip+%ld cBits=%d off=%#x\n", offDisp, cBits, off));
148 pReNative->Core.fDebugPcInitialized = true;
149 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
150 }
151
152 if (cBits == 64)
153 off = iemNativeEmitAddGprImmEx(pCodeBuf, off, idxTmpReg, offDisp, IEMNATIVE_REG_FIXED_TMP0);
154 else
155 {
156 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxTmpReg, (int32_t)offDisp, IEMNATIVE_REG_FIXED_TMP0);
157 if (cBits == 16)
158 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, UINT16_MAX);
159 }
160
161 off = iemNativeEmitStoreGprToVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug),
162 IEMNATIVE_REG_FIXED_TMP0);
163
164 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
165 iemNativeRegFreeTmp(pReNative, idxTmpReg);
166 return off;
167}
168
169
170# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
171DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
172{
173 /* Compare the shadow with the context value, they should match. */
174 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
175 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
176 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
177 return off;
178}
179# endif
180
181#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
182
183/**
184 * Flushes delayed write of a specific guest register.
185 *
186 * This must be called prior to calling CImpl functions and any helpers that use
187 * the guest state (like raising exceptions) and such.
188 *
189 * This optimization has not yet been implemented. The first target would be
190 * RIP updates, since these are the most common ones.
191 */
192DECL_INLINE_THROW(uint32_t)
193iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
194{
195#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
196 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
197#endif
198
199#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
200#if 0 /** @todo r=aeichner EFLAGS writeback delay. */
201 if ( enmClass == kIemNativeGstRegRef_EFlags
202 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
203 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
204#else
205 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
206#endif
207
208 if ( enmClass == kIemNativeGstRegRef_Gpr
209 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
210 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
211#endif
212
213#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
214 if ( enmClass == kIemNativeGstRegRef_XReg
215 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
216 {
217 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
218 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
219 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
220
221 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
222 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
223 }
224#endif
225 RT_NOREF(pReNative, enmClass, idxReg);
226 return off;
227}
228
229
230
231/*********************************************************************************************************************************
232* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
233*********************************************************************************************************************************/
234
235#undef IEM_MC_BEGIN /* unused */
236#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
237 { \
238 Assert(pReNative->Core.bmVars == 0); \
239 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
240 Assert(pReNative->Core.bmStack == 0); \
241 pReNative->fMc = (a_fMcFlags); \
242 pReNative->fCImpl = (a_fCImplFlags); \
243 pReNative->cArgsX = (a_cArgsIncludingHidden)
244
245/** We have to get to the end in recompilation mode, as otherwise we won't
246 * generate code for all the IEM_MC_IF_XXX branches. */
247#define IEM_MC_END() \
248 iemNativeVarFreeAll(pReNative); \
249 } return off
250
251
252
253/*********************************************************************************************************************************
254* Liveness Stubs *
255*********************************************************************************************************************************/
256
257#define IEM_MC_LIVENESS_GREG_INPUT(a_iGReg) ((void)0)
258#define IEM_MC_LIVENESS_GREG_CLOBBER(a_iGReg) ((void)0)
259#define IEM_MC_LIVENESS_GREG_MODIFY(a_iGReg) ((void)0)
260
261#define IEM_MC_LIVENESS_MREG_INPUT(a_iMReg) ((void)0)
262#define IEM_MC_LIVENESS_MREG_CLOBBER(a_iMReg) ((void)0)
263#define IEM_MC_LIVENESS_MREG_MODIFY(a_iMReg) ((void)0)
264
265#define IEM_MC_LIVENESS_XREG_INPUT(a_iXReg) ((void)0)
266#define IEM_MC_LIVENESS_XREG_CLOBBER(a_iXReg) ((void)0)
267#define IEM_MC_LIVENESS_XREG_MODIFY(a_iXReg) ((void)0)
268
269#define IEM_MC_LIVENESS_MXCSR_INPUT() ((void)0)
270#define IEM_MC_LIVENESS_MXCSR_CLOBBER() ((void)0)
271#define IEM_MC_LIVENESS_MXCSR_MODIFY() ((void)0)
272
273
274/*********************************************************************************************************************************
275* Native Emitter Support. *
276*********************************************************************************************************************************/
277
278#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
279
280#define IEM_MC_NATIVE_ELSE() } else {
281
282#define IEM_MC_NATIVE_ENDIF() } ((void)0)
283
284
285#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
286 off = a_fnEmitter(pReNative, off)
287
288#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
289 off = a_fnEmitter(pReNative, off, (a0))
290
291#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
292 off = a_fnEmitter(pReNative, off, (a0), (a1))
293
294#define IEM_MC_NATIVE_EMIT_2_EX(a_fnEmitter, a0, a1) \
295 off = a_fnEmitter(pReNative, off, pCallEntry->idxInstr, (a0), (a1))
296
297#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
298 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
299
300#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
301 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
302
303#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
304 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
305
306#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
307 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
308
309#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
310 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
311
312#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
313 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
314
315
316#ifndef RT_ARCH_AMD64
317# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
318#else
319/** @note This is a naive approach that ASSUMES that the register isn't
320 * allocated, so it only works safely for the first allocation(s) in
321 * a MC block. */
322# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
323 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
324
325DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg,
326 uint32_t off, bool fAllocated);
327
328DECL_INLINE_THROW(uint32_t)
329iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
330{
331 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
332 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
333 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
334
335# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
336 /* Must flush the register if it hold pending writes. */
337 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
338 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
339 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
340# endif
341
342 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off, false /*fAllocated*/);
343 return off;
344}
345
346#endif /* RT_ARCH_AMD64 */
347
348
349
350/*********************************************************************************************************************************
351* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
352*********************************************************************************************************************************/
353
354#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
355 pReNative->fMc = 0; \
356 pReNative->fCImpl = (a_fFlags); \
357 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, \
358 a_cbInstr) /** @todo not used ... */
359
360
361#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
362 pReNative->fMc = 0; \
363 pReNative->fCImpl = (a_fFlags); \
364 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
365
366DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
367 uint8_t idxInstr, uint64_t a_fGstShwFlush,
368 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
369{
370 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
371}
372
373
374#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
375 pReNative->fMc = 0; \
376 pReNative->fCImpl = (a_fFlags); \
377 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
378 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
379
380DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
381 uint8_t idxInstr, uint64_t a_fGstShwFlush,
382 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
383{
384 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
385}
386
387
388#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
389 pReNative->fMc = 0; \
390 pReNative->fCImpl = (a_fFlags); \
391 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
392 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
393
394DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
395 uint8_t idxInstr, uint64_t a_fGstShwFlush,
396 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
397 uint64_t uArg2)
398{
399 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
400}
401
402
403
404/*********************************************************************************************************************************
405* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
406*********************************************************************************************************************************/
407
408/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
409 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
410DECL_INLINE_THROW(uint32_t)
411iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
412{
413 /*
414 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
415 * return with special status code and make the execution loop deal with
416 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
417 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
418 * could continue w/o interruption, it probably will drop into the
419 * debugger, so not worth the effort of trying to services it here and we
420 * just lump it in with the handling of the others.
421 *
422 * To simplify the code and the register state management even more (wrt
423 * immediate in AND operation), we always update the flags and skip the
424 * extra check associated conditional jump.
425 */
426 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
427 <= UINT32_MAX);
428#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
429 AssertMsg( pReNative->idxCurCall == 0
430 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
431 IEMLIVENESSBIT_IDX_EFL_OTHER)),
432 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
433 IEMLIVENESSBIT_IDX_EFL_OTHER)));
434#endif
435
436 /*
437 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
438 * any pending register writes must be flushed.
439 */
440 off = iemNativeRegFlushPendingWrites(pReNative, off);
441
442 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsForUpdate(pReNative, &off,
443 RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OTHER),
444 RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OTHER));
445 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_ReturnWithFlags>(pReNative, off, idxEflReg,
446 X86_EFL_TF
447 | CPUMCTX_DBG_HIT_DRX_MASK
448 | CPUMCTX_DBG_DBGF_MASK);
449 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
450 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
451
452 /* Free but don't flush the EFLAGS register. */
453 iemNativeRegFreeTmp(pReNative, idxEflReg);
454
455 return off;
456}
457
458
459/** Helper for iemNativeEmitFinishInstructionWithStatus. */
460DECLINLINE(RTGCPHYS) iemNativeCallEntryToGCPhysPc(PCIEMTB pTb, PCIEMTHRDEDCALLENTRY pCallEntry)
461{
462 unsigned const offOpcodes = pCallEntry->offOpcode;
463 unsigned const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
464 for (unsigned idxRange = 0; idxRange < cRanges; idxRange++)
465 {
466 unsigned const offRange = offOpcodes - (unsigned)pTb->aRanges[idxRange].offOpcodes;
467 if (offRange < (unsigned)pTb->aRanges[idxRange].cbOpcodes)
468 return iemTbGetRangePhysPageAddr(pTb, idxRange) + offRange + pTb->aRanges[idxRange].offPhysPage;
469 }
470 AssertFailedReturn(NIL_RTGCPHYS);
471}
472
473
474/** The VINF_SUCCESS dummy. */
475template<int const a_rcNormal, bool const a_fIsJump>
476DECL_FORCE_INLINE_THROW(uint32_t)
477iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry,
478 int32_t const offJump)
479{
480 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
481 if (a_rcNormal != VINF_SUCCESS)
482 {
483#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
484 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
485#else
486 RT_NOREF_PV(pCallEntry);
487#endif
488
489 /* As this code returns from the TB any pending register writes must be flushed. */
490 off = iemNativeRegFlushPendingWrites(pReNative, off);
491
492 /*
493 * If we're in a conditional, mark the current branch as exiting so we
494 * can disregard its state when we hit the IEM_MC_ENDIF.
495 */
496 iemNativeMarkCurCondBranchAsExiting(pReNative);
497
498 /*
499 * Use the lookup table for getting to the next TB quickly.
500 * Note! In this code path there can only be one entry at present.
501 */
502 uint8_t const idxTbLookupFirst = IEM_TB_LOOKUP_TAB_GET_IDX(pCallEntry->uTbLookup);
503 PCIEMTB const pTbOrg = pReNative->pTbOrg;
504 Assert(idxTbLookupFirst < pTbOrg->cTbLookupEntries);
505 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1);
506
507#if 0
508 /* Update IEMCPU::ppTbLookupEntryR3 to get the best lookup effect. */
509 PIEMTB * const ppTbLookupFirst = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTbOrg, idxTbLookupFirst);
510 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1); /* large stuff later/never */
511 off = iemNativeEmitStoreImmToVCpuU64(pReNative, off, (uintptr_t)ppTbLookupFirst,
512 RT_UOFFSETOF(VMCPU, iem.s.ppTbLookupEntryR3));
513
514 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreak);
515
516#else
517 /* Load the index as argument #1 for the helper call at the given label. */
518 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxTbLookupFirst);
519
520 /*
521 * Figure out the physical address of the current instruction and see
522 * whether the next instruction we're about to execute is in the same
523 * page so we by can optimistically skip TLB loading.
524 *
525 * - This is safe for all cases in FLAT mode.
526 * - In segmentmented modes it is complicated, given that a negative
527 * jump may underflow EIP and a forward jump may overflow or run into
528 * CS.LIM and triggering a #GP. The only thing we can get away with
529 * now at compile time is forward jumps w/o CS.LIM checks, since the
530 * lack of CS.LIM checks means we're good for the entire physical page
531 * we're executing on and another 15 bytes before we run into CS.LIM.
532 */
533 if ( IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)
534# if 0 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. See also iemNativeHlpReturnBreakViaLookup. */
535 || !(pTbOrg->fFlags & IEMTB_F_CS_LIM_CHECKS)
536# endif
537 )
538 {
539 RTGCPHYS const GCPhysPcCurrent = iemNativeCallEntryToGCPhysPc(pTbOrg, pCallEntry);
540 RTGCPHYS const GCPhysPcNext = GCPhysPcCurrent + pCallEntry->cbOpcode + (int64_t)(a_fIsJump ? offJump : 0);
541 if ( (GCPhysPcNext >> GUEST_PAGE_SHIFT) == (GCPhysPcCurrent >> GUEST_PAGE_SHIFT)
542 && GUEST_PAGE_SIZE - (GCPhysPcCurrent & GUEST_PAGE_OFFSET_MASK) >= pCallEntry->cbOpcode /* 0xfff: je -56h */ )
543
544 {
545 /* Load the next GCPhysPc into the 3rd argument for the helper call. */
546 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, GCPhysPcNext);
547
548 /* Load the key lookup flags into the 2nd argument for the helper call.
549 - This is safe wrt CS limit checking since we're only here for FLAT modes.
550 - ASSUMING that this isn't a STI or POPF instruction, we can exclude any
551 interrupt shadow.
552 - The NMI inhibiting is more questionable, though... */
553 /** @todo We don't implement NMI blocking atm, except via VT-x/AMD-V.
554 * Should we copy it into fExec to simplify this? OTOH, it's just a
555 * couple of extra instructions if EFLAGS are already in a register. */
556 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG,
557 (pReNative->fExec & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE);
558
559 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
560 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookup>(pReNative, off);
561 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookupWithIrq>(pReNative, off);
562 }
563 }
564 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
565 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookupWithTlb>(pReNative, off);
566 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq>(pReNative, off);
567#endif
568 }
569 return off;
570}
571
572
573#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
574 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
575 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
576
577#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
578 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
579 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
580 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
581
582/** Same as iemRegAddToRip64AndFinishingNoFlags. */
583DECL_INLINE_THROW(uint32_t)
584iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
585{
586#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
587# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
588 if (!pReNative->Core.offPc)
589 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
590# endif
591
592 /* Allocate a temporary PC register. */
593 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
594
595 /* Perform the addition and store the result. */
596 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
597 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
598
599 /* Free but don't flush the PC register. */
600 iemNativeRegFreeTmp(pReNative, idxPcReg);
601#endif
602
603#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
604 pReNative->Core.offPc += cbInstr;
605 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
606# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
607 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 64);
608 off = iemNativeEmitPcDebugCheck(pReNative, off);
609# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
610 off = iemNativePcAdjustCheck(pReNative, off);
611# endif
612 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
613#endif
614
615 return off;
616}
617
618
619#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
620 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
621 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
622
623#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
624 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
625 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
626 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
627
628/** Same as iemRegAddToEip32AndFinishingNoFlags. */
629DECL_INLINE_THROW(uint32_t)
630iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
631{
632#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
633# ifdef IEMNATIVE_REG_FIXED_PC_DBG
634 if (!pReNative->Core.offPc)
635 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
636# endif
637
638 /* Allocate a temporary PC register. */
639 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
640
641 /* Perform the addition and store the result. */
642 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
643 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
644
645 /* Free but don't flush the PC register. */
646 iemNativeRegFreeTmp(pReNative, idxPcReg);
647#endif
648
649#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
650 pReNative->Core.offPc += cbInstr;
651 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
652# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
653 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 32);
654 off = iemNativeEmitPcDebugCheck(pReNative, off);
655# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
656 off = iemNativePcAdjustCheck(pReNative, off);
657# endif
658 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
659#endif
660
661 return off;
662}
663
664
665#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
666 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
667 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
668
669#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
670 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
671 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
672 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
673
674/** Same as iemRegAddToIp16AndFinishingNoFlags. */
675DECL_INLINE_THROW(uint32_t)
676iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
677{
678#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
679# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
680 if (!pReNative->Core.offPc)
681 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
682# endif
683
684 /* Allocate a temporary PC register. */
685 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
686
687 /* Perform the addition and store the result. */
688 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
689 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
690 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
691
692 /* Free but don't flush the PC register. */
693 iemNativeRegFreeTmp(pReNative, idxPcReg);
694#endif
695
696#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
697 pReNative->Core.offPc += cbInstr;
698 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
699# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
700 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 16);
701 off = iemNativeEmitPcDebugCheck(pReNative, off);
702# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
703 off = iemNativePcAdjustCheck(pReNative, off);
704# endif
705 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
706#endif
707
708 return off;
709}
710
711
712/*********************************************************************************************************************************
713* Common code for changing PC/RIP/EIP/IP. *
714*********************************************************************************************************************************/
715
716/**
717 * Emits code to check if the content of @a idxAddrReg is a canonical address,
718 * raising a \#GP(0) if it isn't.
719 *
720 * @returns New code buffer offset, UINT32_MAX on failure.
721 * @param pReNative The native recompile state.
722 * @param off The code buffer offset.
723 * @param idxAddrReg The host register with the address to check.
724 * @param idxInstr The current instruction.
725 */
726DECL_FORCE_INLINE_THROW(uint32_t)
727iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
728{
729 /*
730 * Make sure we don't have any outstanding guest register writes as we may
731 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
732 */
733 off = iemNativeRegFlushPendingWrites(pReNative, off);
734
735#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
736 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
737#else
738 RT_NOREF(idxInstr);
739#endif
740
741#ifdef RT_ARCH_AMD64
742 /*
743 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
744 * return raisexcpt();
745 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
746 */
747 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
748
749 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
750 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
751 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
752 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
753 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_RaiseGp0>(pReNative, off);
754
755 iemNativeRegFreeTmp(pReNative, iTmpReg);
756
757#elif defined(RT_ARCH_ARM64)
758 /*
759 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
760 * return raisexcpt();
761 * ----
762 * mov x1, 0x800000000000
763 * add x1, x0, x1
764 * cmp xzr, x1, lsr 48
765 * b.ne .Lraisexcpt
766 */
767 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
768
769 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
770 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
771 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
772 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_RaiseGp0>(pReNative, off);
773
774 iemNativeRegFreeTmp(pReNative, iTmpReg);
775
776#else
777# error "Port me"
778#endif
779 return off;
780}
781
782
783/**
784 * Emits code to check if the content of @a idxAddrReg is a canonical address,
785 * raising a \#GP(0) if it isn't.
786 *
787 * Caller makes sure everything is flushed, except maybe PC.
788 *
789 * @returns New code buffer offset, UINT32_MAX on failure.
790 * @param pReNative The native recompile state.
791 * @param off The code buffer offset.
792 * @param idxAddrReg The host register with the address to check.
793 * @param offDisp The relative displacement that has already been
794 * added to idxAddrReg and must be subtracted if
795 * raising a \#GP(0).
796 * @param idxInstr The current instruction.
797 */
798DECL_FORCE_INLINE_THROW(uint32_t)
799iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithDisp(PIEMRECOMPILERSTATE pReNative, uint32_t off,
800 uint8_t idxAddrReg, int64_t offDisp, uint8_t idxInstr)
801{
802#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
803 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
804#endif
805
806#ifdef RT_ARCH_AMD64
807 /*
808 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
809 * return raisexcpt();
810 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
811 */
812 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
813
814 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
815 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
816 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
817 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
818
819#elif defined(RT_ARCH_ARM64)
820 /*
821 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
822 * return raisexcpt();
823 * ----
824 * mov x1, 0x800000000000
825 * add x1, x0, x1
826 * cmp xzr, x1, lsr 48
827 * b.ne .Lraisexcpt
828 */
829 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
830
831 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
832 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
833 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
834#else
835# error "Port me"
836#endif
837
838 /* Jump to the #GP code (hoping static prediction considers forward branches as not-taken). */
839 uint32_t const offFixup1 = off;
840 off = iemNativeEmitJnzToFixed(pReNative, off, off /*8-bit jump suffices*/);
841
842 /* jump .Lnoexcept; Skip the #GP code. */
843 uint32_t const offFixup2 = off;
844 off = iemNativeEmitJmpToFixed(pReNative, off, off /*8-bit jump suffices*/);
845
846 /* .Lraisexcpt: */
847 iemNativeFixupFixedJump(pReNative, offFixup1, off);
848#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
849 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr), iTmpReg);
850#else
851 RT_NOREF(idxInstr);
852#endif
853
854 /* Undo the PC adjustment and store the old PC value. */
855 off = iemNativeEmitSubGprImm(pReNative, off, idxAddrReg, offDisp, iTmpReg);
856 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxAddrReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
857
858 off = iemNativeEmitTbExit<kIemNativeLabelType_RaiseGp0, false /*a_fActuallyExitingTb*/>(pReNative, off);
859
860 /* .Lnoexcept: */
861 iemNativeFixupFixedJump(pReNative, offFixup2, off);
862
863 iemNativeRegFreeTmp(pReNative, iTmpReg);
864 return off;
865}
866
867
868/**
869 * Emits code to check if the content of @a idxAddrReg is a canonical address,
870 * raising a \#GP(0) if it isn't.
871 *
872 * Caller makes sure everything is flushed, except maybe PC.
873 *
874 * @returns New code buffer offset, UINT32_MAX on failure.
875 * @param pReNative The native recompile state.
876 * @param off The code buffer offset.
877 * @param idxAddrReg The host register with the address to check.
878 * @param idxOldPcReg Register holding the old PC that offPc is relative
879 * to if available, otherwise UINT8_MAX.
880 * @param idxInstr The current instruction.
881 */
882DECL_FORCE_INLINE_THROW(uint32_t)
883iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithOldPc(PIEMRECOMPILERSTATE pReNative, uint32_t off,
884 uint8_t idxAddrReg, uint8_t idxOldPcReg, uint8_t idxInstr)
885{
886#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
887 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
888#endif
889
890#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
891# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
892 if (!pReNative->Core.offPc)
893# endif
894 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
895#else
896 RT_NOREF(idxInstr);
897#endif
898
899#ifdef RT_ARCH_AMD64
900 /*
901 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
902 * return raisexcpt();
903 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
904 */
905 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
906
907 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
908 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
909 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
910 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
911
912#elif defined(RT_ARCH_ARM64)
913 /*
914 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
915 * return raisexcpt();
916 * ----
917 * mov x1, 0x800000000000
918 * add x1, x0, x1
919 * cmp xzr, x1, lsr 48
920 * b.ne .Lraisexcpt
921 */
922 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
923
924 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
925 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
926 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
927#else
928# error "Port me"
929#endif
930
931#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
932 if (pReNative->Core.offPc)
933 {
934 /** @todo On x86, it is said that conditional jumps forward are statically
935 * predicited as not taken, so this isn't a very good construct.
936 * Investigate whether it makes sense to invert it and add another
937 * jump. Also, find out wtf the static predictor does here on arm! */
938 uint32_t const offFixup = off;
939 off = iemNativeEmitJzToFixed(pReNative, off, off + 16 /*8-bit suffices*/);
940
941 /* .Lraisexcpt: */
942# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
943 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr), iTmpReg);
944# endif
945 /* We need to update cpum.GstCtx.rip. */
946 if (idxOldPcReg == UINT8_MAX)
947 {
948 idxOldPcReg = iTmpReg;
949 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
950 }
951 off = iemNativeEmitAddGprImm(pReNative, off, idxOldPcReg, pReNative->Core.offPc);
952 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
953
954 off = iemNativeEmitTbExit<kIemNativeLabelType_RaiseGp0, false /*a_fActuallyExitingTb*/>(pReNative, off);
955 iemNativeFixupFixedJump(pReNative, offFixup, off);
956 }
957 else
958#endif
959 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_RaiseGp0>(pReNative, off);
960
961 iemNativeRegFreeTmp(pReNative, iTmpReg);
962
963 return off;
964}
965
966
967/**
968 * Emits code to check if that the content of @a idxAddrReg is within the limit
969 * of CS, raising a \#GP(0) if it isn't.
970 *
971 * @returns New code buffer offset; throws VBox status code on error.
972 * @param pReNative The native recompile state.
973 * @param off The code buffer offset.
974 * @param idxAddrReg The host register (32-bit) with the address to
975 * check.
976 * @param idxInstr The current instruction.
977 */
978DECL_FORCE_INLINE_THROW(uint32_t)
979iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
980 uint8_t idxAddrReg, uint8_t idxInstr)
981{
982 /*
983 * Make sure we don't have any outstanding guest register writes as we may
984 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
985 */
986 off = iemNativeRegFlushPendingWrites(pReNative, off);
987
988#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
989 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
990#else
991 RT_NOREF(idxInstr);
992#endif
993
994 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
995 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
996 kIemNativeGstRegUse_ReadOnly);
997
998 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
999 off = iemNativeEmitTbExitJa<kIemNativeLabelType_RaiseGp0>(pReNative, off);
1000
1001 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
1002 return off;
1003}
1004
1005
1006
1007
1008/**
1009 * Emits code to check if that the content of @a idxAddrReg is within the limit
1010 * of CS, raising a \#GP(0) if it isn't.
1011 *
1012 * Caller makes sure everything is flushed, except maybe PC.
1013 *
1014 * @returns New code buffer offset; throws VBox status code on error.
1015 * @param pReNative The native recompile state.
1016 * @param off The code buffer offset.
1017 * @param idxAddrReg The host register (32-bit) with the address to
1018 * check.
1019 * @param idxOldPcReg Register holding the old PC that offPc is relative
1020 * to if available, otherwise UINT8_MAX.
1021 * @param idxInstr The current instruction.
1022 */
1023DECL_FORCE_INLINE_THROW(uint32_t)
1024iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0WithOldPc(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1025 uint8_t idxAddrReg, uint8_t idxOldPcReg, uint8_t idxInstr)
1026{
1027#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1028 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
1029#endif
1030
1031#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1032# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1033 if (!pReNative->Core.offPc)
1034# endif
1035 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1036#else
1037 RT_NOREF(idxInstr);
1038#endif
1039
1040 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
1041 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
1042 kIemNativeGstRegUse_ReadOnly);
1043
1044 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
1045#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1046 if (pReNative->Core.offPc)
1047 {
1048 uint32_t const offFixup = off;
1049 off = iemNativeEmitJbeToFixed(pReNative, off, off + 16 /*8-bit suffices*/);
1050
1051 /* Raising a GP(0), but first we need to update cpum.GstCtx.rip. */
1052 if (idxOldPcReg == UINT8_MAX)
1053 {
1054 idxOldPcReg = idxAddrReg;
1055 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1056 }
1057 off = iemNativeEmitAddGprImm(pReNative, off, idxOldPcReg, pReNative->Core.offPc);
1058 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1059# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1060 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1061# endif
1062 off = iemNativeEmitTbExit<kIemNativeLabelType_RaiseGp0, false /*a_fActuallyExitingTb*/>(pReNative, off);
1063 iemNativeFixupFixedJump(pReNative, offFixup, off);
1064 }
1065 else
1066#endif
1067 off = iemNativeEmitTbExitJa<kIemNativeLabelType_RaiseGp0>(pReNative, off);
1068
1069 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
1070 return off;
1071}
1072
1073
1074/*********************************************************************************************************************************
1075* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
1076*********************************************************************************************************************************/
1077
1078#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1079 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1080 (a_enmEffOpSize), pCallEntry->idxInstr); \
1081 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1082
1083#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1084 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1085 (a_enmEffOpSize), pCallEntry->idxInstr); \
1086 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1087 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1088
1089#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
1090 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1091 IEMMODE_16BIT, pCallEntry->idxInstr); \
1092 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1093
1094#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1095 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1096 IEMMODE_16BIT, pCallEntry->idxInstr); \
1097 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1098 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1099
1100#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
1101 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1102 IEMMODE_64BIT, pCallEntry->idxInstr); \
1103 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1104
1105#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1106 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1107 IEMMODE_64BIT, pCallEntry->idxInstr); \
1108 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1109 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1110
1111
1112#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1113 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1114 (a_enmEffOpSize), pCallEntry->idxInstr); \
1115 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1116
1117#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1118 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1119 (a_enmEffOpSize), pCallEntry->idxInstr); \
1120 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1121 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1122
1123#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG(a_i16, a_cbInstr, a_rcNormal) \
1124 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1125 IEMMODE_16BIT, pCallEntry->idxInstr); \
1126 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1127
1128#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1129 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1130 IEMMODE_16BIT, pCallEntry->idxInstr); \
1131 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1132 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1133
1134#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG(a_i32, a_cbInstr, a_rcNormal) \
1135 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1136 IEMMODE_64BIT, pCallEntry->idxInstr); \
1137 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1138
1139#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1140 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1141 IEMMODE_64BIT, pCallEntry->idxInstr); \
1142 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1143 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1144
1145/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
1146 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
1147 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
1148template<bool const a_fWithinPage>
1149DECL_INLINE_THROW(uint32_t)
1150iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
1151 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
1152{
1153 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
1154#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1155 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1156 if (a_fWithinPage && enmEffOpSize == IEMMODE_64BIT)
1157 {
1158 /* No #GP checking required, just update offPc and get on with it. */
1159 pReNative->Core.offPc += (int64_t)offDisp + cbInstr;
1160# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1161 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
1162# endif
1163 }
1164 else
1165#endif
1166 {
1167 /* Flush all but PC iff we're doing a 64-bit update here and this isn't within a page.. */
1168 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT && !a_fWithinPage))
1169 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc) /*fGstShwExcept*/);
1170
1171 /* Allocate a temporary PC register. */
1172 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1173 kIemNativeGstRegUse_ForUpdate);
1174
1175 /* Perform the addition. */
1176 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr + pReNative->Core.offPc);
1177
1178 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
1179 {
1180 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't.
1181 We can skip this if the target is within the same page. */
1182 if (!a_fWithinPage)
1183 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithDisp(pReNative, off, idxPcReg,
1184 (int64_t)offDisp + cbInstr, idxInstr);
1185 }
1186 else
1187 {
1188 /* Just truncate the result to 16-bit IP. */
1189 Assert(enmEffOpSize == IEMMODE_16BIT);
1190 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1191 }
1192
1193#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1194# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1195 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
1196 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1197# endif
1198 /* Since we've already got the new PC value in idxPcReg, we can just as
1199 well write it out and reset offPc to zero. Otherwise, we'd need to use
1200 a copy the shadow PC, which will cost another move instruction here. */
1201# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS)
1202 uint8_t const idxOldInstrPlusOne = pReNative->idxInstrPlusOneOfLastPcUpdate;
1203 pReNative->idxInstrPlusOneOfLastPcUpdate = RT_MAX(idxInstr + 1, idxOldInstrPlusOne);
1204 uint8_t const cInstrsSkipped = idxInstr <= idxOldInstrPlusOne ? 0 : idxInstr - idxOldInstrPlusOne;
1205 Log4(("iemNativeEmitRip64RelativeJumpAndFinishingNoFlags: offPc=%#RX64 -> 0; off=%#x; idxInstr=%u cInstrsSkipped=%u cCondDepth=%d\n",
1206 pReNative->Core.offPc, off, idxInstr, cInstrsSkipped, pReNative->cCondDepth));
1207 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, cInstrsSkipped);
1208# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1209 iemNativeDbgInfoAddNativeOffset(pReNative, off);
1210 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, cInstrsSkipped);
1211# endif
1212# endif
1213 pReNative->Core.offPc = 0;
1214#endif
1215
1216 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1217
1218 /* Free but don't flush the PC register. */
1219 iemNativeRegFreeTmp(pReNative, idxPcReg);
1220 }
1221 return off;
1222}
1223
1224
1225#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1226 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1227 (a_enmEffOpSize), pCallEntry->idxInstr); \
1228 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1229
1230#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1231 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1232 (a_enmEffOpSize), pCallEntry->idxInstr); \
1233 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1234 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1235
1236#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
1237 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1238 IEMMODE_16BIT, pCallEntry->idxInstr); \
1239 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1240
1241#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1242 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1243 IEMMODE_16BIT, pCallEntry->idxInstr); \
1244 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1245 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1246
1247#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
1248 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1249 IEMMODE_32BIT, pCallEntry->idxInstr); \
1250 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1251
1252#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1253 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1254 IEMMODE_32BIT, pCallEntry->idxInstr); \
1255 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1256 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1257
1258
1259#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1260 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1261 (a_enmEffOpSize), pCallEntry->idxInstr); \
1262 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1263
1264#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1265 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1266 (a_enmEffOpSize), pCallEntry->idxInstr); \
1267 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1268 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1269
1270#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT(a_i16, a_cbInstr, a_rcNormal) \
1271 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1272 IEMMODE_16BIT, pCallEntry->idxInstr); \
1273 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1274
1275#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1276 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1277 IEMMODE_16BIT, pCallEntry->idxInstr); \
1278 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1279 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1280
1281#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT(a_i32, a_cbInstr, a_rcNormal) \
1282 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1283 IEMMODE_32BIT, pCallEntry->idxInstr); \
1284 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1285
1286#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1287 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1288 IEMMODE_32BIT, pCallEntry->idxInstr); \
1289 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1290 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1291
1292/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
1293 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
1294 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
1295template<bool const a_fFlat>
1296DECL_INLINE_THROW(uint32_t)
1297iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
1298 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
1299{
1300 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
1301#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1302 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1303#endif
1304
1305 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1306 if (!a_fFlat || enmEffOpSize == IEMMODE_16BIT)
1307 {
1308 off = iemNativeRegFlushPendingWrites(pReNative, off);
1309#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1310 Assert(pReNative->Core.offPc == 0);
1311#endif
1312 }
1313
1314 /* Allocate a temporary PC register. */
1315 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
1316
1317 /* Perform the addition. */
1318#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1319 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
1320#else
1321 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
1322#endif
1323
1324 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
1325 if (enmEffOpSize == IEMMODE_16BIT)
1326 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1327
1328 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
1329 if (!a_fFlat)
1330 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1331
1332 /* Commit it. */
1333#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1334 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, enmEffOpSize == IEMMODE_32BIT ? 32 : 16);
1335 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1336#endif
1337
1338 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1339#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1340 pReNative->Core.offPc = 0;
1341#endif
1342
1343 /* Free but don't flush the PC register. */
1344 iemNativeRegFreeTmp(pReNative, idxPcReg);
1345
1346 return off;
1347}
1348
1349
1350#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
1351 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
1352 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1353
1354#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
1355 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
1356 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1357 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1358
1359#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
1360 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
1361 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1362
1363#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1364 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
1365 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1366 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1367
1368#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
1369 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
1370 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
1371
1372#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1373 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
1374 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1375 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
1376
1377/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
1378DECL_INLINE_THROW(uint32_t)
1379iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1380 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
1381{
1382 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1383 off = iemNativeRegFlushPendingWrites(pReNative, off);
1384
1385#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1386 Assert(pReNative->Core.offPc == 0);
1387 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1388#endif
1389
1390 /* Allocate a temporary PC register. */
1391 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
1392
1393 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
1394 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
1395 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1396 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1397#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1398 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, 16);
1399 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1400#endif
1401 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1402
1403 /* Free but don't flush the PC register. */
1404 iemNativeRegFreeTmp(pReNative, idxPcReg);
1405
1406 return off;
1407}
1408
1409
1410
1411/*********************************************************************************************************************************
1412* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
1413*********************************************************************************************************************************/
1414
1415/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
1416#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
1417 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1418
1419/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
1420#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
1421 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1422
1423/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
1424#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
1425 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1426
1427/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
1428 * clears flags. */
1429#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
1430 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
1431 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1432
1433/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
1434 * clears flags. */
1435#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
1436 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
1437 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1438
1439/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
1440 * clears flags. */
1441#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
1442 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
1443 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1444
1445#undef IEM_MC_SET_RIP_U16_AND_FINISH
1446
1447
1448/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
1449#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
1450 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1451
1452/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
1453#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
1454 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1455
1456/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
1457 * clears flags. */
1458#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
1459 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
1460 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1461
1462/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
1463 * and clears flags. */
1464#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
1465 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
1466 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1467
1468#undef IEM_MC_SET_RIP_U32_AND_FINISH
1469
1470
1471/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
1472#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
1473 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1474
1475/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
1476 * and clears flags. */
1477#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
1478 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
1479 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1480
1481#undef IEM_MC_SET_RIP_U64_AND_FINISH
1482
1483
1484/** Same as iemRegRipJumpU16AndFinishNoFlags,
1485 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
1486DECL_INLINE_THROW(uint32_t)
1487iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
1488 uint8_t idxInstr, uint8_t cbVar)
1489{
1490 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1491 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1492
1493 /* If we can't rule out a #GP(0) below, flush all dirty register except for
1494 PC which will be handled specially by the two workers below if they raise a GP. */
1495 bool const fMayRaiseGp0 = (f64Bit && cbVar > sizeof(uint32_t)) || (!f64Bit && !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1496 uint8_t const idxOldPcReg = fMayRaiseGp0
1497 ? iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off, kIemNativeGstReg_Pc)
1498 : UINT8_MAX;
1499 if (fMayRaiseGp0)
1500 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc) /*fGstShwExcept*/);
1501
1502 /* Get a register with the new PC loaded from idxVarPc.
1503 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1504 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
1505
1506 /* Check that the target is within CS.LIM / is canonical (may #GP(0) + exit TB). */
1507 if (fMayRaiseGp0)
1508 {
1509 if (f64Bit)
1510 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithOldPc(pReNative, off, idxPcReg, idxOldPcReg, idxInstr);
1511 else
1512 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0WithOldPc(pReNative, off, idxPcReg, idxOldPcReg, idxInstr);
1513 }
1514
1515 /* Store the result. */
1516 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1517
1518#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1519 pReNative->Core.offPc = 0;
1520 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1521# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1522 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1523 pReNative->Core.fDebugPcInitialized = true;
1524 Log4(("uPcUpdatingDebug=rip off=%#x\n", off));
1525# endif
1526#endif
1527
1528 if (idxOldPcReg != UINT8_MAX)
1529 iemNativeRegFreeTmp(pReNative, idxOldPcReg);
1530 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1531 /** @todo implictly free the variable? */
1532
1533 return off;
1534}
1535
1536
1537
1538/*********************************************************************************************************************************
1539* Emitters for changing PC/RIP/EIP/IP with a relative call jump (IEM_MC_IND_CALL_UXX_AND_FINISH) (requires stack emmiters). *
1540*********************************************************************************************************************************/
1541
1542/** @todo These helpers belong to the stack push API naturally but we already need them up here (we could of course move
1543 * this below the stack emitters but then this is not close to the rest of the PC/RIP handling...). */
1544DECL_FORCE_INLINE_THROW(uint32_t)
1545iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1546{
1547 /* Use16BitSp: */
1548#ifdef RT_ARCH_AMD64
1549 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
1550 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1551#else
1552 /* sub regeff, regrsp, #cbMem */
1553 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
1554 /* and regeff, regeff, #0xffff */
1555 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1556 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
1557 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
1558 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
1559#endif
1560 return off;
1561}
1562
1563
1564DECL_FORCE_INLINE(uint32_t)
1565iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1566{
1567 /* Use32BitSp: */
1568 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
1569 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1570 return off;
1571}
1572
1573
1574template<uint8_t const a_cBitsVar, uint8_t const a_cBitsFlat>
1575DECL_INLINE_THROW(uint32_t)
1576iemNativeEmitStackPushRip(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxRegPc,
1577 uintptr_t pfnFunction, uint8_t idxInstr)
1578{
1579 AssertCompile(a_cBitsVar == 16 || a_cBitsVar == 32 || a_cBitsVar == 64);
1580 AssertCompile(a_cBitsFlat == 0 || a_cBitsFlat == 32 || a_cBitsFlat == 64);
1581
1582 /*
1583 * Assert sanity.
1584 */
1585#ifdef VBOX_STRICT
1586 if RT_CONSTEXPR_IF(a_cBitsFlat != 0)
1587 {
1588 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
1589 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
1590 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
1591 Assert( pfnFunction
1592 == ( RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 32) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1593 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(32, 32) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
1594 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 64) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1595 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(64, 64) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
1596 : UINT64_C(0xc000b000a0009000) ));
1597 }
1598 else
1599 Assert( pfnFunction
1600 == ( RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
1601 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(32, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
1602 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(64, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
1603 : UINT64_C(0xc000b000a0009000) ));
1604#endif
1605
1606#ifdef VBOX_STRICT
1607 /*
1608 * Check that the fExec flags we've got make sense.
1609 */
1610 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
1611#endif
1612
1613 /*
1614 * To keep things simple we have to commit any pending writes first as we
1615 * may end up making calls.
1616 */
1617 /** @todo we could postpone this till we make the call and reload the
1618 * registers after returning from the call. Not sure if that's sensible or
1619 * not, though. */
1620 off = iemNativeRegFlushPendingWrites(pReNative, off);
1621
1622 /*
1623 * First we calculate the new RSP and the effective stack pointer value.
1624 * For 64-bit mode and flat 32-bit these two are the same.
1625 * (Code structure is very similar to that of PUSH)
1626 */
1627 RT_CONSTEXPR
1628 uint8_t const cbMem = a_cBitsVar / 8;
1629 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1630 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
1631 uint8_t const idxRegEffSp = a_cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1632 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1633 if RT_CONSTEXPR_IF(a_cBitsFlat != 0)
1634 {
1635 Assert(idxRegEffSp == idxRegRsp);
1636 Assert(a_cBitsFlat == 32 || a_cBitsFlat == 64);
1637 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1638 if RT_CONSTEXPR_IF(a_cBitsFlat == 64)
1639 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
1640 else
1641 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
1642 }
1643 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1644 {
1645 Assert(idxRegEffSp != idxRegRsp);
1646 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1647 kIemNativeGstRegUse_ReadOnly);
1648#ifdef RT_ARCH_AMD64
1649 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1650#else
1651 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1652#endif
1653 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1654 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1655 offFixupJumpToUseOtherBitSp = off;
1656 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1657 {
1658 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1659 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1660 }
1661 else
1662 {
1663 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1664 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1665 }
1666 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1667 }
1668 /* SpUpdateEnd: */
1669 uint32_t const offLabelSpUpdateEnd = off;
1670
1671 /*
1672 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1673 * we're skipping lookup).
1674 */
1675 uint8_t const iSegReg = a_cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
1676 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
1677 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1678 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1679 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1680 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1681 : UINT32_MAX;
1682 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
1683
1684
1685 if (!TlbState.fSkip)
1686 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1687 else
1688 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1689
1690 /*
1691 * Use16BitSp:
1692 */
1693 if RT_CONSTEXPR_IF(a_cBitsFlat == 0)
1694 {
1695#ifdef RT_ARCH_AMD64
1696 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1697#else
1698 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1699#endif
1700 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1701 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1702 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1703 else
1704 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1705 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1706 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1707 }
1708
1709 /*
1710 * TlbMiss:
1711 *
1712 * Call helper to do the pushing.
1713 */
1714 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1715
1716#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1717 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1718#else
1719 RT_NOREF(idxInstr);
1720#endif
1721
1722 /* Save variables in volatile registers. */
1723 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1724 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1725 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
1726 | (RT_BIT_32(idxRegPc));
1727 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1728
1729 if ( idxRegPc == IEMNATIVE_CALL_ARG1_GREG
1730 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
1731 {
1732 /* Swap them using ARG0 as temp register: */
1733 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
1734 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
1735 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
1736 }
1737 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
1738 {
1739 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc (first!) */
1740 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1741
1742 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
1743 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1744 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1745 }
1746 else
1747 {
1748 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
1749 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1750
1751 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc */
1752 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1753 }
1754
1755#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
1756 /* Do delayed EFLAGS calculations. */
1757 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
1758 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
1759#endif
1760
1761 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1762 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1763
1764 /* Done setting up parameters, make the call. */
1765 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
1766
1767 /* Restore variables and guest shadow registers to volatile registers. */
1768 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1769 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1770
1771#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1772 if (!TlbState.fSkip)
1773 {
1774 /* end of TlbMiss - Jump to the done label. */
1775 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1776 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1777
1778 /*
1779 * TlbLookup:
1780 */
1781 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1,
1782 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1783
1784 /*
1785 * Emit code to do the actual storing / fetching.
1786 */
1787 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
1788# ifdef IEM_WITH_TLB_STATISTICS
1789 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1790 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1791# endif
1792 AssertCompile(cbMem == 2 || cbMem == 4 || cbMem == 8);
1793 if RT_CONSTEXPR_IF(cbMem == 2)
1794 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1795 else if RT_CONSTEXPR_IF(cbMem == 4)
1796 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1797 else
1798 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1799
1800 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1801 TlbState.freeRegsAndReleaseVars(pReNative);
1802
1803 /*
1804 * TlbDone:
1805 *
1806 * Commit the new RSP value.
1807 */
1808 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1809 }
1810#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1811
1812#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
1813 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
1814#endif
1815 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1816 if (idxRegEffSp != idxRegRsp)
1817 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1818
1819 return off;
1820}
1821
1822
1823/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets. */
1824#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr) \
1825 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1826
1827/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets that checks and
1828 * clears flags. */
1829#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1830 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr); \
1831 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1832
1833/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets. */
1834#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr) \
1835 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1836
1837/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets that checks and
1838 * clears flags. */
1839#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1840 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr); \
1841 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1842
1843#undef IEM_MC_IND_CALL_U16_AND_FINISH
1844
1845
1846/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets. */
1847#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr) \
1848 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1849
1850/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets that checks and
1851 * clears flags. */
1852#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP, a_cbInstr) \
1853 IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr); \
1854 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1855
1856#undef IEM_MC_IND_CALL_U32_AND_FINISH
1857
1858
1859/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1860 * an extra parameter, for use in 64-bit code. */
1861#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr) \
1862 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u64NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1863
1864
1865/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1866 * an extra parameter, for use in 64-bit code and we need to check and clear
1867 * flags. */
1868#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewIP, a_cbInstr) \
1869 IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr); \
1870 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1871
1872#undef IEM_MC_IND_CALL_U64_AND_FINISH
1873
1874/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1875 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1876DECL_INLINE_THROW(uint32_t)
1877iemNativeEmitRipIndirectCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxVarPc, bool f64Bit,
1878 uint8_t idxInstr, uint8_t cbVar)
1879{
1880 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1881 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1882
1883 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1884 off = iemNativeRegFlushPendingWrites(pReNative, off);
1885
1886#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1887 Assert(pReNative->Core.offPc == 0);
1888 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1889#endif
1890
1891 /* Get a register with the new PC loaded from idxVarPc.
1892 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1893 uint8_t const idxNewPcReg = iemNativeVarRegisterAcquire(pReNative, idxVarPc, &off);
1894
1895 /* Check limit (may #GP(0) + exit TB). */
1896 if (!f64Bit)
1897/** @todo we can skip this test in FLAT 32-bit mode. */
1898 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1899 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1900 else if (cbVar > sizeof(uint32_t))
1901 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1902
1903#if 1
1904 /* Allocate a temporary PC register, we don't want it shadowed. */
1905 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1906 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1907#else
1908 /* Allocate a temporary PC register. */
1909 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate,
1910 true /*fNoVolatileRegs*/);
1911#endif
1912
1913 /* Perform the addition and push the variable to the guest stack. */
1914 /** @todo Flat variants for PC32 variants. */
1915 switch (cbVar)
1916 {
1917 case sizeof(uint16_t):
1918 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1919 /* Truncate the result to 16-bit IP. */
1920 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1921 off = iemNativeEmitStackPushRip<16, 0>(pReNative, off, idxPcReg, (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1922 break;
1923 case sizeof(uint32_t):
1924 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1925 /** @todo In FLAT mode we can use the flat variant. */
1926 off = iemNativeEmitStackPushRip<32, 0>(pReNative, off, idxPcReg, (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1927 break;
1928 case sizeof(uint64_t):
1929 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
1930 off = iemNativeEmitStackPushRip<64, 64>(pReNative, off, idxPcReg, (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1931 break;
1932 default:
1933 AssertFailed();
1934 }
1935
1936 /* RSP got changed, so do this again. */
1937 off = iemNativeRegFlushPendingWrites(pReNative, off);
1938
1939 /* Store the result. */
1940 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1941#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1942 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1943 pReNative->Core.fDebugPcInitialized = true;
1944 Log4(("uPcUpdatingDebug=rip/indirect-call off=%#x\n", off));
1945#endif
1946
1947#if 1
1948 /* Need to transfer the shadow information to the new RIP register. */
1949 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxNewPcReg, kIemNativeGstReg_Pc, off);
1950#else
1951 /* Sync the new PC. */
1952 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcReg, idxNewPcReg);
1953#endif
1954 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1955 iemNativeRegFreeTmp(pReNative, idxPcReg);
1956 /** @todo implictly free the variable? */
1957
1958 return off;
1959}
1960
1961
1962/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1963 * an extra parameter, for use in 16-bit code on a pre-386 CPU. */
1964#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
1965 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1966
1967/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1968 * an extra parameter, for use in 16-bit code on a pre-386 CPU and we need to check and clear
1969 * flags. */
1970#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
1971 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
1972 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1973
1974/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1975 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1976#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
1977 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1978
1979/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1980 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1981 * flags. */
1982#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
1983 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
1984 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1985
1986/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1987 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1988#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
1989 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1990
1991/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1992 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1993 * flags. */
1994#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
1995 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
1996 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1997
1998#undef IEM_MC_REL_CALL_S16_AND_FINISH
1999
2000/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2001 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2002DECL_INLINE_THROW(uint32_t)
2003iemNativeEmitRipRelativeCallS16NoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int16_t offDisp,
2004 uint8_t idxInstr)
2005{
2006 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2007 off = iemNativeRegFlushPendingWrites(pReNative, off);
2008
2009#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2010 Assert(pReNative->Core.offPc == 0);
2011 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2012#endif
2013
2014 /* Allocate a temporary PC register. */
2015 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2016 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
2017 uint8_t const idxPcRegNew = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
2018
2019 /* Calculate the new RIP. */
2020 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
2021 /* Truncate the result to 16-bit IP. */
2022 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegOld);
2023 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
2024 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
2025
2026 /* Truncate the result to 16-bit IP. */
2027 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegNew);
2028
2029 /* Check limit (may #GP(0) + exit TB). */
2030 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2031
2032 /* Perform the addition and push the variable to the guest stack. */
2033 off = iemNativeEmitStackPushRip<16, 0>(pReNative, off, idxPcRegOld, (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
2034
2035 /* RSP got changed, so flush again. */
2036 off = iemNativeRegFlushPendingWrites(pReNative, off);
2037
2038 /* Store the result. */
2039 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2040#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2041 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2042 pReNative->Core.fDebugPcInitialized = true;
2043 Log4(("uPcUpdatingDebug=rip/rel-call-16 off=%#x offDisp=%d\n", off, offDisp));
2044#endif
2045
2046 /* Need to transfer the shadow information to the new RIP register. */
2047 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2048 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2049 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2050
2051 return off;
2052}
2053
2054
2055/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
2056 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
2057#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
2058 off = iemNativeEmitEip32RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i32), pCallEntry->idxInstr)
2059
2060/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
2061 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
2062 * flags. */
2063#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
2064 IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
2065 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2066
2067#undef IEM_MC_REL_CALL_S32_AND_FINISH
2068
2069/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2070 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2071DECL_INLINE_THROW(uint32_t)
2072iemNativeEmitEip32RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int32_t offDisp,
2073 uint8_t idxInstr)
2074{
2075 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2076 off = iemNativeRegFlushPendingWrites(pReNative, off);
2077
2078#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2079 Assert(pReNative->Core.offPc == 0);
2080 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2081#endif
2082
2083 /* Allocate a temporary PC register. */
2084 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2085 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
2086 uint8_t const idxPcRegNew = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
2087
2088 /* Update the EIP to get the return address. */
2089 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
2090
2091 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2092 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
2093 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
2094 /** @todo we can skip this test in FLAT 32-bit mode. */
2095 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2096
2097 /* Perform Perform the return address to the guest stack. */
2098 /** @todo Can avoid the stack limit checks in FLAT 32-bit mode. */
2099 off = iemNativeEmitStackPushRip<32, 0>(pReNative, off, idxPcRegOld, (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
2100
2101 /* RSP got changed, so do this again. */
2102 off = iemNativeRegFlushPendingWrites(pReNative, off);
2103
2104 /* Store the result. */
2105 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2106#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2107 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2108 pReNative->Core.fDebugPcInitialized = true;
2109 Log4(("uPcUpdatingDebug=eip/rel-call-32 off=%#x offDisp=%d\n", off, offDisp));
2110#endif
2111
2112 /* Need to transfer the shadow information to the new RIP register. */
2113 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2114 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2115 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2116
2117 return off;
2118}
2119
2120
2121/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
2122 * an extra parameter, for use in 64-bit code. */
2123#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr) \
2124 off = iemNativeEmitRip64RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i64), pCallEntry->idxInstr)
2125
2126/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
2127 * an extra parameter, for use in 64-bit code and we need to check and clear
2128 * flags. */
2129#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i64, a_cbInstr) \
2130 IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr); \
2131 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2132
2133#undef IEM_MC_REL_CALL_S64_AND_FINISH
2134
2135/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2136 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2137DECL_INLINE_THROW(uint32_t)
2138iemNativeEmitRip64RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int64_t offDisp,
2139 uint8_t idxInstr)
2140{
2141 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2142 off = iemNativeRegFlushPendingWrites(pReNative, off);
2143
2144#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2145 Assert(pReNative->Core.offPc == 0);
2146 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2147#endif
2148
2149 /* Allocate a temporary PC register. */
2150 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2151 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
2152 uint8_t const idxPcRegNew = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
2153
2154 /* Update the RIP to get the return address. */
2155 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcRegOld, cbInstr);
2156
2157 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2158 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcRegNew, idxPcRegOld);
2159 off = iemNativeEmitAddGprImm(pReNative, off, idxPcRegNew, offDisp);
2160 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2161
2162 /* Perform Perform the return address to the guest stack. */
2163 off = iemNativeEmitStackPushRip<64, 64>(pReNative, off, idxPcRegOld, (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
2164
2165 /* RSP got changed, so do this again. */
2166 off = iemNativeRegFlushPendingWrites(pReNative, off);
2167
2168 /* Store the result. */
2169 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2170#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2171 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2172 pReNative->Core.fDebugPcInitialized = true;
2173 Log4(("uPcUpdatingDebug=rip/rel-call-64 off=%#x offDisp=%ld\n", off, offDisp));
2174#endif
2175
2176 /* Need to transfer the shadow information to the new RIP register. */
2177 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2178 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2179 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2180
2181 return off;
2182}
2183
2184
2185/*********************************************************************************************************************************
2186* Emitters for changing PC/RIP/EIP/IP with a RETN (Iw) instruction (IEM_MC_RETN_AND_FINISH) (requires stack emmiters). *
2187*********************************************************************************************************************************/
2188
2189DECL_FORCE_INLINE_THROW(uint32_t)
2190iemNativeEmitStackPopForRetnUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
2191 uint16_t cbPopAdd, uint8_t idxRegTmp)
2192{
2193 /* Use16BitSp: */
2194#ifdef RT_ARCH_AMD64
2195 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
2196 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
2197 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbPopAdd); /* ASSUMES this does NOT modify bits [63:16]! */
2198 RT_NOREF(idxRegTmp);
2199
2200#elif defined(RT_ARCH_ARM64)
2201 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
2202 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
2203 /* add tmp, regrsp, #cbMem */
2204 uint16_t const cbCombined = cbMem + cbPopAdd;
2205 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbCombined & (RT_BIT_32(12) - 1U), false /*f64Bit*/);
2206 if (cbCombined >= RT_BIT_32(12))
2207 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegTmp, cbCombined >> 12,
2208 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
2209 /* and tmp, tmp, #0xffff */
2210 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2211 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
2212 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
2213 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
2214
2215#else
2216# error "Port me"
2217#endif
2218 return off;
2219}
2220
2221
2222DECL_FORCE_INLINE_THROW(uint32_t)
2223iemNativeEmitStackPopForRetnUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
2224 uint16_t cbPopAdd)
2225{
2226 /* Use32BitSp: */
2227 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
2228 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem + cbPopAdd);
2229 return off;
2230}
2231
2232
2233/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets. */
2234#define IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_cbPopArgs, a_cbInstr) \
2235 off = iemNativeEmitRetn<IEMMODE_16BIT, false>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr)
2236
2237/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets. */
2238#define IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_cbPopArgs, a_cbInstr, a_enmEffOpSize) \
2239 Assert((a_enmEffOpSize) == IEMMODE_32BIT || (a_enmEffOpSize) == IEMMODE_16BIT); \
2240 off = (a_enmEffOpSize) == IEMMODE_32BIT \
2241 ? iemNativeEmitRetn<IEMMODE_32BIT, false>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr) \
2242 : iemNativeEmitRetn<IEMMODE_16BIT, false>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr)
2243
2244/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code. */
2245#define IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_cbPopArgs, a_cbInstr, a_enmEffOpSize) \
2246 Assert((a_enmEffOpSize) == IEMMODE_64BIT || (a_enmEffOpSize) == IEMMODE_16BIT); \
2247 off = (a_enmEffOpSize) == IEMMODE_64BIT \
2248 ? iemNativeEmitRetn<IEMMODE_64BIT, true>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr) \
2249 : iemNativeEmitRetn<IEMMODE_16BIT, true>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr)
2250
2251/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets that checks and
2252 * clears flags. */
2253#define IEM_MC_RETN_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbPopArgs, a_cbInstr) \
2254 IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_cbPopArgs, a_cbInstr); \
2255 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2256
2257/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets that checks and
2258 * clears flags. */
2259#define IEM_MC_RETN_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbPopArgs, a_cbInstr, a_enmEffOpSize) \
2260 IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_cbPopArgs, a_cbInstr, a_enmEffOpSize); \
2261 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2262
2263/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code that checks and
2264 * clears flags. */
2265#define IEM_MC_RETN_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbPopArgs, a_cbInstr, a_enmEffOpSize) \
2266 IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_cbPopArgs, a_cbInstr, a_enmEffOpSize); \
2267 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2268
2269/** IEM_MC[|_FLAT32|_FLAT64]_RETN_AND_FINISH */
2270template<IEMMODE const a_enmEffOpSize, bool const a_f64Bit>
2271DECL_INLINE_THROW(uint32_t)
2272iemNativeEmitRetn(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint16_t cbPopArgs, uint8_t idxInstr)
2273{
2274 RT_NOREF(cbInstr);
2275 AssertCompile(a_enmEffOpSize == IEMMODE_64BIT || a_enmEffOpSize == IEMMODE_32BIT || a_enmEffOpSize == IEMMODE_16BIT);
2276
2277#ifdef VBOX_STRICT
2278 /*
2279 * Check that the fExec flags we've got make sense.
2280 */
2281 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
2282#endif
2283
2284 /*
2285 * To keep things simple we have to commit any pending writes first as we
2286 * may end up making calls.
2287 */
2288 off = iemNativeRegFlushPendingWrites(pReNative, off);
2289
2290 /*
2291 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
2292 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
2293 * directly as the effective stack pointer.
2294 *
2295 * (Code structure is very similar to that of PUSH)
2296 *
2297 * Note! As a simplification, we treat opsize overridden returns (o16 ret)
2298 * in FLAT 32-bit mode as if we weren't in FLAT mode since these
2299 * aren't commonly used (or useful) and thus not in need of optimizing.
2300 *
2301 * Note! For non-flat modes the guest RSP is not allocated for update but
2302 * rather for calculation as the shadowed register would remain modified
2303 * even if the return address throws a #GP(0) due to being outside the
2304 * CS limit causing a wrong stack pointer value in the guest (see the
2305 * near return testcase in bs3-cpu-basic-2). If no exception is thrown
2306 * the shadowing is transfered to the new register returned by
2307 * iemNativeRegAllocTmpForGuestReg() at the end.
2308 */
2309 RT_CONSTEXPR
2310 uint8_t const cbMem = a_enmEffOpSize == IEMMODE_64BIT
2311 ? sizeof(uint64_t)
2312 : a_enmEffOpSize == IEMMODE_32BIT
2313 ? sizeof(uint32_t)
2314 : sizeof(uint16_t);
2315/** @todo the basic flatness could be detected by the threaded compiler step
2316 * like for the other macros... worth it? */
2317 bool const fFlat = a_enmEffOpSize == IEMMODE_64BIT
2318 || (a_enmEffOpSize == IEMMODE_32BIT /* see note */ && IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
2319 uintptr_t const pfnFunction = a_enmEffOpSize == IEMMODE_64BIT
2320 ? (uintptr_t)iemNativeHlpStackFlatFetchU64
2321 : fFlat
2322 ? (uintptr_t)iemNativeHlpStackFlatFetchU32
2323 : a_enmEffOpSize == IEMMODE_32BIT
2324 ? (uintptr_t)iemNativeHlpStackFetchU32
2325 : (uintptr_t)iemNativeHlpStackFetchU16;
2326 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
2327 fFlat ? kIemNativeGstRegUse_ForUpdate
2328 : kIemNativeGstRegUse_Calculation,
2329 true /*fNoVolatileRegs*/);
2330 uint8_t const idxRegEffSp = fFlat ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
2331 /** @todo can do a better job picking the register here. For cbMem >= 4 this
2332 * will be the resulting register value. */
2333 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
2334
2335 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
2336 if (fFlat)
2337 Assert(idxRegEffSp == idxRegRsp);
2338 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
2339 {
2340 Assert(idxRegEffSp != idxRegRsp);
2341 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
2342 kIemNativeGstRegUse_ReadOnly);
2343#ifdef RT_ARCH_AMD64
2344 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2345#else
2346 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2347#endif
2348 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
2349 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
2350 offFixupJumpToUseOtherBitSp = off;
2351 if RT_CONSTEXPR_IF(a_enmEffOpSize == IEMMODE_32BIT)
2352 {
2353 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
2354 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPopArgs);
2355 }
2356 else
2357 {
2358 Assert(a_enmEffOpSize == IEMMODE_16BIT);
2359 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
2360 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPopArgs,
2361 idxRegMemResult);
2362 }
2363 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2364 }
2365 /* SpUpdateEnd: */
2366 uint32_t const offLabelSpUpdateEnd = off;
2367
2368 /*
2369 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
2370 * we're skipping lookup).
2371 */
2372 uint8_t const iSegReg = fFlat ? UINT8_MAX : X86_SREG_SS;
2373 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
2374 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
2375 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
2376 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
2377 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
2378 : UINT32_MAX;
2379
2380 if (!TlbState.fSkip)
2381 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
2382 else
2383 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
2384
2385 /*
2386 * Use16BitSp:
2387 */
2388 if (!fFlat)
2389 {
2390#ifdef RT_ARCH_AMD64
2391 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2392#else
2393 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2394#endif
2395 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
2396 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
2397 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPopArgs,
2398 idxRegMemResult);
2399 else
2400 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPopArgs);
2401 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
2402 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2403 }
2404
2405 /*
2406 * TlbMiss:
2407 *
2408 * Call helper to do the pushing.
2409 */
2410 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
2411
2412#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2413 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2414#else
2415 RT_NOREF(idxInstr);
2416#endif
2417
2418 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
2419 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
2420 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
2421 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
2422
2423
2424 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
2425 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
2426 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
2427
2428#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
2429 /* Do delayed EFLAGS calculations. */
2430 off = iemNativeDoPostponedEFlagsAtTlbMiss<RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
2431#endif
2432
2433 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
2434 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2435
2436 /* Done setting up parameters, make the call. */
2437 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
2438
2439 /* Move the return register content to idxRegMemResult. */
2440 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
2441 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
2442
2443 /* Restore variables and guest shadow registers to volatile registers. */
2444 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
2445 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
2446
2447#ifdef IEMNATIVE_WITH_TLB_LOOKUP
2448 if (!TlbState.fSkip)
2449 {
2450 /* end of TlbMiss - Jump to the done label. */
2451 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
2452 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
2453
2454 /*
2455 * TlbLookup:
2456 */
2457 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
2458 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
2459
2460 /*
2461 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
2462 */
2463 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2464# ifdef IEM_WITH_TLB_STATISTICS
2465 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
2466 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
2467# endif
2468 switch (cbMem)
2469 {
2470 case 2:
2471 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2472 break;
2473 case 4:
2474 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2475 break;
2476 case 8:
2477 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2478 break;
2479 default:
2480 AssertFailed();
2481 }
2482
2483 TlbState.freeRegsAndReleaseVars(pReNative);
2484
2485 /*
2486 * TlbDone:
2487 *
2488 * Set the new RSP value (FLAT accesses needs to calculate it first) and
2489 * commit the popped register value.
2490 */
2491 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
2492 }
2493#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
2494
2495 /* Check limit before committing RIP and RSP (may #GP(0) + exit TB). */
2496 if RT_CONSTEXPR_IF(!a_f64Bit)
2497/** @todo we can skip this test in FLAT 32-bit mode. */
2498 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2499 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2500 else if RT_CONSTEXPR_IF(a_enmEffOpSize == IEMMODE_64BIT)
2501 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2502
2503 /* Complete RSP calculation for FLAT mode. */
2504 if (idxRegEffSp == idxRegRsp)
2505 {
2506 if RT_CONSTEXPR_IF(a_enmEffOpSize == IEMMODE_64BIT)
2507 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRsp, sizeof(uint64_t) + cbPopArgs);
2508 else
2509 {
2510 Assert(a_enmEffOpSize == IEMMODE_32BIT);
2511 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxRegRsp, sizeof(uint32_t) + cbPopArgs);
2512 }
2513 }
2514
2515 /* Commit the result and clear any current guest shadows for RIP. */
2516 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
2517 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2518 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, kIemNativeGstReg_Pc, off);
2519#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2520 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2521 pReNative->Core.fDebugPcInitialized = true;
2522 Log4(("uPcUpdatingDebug=rip/ret off=%#x\n", off));
2523#endif
2524
2525 /* Need to transfer the shadowing information to the host register containing the updated value now. */
2526 if (!fFlat)
2527 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegRsp, IEMNATIVEGSTREG_GPR(X86_GREG_xSP), off);
2528
2529 iemNativeRegFreeTmp(pReNative, idxRegRsp);
2530 if (idxRegEffSp != idxRegRsp)
2531 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
2532 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
2533 return off;
2534}
2535
2536
2537/*********************************************************************************************************************************
2538* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
2539*********************************************************************************************************************************/
2540
2541#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
2542 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2543
2544/**
2545 * Emits code to check if a \#NM exception should be raised.
2546 *
2547 * @returns New code buffer offset, UINT32_MAX on failure.
2548 * @param pReNative The native recompile state.
2549 * @param off The code buffer offset.
2550 * @param idxInstr The current instruction.
2551 */
2552DECL_INLINE_THROW(uint32_t)
2553iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2554{
2555#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2556 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
2557
2558 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
2559 {
2560#endif
2561 /*
2562 * Make sure we don't have any outstanding guest register writes as we may
2563 * raise an #NM and all guest register must be up to date in CPUMCTX.
2564 */
2565 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2566 off = iemNativeRegFlushPendingWrites(pReNative, off);
2567
2568#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2569 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2570#else
2571 RT_NOREF(idxInstr);
2572#endif
2573
2574 /* Allocate a temporary CR0 register. */
2575 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2576 kIemNativeGstRegUse_ReadOnly);
2577
2578 /*
2579 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
2580 * return raisexcpt();
2581 */
2582 /* Test and jump. */
2583 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_RaiseNm>(pReNative, off, idxCr0Reg,
2584 X86_CR0_EM | X86_CR0_TS);
2585
2586 /* Free but don't flush the CR0 register. */
2587 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2588
2589#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2590 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
2591 }
2592 else
2593 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
2594#endif
2595
2596 return off;
2597}
2598
2599
2600#define IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE() \
2601 off = iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2602
2603/**
2604 * Emits code to check if a \#NM exception should be raised.
2605 *
2606 * @returns New code buffer offset, UINT32_MAX on failure.
2607 * @param pReNative The native recompile state.
2608 * @param off The code buffer offset.
2609 * @param idxInstr The current instruction.
2610 */
2611DECL_INLINE_THROW(uint32_t)
2612iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2613{
2614#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2615 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckPotential);
2616
2617 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE))
2618 {
2619#endif
2620 /*
2621 * Make sure we don't have any outstanding guest register writes as we may
2622 * raise an #NM and all guest register must be up to date in CPUMCTX.
2623 */
2624 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2625 off = iemNativeRegFlushPendingWrites(pReNative, off);
2626
2627#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2628 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2629#else
2630 RT_NOREF(idxInstr);
2631#endif
2632
2633 /* Allocate a temporary CR0 register. */
2634 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2635 kIemNativeGstRegUse_Calculation);
2636
2637 /*
2638 * if (cr0 & (X86_CR0_MP | X86_CR0_TS) == (X86_CR0_MP | X86_CR0_TS))
2639 * return raisexcpt();
2640 */
2641 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2642 /* Test and jump. */
2643 off = iemNativeEmitTbExitIfGpr32EqualsImm<kIemNativeLabelType_RaiseNm>(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2644
2645 /* Free the CR0 register. */
2646 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2647
2648#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2649 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE;
2650 }
2651 else
2652 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckOmitted);
2653#endif
2654
2655 return off;
2656}
2657
2658
2659#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
2660 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
2661
2662/**
2663 * Emits code to check if a \#MF exception should be raised.
2664 *
2665 * @returns New code buffer offset, UINT32_MAX on failure.
2666 * @param pReNative The native recompile state.
2667 * @param off The code buffer offset.
2668 * @param idxInstr The current instruction.
2669 */
2670DECL_INLINE_THROW(uint32_t)
2671iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2672{
2673 /*
2674 * Make sure we don't have any outstanding guest register writes as we may
2675 * raise an #MF and all guest register must be up to date in CPUMCTX.
2676 */
2677 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
2678 off = iemNativeRegFlushPendingWrites(pReNative, off);
2679
2680#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2681 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2682#else
2683 RT_NOREF(idxInstr);
2684#endif
2685
2686 /* Allocate a temporary FSW register. */
2687 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
2688 kIemNativeGstRegUse_ReadOnly);
2689
2690 /*
2691 * if (FSW & X86_FSW_ES != 0)
2692 * return raisexcpt();
2693 */
2694 /* Test and jump. */
2695 off = iemNativeEmitTbExitIfBitSetInGpr<kIemNativeLabelType_RaiseMf>(pReNative, off, idxFpuFswReg, X86_FSW_ES_BIT);
2696
2697 /* Free but don't flush the FSW register. */
2698 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
2699
2700 return off;
2701}
2702
2703
2704#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
2705 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2706
2707/**
2708 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
2709 *
2710 * @returns New code buffer offset, UINT32_MAX on failure.
2711 * @param pReNative The native recompile state.
2712 * @param off The code buffer offset.
2713 * @param idxInstr The current instruction.
2714 */
2715DECL_INLINE_THROW(uint32_t)
2716iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2717{
2718#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2719 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
2720
2721 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
2722 {
2723#endif
2724 /*
2725 * Make sure we don't have any outstanding guest register writes as we may
2726 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2727 */
2728 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2729 off = iemNativeRegFlushPendingWrites(pReNative, off);
2730
2731#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2732 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2733#else
2734 RT_NOREF(idxInstr);
2735#endif
2736
2737 /* Allocate a temporary CR0 and CR4 register. */
2738 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2739 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2740 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2741
2742 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
2743#ifdef RT_ARCH_AMD64
2744 /*
2745 * We do a modified test here:
2746 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
2747 * else { goto RaiseSseRelated; }
2748 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
2749 * all targets except the 386, which doesn't support SSE, this should
2750 * be a safe assumption.
2751 */
2752 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2753 1+6+3+3+7+7+6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2754 //pCodeBuf[off++] = 0xcc;
2755 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
2756 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
2757 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
2758 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
2759 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
2760 off = iemNativeEmitTbExitJccEx<kIemNativeLabelType_RaiseSseRelated>(pReNative, pCodeBuf, off, kIemNativeInstrCond_ne);
2761
2762#elif defined(RT_ARCH_ARM64)
2763 /*
2764 * We do a modified test here:
2765 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
2766 * else { goto RaiseSseRelated; }
2767 */
2768 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2769 1+5 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2770 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2771 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
2772 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
2773 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
2774 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2775 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
2776 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
2777 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2778 off = iemNativeEmitTbExitIfGprIsNotZeroEx<kIemNativeLabelType_RaiseSseRelated>(pReNative, pCodeBuf, off,
2779 idxTmpReg, false /*f64Bit*/);
2780
2781#else
2782# error "Port me!"
2783#endif
2784
2785 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2786 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2787 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2788 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2789
2790#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2791 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
2792 }
2793 else
2794 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
2795#endif
2796
2797 return off;
2798}
2799
2800
2801#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
2802 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2803
2804/**
2805 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
2806 *
2807 * @returns New code buffer offset, UINT32_MAX on failure.
2808 * @param pReNative The native recompile state.
2809 * @param off The code buffer offset.
2810 * @param idxInstr The current instruction.
2811 */
2812DECL_INLINE_THROW(uint32_t)
2813iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2814{
2815#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2816 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
2817
2818 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
2819 {
2820#endif
2821 /*
2822 * Make sure we don't have any outstanding guest register writes as we may
2823 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2824 */
2825 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2826 off = iemNativeRegFlushPendingWrites(pReNative, off);
2827
2828#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2829 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2830#else
2831 RT_NOREF(idxInstr);
2832#endif
2833
2834 /* Allocate a temporary CR0, CR4 and XCR0 register. */
2835 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2836 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2837 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
2838 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2839
2840 /*
2841 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
2842 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
2843 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
2844 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
2845 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
2846 * { likely }
2847 * else { goto RaiseAvxRelated; }
2848 */
2849#ifdef RT_ARCH_AMD64
2850 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
2851 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
2852 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
2853 ^ 0x1a) ) { likely }
2854 else { goto RaiseAvxRelated; } */
2855 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2856 1+6+3+5+3+5+3+7+6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2857 //pCodeBuf[off++] = 0xcc;
2858 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
2859 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
2860 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
2861 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2862 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
2863 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
2864 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2865 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
2866 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
2867 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
2868 off = iemNativeEmitTbExitJccEx<kIemNativeLabelType_RaiseAvxRelated>(pReNative, pCodeBuf, off, kIemNativeInstrCond_ne);
2869
2870#elif defined(RT_ARCH_ARM64)
2871 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
2872 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
2873 else { goto RaiseAvxRelated; } */
2874 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2875 1+6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2876 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2877 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
2878 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
2879 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
2880 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
2881 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
2882 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
2883 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
2884 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
2885 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
2886 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
2887 off = iemNativeEmitTbExitIfGprIsNotZeroEx<kIemNativeLabelType_RaiseAvxRelated>(pReNative, pCodeBuf, off,
2888 idxTmpReg, false /*f64Bit*/);
2889
2890#else
2891# error "Port me!"
2892#endif
2893
2894 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2895 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2896 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2897 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
2898#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2899 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2900 }
2901 else
2902 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
2903#endif
2904
2905 return off;
2906}
2907
2908
2909#define IEM_MC_RAISE_DIVIDE_ERROR_IF_LOCAL_IS_ZERO(a_uVar) \
2910 off = iemNativeEmitRaiseDivideErrorIfLocalIsZero(pReNative, off, a_uVar, pCallEntry->idxInstr)
2911
2912/**
2913 * Emits code to raise a \#DE if a local variable is zero.
2914 *
2915 * @returns New code buffer offset, UINT32_MAX on failure.
2916 * @param pReNative The native recompile state.
2917 * @param off The code buffer offset.
2918 * @param idxVar The variable to check. This must be 32-bit (EFLAGS).
2919 * @param idxInstr The current instruction.
2920 */
2921DECL_INLINE_THROW(uint32_t)
2922iemNativeEmitRaiseDivideErrorIfLocalIsZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxInstr)
2923{
2924 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2925 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, sizeof(uint32_t));
2926
2927 /* Make sure we don't have any outstanding guest register writes as we may. */
2928 off = iemNativeRegFlushPendingWrites(pReNative, off);
2929
2930 /* Set the instruction number if we're counting. */
2931#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2932 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2933#else
2934 RT_NOREF(idxInstr);
2935#endif
2936
2937 /* Do the job we're here for. */
2938 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off);
2939 off = iemNativeEmitTbExitIfGprIsZero<kIemNativeLabelType_RaiseDe>(pReNative, off, idxVarReg, false /*f64Bit*/);
2940 iemNativeVarRegisterRelease(pReNative, idxVar);
2941
2942 return off;
2943}
2944
2945
2946#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
2947 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
2948
2949/**
2950 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
2951 *
2952 * @returns New code buffer offset, UINT32_MAX on failure.
2953 * @param pReNative The native recompile state.
2954 * @param off The code buffer offset.
2955 * @param idxInstr The current instruction.
2956 * @param idxVarEffAddr Index of the variable containing the effective address to check.
2957 * @param cbAlign The alignment in bytes to check against.
2958 */
2959DECL_INLINE_THROW(uint32_t)
2960iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
2961 uint8_t idxVarEffAddr, uint8_t cbAlign)
2962{
2963 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
2964 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
2965
2966 /*
2967 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
2968 */
2969 off = iemNativeRegFlushPendingWrites(pReNative, off);
2970
2971#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2972 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2973#else
2974 RT_NOREF(idxInstr);
2975#endif
2976
2977 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
2978 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_RaiseGp0>(pReNative, off, idxVarReg, cbAlign - 1);
2979 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
2980
2981 return off;
2982}
2983
2984
2985/*********************************************************************************************************************************
2986* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
2987*********************************************************************************************************************************/
2988
2989/**
2990 * Pushes an IEM_MC_IF_XXX onto the condition stack.
2991 *
2992 * @returns Pointer to the condition stack entry on success, NULL on failure
2993 * (too many nestings)
2994 */
2995DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
2996{
2997 uint32_t const idxStack = pReNative->cCondDepth;
2998 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
2999
3000 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
3001 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
3002
3003 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
3004 pEntry->fInElse = false;
3005 pEntry->fIfExitTb = false;
3006 pEntry->fElseExitTb = false;
3007 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
3008 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
3009
3010 return pEntry;
3011}
3012
3013
3014/**
3015 * Start of the if-block, snapshotting the register and variable state.
3016 */
3017DECL_INLINE_THROW(void)
3018iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
3019{
3020 Assert(offIfBlock != UINT32_MAX);
3021 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3022 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3023 Assert(!pEntry->fInElse);
3024
3025 /* Define the start of the IF block if request or for disassembly purposes. */
3026 if (idxLabelIf != UINT32_MAX)
3027 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
3028#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3029 else
3030 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
3031#else
3032 RT_NOREF(offIfBlock);
3033#endif
3034
3035 /* Copy the initial state so we can restore it in the 'else' block. */
3036 pEntry->InitialState = pReNative->Core;
3037}
3038
3039
3040#define IEM_MC_ELSE() } while (0); \
3041 off = iemNativeEmitElse(pReNative, off); \
3042 do {
3043
3044/** Emits code related to IEM_MC_ELSE. */
3045DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3046{
3047 /* Check sanity and get the conditional stack entry. */
3048 Assert(off != UINT32_MAX);
3049 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3050 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3051 Assert(!pEntry->fInElse);
3052
3053 /* We can skip dirty register flushing and the dirty register flushing if
3054 the branch already jumped to a TB exit. */
3055 if (!pEntry->fIfExitTb)
3056 {
3057#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) && 0
3058 /* Writeback any dirty shadow registers. */
3059 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
3060 * in one of the branches and leave guest registers already dirty before the start of the if
3061 * block alone. */
3062 off = iemNativeRegFlushDirtyGuest(pReNative, off);
3063#endif
3064
3065 /* Jump to the endif. */
3066 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
3067 }
3068# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3069 else
3070 Assert(pReNative->Core.offPc == 0);
3071# endif
3072
3073 /* Define the else label and enter the else part of the condition. */
3074 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3075 pEntry->fInElse = true;
3076
3077 /* Snapshot the core state so we can do a merge at the endif and restore
3078 the snapshot we took at the start of the if-block. */
3079 pEntry->IfFinalState = pReNative->Core;
3080 pReNative->Core = pEntry->InitialState;
3081
3082 return off;
3083}
3084
3085
3086#define IEM_MC_ENDIF() } while (0); \
3087 off = iemNativeEmitEndIf(pReNative, off)
3088
3089/** Emits code related to IEM_MC_ENDIF. */
3090DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3091{
3092 /* Check sanity and get the conditional stack entry. */
3093 Assert(off != UINT32_MAX);
3094 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3095 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3096
3097#if defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) && 0
3098 off = iemNativeRegFlushDirtyGuest(pReNative, off);
3099#endif
3100
3101 /*
3102 * If either of the branches exited the TB, we can take the state from the
3103 * other branch and skip all the merging headache.
3104 */
3105 bool fDefinedLabels = false;
3106 if (pEntry->fElseExitTb || pEntry->fIfExitTb)
3107 {
3108#ifdef VBOX_STRICT
3109 Assert(pReNative->cCondDepth == 1); /* Assuming this only happens in simple conditional structures. */
3110 Assert(pEntry->fElseExitTb != pEntry->fIfExitTb); /* Assuming we don't have any code where both branches exits. */
3111 PCIEMNATIVECORESTATE const pExitCoreState = pEntry->fIfExitTb && pEntry->fInElse
3112 ? &pEntry->IfFinalState : &pReNative->Core;
3113# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3114 Assert(pExitCoreState->bmGstRegShadowDirty == 0);
3115# endif
3116# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3117 Assert(pExitCoreState->offPc == 0);
3118# endif
3119 RT_NOREF(pExitCoreState);
3120#endif
3121
3122 if (!pEntry->fIfExitTb)
3123 {
3124 Assert(pEntry->fInElse);
3125 pReNative->Core = pEntry->IfFinalState;
3126 }
3127 }
3128 else
3129 {
3130 /*
3131 * Now we have find common group with the core state at the end of the
3132 * if-final. Use the smallest common denominator and just drop anything
3133 * that isn't the same in both states.
3134 */
3135 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
3136 * which is why we're doing this at the end of the else-block.
3137 * But we'd need more info about future for that to be worth the effort. */
3138 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
3139#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3140 AssertMsgStmt(pReNative->Core.offPc == pOther->offPc,
3141 ("Core.offPc=%#RX64 pOther->offPc=%#RX64\n", pReNative->Core.offPc, pOther->offPc),
3142 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
3143#endif
3144
3145 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
3146 {
3147#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3148 /*
3149 * If the branch has differences in dirty shadow registers, we will flush
3150 * the register only dirty in the current branch and dirty any that's only
3151 * dirty in the other one.
3152 */
3153 uint64_t const fGstRegDirtyOther = pOther->bmGstRegShadowDirty;
3154 uint64_t const fGstRegDirtyThis = pReNative->Core.bmGstRegShadowDirty;
3155 uint64_t const fGstRegDirtyDiff = fGstRegDirtyOther ^ fGstRegDirtyThis;
3156 uint64_t const fGstRegDirtyHead = fGstRegDirtyThis & fGstRegDirtyDiff;
3157 uint64_t fGstRegDirtyTail = fGstRegDirtyOther & fGstRegDirtyDiff;
3158 if (!fGstRegDirtyDiff)
3159 { /* likely */ }
3160 else
3161 {
3162 //uint64_t const fGstRegDirtyHead = pReNative->Core.bmGstRegShadowDirty & fGstRegDirtyDiff;
3163 if (fGstRegDirtyHead)
3164 {
3165 Log12(("iemNativeEmitEndIf: flushing dirty guest registers in current branch: %RX64\n", fGstRegDirtyHead));
3166 off = iemNativeRegFlushDirtyGuest(pReNative, off, fGstRegDirtyHead);
3167 }
3168 }
3169#endif
3170
3171 /*
3172 * Shadowed guest registers.
3173 *
3174 * We drop any shadows where the two states disagree about where
3175 * things are kept. We may end up flushing dirty more registers
3176 * here, if the two branches keeps things in different registers.
3177 */
3178 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
3179 if (fGstRegs)
3180 {
3181 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
3182 do
3183 {
3184 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3185 fGstRegs &= ~RT_BIT_64(idxGstReg);
3186
3187 uint8_t const idxCurHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3188 uint8_t const idxOtherHstReg = pOther->aidxGstRegShadows[idxGstReg];
3189 if ( idxCurHstReg != idxOtherHstReg
3190 || !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg)))
3191 {
3192#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3193 Log12(("iemNativeEmitEndIf: dropping gst %s (%d) from hst %s (other %d/%#RX64)\n",
3194 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
3195 idxOtherHstReg, pOther->bmGstRegShadows));
3196#else
3197 Log12(("iemNativeEmitEndIf: dropping %s gst %s (%d) from hst %s (other %d/%#RX64/%s)\n",
3198 pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "_dirty_" : "clean",
3199 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
3200 idxOtherHstReg, pOther->bmGstRegShadows,
3201 pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "dirty" : "clean"));
3202 if (pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
3203 fGstRegDirtyTail |= RT_BIT_64(idxGstReg);
3204 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
3205 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3206#endif
3207 iemNativeRegClearGstRegShadowingOne(pReNative, idxCurHstReg, (IEMNATIVEGSTREG)idxGstReg, off);
3208 }
3209 } while (fGstRegs);
3210 }
3211 else
3212 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
3213
3214#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3215 /*
3216 * Generate jumpy code for flushing dirty registers from the other
3217 * branch that aren't dirty in the current one.
3218 */
3219 if (!fGstRegDirtyTail)
3220 { /* likely */ }
3221 else
3222 {
3223 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeEndIfOtherBranchDirty);
3224 Log12(("iemNativeEmitEndIf: Dirty register only in the other branch: %#RX64 - BAD!\n", fGstRegDirtyTail));
3225
3226 /* First the current branch has to jump over the dirty flushing from the other branch. */
3227 uint32_t const offFixup1 = off;
3228 off = iemNativeEmitJmpToFixed(pReNative, off, off + 10);
3229
3230 /* Put the endif and maybe else label here so the other branch ends up here. */
3231 if (!pEntry->fInElse)
3232 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3233 else
3234 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
3235 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
3236 fDefinedLabels = true;
3237
3238 /* Flush the dirty guest registers from the other branch. */
3239 while (fGstRegDirtyTail)
3240 {
3241 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegDirtyTail) - 1;
3242 fGstRegDirtyTail &= ~RT_BIT_64(idxGstReg);
3243 Log12(("iemNativeEmitEndIf: tail flushing %s (%d) from other branch %d (cur %d/%#RX64)\n",
3244 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, pOther->aidxGstRegShadows[idxGstReg],
3245 pReNative->Core.aidxGstRegShadows[idxGstReg], pReNative->Core.bmGstRegShadows));
3246
3247 off = iemNativeRegFlushPendingWriteEx(pReNative, off, (PIEMNATIVECORESTATE)pOther, (IEMNATIVEGSTREG)idxGstReg);
3248
3249 /* Mismatching shadowing should've been dropped in the previous step already. */
3250 Assert( !(pReNative->Core.bmGstRegShadows & RT_BIT_64(idxGstReg))
3251 || pReNative->Core.aidxGstRegShadows[idxGstReg] == pOther->aidxGstRegShadows[idxGstReg]);
3252 }
3253
3254 /* Here is the actual endif label, fixup the above jump to land here. */
3255 iemNativeFixupFixedJump(pReNative, offFixup1, off);
3256 }
3257#endif
3258
3259 /*
3260 * Check variables next. For now we must require them to be identical
3261 * or stuff we can recreate. (No code is emitted here.)
3262 */
3263 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
3264#ifdef VBOX_STRICT
3265 uint32_t const offAssert = off;
3266#endif
3267 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
3268 if (fVars)
3269 {
3270 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
3271 do
3272 {
3273 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
3274 fVars &= ~RT_BIT_32(idxVar);
3275
3276 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
3277 {
3278 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
3279 continue;
3280 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
3281 {
3282 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
3283 if (idxHstReg != UINT8_MAX)
3284 {
3285 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3286 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3287 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
3288 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3289 }
3290 continue;
3291 }
3292 }
3293 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
3294 continue;
3295
3296 /* Irreconcilable, so drop it. */
3297 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
3298 if (idxHstReg != UINT8_MAX)
3299 {
3300 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3301 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3302 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
3303 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3304 }
3305 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3306 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
3307 } while (fVars);
3308 }
3309 Assert(off == offAssert);
3310
3311 /*
3312 * Finally, check that the host register allocations matches.
3313 */
3314 AssertMsgStmt((pReNative->Core.bmHstRegs & (pReNative->Core.bmHstRegs ^ pOther->bmHstRegs)) == 0,
3315 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
3316 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
3317 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
3318 }
3319 }
3320
3321 /*
3322 * Define the endif label and maybe the else one if we're still in the 'if' part.
3323 */
3324 if (!fDefinedLabels)
3325 {
3326 if (!pEntry->fInElse)
3327 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3328 else
3329 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
3330 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
3331 }
3332
3333 /* Pop the conditional stack.*/
3334 pReNative->cCondDepth -= 1;
3335
3336 return off;
3337}
3338
3339
3340/**
3341 * Helper function to convert X86_EFL_xxx masks to liveness masks.
3342 *
3343 * The compiler should be able to figure this out at compile time, so sprinkling
3344 * constexpr where ever possible here to nudge it along.
3345 */
3346template<uint32_t const a_fEfl>
3347RT_CONSTEXPR uint64_t iemNativeEflagsToLivenessMask(void)
3348{
3349 return (a_fEfl & ~X86_EFL_STATUS_BITS ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OTHER) : 0)
3350 | (a_fEfl & X86_EFL_CF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_CF) : 0)
3351 | (a_fEfl & X86_EFL_PF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_PF) : 0)
3352 | (a_fEfl & X86_EFL_AF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_AF) : 0)
3353 | (a_fEfl & X86_EFL_ZF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_ZF) : 0)
3354 | (a_fEfl & X86_EFL_SF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_SF) : 0)
3355 | (a_fEfl & X86_EFL_OF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OF) : 0);
3356}
3357
3358
3359/**
3360 * Helper function to convert a single X86_EFL_xxxx value to bit number.
3361 *
3362 * The compiler should be able to figure this out at compile time, so sprinkling
3363 * constexpr where ever possible here to nudge it along.
3364 */
3365template<uint32_t const a_fEfl>
3366RT_CONSTEXPR unsigned iemNativeEflagsToSingleBitNo(void)
3367{
3368 AssertCompile( a_fEfl == X86_EFL_CF
3369 || a_fEfl == X86_EFL_PF
3370 || a_fEfl == X86_EFL_AF
3371 || a_fEfl == X86_EFL_ZF
3372 || a_fEfl == X86_EFL_SF
3373 || a_fEfl == X86_EFL_OF
3374 || a_fEfl == X86_EFL_DF);
3375 return a_fEfl == X86_EFL_CF ? X86_EFL_CF_BIT
3376 : a_fEfl == X86_EFL_PF ? X86_EFL_PF_BIT
3377 : a_fEfl == X86_EFL_AF ? X86_EFL_AF_BIT
3378 : a_fEfl == X86_EFL_ZF ? X86_EFL_ZF_BIT
3379 : a_fEfl == X86_EFL_SF ? X86_EFL_SF_BIT
3380 : a_fEfl == X86_EFL_OF ? X86_EFL_OF_BIT
3381 : X86_EFL_DF_BIT;
3382}
3383
3384
3385#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
3386 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits), iemNativeEflagsToLivenessMask<a_fBits>()); \
3387 do {
3388
3389/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
3390DECL_INLINE_THROW(uint32_t)
3391iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl, uint64_t fLivenessEflBits)
3392{
3393 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, fBitsInEfl);
3394 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
3395 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3396
3397 /* Get the eflags. */
3398 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3399
3400 /* Test and jump. */
3401 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
3402
3403 /* Free but don't flush the EFlags register. */
3404 iemNativeRegFreeTmp(pReNative, idxEflReg);
3405
3406 /* Make a copy of the core state now as we start the if-block. */
3407 iemNativeCondStartIfBlock(pReNative, off);
3408
3409 return off;
3410}
3411
3412
3413#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
3414 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits), iemNativeEflagsToLivenessMask<a_fBits>()); \
3415 do {
3416
3417/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
3418DECL_INLINE_THROW(uint32_t)
3419iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl, uint64_t fLivenessEflBits)
3420{
3421 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, fBitsInEfl);
3422 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
3423 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3424
3425 /* Get the eflags. */
3426 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3427
3428 /* Test and jump. */
3429 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
3430
3431 /* Free but don't flush the EFlags register. */
3432 iemNativeRegFreeTmp(pReNative, idxEflReg);
3433
3434 /* Make a copy of the core state now as we start the if-block. */
3435 iemNativeCondStartIfBlock(pReNative, off);
3436
3437 return off;
3438}
3439
3440
3441#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
3442 off = iemNativeEmitIfEflagsBitSet(pReNative, off, iemNativeEflagsToSingleBitNo<a_fBit>(), \
3443 iemNativeEflagsToLivenessMask<a_fBit>()); \
3444 do {
3445
3446/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
3447DECL_INLINE_THROW(uint32_t)
3448iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, unsigned iBitNo, uint64_t fLivenessEflBit)
3449{
3450 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3451 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3452 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3453
3454 /* Get the eflags. */
3455 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBit);
3456
3457 /* Test and jump. */
3458 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3459
3460 /* Free but don't flush the EFlags register. */
3461 iemNativeRegFreeTmp(pReNative, idxEflReg);
3462
3463 /* Make a copy of the core state now as we start the if-block. */
3464 iemNativeCondStartIfBlock(pReNative, off);
3465
3466 return off;
3467}
3468
3469
3470#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
3471 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, iemNativeEflagsToSingleBitNo<a_fBit>(), \
3472 iemNativeEflagsToLivenessMask<a_fBit>()); \
3473 do {
3474
3475/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
3476DECL_INLINE_THROW(uint32_t)
3477iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, unsigned iBitNo, uint64_t fLivenessEflBit)
3478{
3479 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3480 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3481 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3482
3483 /* Get the eflags. */
3484 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBit);
3485
3486 /* Test and jump. */
3487 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3488
3489 /* Free but don't flush the EFlags register. */
3490 iemNativeRegFreeTmp(pReNative, idxEflReg);
3491
3492 /* Make a copy of the core state now as we start the if-block. */
3493 iemNativeCondStartIfBlock(pReNative, off);
3494
3495 return off;
3496}
3497
3498
3499#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
3500 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, false /*fInverted*/, \
3501 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3502 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3503 iemNativeEflagsToLivenessMask<a_fBit1 | a_fBit2>()); \
3504 do {
3505
3506#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
3507 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, true /*fInverted*/, \
3508 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3509 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3510 iemNativeEflagsToLivenessMask<a_fBit1 | a_fBit2>()); \
3511 do {
3512
3513/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
3514DECL_INLINE_THROW(uint32_t)
3515iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3516 bool fInverted, unsigned iBitNo1, unsigned iBitNo2, uint64_t fLivenessEflBits)
3517{
3518 Assert(iBitNo1 != iBitNo2);
3519 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3520 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3521 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3522
3523 /* Get the eflags. */
3524 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3525
3526#ifdef RT_ARCH_AMD64
3527 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, RT_BIT_64(iBitNo1));
3528
3529 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3530 if (iBitNo1 > iBitNo2)
3531 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3532 else
3533 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3534 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3535
3536#elif defined(RT_ARCH_ARM64)
3537 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3538 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3539
3540 /* and tmpreg, eflreg, #1<<iBitNo1 */
3541 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3542
3543 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3544 if (iBitNo1 > iBitNo2)
3545 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3546 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3547 else
3548 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3549 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3550
3551 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3552
3553#else
3554# error "Port me"
3555#endif
3556
3557 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3558 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3559 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3560
3561 /* Free but don't flush the EFlags and tmp registers. */
3562 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3563 iemNativeRegFreeTmp(pReNative, idxEflReg);
3564
3565 /* Make a copy of the core state now as we start the if-block. */
3566 iemNativeCondStartIfBlock(pReNative, off);
3567
3568 return off;
3569}
3570
3571
3572#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
3573 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, false /*fInverted*/, \
3574 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3575 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3576 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3577 iemNativeEflagsToLivenessMask<a_fBit | a_fBit1 | a_fBit2>()); \
3578 do {
3579
3580#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
3581 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, true /*fInverted*/, \
3582 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3583 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3584 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3585 iemNativeEflagsToLivenessMask<a_fBit | a_fBit1 | a_fBit2>()); \
3586 do {
3587
3588/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
3589 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
3590DECL_INLINE_THROW(uint32_t)
3591iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fInverted,
3592 unsigned iBitNo, unsigned iBitNo1, unsigned iBitNo2, uint64_t fLivenessEflBits)
3593{
3594 Assert(iBitNo1 != iBitNo);
3595 Assert(iBitNo2 != iBitNo);
3596 Assert(iBitNo2 != iBitNo1);
3597 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo) | RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3598 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo) | RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3599 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3600
3601 /* We need an if-block label for the non-inverted variant. */
3602 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
3603 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
3604
3605 /* Get the eflags. */
3606 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3607
3608#ifdef RT_ARCH_AMD64
3609 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, RT_BIT_64(iBitNo1)); /* This must come before we jump anywhere! */
3610#elif defined(RT_ARCH_ARM64)
3611 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3612#endif
3613
3614 /* Check for the lone bit first. */
3615 if (!fInverted)
3616 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3617 else
3618 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
3619
3620 /* Then extract and compare the other two bits. */
3621#ifdef RT_ARCH_AMD64
3622 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3623 if (iBitNo1 > iBitNo2)
3624 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3625 else
3626 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3627 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3628
3629#elif defined(RT_ARCH_ARM64)
3630 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3631
3632 /* and tmpreg, eflreg, #1<<iBitNo1 */
3633 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3634
3635 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3636 if (iBitNo1 > iBitNo2)
3637 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3638 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3639 else
3640 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3641 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3642
3643 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3644
3645#else
3646# error "Port me"
3647#endif
3648
3649 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3650 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3651 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3652
3653 /* Free but don't flush the EFlags and tmp registers. */
3654 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3655 iemNativeRegFreeTmp(pReNative, idxEflReg);
3656
3657 /* Make a copy of the core state now as we start the if-block. */
3658 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
3659
3660 return off;
3661}
3662
3663
3664#define IEM_MC_IF_CX_IS_NZ() \
3665 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
3666 do {
3667
3668/** Emits code for IEM_MC_IF_CX_IS_NZ. */
3669DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3670{
3671 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3672
3673 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3674 kIemNativeGstRegUse_ReadOnly);
3675 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
3676 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3677
3678 iemNativeCondStartIfBlock(pReNative, off);
3679 return off;
3680}
3681
3682
3683#define IEM_MC_IF_ECX_IS_NZ() \
3684 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
3685 do {
3686
3687#define IEM_MC_IF_RCX_IS_NZ() \
3688 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
3689 do {
3690
3691/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
3692DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3693{
3694 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3695
3696 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3697 kIemNativeGstRegUse_ReadOnly);
3698 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
3699 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3700
3701 iemNativeCondStartIfBlock(pReNative, off);
3702 return off;
3703}
3704
3705
3706#define IEM_MC_IF_CX_IS_NOT_ONE() \
3707 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
3708 do {
3709
3710/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
3711DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3712{
3713 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3714
3715 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3716 kIemNativeGstRegUse_ReadOnly);
3717#ifdef RT_ARCH_AMD64
3718 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3719#else
3720 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3721 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3722 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3723#endif
3724 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3725
3726 iemNativeCondStartIfBlock(pReNative, off);
3727 return off;
3728}
3729
3730
3731#define IEM_MC_IF_ECX_IS_NOT_ONE() \
3732 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
3733 do {
3734
3735#define IEM_MC_IF_RCX_IS_NOT_ONE() \
3736 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
3737 do {
3738
3739/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
3740DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3741{
3742 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3743
3744 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3745 kIemNativeGstRegUse_ReadOnly);
3746 if (f64Bit)
3747 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3748 else
3749 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3750 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3751
3752 iemNativeCondStartIfBlock(pReNative, off);
3753 return off;
3754}
3755
3756
3757#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3758 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, true /*fCheckIfSet*/, \
3759 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3760 iemNativeEflagsToLivenessMask<a_fBit>()); \
3761 do {
3762
3763#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3764 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, false /*fCheckIfSet*/, \
3765 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3766 iemNativeEflagsToLivenessMask<a_fBit>()); \
3767 do {
3768
3769/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
3770 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3771DECL_INLINE_THROW(uint32_t)
3772iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3773 bool fCheckIfSet, unsigned iBitNo, uint64_t fLivenessEflBit)
3774{
3775 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3776 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3777 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3778
3779 /* We have to load both RCX and EFLAGS before we can start branching,
3780 otherwise we'll end up in the else-block with an inconsistent
3781 register allocator state.
3782 Doing EFLAGS first as it's more likely to be loaded, right? */
3783 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBit);
3784 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3785 kIemNativeGstRegUse_ReadOnly);
3786
3787 /** @todo we could reduce this to a single branch instruction by spending a
3788 * temporary register and some setnz stuff. Not sure if loops are
3789 * worth it. */
3790 /* Check CX. */
3791#ifdef RT_ARCH_AMD64
3792 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3793#else
3794 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3795 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3796 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3797#endif
3798
3799 /* Check the EFlags bit. */
3800 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3801 !fCheckIfSet /*fJmpIfSet*/);
3802
3803 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3804 iemNativeRegFreeTmp(pReNative, idxEflReg);
3805
3806 iemNativeCondStartIfBlock(pReNative, off);
3807 return off;
3808}
3809
3810
3811#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3812 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, true /*fCheckIfSet*/, false /*f64Bit*/, \
3813 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3814 iemNativeEflagsToLivenessMask<a_fBit>()); \
3815 do {
3816
3817#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3818 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, false /*fCheckIfSet*/, false /*f64Bit*/, \
3819 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3820 iemNativeEflagsToLivenessMask<a_fBit>()); \
3821 do {
3822
3823#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3824 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, true /*fCheckIfSet*/, true /*f64Bit*/, \
3825 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3826 iemNativeEflagsToLivenessMask<a_fBit>()); \
3827 do {
3828
3829#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3830 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, false /*fCheckIfSet*/, true /*f64Bit*/, \
3831 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3832 iemNativeEflagsToLivenessMask<a_fBit>()); \
3833 do {
3834
3835/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
3836 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
3837 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
3838 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3839DECL_INLINE_THROW(uint32_t)
3840iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fCheckIfSet, bool f64Bit,
3841 unsigned iBitNo, uint64_t fLivenessEFlBit)
3842
3843{
3844 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3845 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3846 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3847
3848 /* We have to load both RCX and EFLAGS before we can start branching,
3849 otherwise we'll end up in the else-block with an inconsistent
3850 register allocator state.
3851 Doing EFLAGS first as it's more likely to be loaded, right? */
3852 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEFlBit);
3853 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3854 kIemNativeGstRegUse_ReadOnly);
3855
3856 /** @todo we could reduce this to a single branch instruction by spending a
3857 * temporary register and some setnz stuff. Not sure if loops are
3858 * worth it. */
3859 /* Check RCX/ECX. */
3860 if (f64Bit)
3861 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3862 else
3863 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3864
3865 /* Check the EFlags bit. */
3866 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3867 !fCheckIfSet /*fJmpIfSet*/);
3868
3869 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3870 iemNativeRegFreeTmp(pReNative, idxEflReg);
3871
3872 iemNativeCondStartIfBlock(pReNative, off);
3873 return off;
3874}
3875
3876
3877#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
3878 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
3879 do {
3880
3881/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
3882DECL_INLINE_THROW(uint32_t)
3883iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
3884{
3885 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3886
3887 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
3888 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
3889 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3890 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3891
3892 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
3893
3894 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
3895
3896 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
3897
3898 iemNativeCondStartIfBlock(pReNative, off);
3899 return off;
3900}
3901
3902
3903#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
3904 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
3905 do {
3906
3907/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
3908DECL_INLINE_THROW(uint32_t)
3909iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
3910{
3911 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3912 Assert(iGReg < 16);
3913
3914 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3915 kIemNativeGstRegUse_ReadOnly);
3916
3917 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
3918
3919 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3920
3921 iemNativeCondStartIfBlock(pReNative, off);
3922 return off;
3923}
3924
3925
3926
3927/*********************************************************************************************************************************
3928* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
3929*********************************************************************************************************************************/
3930
3931#define IEM_MC_NOREF(a_Name) \
3932 RT_NOREF_PV(a_Name)
3933
3934#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
3935 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
3936
3937#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
3938 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
3939
3940#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
3941 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
3942
3943#define IEM_MC_LOCAL(a_Type, a_Name) \
3944 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
3945
3946#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
3947 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
3948
3949#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
3950 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
3951
3952
3953/**
3954 * Sets the host register for @a idxVarRc to @a idxReg.
3955 *
3956 * Any guest register shadowing will be implictly dropped by this call.
3957 *
3958 * The variable must not have any register associated with it (causes
3959 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
3960 * implied.
3961 *
3962 * @returns idxReg
3963 * @param pReNative The recompiler state.
3964 * @param idxVar The variable.
3965 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
3966 * @param off For recording in debug info.
3967 * @param fAllocated Set if the register is already allocated, false if not.
3968 *
3969 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
3970 */
3971DECL_INLINE_THROW(uint8_t)
3972iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off, bool fAllocated)
3973{
3974 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3975 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3976 Assert(!pVar->fRegAcquired);
3977 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3978 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
3979 AssertStmt(RT_BOOL(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)) == fAllocated,
3980 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
3981
3982 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
3983 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
3984
3985 iemNativeVarSetKindToStack(pReNative, idxVar);
3986 pVar->idxReg = idxReg;
3987
3988 return idxReg;
3989}
3990
3991
3992/**
3993 * A convenient helper function.
3994 */
3995DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
3996 uint8_t idxReg, uint32_t *poff)
3997{
3998 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff, false /*fAllocated*/);
3999 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
4000 return idxReg;
4001}
4002
4003
4004/**
4005 * This is called by IEM_MC_END() to clean up all variables.
4006 */
4007DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
4008{
4009 uint32_t const bmVars = pReNative->Core.bmVars;
4010 if (bmVars != 0)
4011 iemNativeVarFreeAllSlow(pReNative, bmVars);
4012 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
4013 Assert(pReNative->Core.bmStack == 0);
4014}
4015
4016
4017#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
4018
4019/**
4020 * This is called by IEM_MC_FREE_LOCAL.
4021 */
4022DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
4023{
4024 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4025 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
4026 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
4027}
4028
4029
4030#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
4031
4032/**
4033 * This is called by IEM_MC_FREE_ARG.
4034 */
4035DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
4036{
4037 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4038 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
4039 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
4040}
4041
4042
4043#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
4044
4045/**
4046 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
4047 */
4048DECL_INLINE_THROW(uint32_t)
4049iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
4050{
4051 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
4052 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
4053 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4054 Assert( pVarDst->cbVar == sizeof(uint16_t)
4055 || pVarDst->cbVar == sizeof(uint32_t));
4056
4057 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
4058 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
4059 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
4060 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
4061 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4062
4063 Assert(pVarDst->cbVar < pVarSrc->cbVar);
4064
4065 /*
4066 * Special case for immediates.
4067 */
4068 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
4069 {
4070 switch (pVarDst->cbVar)
4071 {
4072 case sizeof(uint16_t):
4073 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
4074 break;
4075 case sizeof(uint32_t):
4076 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
4077 break;
4078 default: AssertFailed(); break;
4079 }
4080 }
4081 else
4082 {
4083 /*
4084 * The generic solution for now.
4085 */
4086 /** @todo optimize this by having the python script make sure the source
4087 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
4088 * statement. Then we could just transfer the register assignments. */
4089 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
4090 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
4091 switch (pVarDst->cbVar)
4092 {
4093 case sizeof(uint16_t):
4094 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
4095 break;
4096 case sizeof(uint32_t):
4097 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
4098 break;
4099 default: AssertFailed(); break;
4100 }
4101 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
4102 iemNativeVarRegisterRelease(pReNative, idxVarDst);
4103 }
4104 return off;
4105}
4106
4107
4108
4109/*********************************************************************************************************************************
4110* Emitters for IEM_MC_CALL_CIMPL_XXX *
4111*********************************************************************************************************************************/
4112
4113/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
4114DECL_INLINE_THROW(uint32_t)
4115iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
4116 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
4117
4118{
4119 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, X86_EFL_STATUS_BITS);
4120 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
4121
4122#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4123 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
4124 when a calls clobber any of the relevant control registers. */
4125# if 1
4126 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
4127 {
4128 /* Likely as long as call+ret are done via cimpl. */
4129 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
4130 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
4131 }
4132 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
4133 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
4134 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
4135 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4136 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
4137 else
4138 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4139 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
4140 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
4141
4142# else
4143 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
4144 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
4145 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
4146 pReNative->fSimdRaiseXcptChecksEmitted = 0;
4147 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
4148 || pfnCImpl == (uintptr_t)iemCImpl_callf
4149 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
4150 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
4151 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4152 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
4153 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
4154# endif
4155
4156# ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
4157 /* Mark the host floating point control register as not synced if MXCSR is modified. */
4158 if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_MxCsr))
4159 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
4160# endif
4161#endif
4162
4163 /*
4164 * Do all the call setup and cleanup.
4165 */
4166 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
4167
4168 /*
4169 * Load the two or three hidden arguments.
4170 */
4171#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
4172 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
4173 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4174 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
4175#else
4176 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4177 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
4178#endif
4179
4180 /*
4181 * Make the call and check the return code.
4182 *
4183 * Shadow PC copies are always flushed here, other stuff depends on flags.
4184 * Segment and general purpose registers are explictily flushed via the
4185 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
4186 * macros.
4187 */
4188 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
4189#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
4190 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
4191#endif
4192 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
4193 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
4194 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
4195 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
4196
4197#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
4198 pReNative->Core.fDebugPcInitialized = false;
4199 Log4(("fDebugPcInitialized=false cimpl off=%#x (v1)\n", off));
4200#endif
4201
4202 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
4203}
4204
4205
4206#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
4207 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
4208
4209/** Emits code for IEM_MC_CALL_CIMPL_1. */
4210DECL_INLINE_THROW(uint32_t)
4211iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4212 uintptr_t pfnCImpl, uint8_t idxArg0)
4213{
4214 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4215 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
4216}
4217
4218
4219#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
4220 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
4221
4222/** Emits code for IEM_MC_CALL_CIMPL_2. */
4223DECL_INLINE_THROW(uint32_t)
4224iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4225 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
4226{
4227 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4228 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4229 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
4230}
4231
4232
4233#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
4234 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4235 (uintptr_t)a_pfnCImpl, a0, a1, a2)
4236
4237/** Emits code for IEM_MC_CALL_CIMPL_3. */
4238DECL_INLINE_THROW(uint32_t)
4239iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4240 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
4241{
4242 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4243 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4244 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4245 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
4246}
4247
4248
4249#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
4250 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4251 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
4252
4253/** Emits code for IEM_MC_CALL_CIMPL_4. */
4254DECL_INLINE_THROW(uint32_t)
4255iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4256 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
4257{
4258 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4259 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4260 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4261 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
4262 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
4263}
4264
4265
4266#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
4267 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4268 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
4269
4270/** Emits code for IEM_MC_CALL_CIMPL_4. */
4271DECL_INLINE_THROW(uint32_t)
4272iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4273 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
4274{
4275 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4276 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4277 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4278 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
4279 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
4280 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
4281}
4282
4283
4284/** Recompiler debugging: Flush guest register shadow copies. */
4285#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
4286
4287
4288
4289/*********************************************************************************************************************************
4290* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
4291*********************************************************************************************************************************/
4292
4293/**
4294 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
4295 */
4296DECL_INLINE_THROW(uint32_t)
4297iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4298 uintptr_t pfnAImpl, uint8_t cArgs)
4299{
4300 if (idxVarRc != UINT8_MAX)
4301 {
4302 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
4303 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
4304 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
4305 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
4306 }
4307
4308 /*
4309 * Do all the call setup and cleanup.
4310 *
4311 * It is only required to flush pending guest register writes in call volatile registers as
4312 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
4313 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
4314 * no matter the fFlushPendingWrites parameter.
4315 */
4316 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
4317
4318 /*
4319 * Make the call and update the return code variable if we've got one.
4320 */
4321 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnAImpl);
4322 if (idxVarRc != UINT8_MAX)
4323 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off, false /*fAllocated*/);
4324
4325 return off;
4326}
4327
4328
4329
4330#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
4331 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
4332
4333#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
4334 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
4335
4336/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
4337DECL_INLINE_THROW(uint32_t)
4338iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
4339{
4340 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
4341}
4342
4343
4344#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
4345 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
4346
4347#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
4348 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
4349
4350/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
4351DECL_INLINE_THROW(uint32_t)
4352iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
4353{
4354 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4355 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
4356}
4357
4358
4359#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
4360 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
4361
4362#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
4363 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
4364
4365/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
4366DECL_INLINE_THROW(uint32_t)
4367iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4368 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
4369{
4370 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4371 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4372 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
4373}
4374
4375
4376#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
4377 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
4378
4379#define IEM_MC_CALL_AIMPL_3(a_rcType, a_rc, a_pfn, a0, a1, a2) \
4380 IEM_MC_LOCAL(a_rcType, a_rc); \
4381 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
4382
4383/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
4384DECL_INLINE_THROW(uint32_t)
4385iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4386 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
4387{
4388 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4389 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4390 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
4391 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
4392}
4393
4394
4395#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
4396 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
4397
4398#define IEM_MC_CALL_AIMPL_4(a_rcType, a_rc, a_pfn, a0, a1, a2, a3) \
4399 IEM_MC_LOCAL(a_rcType, a_rc); \
4400 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
4401
4402/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
4403DECL_INLINE_THROW(uint32_t)
4404iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4405 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
4406{
4407 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4408 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4409 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
4410 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
4411 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
4412}
4413
4414
4415
4416/*********************************************************************************************************************************
4417* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
4418*********************************************************************************************************************************/
4419
4420#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
4421 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
4422
4423#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
4424 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
4425
4426#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
4427 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
4428
4429#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
4430 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
4431
4432
4433/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
4434 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
4435DECL_INLINE_THROW(uint32_t)
4436iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
4437{
4438 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4439 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4440 Assert(iGRegEx < 20);
4441
4442 /* Same discussion as in iemNativeEmitFetchGregU16 */
4443 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4444 kIemNativeGstRegUse_ReadOnly);
4445
4446 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4447 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4448
4449 /* The value is zero-extended to the full 64-bit host register width. */
4450 if (iGRegEx < 16)
4451 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4452 else
4453 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
4454
4455 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4456 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4457 return off;
4458}
4459
4460
4461#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
4462 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
4463
4464#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
4465 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
4466
4467#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
4468 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
4469
4470/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
4471DECL_INLINE_THROW(uint32_t)
4472iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
4473{
4474 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4475 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4476 Assert(iGRegEx < 20);
4477
4478 /* Same discussion as in iemNativeEmitFetchGregU16 */
4479 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4480 kIemNativeGstRegUse_ReadOnly);
4481
4482 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4483 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4484
4485 if (iGRegEx < 16)
4486 {
4487 switch (cbSignExtended)
4488 {
4489 case sizeof(uint16_t):
4490 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4491 break;
4492 case sizeof(uint32_t):
4493 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4494 break;
4495 case sizeof(uint64_t):
4496 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4497 break;
4498 default: AssertFailed(); break;
4499 }
4500 }
4501 else
4502 {
4503 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
4504 switch (cbSignExtended)
4505 {
4506 case sizeof(uint16_t):
4507 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4508 break;
4509 case sizeof(uint32_t):
4510 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4511 break;
4512 case sizeof(uint64_t):
4513 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4514 break;
4515 default: AssertFailed(); break;
4516 }
4517 }
4518
4519 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4520 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4521 return off;
4522}
4523
4524
4525
4526#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
4527 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
4528
4529#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
4530 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4531
4532#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
4533 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4534
4535/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
4536DECL_INLINE_THROW(uint32_t)
4537iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4538{
4539 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4540 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4541 Assert(iGReg < 16);
4542
4543 /*
4544 * We can either just load the low 16-bit of the GPR into a host register
4545 * for the variable, or we can do so via a shadow copy host register. The
4546 * latter will avoid having to reload it if it's being stored later, but
4547 * will waste a host register if it isn't touched again. Since we don't
4548 * know what going to happen, we choose the latter for now.
4549 */
4550 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4551 kIemNativeGstRegUse_ReadOnly);
4552
4553 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4554 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4555 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4556 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4557
4558 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4559 return off;
4560}
4561
4562#define IEM_MC_FETCH_GREG_I16(a_i16Dst, a_iGReg) \
4563 off = iemNativeEmitFetchGregI16(pReNative, off, a_i16Dst, a_iGReg)
4564
4565/** Emits code for IEM_MC_FETCH_GREG_I16. */
4566DECL_INLINE_THROW(uint32_t)
4567iemNativeEmitFetchGregI16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4568{
4569 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4570 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(int16_t));
4571 Assert(iGReg < 16);
4572
4573 /*
4574 * We can either just load the low 16-bit of the GPR into a host register
4575 * for the variable, or we can do so via a shadow copy host register. The
4576 * latter will avoid having to reload it if it's being stored later, but
4577 * will waste a host register if it isn't touched again. Since we don't
4578 * know what going to happen, we choose the latter for now.
4579 */
4580 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4581 kIemNativeGstRegUse_ReadOnly);
4582
4583 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4584 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4585#ifdef RT_ARCH_AMD64
4586 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4587#elif defined(RT_ARCH_ARM64) /* Note! There are no 16-bit registers on ARM, we emulate that through 32-bit registers which requires sign extension. */
4588 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4589#endif
4590 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4591
4592 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4593 return off;
4594}
4595
4596
4597#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
4598 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4599
4600#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
4601 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4602
4603/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
4604DECL_INLINE_THROW(uint32_t)
4605iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
4606{
4607 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4608 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4609 Assert(iGReg < 16);
4610
4611 /*
4612 * We can either just load the low 16-bit of the GPR into a host register
4613 * for the variable, or we can do so via a shadow copy host register. The
4614 * latter will avoid having to reload it if it's being stored later, but
4615 * will waste a host register if it isn't touched again. Since we don't
4616 * know what going to happen, we choose the latter for now.
4617 */
4618 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4619 kIemNativeGstRegUse_ReadOnly);
4620
4621 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4622 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4623 if (cbSignExtended == sizeof(uint32_t))
4624 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4625 else
4626 {
4627 Assert(cbSignExtended == sizeof(uint64_t));
4628 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4629 }
4630 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4631
4632 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4633 return off;
4634}
4635
4636
4637#define IEM_MC_FETCH_GREG_I32(a_i32Dst, a_iGReg) \
4638 off = iemNativeEmitFetchGregU32(pReNative, off, a_i32Dst, a_iGReg, sizeof(uint32_t))
4639
4640#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
4641 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
4642
4643#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
4644 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
4645
4646/** Emits code for IEM_MC_FETCH_GREG_U32. */
4647DECL_INLINE_THROW(uint32_t)
4648iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4649{
4650 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4651 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4652 Assert(iGReg < 16);
4653
4654 /*
4655 * We can either just load the low 16-bit of the GPR into a host register
4656 * for the variable, or we can do so via a shadow copy host register. The
4657 * latter will avoid having to reload it if it's being stored later, but
4658 * will waste a host register if it isn't touched again. Since we don't
4659 * know what going to happen, we choose the latter for now.
4660 */
4661 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4662 kIemNativeGstRegUse_ReadOnly);
4663
4664 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4665 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4666 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4667 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4668
4669 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4670 return off;
4671}
4672
4673
4674#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
4675 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
4676
4677/** Emits code for IEM_MC_FETCH_GREG_U32. */
4678DECL_INLINE_THROW(uint32_t)
4679iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4680{
4681 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4682 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4683 Assert(iGReg < 16);
4684
4685 /*
4686 * We can either just load the low 32-bit of the GPR into a host register
4687 * for the variable, or we can do so via a shadow copy host register. The
4688 * latter will avoid having to reload it if it's being stored later, but
4689 * will waste a host register if it isn't touched again. Since we don't
4690 * know what going to happen, we choose the latter for now.
4691 */
4692 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4693 kIemNativeGstRegUse_ReadOnly);
4694
4695 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4696 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4697 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4698 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4699
4700 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4701 return off;
4702}
4703
4704
4705#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
4706 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4707
4708#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
4709 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4710
4711/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
4712 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
4713DECL_INLINE_THROW(uint32_t)
4714iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4715{
4716 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4717 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4718 Assert(iGReg < 16);
4719
4720 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4721 kIemNativeGstRegUse_ReadOnly);
4722
4723 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4724 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4725 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
4726 /** @todo name the register a shadow one already? */
4727 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4728
4729 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4730 return off;
4731}
4732
4733
4734#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4735#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
4736 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
4737
4738/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
4739DECL_INLINE_THROW(uint32_t)
4740iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
4741{
4742 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4743 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
4744 Assert(iGRegLo < 16 && iGRegHi < 16);
4745
4746 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
4747 kIemNativeGstRegUse_ReadOnly);
4748 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
4749 kIemNativeGstRegUse_ReadOnly);
4750
4751 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4752 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
4753 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
4754 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
4755
4756 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
4757 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
4758 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
4759 return off;
4760}
4761#endif
4762
4763
4764/*********************************************************************************************************************************
4765* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
4766*********************************************************************************************************************************/
4767
4768#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
4769 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
4770
4771/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
4772DECL_INLINE_THROW(uint32_t)
4773iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
4774{
4775 Assert(iGRegEx < 20);
4776 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4777 kIemNativeGstRegUse_ForUpdate);
4778#ifdef RT_ARCH_AMD64
4779 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4780
4781 /* To the lowest byte of the register: mov r8, imm8 */
4782 if (iGRegEx < 16)
4783 {
4784 if (idxGstTmpReg >= 8)
4785 pbCodeBuf[off++] = X86_OP_REX_B;
4786 else if (idxGstTmpReg >= 4)
4787 pbCodeBuf[off++] = X86_OP_REX;
4788 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4789 pbCodeBuf[off++] = u8Value;
4790 }
4791 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
4792 else if (idxGstTmpReg < 4)
4793 {
4794 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
4795 pbCodeBuf[off++] = u8Value;
4796 }
4797 else
4798 {
4799 /* ror reg64, 8 */
4800 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4801 pbCodeBuf[off++] = 0xc1;
4802 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4803 pbCodeBuf[off++] = 8;
4804
4805 /* mov reg8, imm8 */
4806 if (idxGstTmpReg >= 8)
4807 pbCodeBuf[off++] = X86_OP_REX_B;
4808 else if (idxGstTmpReg >= 4)
4809 pbCodeBuf[off++] = X86_OP_REX;
4810 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4811 pbCodeBuf[off++] = u8Value;
4812
4813 /* rol reg64, 8 */
4814 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4815 pbCodeBuf[off++] = 0xc1;
4816 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4817 pbCodeBuf[off++] = 8;
4818 }
4819
4820#elif defined(RT_ARCH_ARM64)
4821 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
4822 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4823 if (iGRegEx < 16)
4824 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
4825 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
4826 else
4827 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
4828 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
4829 iemNativeRegFreeTmp(pReNative, idxImmReg);
4830
4831#else
4832# error "Port me!"
4833#endif
4834
4835 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4836
4837#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4838 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4839#endif
4840
4841 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4842 return off;
4843}
4844
4845
4846#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
4847 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
4848
4849/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
4850DECL_INLINE_THROW(uint32_t)
4851iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
4852{
4853 Assert(iGRegEx < 20);
4854 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4855
4856 /*
4857 * If it's a constant value (unlikely) we treat this as a
4858 * IEM_MC_STORE_GREG_U8_CONST statement.
4859 */
4860 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4861 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4862 { /* likely */ }
4863 else
4864 {
4865 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4866 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4867 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
4868 }
4869
4870 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4871 kIemNativeGstRegUse_ForUpdate);
4872 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxValueVar, &off);
4873
4874#ifdef RT_ARCH_AMD64
4875 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
4876 if (iGRegEx < 16)
4877 {
4878 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4879 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4880 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4881 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4882 pbCodeBuf[off++] = X86_OP_REX;
4883 pbCodeBuf[off++] = 0x8a;
4884 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4885 }
4886 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
4887 else if (idxGstTmpReg < 4 && idxVarReg < 4)
4888 {
4889 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
4890 pbCodeBuf[off++] = 0x8a;
4891 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
4892 }
4893 else
4894 {
4895 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
4896
4897 /* ror reg64, 8 */
4898 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4899 pbCodeBuf[off++] = 0xc1;
4900 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4901 pbCodeBuf[off++] = 8;
4902
4903 /* mov reg8, reg8(r/m) */
4904 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4905 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4906 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4907 pbCodeBuf[off++] = X86_OP_REX;
4908 pbCodeBuf[off++] = 0x8a;
4909 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4910
4911 /* rol reg64, 8 */
4912 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4913 pbCodeBuf[off++] = 0xc1;
4914 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4915 pbCodeBuf[off++] = 8;
4916 }
4917
4918#elif defined(RT_ARCH_ARM64)
4919 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
4920 or
4921 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
4922 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4923 if (iGRegEx < 16)
4924 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
4925 else
4926 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
4927
4928#else
4929# error "Port me!"
4930#endif
4931 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4932
4933 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4934
4935#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4936 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4937#endif
4938 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4939 return off;
4940}
4941
4942
4943
4944#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
4945 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
4946
4947/** Emits code for IEM_MC_STORE_GREG_U16. */
4948DECL_INLINE_THROW(uint32_t)
4949iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
4950{
4951 Assert(iGReg < 16);
4952 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4953 kIemNativeGstRegUse_ForUpdate);
4954#ifdef RT_ARCH_AMD64
4955 /* mov reg16, imm16 */
4956 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4957 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4958 if (idxGstTmpReg >= 8)
4959 pbCodeBuf[off++] = X86_OP_REX_B;
4960 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
4961 pbCodeBuf[off++] = RT_BYTE1(uValue);
4962 pbCodeBuf[off++] = RT_BYTE2(uValue);
4963
4964#elif defined(RT_ARCH_ARM64)
4965 /* movk xdst, #uValue, lsl #0 */
4966 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4967 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
4968
4969#else
4970# error "Port me!"
4971#endif
4972
4973 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4974
4975#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4976 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4977#endif
4978 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4979 return off;
4980}
4981
4982
4983#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
4984 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
4985
4986/** Emits code for IEM_MC_STORE_GREG_U16. */
4987DECL_INLINE_THROW(uint32_t)
4988iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4989{
4990 Assert(iGReg < 16);
4991 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4992
4993 /*
4994 * If it's a constant value (unlikely) we treat this as a
4995 * IEM_MC_STORE_GREG_U16_CONST statement.
4996 */
4997 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4998 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4999 { /* likely */ }
5000 else
5001 {
5002 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5003 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5004 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
5005 }
5006
5007 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5008 kIemNativeGstRegUse_ForUpdate);
5009
5010#ifdef RT_ARCH_AMD64
5011 /* mov reg16, reg16 or [mem16] */
5012 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
5013 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5014 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5015 {
5016 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
5017 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
5018 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
5019 pbCodeBuf[off++] = 0x8b;
5020 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
5021 }
5022 else
5023 {
5024 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
5025 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
5026 if (idxGstTmpReg >= 8)
5027 pbCodeBuf[off++] = X86_OP_REX_R;
5028 pbCodeBuf[off++] = 0x8b;
5029 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
5030 }
5031
5032#elif defined(RT_ARCH_ARM64)
5033 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
5034 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxValueVar, &off);
5035 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5036 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
5037 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5038
5039#else
5040# error "Port me!"
5041#endif
5042
5043 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5044
5045#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5046 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5047#endif
5048 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5049 return off;
5050}
5051
5052
5053#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
5054 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
5055
5056/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
5057DECL_INLINE_THROW(uint32_t)
5058iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
5059{
5060 Assert(iGReg < 16);
5061 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5062 kIemNativeGstRegUse_ForFullWrite);
5063 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
5064#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5065 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5066#endif
5067 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5068 return off;
5069}
5070
5071
5072#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
5073 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
5074
5075#define IEM_MC_STORE_GREG_I32(a_iGReg, a_i32Value) \
5076 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_i32Value)
5077
5078/** Emits code for IEM_MC_STORE_GREG_U32/IEM_MC_STORE_GREG_I32. */
5079DECL_INLINE_THROW(uint32_t)
5080iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5081{
5082 Assert(iGReg < 16);
5083 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5084
5085 /*
5086 * If it's a constant value (unlikely) we treat this as a
5087 * IEM_MC_STORE_GREG_U32_CONST statement.
5088 */
5089 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5090 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5091 { /* likely */ }
5092 else
5093 {
5094 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5095 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5096 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
5097 }
5098
5099 /*
5100 * For the rest we allocate a guest register for the variable and writes
5101 * it to the CPUMCTX structure.
5102 */
5103 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
5104#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5105 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5106#else
5107 RT_NOREF(idxVarReg);
5108#endif
5109#ifdef VBOX_STRICT
5110 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
5111#endif
5112 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5113 return off;
5114}
5115
5116
5117#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
5118 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
5119
5120/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
5121DECL_INLINE_THROW(uint32_t)
5122iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
5123{
5124 Assert(iGReg < 16);
5125 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5126 kIemNativeGstRegUse_ForFullWrite);
5127 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
5128#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5129 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5130#endif
5131 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5132 return off;
5133}
5134
5135
5136#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
5137 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
5138
5139#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
5140 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
5141
5142/** Emits code for IEM_MC_STORE_GREG_U64. */
5143DECL_INLINE_THROW(uint32_t)
5144iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5145{
5146 Assert(iGReg < 16);
5147 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5148
5149 /*
5150 * If it's a constant value (unlikely) we treat this as a
5151 * IEM_MC_STORE_GREG_U64_CONST statement.
5152 */
5153 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5154 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5155 { /* likely */ }
5156 else
5157 {
5158 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5159 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5160 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
5161 }
5162
5163 /*
5164 * For the rest we allocate a guest register for the variable and writes
5165 * it to the CPUMCTX structure.
5166 */
5167 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
5168#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5169 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5170#else
5171 RT_NOREF(idxVarReg);
5172#endif
5173 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5174 return off;
5175}
5176
5177
5178#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
5179 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
5180
5181/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
5182DECL_INLINE_THROW(uint32_t)
5183iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
5184{
5185 Assert(iGReg < 16);
5186 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5187 kIemNativeGstRegUse_ForUpdate);
5188 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
5189#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5190 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5191#endif
5192 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5193 return off;
5194}
5195
5196
5197#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5198#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
5199 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
5200
5201/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
5202DECL_INLINE_THROW(uint32_t)
5203iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
5204{
5205 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
5206 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
5207 Assert(iGRegLo < 16 && iGRegHi < 16);
5208
5209 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
5210 kIemNativeGstRegUse_ForFullWrite);
5211 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
5212 kIemNativeGstRegUse_ForFullWrite);
5213
5214 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5215 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
5216 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
5217 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
5218
5219 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
5220 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
5221 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
5222 return off;
5223}
5224#endif
5225
5226
5227/*********************************************************************************************************************************
5228* General purpose register manipulation (add, sub). *
5229*********************************************************************************************************************************/
5230
5231#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5232 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5233
5234/** Emits code for IEM_MC_ADD_GREG_U16. */
5235DECL_INLINE_THROW(uint32_t)
5236iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
5237{
5238 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5239 kIemNativeGstRegUse_ForUpdate);
5240
5241#ifdef RT_ARCH_AMD64
5242 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5243 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5244 if (idxGstTmpReg >= 8)
5245 pbCodeBuf[off++] = X86_OP_REX_B;
5246 if (uAddend == 1)
5247 {
5248 pbCodeBuf[off++] = 0xff; /* inc */
5249 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5250 }
5251 else
5252 {
5253 pbCodeBuf[off++] = 0x81;
5254 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5255 pbCodeBuf[off++] = uAddend;
5256 pbCodeBuf[off++] = 0;
5257 }
5258
5259#else
5260 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5261 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5262
5263 /* sub tmp, gstgrp, uAddend */
5264 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
5265
5266 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5267 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5268
5269 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5270#endif
5271
5272 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5273
5274#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5275 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5276#endif
5277
5278 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5279 return off;
5280}
5281
5282
5283#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
5284 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5285
5286#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
5287 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5288
5289/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
5290DECL_INLINE_THROW(uint32_t)
5291iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
5292{
5293 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5294 kIemNativeGstRegUse_ForUpdate);
5295
5296#ifdef RT_ARCH_AMD64
5297 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5298 if (f64Bit)
5299 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5300 else if (idxGstTmpReg >= 8)
5301 pbCodeBuf[off++] = X86_OP_REX_B;
5302 if (uAddend == 1)
5303 {
5304 pbCodeBuf[off++] = 0xff; /* inc */
5305 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5306 }
5307 else if (uAddend < 128)
5308 {
5309 pbCodeBuf[off++] = 0x83; /* add */
5310 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5311 pbCodeBuf[off++] = RT_BYTE1(uAddend);
5312 }
5313 else
5314 {
5315 pbCodeBuf[off++] = 0x81; /* add */
5316 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5317 pbCodeBuf[off++] = RT_BYTE1(uAddend);
5318 pbCodeBuf[off++] = 0;
5319 pbCodeBuf[off++] = 0;
5320 pbCodeBuf[off++] = 0;
5321 }
5322
5323#else
5324 /* sub tmp, gstgrp, uAddend */
5325 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5326 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
5327
5328#endif
5329
5330 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5331
5332#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5333 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5334#endif
5335
5336 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5337 return off;
5338}
5339
5340
5341
5342#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5343 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5344
5345/** Emits code for IEM_MC_SUB_GREG_U16. */
5346DECL_INLINE_THROW(uint32_t)
5347iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
5348{
5349 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5350 kIemNativeGstRegUse_ForUpdate);
5351
5352#ifdef RT_ARCH_AMD64
5353 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5354 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5355 if (idxGstTmpReg >= 8)
5356 pbCodeBuf[off++] = X86_OP_REX_B;
5357 if (uSubtrahend == 1)
5358 {
5359 pbCodeBuf[off++] = 0xff; /* dec */
5360 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5361 }
5362 else
5363 {
5364 pbCodeBuf[off++] = 0x81;
5365 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5366 pbCodeBuf[off++] = uSubtrahend;
5367 pbCodeBuf[off++] = 0;
5368 }
5369
5370#else
5371 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5372 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5373
5374 /* sub tmp, gstgrp, uSubtrahend */
5375 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
5376
5377 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5378 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5379
5380 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5381#endif
5382
5383 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5384
5385#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5386 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5387#endif
5388
5389 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5390 return off;
5391}
5392
5393
5394#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
5395 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5396
5397#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
5398 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5399
5400/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
5401DECL_INLINE_THROW(uint32_t)
5402iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
5403{
5404 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5405 kIemNativeGstRegUse_ForUpdate);
5406
5407#ifdef RT_ARCH_AMD64
5408 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5409 if (f64Bit)
5410 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5411 else if (idxGstTmpReg >= 8)
5412 pbCodeBuf[off++] = X86_OP_REX_B;
5413 if (uSubtrahend == 1)
5414 {
5415 pbCodeBuf[off++] = 0xff; /* dec */
5416 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5417 }
5418 else if (uSubtrahend < 128)
5419 {
5420 pbCodeBuf[off++] = 0x83; /* sub */
5421 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5422 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5423 }
5424 else
5425 {
5426 pbCodeBuf[off++] = 0x81; /* sub */
5427 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5428 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5429 pbCodeBuf[off++] = 0;
5430 pbCodeBuf[off++] = 0;
5431 pbCodeBuf[off++] = 0;
5432 }
5433
5434#else
5435 /* sub tmp, gstgrp, uSubtrahend */
5436 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5437 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
5438
5439#endif
5440
5441 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5442
5443#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5444 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5445#endif
5446
5447 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5448 return off;
5449}
5450
5451
5452#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
5453 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
5454
5455#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
5456 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
5457
5458#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
5459 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5460
5461#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
5462 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5463
5464/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
5465DECL_INLINE_THROW(uint32_t)
5466iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5467{
5468#ifdef VBOX_STRICT
5469 switch (cbMask)
5470 {
5471 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5472 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5473 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5474 case sizeof(uint64_t): break;
5475 default: AssertFailedBreak();
5476 }
5477#endif
5478
5479 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5480 kIemNativeGstRegUse_ForUpdate);
5481
5482 switch (cbMask)
5483 {
5484 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5485 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
5486 break;
5487 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
5488 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
5489 break;
5490 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5491 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5492 break;
5493 case sizeof(uint64_t):
5494 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
5495 break;
5496 default: AssertFailedBreak();
5497 }
5498
5499 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5500
5501#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5502 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5503#endif
5504
5505 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5506 return off;
5507}
5508
5509
5510#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
5511 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
5512
5513#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
5514 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
5515
5516#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
5517 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5518
5519#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
5520 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5521
5522/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
5523DECL_INLINE_THROW(uint32_t)
5524iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5525{
5526#ifdef VBOX_STRICT
5527 switch (cbMask)
5528 {
5529 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5530 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5531 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5532 case sizeof(uint64_t): break;
5533 default: AssertFailedBreak();
5534 }
5535#endif
5536
5537 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5538 kIemNativeGstRegUse_ForUpdate);
5539
5540 switch (cbMask)
5541 {
5542 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5543 case sizeof(uint16_t):
5544 case sizeof(uint64_t):
5545 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
5546 break;
5547 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5548 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5549 break;
5550 default: AssertFailedBreak();
5551 }
5552
5553 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5554
5555#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5556 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5557#endif
5558
5559 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5560 return off;
5561}
5562
5563
5564/*********************************************************************************************************************************
5565* Local/Argument variable manipulation (add, sub, and, or). *
5566*********************************************************************************************************************************/
5567
5568#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
5569 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5570
5571#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
5572 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5573
5574#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
5575 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5576
5577#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
5578 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5579
5580
5581#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
5582 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
5583
5584#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
5585 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
5586
5587#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
5588 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
5589
5590/** Emits code for AND'ing a local and a constant value. */
5591DECL_INLINE_THROW(uint32_t)
5592iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5593{
5594#ifdef VBOX_STRICT
5595 switch (cbMask)
5596 {
5597 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5598 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5599 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5600 case sizeof(uint64_t): break;
5601 default: AssertFailedBreak();
5602 }
5603#endif
5604
5605 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5606 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5607
5608 if (cbMask <= sizeof(uint32_t))
5609 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
5610 else
5611 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
5612
5613 iemNativeVarRegisterRelease(pReNative, idxVar);
5614 return off;
5615}
5616
5617
5618#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
5619 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5620
5621#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
5622 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5623
5624#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
5625 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5626
5627#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
5628 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5629
5630/** Emits code for OR'ing a local and a constant value. */
5631DECL_INLINE_THROW(uint32_t)
5632iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5633{
5634#ifdef VBOX_STRICT
5635 switch (cbMask)
5636 {
5637 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5638 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5639 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5640 case sizeof(uint64_t): break;
5641 default: AssertFailedBreak();
5642 }
5643#endif
5644
5645 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5646 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5647
5648 if (cbMask <= sizeof(uint32_t))
5649 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
5650 else
5651 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
5652
5653 iemNativeVarRegisterRelease(pReNative, idxVar);
5654 return off;
5655}
5656
5657
5658#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
5659 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
5660
5661#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
5662 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
5663
5664#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
5665 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
5666
5667/** Emits code for reversing the byte order in a local value. */
5668DECL_INLINE_THROW(uint32_t)
5669iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
5670{
5671 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5672 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5673
5674 switch (cbLocal)
5675 {
5676 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
5677 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
5678 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
5679 default: AssertFailedBreak();
5680 }
5681
5682 iemNativeVarRegisterRelease(pReNative, idxVar);
5683 return off;
5684}
5685
5686
5687#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
5688 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5689
5690#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
5691 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5692
5693#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
5694 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5695
5696/** Emits code for shifting left a local value. */
5697DECL_INLINE_THROW(uint32_t)
5698iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5699{
5700#ifdef VBOX_STRICT
5701 switch (cbLocal)
5702 {
5703 case sizeof(uint8_t): Assert(cShift < 8); break;
5704 case sizeof(uint16_t): Assert(cShift < 16); break;
5705 case sizeof(uint32_t): Assert(cShift < 32); break;
5706 case sizeof(uint64_t): Assert(cShift < 64); break;
5707 default: AssertFailedBreak();
5708 }
5709#endif
5710
5711 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5712 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5713
5714 if (cbLocal <= sizeof(uint32_t))
5715 {
5716 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
5717 if (cbLocal < sizeof(uint32_t))
5718 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
5719 cbLocal == sizeof(uint16_t)
5720 ? UINT32_C(0xffff)
5721 : UINT32_C(0xff));
5722 }
5723 else
5724 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
5725
5726 iemNativeVarRegisterRelease(pReNative, idxVar);
5727 return off;
5728}
5729
5730
5731#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
5732 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5733
5734#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
5735 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5736
5737#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
5738 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5739
5740/** Emits code for shifting left a local value. */
5741DECL_INLINE_THROW(uint32_t)
5742iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5743{
5744#ifdef VBOX_STRICT
5745 switch (cbLocal)
5746 {
5747 case sizeof(int8_t): Assert(cShift < 8); break;
5748 case sizeof(int16_t): Assert(cShift < 16); break;
5749 case sizeof(int32_t): Assert(cShift < 32); break;
5750 case sizeof(int64_t): Assert(cShift < 64); break;
5751 default: AssertFailedBreak();
5752 }
5753#endif
5754
5755 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5756 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5757
5758 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
5759 if (cbLocal == sizeof(uint8_t))
5760 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
5761 else if (cbLocal == sizeof(uint16_t))
5762 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
5763
5764 if (cbLocal <= sizeof(uint32_t))
5765 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
5766 else
5767 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
5768
5769 iemNativeVarRegisterRelease(pReNative, idxVar);
5770 return off;
5771}
5772
5773
5774#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
5775 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
5776
5777#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
5778 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
5779
5780#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
5781 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
5782
5783/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
5784DECL_INLINE_THROW(uint32_t)
5785iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
5786{
5787 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
5788 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
5789 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5790 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5791
5792 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5793 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquireInited(pReNative, idxVarEffAddr, &off);
5794
5795 /* Need to sign extend the value. */
5796 if (cbLocal <= sizeof(uint32_t))
5797 {
5798/** @todo ARM64: In case of boredone, the extended add instruction can do the
5799 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
5800 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5801
5802 switch (cbLocal)
5803 {
5804 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
5805 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
5806 default: AssertFailed();
5807 }
5808
5809 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
5810 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5811 }
5812 else
5813 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
5814
5815 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
5816 iemNativeVarRegisterRelease(pReNative, idxVar);
5817 return off;
5818}
5819
5820
5821
5822/*********************************************************************************************************************************
5823* EFLAGS *
5824*********************************************************************************************************************************/
5825
5826#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
5827# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
5828#else
5829# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
5830 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
5831
5832DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
5833{
5834 if (fEflOutput)
5835 {
5836 PVMCPUCC const pVCpu = pReNative->pVCpu;
5837# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5838 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
5839 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
5840 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
5841# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5842 if (fEflOutput & (a_fEfl)) \
5843 { \
5844 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
5845 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5846 else \
5847 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5848 } else do { } while (0)
5849# else
5850 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
5851 IEMLIVENESSBIT const LivenessClobbered = { IEMLIVENESS_STATE_GET_WILL_BE_CLOBBERED_SET(pLivenessEntry) };
5852 IEMLIVENESSBIT const LivenessDelayable = { IEMLIVENESS_STATE_GET_CAN_BE_POSTPONED_SET(pLivenessEntry) };
5853# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5854 if (fEflOutput & (a_fEfl)) \
5855 { \
5856 if (LivenessClobbered.a_fLivenessMember) \
5857 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5858 else if (LivenessDelayable.a_fLivenessMember) \
5859 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
5860 else \
5861 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5862 } else do { } while (0)
5863# endif
5864 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
5865 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
5866 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
5867 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
5868 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
5869 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
5870 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
5871# undef CHECK_FLAG_AND_UPDATE_STATS
5872 }
5873 RT_NOREF(fEflInput);
5874}
5875#endif /* VBOX_WITH_STATISTICS */
5876
5877#undef IEM_MC_FETCH_EFLAGS /* should not be used */
5878#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5879 off = iemNativeEmitFetchEFlags<a_fEflInput, iemNativeEflagsToLivenessMask<a_fEflInput>(),\
5880 a_fEflOutput, iemNativeEflagsToLivenessMask<a_fEflOutput>()>(pReNative, off, a_EFlags)
5881
5882/** Handles IEM_MC_FETCH_EFLAGS_EX. */
5883template<uint32_t const a_fEflInput, uint64_t const a_fLivenessEflInput,
5884 uint32_t const a_fEflOutput, uint64_t const a_fLivenessEflOutput>
5885DECL_INLINE_THROW(uint32_t)
5886iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
5887{
5888 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
5889 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5890 /** @todo fix NOT AssertCompile(a_fEflInput != 0 || a_fEflOutput != 0); */
5891
5892#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5893# ifdef VBOX_STRICT
5894 if ( pReNative->idxCurCall != 0
5895 && (a_fEflInput != 0 || a_fEflOutput != 0) /* for NOT these are both zero for now. */)
5896 {
5897 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5898 RT_CONSTEXPR uint32_t const fBoth = a_fEflInput | a_fEflOutput;
5899# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
5900 AssertMsg( !(fBoth & (a_fElfConst)) \
5901 || (!(a_fEflInput & (a_fElfConst)) \
5902 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5903 : !(a_fEflOutput & (a_fElfConst)) \
5904 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5905 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
5906 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5907 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
5908 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
5909 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
5910 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
5911 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
5912 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
5913 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
5914# undef ASSERT_ONE_EFL
5915 }
5916# endif
5917#endif
5918
5919 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, a_fEflInput);
5920 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, a_fEflInput);
5921
5922 /** @todo This could be prettier...*/
5923 /** @todo Also, the shadowing+liveness handling of EFlags is currently
5924 * problematic, but I'll try tackle that soon (@bugref{10720}). */
5925 PCIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarEFlags)];
5926 Assert(pVar->enmKind == kIemNativeVarKind_Invalid || pVar->enmKind == kIemNativeVarKind_Stack);
5927 Assert(pVar->idxReg == UINT8_MAX);
5928 if (pVar->uArgNo >= IEMNATIVE_CALL_ARG_GREG_COUNT)
5929 {
5930 /** @todo We could use kIemNativeGstRegUse_ReadOnly here when fOutput is
5931 * zero, but since iemNativeVarRegisterSet clears the shadowing,
5932 * that's counter productive... */
5933 uint8_t const idxGstReg = iemNativeRegAllocTmpForGuestEFlagsForUpdate(pReNative, &off,
5934 a_fLivenessEflInput, a_fLivenessEflOutput);
5935 iemNativeVarRegisterSet(pReNative, idxVarEFlags, idxGstReg, off, true /*fAllocated*/);
5936 }
5937 else
5938 {
5939 /* Register argument variable: Avoid assertions in generic call code and load it the traditional way. */
5940 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off);
5941 uint8_t const idxGstReg = iemNativeRegAllocTmpForGuestEFlagsIfAlreadyPresent(pReNative, &off,
5942 a_fLivenessEflInput, a_fLivenessEflOutput);
5943 if (idxGstReg != UINT8_MAX)
5944 {
5945 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstReg);
5946 iemNativeRegFreeTmp(pReNative, idxGstReg);
5947 }
5948 else
5949 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxVarReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
5950 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5951 }
5952 return off;
5953}
5954
5955
5956
5957/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
5958 * start using it with custom native code emission (inlining assembly
5959 * instruction helpers). */
5960#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
5961#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5962 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5963 off = iemNativeEmitCommitEFlags<true /*a_fUpdateSkippingAndPostponing*/, a_fEflOutput, \
5964 iemNativeEflagsToLivenessMask<a_fEflInput>(), \
5965 iemNativeEflagsToLivenessMask<a_fEflOutput>()>(pReNative, off, a_EFlags, a_fEflInput)
5966
5967#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
5968#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5969 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5970 off = iemNativeEmitCommitEFlags<false /*a_fUpdateSkippingAndPostponing*/, a_fEflOutput, \
5971 iemNativeEflagsToLivenessMask<a_fEflInput>(), \
5972 iemNativeEflagsToLivenessMask<a_fEflOutput>()>(pReNative, off, a_EFlags, a_fEflInput)
5973
5974/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
5975template<bool const a_fUpdateSkippingAndPostponing, uint32_t const a_fEflOutput,
5976 uint64_t const a_fLivenessEflInputBits, uint64_t const a_fLivenessEflOutputBits>
5977DECL_INLINE_THROW(uint32_t)
5978iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fElfInput)
5979{
5980 uint8_t const idxReg = iemNativeVarRegisterAcquireInited(pReNative, idxVarEFlags, &off);
5981 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5982
5983#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5984# ifdef VBOX_STRICT
5985 if ( pReNative->idxCurCall != 0
5986 && (a_fLivenessEflInputBits != 0 || a_fLivenessEflOutputBits != 0) /* for NOT these are both zero for now. */)
5987 {
5988 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5989# define ASSERT_ONE_EFL(a_idxField) \
5990 if RT_CONSTEXPR_IF(((a_fLivenessEflInputBits | a_fLivenessEflOutputBits) & RT_BIT_64(a_idxField)) != 0) \
5991 AssertMsg(!(a_fLivenessEflInputBits & RT_BIT_64(a_idxField)) \
5992 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5993 : !(a_fLivenessEflOutputBits & RT_BIT_64(a_idxField)) \
5994 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5995 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)), \
5996 ("%s - %u\n", #a_idxField, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5997 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OTHER);
5998 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_CF);
5999 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_PF);
6000 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_AF);
6001 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_ZF);
6002 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_SF);
6003 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OF);
6004# undef ASSERT_ONE_EFL
6005 }
6006# endif
6007#endif
6008
6009#ifdef VBOX_STRICT
6010 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
6011 uint32_t offFixup = off;
6012 off = iemNativeEmitJnzToFixed(pReNative, off, off);
6013 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
6014 iemNativeFixupFixedJump(pReNative, offFixup, off);
6015
6016 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
6017 offFixup = off;
6018 off = iemNativeEmitJzToFixed(pReNative, off, off);
6019 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
6020 iemNativeFixupFixedJump(pReNative, offFixup, off);
6021
6022 /** @todo validate that only bits in the a_fEflOutput mask changed. */
6023#endif
6024
6025#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
6026 if RT_CONSTEXPR_IF(a_fUpdateSkippingAndPostponing)
6027 {
6028 Assert(!(pReNative->fSkippingEFlags & fElfInput)); RT_NOREF(fElfInput);
6029 if (pReNative->fSkippingEFlags)
6030 Log5(("EFLAGS: fSkippingEFlags %#x -> %#x (iemNativeEmitCommitEFlags)\n",
6031 pReNative->fSkippingEFlags, pReNative->fSkippingEFlags & ~(a_fEflOutput & X86_EFL_STATUS_BITS) ));
6032 if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6033 pReNative->fSkippingEFlags = 0;
6034 else
6035 pReNative->fSkippingEFlags &= ~(a_fEflOutput & X86_EFL_STATUS_BITS);
6036# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6037 if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6038 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6039 else
6040 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(a_fEflOutput & X86_EFL_STATUS_BITS),
6041 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6042# endif
6043 IEMNATIVE_CLEAR_POSTPONED_EFLAGS(pReNative, a_fEflOutput);
6044 }
6045#endif
6046
6047 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
6048 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
6049 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
6050 return off;
6051}
6052
6053
6054typedef enum IEMNATIVEMITEFLOP
6055{
6056 kIemNativeEmitEflOp_Set,
6057 kIemNativeEmitEflOp_Clear,
6058 kIemNativeEmitEflOp_Flip
6059} IEMNATIVEMITEFLOP;
6060
6061#define IEM_MC_SET_EFL_BIT(a_fBit) \
6062 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Set, a_fBit, iemNativeEflagsToLivenessMask<a_fBit>()>(pReNative, off)
6063
6064#define IEM_MC_CLEAR_EFL_BIT(a_fBit) \
6065 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Clear, a_fBit, iemNativeEflagsToLivenessMask<a_fBit>()>(pReNative, off)
6066
6067#define IEM_MC_FLIP_EFL_BIT(a_fBit) \
6068 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Flip, a_fBit, iemNativeEflagsToLivenessMask<a_fBit>()>(pReNative, off)
6069
6070/** Handles IEM_MC_SET_EFL_BIT/IEM_MC_CLEAR_EFL_BIT/IEM_MC_FLIP_EFL_BIT. */
6071template<IEMNATIVEMITEFLOP const a_enmOp, uint32_t const a_fEflBit, uint64_t const a_fLivenessEflBit>
6072DECL_INLINE_THROW(uint32_t) iemNativeEmitModifyEFlagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6073{
6074 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsForUpdate(pReNative, &off,
6075 a_enmOp == kIemNativeEmitEflOp_Flip
6076 ? a_fLivenessEflBit : 0,
6077 a_fLivenessEflBit);
6078
6079 /* Using 'if constexpr' forces code elimination in debug builds with VC. */
6080 if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Set)
6081 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxEflReg, a_fEflBit);
6082 else if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Clear)
6083 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~a_fEflBit);
6084 else if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Flip)
6085 off = iemNativeEmitXorGpr32ByImm(pReNative, off, idxEflReg, a_fEflBit);
6086 else
6087 AssertCompile( a_enmOp == kIemNativeEmitEflOp_Set /* AssertCompile(false) works with VC 2019 but not clang 15. */
6088 || a_enmOp == kIemNativeEmitEflOp_Clear
6089 || a_enmOp == kIemNativeEmitEflOp_Flip);
6090
6091 /** @todo No delayed writeback for EFLAGS right now. */
6092 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
6093
6094 /* Free but don't flush the EFLAGS register. */
6095 iemNativeRegFreeTmp(pReNative, idxEflReg);
6096
6097#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
6098 /* Clear the bit in the skipped mask if we're clobbering and it's a status bit. */
6099 if RT_CONSTEXPR_IF( (a_enmOp == kIemNativeEmitEflOp_Set || a_enmOp == kIemNativeEmitEflOp_Clear)
6100 && (a_fEflBit & X86_EFL_STATUS_BITS))
6101 {
6102 if (pReNative->fSkippingEFlags)
6103 Log5(("EFLAGS: fSkippingEFlags %#x -> %#x (iemNativeEmitModifyEFlagsBit)\n",
6104 pReNative->fSkippingEFlags, pReNative->fSkippingEFlags & ~a_fEflBit ));
6105 pReNative->fSkippingEFlags &= ~a_fEflBit;
6106# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6107 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~a_fEflBit, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6108# endif
6109 }
6110#endif
6111
6112 return off;
6113}
6114
6115
6116/*********************************************************************************************************************************
6117* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
6118*********************************************************************************************************************************/
6119
6120#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
6121 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
6122
6123#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
6124 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
6125
6126#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
6127 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
6128
6129
6130/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
6131 * IEM_MC_FETCH_SREG_ZX_U64. */
6132DECL_INLINE_THROW(uint32_t)
6133iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
6134{
6135 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
6136 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
6137 Assert(iSReg < X86_SREG_COUNT);
6138
6139 /*
6140 * For now, we will not create a shadow copy of a selector. The rational
6141 * is that since we do not recompile the popping and loading of segment
6142 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
6143 * pushing and moving to registers, there is only a small chance that the
6144 * shadow copy will be accessed again before the register is reloaded. One
6145 * scenario would be nested called in 16-bit code, but I doubt it's worth
6146 * the extra register pressure atm.
6147 *
6148 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
6149 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
6150 * store scencario covered at present (r160730).
6151 */
6152 iemNativeVarSetKindToStack(pReNative, idxDstVar);
6153 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
6154 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
6155 iemNativeVarRegisterRelease(pReNative, idxDstVar);
6156 return off;
6157}
6158
6159
6160
6161/*********************************************************************************************************************************
6162* Register references. *
6163*********************************************************************************************************************************/
6164
6165#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
6166 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
6167
6168#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
6169 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
6170
6171/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
6172DECL_INLINE_THROW(uint32_t)
6173iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
6174{
6175 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
6176 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6177 Assert(iGRegEx < 20);
6178
6179 if (iGRegEx < 16)
6180 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
6181 else
6182 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
6183
6184 /* If we've delayed writing back the register value, flush it now. */
6185 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
6186
6187 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6188 if (!fConst)
6189 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
6190
6191 return off;
6192}
6193
6194#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
6195 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
6196
6197#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
6198 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
6199
6200#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
6201 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
6202
6203#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
6204 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
6205
6206#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
6207 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
6208
6209#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
6210 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
6211
6212#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
6213 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
6214
6215#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
6216 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
6217
6218#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
6219 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
6220
6221#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
6222 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
6223
6224/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
6225DECL_INLINE_THROW(uint32_t)
6226iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
6227{
6228 Assert(iGReg < 16);
6229 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
6230 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6231
6232 /* If we've delayed writing back the register value, flush it now. */
6233 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
6234
6235 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6236 if (!fConst)
6237 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
6238
6239 return off;
6240}
6241
6242
6243#undef IEM_MC_REF_EFLAGS /* should not be used. */
6244#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
6245 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
6246 off = iemNativeEmitRefEFlags<a_fEflOutput>(pReNative, off, a_pEFlags, a_fEflInput)
6247
6248/** Handles IEM_MC_REF_EFLAGS. */
6249template<uint32_t const a_fEflOutput>
6250DECL_INLINE_THROW(uint32_t)
6251iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput)
6252{
6253 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
6254 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6255
6256#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
6257 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, fEflInput);
6258 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
6259 if (pReNative->fSkippingEFlags)
6260 Log5(("EFLAGS: fSkippingEFlags %#x -> %#x (iemNativeEmitRefEFlags)\n",
6261 pReNative->fSkippingEFlags, pReNative->fSkippingEFlags & ~a_fEflOutput ));
6262 pReNative->fSkippingEFlags &= ~a_fEflOutput;
6263# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6264
6265 /* Updating the skipping according to the outputs is a little early, but
6266 we don't have any other hooks for references atm. */
6267 if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6268 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6269 else if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) != 0)
6270 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(a_fEflOutput & X86_EFL_STATUS_BITS),
6271 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6272# endif
6273
6274 /* This ASSUMES that EFLAGS references are not taken before use. */
6275 IEMNATIVE_CLEAR_POSTPONED_EFLAGS(pReNative, a_fEflOutput);
6276
6277#endif
6278 RT_NOREF(fEflInput);
6279
6280 /* If we've delayed writing back the register value, flush it now. */
6281 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
6282
6283 /* If there is a shadow copy of guest EFLAGS, flush it now. */
6284 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
6285
6286 return off;
6287}
6288
6289
6290/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
6291 * different code from threaded recompiler, maybe it would be helpful. For now
6292 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
6293#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
6294
6295
6296#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
6297 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
6298
6299#define IEM_MC_REF_XREG_XMM(a_puXmmDst, a_iXReg) \
6300 off = iemNativeEmitRefXregXxx(pReNative, off, a_puXmmDst, a_iXReg, false /*fConst*/)
6301
6302#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
6303 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
6304
6305#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
6306 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
6307
6308#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6309/* Just being paranoid here. */
6310# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
6311AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
6312AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
6313AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
6314AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
6315# endif
6316AssertCompileMemberOffset(X86XMMREG, au64, 0);
6317AssertCompileMemberOffset(X86XMMREG, au32, 0);
6318AssertCompileMemberOffset(X86XMMREG, ar64, 0);
6319AssertCompileMemberOffset(X86XMMREG, ar32, 0);
6320
6321# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
6322 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
6323# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
6324 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
6325# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
6326 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
6327# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
6328 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
6329#endif
6330
6331/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
6332DECL_INLINE_THROW(uint32_t)
6333iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
6334{
6335 Assert(iXReg < 16);
6336 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
6337 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6338
6339 /* If we've delayed writing back the register value, flush it now. */
6340 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
6341
6342#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6343 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6344 if (!fConst)
6345 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
6346#else
6347 RT_NOREF(fConst);
6348#endif
6349
6350 return off;
6351}
6352
6353
6354
6355/*********************************************************************************************************************************
6356* Effective Address Calculation *
6357*********************************************************************************************************************************/
6358#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
6359 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
6360
6361/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
6362 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
6363DECL_INLINE_THROW(uint32_t)
6364iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6365 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
6366{
6367 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6368
6369 /*
6370 * Handle the disp16 form with no registers first.
6371 *
6372 * Convert to an immediate value, as that'll delay the register allocation
6373 * and assignment till the memory access / call / whatever and we can use
6374 * a more appropriate register (or none at all).
6375 */
6376 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
6377 {
6378 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
6379 return off;
6380 }
6381
6382 /* Determin the displacment. */
6383 uint16_t u16EffAddr;
6384 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6385 {
6386 case 0: u16EffAddr = 0; break;
6387 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
6388 case 2: u16EffAddr = u16Disp; break;
6389 default: AssertFailedStmt(u16EffAddr = 0);
6390 }
6391
6392 /* Determine the registers involved. */
6393 uint8_t idxGstRegBase;
6394 uint8_t idxGstRegIndex;
6395 switch (bRm & X86_MODRM_RM_MASK)
6396 {
6397 case 0:
6398 idxGstRegBase = X86_GREG_xBX;
6399 idxGstRegIndex = X86_GREG_xSI;
6400 break;
6401 case 1:
6402 idxGstRegBase = X86_GREG_xBX;
6403 idxGstRegIndex = X86_GREG_xDI;
6404 break;
6405 case 2:
6406 idxGstRegBase = X86_GREG_xBP;
6407 idxGstRegIndex = X86_GREG_xSI;
6408 break;
6409 case 3:
6410 idxGstRegBase = X86_GREG_xBP;
6411 idxGstRegIndex = X86_GREG_xDI;
6412 break;
6413 case 4:
6414 idxGstRegBase = X86_GREG_xSI;
6415 idxGstRegIndex = UINT8_MAX;
6416 break;
6417 case 5:
6418 idxGstRegBase = X86_GREG_xDI;
6419 idxGstRegIndex = UINT8_MAX;
6420 break;
6421 case 6:
6422 idxGstRegBase = X86_GREG_xBP;
6423 idxGstRegIndex = UINT8_MAX;
6424 break;
6425#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
6426 default:
6427#endif
6428 case 7:
6429 idxGstRegBase = X86_GREG_xBX;
6430 idxGstRegIndex = UINT8_MAX;
6431 break;
6432 }
6433
6434 /*
6435 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
6436 */
6437 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6438 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6439 kIemNativeGstRegUse_ReadOnly);
6440 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
6441 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6442 kIemNativeGstRegUse_ReadOnly)
6443 : UINT8_MAX;
6444#ifdef RT_ARCH_AMD64
6445 if (idxRegIndex == UINT8_MAX)
6446 {
6447 if (u16EffAddr == 0)
6448 {
6449 /* movxz ret, base */
6450 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
6451 }
6452 else
6453 {
6454 /* lea ret32, [base64 + disp32] */
6455 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6456 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6457 if (idxRegRet >= 8 || idxRegBase >= 8)
6458 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6459 pbCodeBuf[off++] = 0x8d;
6460 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6461 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
6462 else
6463 {
6464 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
6465 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6466 }
6467 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6468 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6469 pbCodeBuf[off++] = 0;
6470 pbCodeBuf[off++] = 0;
6471 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6472
6473 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6474 }
6475 }
6476 else
6477 {
6478 /* lea ret32, [index64 + base64 (+ disp32)] */
6479 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6480 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6481 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6482 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6483 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6484 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6485 pbCodeBuf[off++] = 0x8d;
6486 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
6487 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6488 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
6489 if (bMod == X86_MOD_MEM4)
6490 {
6491 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6492 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6493 pbCodeBuf[off++] = 0;
6494 pbCodeBuf[off++] = 0;
6495 }
6496 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6497 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6498 }
6499
6500#elif defined(RT_ARCH_ARM64)
6501 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6502 if (u16EffAddr == 0)
6503 {
6504 if (idxRegIndex == UINT8_MAX)
6505 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
6506 else
6507 {
6508 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
6509 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6510 }
6511 }
6512 else
6513 {
6514 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
6515 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
6516 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
6517 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6518 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
6519 else
6520 {
6521 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
6522 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6523 }
6524 if (idxRegIndex != UINT8_MAX)
6525 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
6526 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6527 }
6528
6529#else
6530# error "port me"
6531#endif
6532
6533 if (idxRegIndex != UINT8_MAX)
6534 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6535 iemNativeRegFreeTmp(pReNative, idxRegBase);
6536 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6537 return off;
6538}
6539
6540
6541#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
6542 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
6543
6544/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
6545 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
6546DECL_INLINE_THROW(uint32_t)
6547iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6548 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
6549{
6550 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6551
6552 /*
6553 * Handle the disp32 form with no registers first.
6554 *
6555 * Convert to an immediate value, as that'll delay the register allocation
6556 * and assignment till the memory access / call / whatever and we can use
6557 * a more appropriate register (or none at all).
6558 */
6559 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6560 {
6561 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
6562 return off;
6563 }
6564
6565 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6566 uint32_t u32EffAddr = 0;
6567 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6568 {
6569 case 0: break;
6570 case 1: u32EffAddr = (int8_t)u32Disp; break;
6571 case 2: u32EffAddr = u32Disp; break;
6572 default: AssertFailed();
6573 }
6574
6575 /* Get the register (or SIB) value. */
6576 uint8_t idxGstRegBase = UINT8_MAX;
6577 uint8_t idxGstRegIndex = UINT8_MAX;
6578 uint8_t cShiftIndex = 0;
6579 switch (bRm & X86_MODRM_RM_MASK)
6580 {
6581 case 0: idxGstRegBase = X86_GREG_xAX; break;
6582 case 1: idxGstRegBase = X86_GREG_xCX; break;
6583 case 2: idxGstRegBase = X86_GREG_xDX; break;
6584 case 3: idxGstRegBase = X86_GREG_xBX; break;
6585 case 4: /* SIB */
6586 {
6587 /* index /w scaling . */
6588 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6589 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6590 {
6591 case 0: idxGstRegIndex = X86_GREG_xAX; break;
6592 case 1: idxGstRegIndex = X86_GREG_xCX; break;
6593 case 2: idxGstRegIndex = X86_GREG_xDX; break;
6594 case 3: idxGstRegIndex = X86_GREG_xBX; break;
6595 case 4: cShiftIndex = 0; /*no index*/ break;
6596 case 5: idxGstRegIndex = X86_GREG_xBP; break;
6597 case 6: idxGstRegIndex = X86_GREG_xSI; break;
6598 case 7: idxGstRegIndex = X86_GREG_xDI; break;
6599 }
6600
6601 /* base */
6602 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
6603 {
6604 case 0: idxGstRegBase = X86_GREG_xAX; break;
6605 case 1: idxGstRegBase = X86_GREG_xCX; break;
6606 case 2: idxGstRegBase = X86_GREG_xDX; break;
6607 case 3: idxGstRegBase = X86_GREG_xBX; break;
6608 case 4:
6609 idxGstRegBase = X86_GREG_xSP;
6610 u32EffAddr += uSibAndRspOffset >> 8;
6611 break;
6612 case 5:
6613 if ((bRm & X86_MODRM_MOD_MASK) != 0)
6614 idxGstRegBase = X86_GREG_xBP;
6615 else
6616 {
6617 Assert(u32EffAddr == 0);
6618 u32EffAddr = u32Disp;
6619 }
6620 break;
6621 case 6: idxGstRegBase = X86_GREG_xSI; break;
6622 case 7: idxGstRegBase = X86_GREG_xDI; break;
6623 }
6624 break;
6625 }
6626 case 5: idxGstRegBase = X86_GREG_xBP; break;
6627 case 6: idxGstRegBase = X86_GREG_xSI; break;
6628 case 7: idxGstRegBase = X86_GREG_xDI; break;
6629 }
6630
6631 /*
6632 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6633 * the start of the function.
6634 */
6635 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6636 {
6637 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
6638 return off;
6639 }
6640
6641 /*
6642 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6643 */
6644 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6645 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6646 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6647 kIemNativeGstRegUse_ReadOnly);
6648 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6649 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6650 kIemNativeGstRegUse_ReadOnly);
6651
6652 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6653 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6654 {
6655 idxRegBase = idxRegIndex;
6656 idxRegIndex = UINT8_MAX;
6657 }
6658
6659#ifdef RT_ARCH_AMD64
6660 if (idxRegIndex == UINT8_MAX)
6661 {
6662 if (u32EffAddr == 0)
6663 {
6664 /* mov ret, base */
6665 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6666 }
6667 else
6668 {
6669 /* lea ret32, [base64 + disp32] */
6670 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6671 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6672 if (idxRegRet >= 8 || idxRegBase >= 8)
6673 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6674 pbCodeBuf[off++] = 0x8d;
6675 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6676 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6677 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6678 else
6679 {
6680 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6681 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6682 }
6683 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6684 if (bMod == X86_MOD_MEM4)
6685 {
6686 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6687 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6688 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6689 }
6690 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6691 }
6692 }
6693 else
6694 {
6695 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6696 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6697 if (idxRegBase == UINT8_MAX)
6698 {
6699 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
6700 if (idxRegRet >= 8 || idxRegIndex >= 8)
6701 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6702 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6703 pbCodeBuf[off++] = 0x8d;
6704 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6705 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6706 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6707 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6708 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6709 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6710 }
6711 else
6712 {
6713 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6714 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6715 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6716 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6717 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6718 pbCodeBuf[off++] = 0x8d;
6719 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6720 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6721 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6722 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6723 if (bMod != X86_MOD_MEM0)
6724 {
6725 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6726 if (bMod == X86_MOD_MEM4)
6727 {
6728 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6729 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6730 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6731 }
6732 }
6733 }
6734 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6735 }
6736
6737#elif defined(RT_ARCH_ARM64)
6738 if (u32EffAddr == 0)
6739 {
6740 if (idxRegIndex == UINT8_MAX)
6741 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6742 else if (idxRegBase == UINT8_MAX)
6743 {
6744 if (cShiftIndex == 0)
6745 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
6746 else
6747 {
6748 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6749 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
6750 }
6751 }
6752 else
6753 {
6754 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6755 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6756 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6757 }
6758 }
6759 else
6760 {
6761 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
6762 {
6763 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6764 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
6765 }
6766 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
6767 {
6768 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6769 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6770 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
6771 }
6772 else
6773 {
6774 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
6775 if (idxRegBase != UINT8_MAX)
6776 {
6777 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6778 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6779 }
6780 }
6781 if (idxRegIndex != UINT8_MAX)
6782 {
6783 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6784 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
6785 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6786 }
6787 }
6788
6789#else
6790# error "port me"
6791#endif
6792
6793 if (idxRegIndex != UINT8_MAX)
6794 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6795 if (idxRegBase != UINT8_MAX)
6796 iemNativeRegFreeTmp(pReNative, idxRegBase);
6797 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6798 return off;
6799}
6800
6801
6802#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6803 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6804 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6805
6806#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6807 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6808 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6809
6810#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6811 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6812 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
6813
6814/**
6815 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
6816 *
6817 * @returns New off.
6818 * @param pReNative .
6819 * @param off .
6820 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
6821 * bit 4 to REX.X. The two bits are part of the
6822 * REG sub-field, which isn't needed in this
6823 * function.
6824 * @param uSibAndRspOffset Two parts:
6825 * - The first 8 bits make up the SIB byte.
6826 * - The next 8 bits are the fixed RSP/ESP offset
6827 * in case of a pop [xSP].
6828 * @param u32Disp The displacement byte/word/dword, if any.
6829 * @param cbInstr The size of the fully decoded instruction. Used
6830 * for RIP relative addressing.
6831 * @param idxVarRet The result variable number.
6832 * @param f64Bit Whether to use a 64-bit or 32-bit address size
6833 * when calculating the address.
6834 *
6835 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
6836 */
6837DECL_INLINE_THROW(uint32_t)
6838iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
6839 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
6840{
6841 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6842
6843 /*
6844 * Special case the rip + disp32 form first.
6845 */
6846 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6847 {
6848 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6849 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
6850 kIemNativeGstRegUse_ReadOnly);
6851 if (f64Bit)
6852 {
6853#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6854 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr + (int64_t)pReNative->Core.offPc;
6855#else
6856 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
6857#endif
6858#ifdef RT_ARCH_AMD64
6859 if ((int32_t)offFinalDisp == offFinalDisp)
6860 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
6861 else
6862 {
6863 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
6864 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
6865 }
6866#else
6867 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6868#endif
6869 }
6870 else
6871 {
6872# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6873 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr + (int32_t)pReNative->Core.offPc;
6874# else
6875 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr;
6876# endif
6877 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6878 }
6879 iemNativeRegFreeTmp(pReNative, idxRegPc);
6880 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6881 return off;
6882 }
6883
6884 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6885 int64_t i64EffAddr = 0;
6886 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6887 {
6888 case 0: break;
6889 case 1: i64EffAddr = (int8_t)u32Disp; break;
6890 case 2: i64EffAddr = (int32_t)u32Disp; break;
6891 default: AssertFailed();
6892 }
6893
6894 /* Get the register (or SIB) value. */
6895 uint8_t idxGstRegBase = UINT8_MAX;
6896 uint8_t idxGstRegIndex = UINT8_MAX;
6897 uint8_t cShiftIndex = 0;
6898 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
6899 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
6900 else /* SIB: */
6901 {
6902 /* index /w scaling . */
6903 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6904 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6905 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
6906 if (idxGstRegIndex == 4)
6907 {
6908 /* no index */
6909 cShiftIndex = 0;
6910 idxGstRegIndex = UINT8_MAX;
6911 }
6912
6913 /* base */
6914 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
6915 if (idxGstRegBase == 4)
6916 {
6917 /* pop [rsp] hack */
6918 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
6919 }
6920 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
6921 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
6922 {
6923 /* mod=0 and base=5 -> disp32, no base reg. */
6924 Assert(i64EffAddr == 0);
6925 i64EffAddr = (int32_t)u32Disp;
6926 idxGstRegBase = UINT8_MAX;
6927 }
6928 }
6929
6930 /*
6931 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6932 * the start of the function.
6933 */
6934 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6935 {
6936 if (f64Bit)
6937 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
6938 else
6939 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
6940 return off;
6941 }
6942
6943 /*
6944 * Now emit code that calculates:
6945 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6946 * or if !f64Bit:
6947 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6948 */
6949 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6950 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6951 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6952 kIemNativeGstRegUse_ReadOnly);
6953 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6954 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6955 kIemNativeGstRegUse_ReadOnly);
6956
6957 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6958 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6959 {
6960 idxRegBase = idxRegIndex;
6961 idxRegIndex = UINT8_MAX;
6962 }
6963
6964#ifdef RT_ARCH_AMD64
6965 uint8_t bFinalAdj;
6966 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
6967 bFinalAdj = 0; /* likely */
6968 else
6969 {
6970 /* pop [rsp] with a problematic disp32 value. Split out the
6971 RSP offset and add it separately afterwards (bFinalAdj). */
6972 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
6973 Assert(idxGstRegBase == X86_GREG_xSP);
6974 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
6975 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
6976 Assert(bFinalAdj != 0);
6977 i64EffAddr -= bFinalAdj;
6978 Assert((int32_t)i64EffAddr == i64EffAddr);
6979 }
6980 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
6981//pReNative->pInstrBuf[off++] = 0xcc;
6982
6983 if (idxRegIndex == UINT8_MAX)
6984 {
6985 if (u32EffAddr == 0)
6986 {
6987 /* mov ret, base */
6988 if (f64Bit)
6989 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
6990 else
6991 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6992 }
6993 else
6994 {
6995 /* lea ret, [base + disp32] */
6996 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6997 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6998 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
6999 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
7000 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
7001 | (f64Bit ? X86_OP_REX_W : 0);
7002 pbCodeBuf[off++] = 0x8d;
7003 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
7004 if (idxRegBase != X86_GREG_x12 /*SIB*/)
7005 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
7006 else
7007 {
7008 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
7009 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
7010 }
7011 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7012 if (bMod == X86_MOD_MEM4)
7013 {
7014 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7015 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7016 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7017 }
7018 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7019 }
7020 }
7021 else
7022 {
7023 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
7024 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
7025 if (idxRegBase == UINT8_MAX)
7026 {
7027 /* lea ret, [(index64 << cShiftIndex) + disp32] */
7028 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
7029 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
7030 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
7031 | (f64Bit ? X86_OP_REX_W : 0);
7032 pbCodeBuf[off++] = 0x8d;
7033 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
7034 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
7035 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7036 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7037 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7038 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7039 }
7040 else
7041 {
7042 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
7043 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
7044 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
7045 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
7046 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
7047 | (f64Bit ? X86_OP_REX_W : 0);
7048 pbCodeBuf[off++] = 0x8d;
7049 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
7050 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
7051 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
7052 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
7053 if (bMod != X86_MOD_MEM0)
7054 {
7055 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7056 if (bMod == X86_MOD_MEM4)
7057 {
7058 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7059 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7060 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7061 }
7062 }
7063 }
7064 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7065 }
7066
7067 if (!bFinalAdj)
7068 { /* likely */ }
7069 else
7070 {
7071 Assert(f64Bit);
7072 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
7073 }
7074
7075#elif defined(RT_ARCH_ARM64)
7076 if (i64EffAddr == 0)
7077 {
7078 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7079 if (idxRegIndex == UINT8_MAX)
7080 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
7081 else if (idxRegBase != UINT8_MAX)
7082 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
7083 f64Bit, false /*fSetFlags*/, cShiftIndex);
7084 else
7085 {
7086 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
7087 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
7088 }
7089 }
7090 else
7091 {
7092 if (f64Bit)
7093 { /* likely */ }
7094 else
7095 i64EffAddr = (int32_t)i64EffAddr;
7096
7097 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
7098 {
7099 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7100 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
7101 }
7102 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
7103 {
7104 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7105 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
7106 }
7107 else
7108 {
7109 if (f64Bit)
7110 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
7111 else
7112 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
7113 if (idxRegBase != UINT8_MAX)
7114 {
7115 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7116 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
7117 }
7118 }
7119 if (idxRegIndex != UINT8_MAX)
7120 {
7121 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7122 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
7123 f64Bit, false /*fSetFlags*/, cShiftIndex);
7124 }
7125 }
7126
7127#else
7128# error "port me"
7129#endif
7130
7131 if (idxRegIndex != UINT8_MAX)
7132 iemNativeRegFreeTmp(pReNative, idxRegIndex);
7133 if (idxRegBase != UINT8_MAX)
7134 iemNativeRegFreeTmp(pReNative, idxRegBase);
7135 iemNativeVarRegisterRelease(pReNative, idxVarRet);
7136 return off;
7137}
7138
7139
7140/*********************************************************************************************************************************
7141* Memory fetches and stores common *
7142*********************************************************************************************************************************/
7143
7144typedef enum IEMNATIVEMITMEMOP
7145{
7146 kIemNativeEmitMemOp_Store = 0,
7147 kIemNativeEmitMemOp_Fetch,
7148 kIemNativeEmitMemOp_Fetch_Zx_U16,
7149 kIemNativeEmitMemOp_Fetch_Zx_U32,
7150 kIemNativeEmitMemOp_Fetch_Zx_U64,
7151 kIemNativeEmitMemOp_Fetch_Sx_U16,
7152 kIemNativeEmitMemOp_Fetch_Sx_U32,
7153 kIemNativeEmitMemOp_Fetch_Sx_U64
7154} IEMNATIVEMITMEMOP;
7155
7156/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
7157 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
7158 * (with iSegReg = UINT8_MAX). */
7159template<uint8_t const a_cbMem, uint32_t const a_fAlignMaskAndCtl, IEMNATIVEMITMEMOP const a_enmOp, bool a_fFlat = false>
7160DECL_INLINE_THROW(uint32_t)
7161iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
7162 uint8_t idxVarGCPtrMem, uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
7163{
7164 /*
7165 * Assert sanity.
7166 */
7167 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
7168 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
7169 Assert( a_enmOp != kIemNativeEmitMemOp_Store
7170 || pVarValue->enmKind == kIemNativeVarKind_Immediate
7171 || pVarValue->enmKind == kIemNativeVarKind_Stack);
7172 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
7173 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
7174 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
7175 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
7176 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7177 Assert(!a_fFlat ? iSegReg < 6 : iSegReg == UINT8_MAX);
7178#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7179 AssertCompile( a_cbMem == 1 || a_cbMem == 2 || a_cbMem == 4 || a_cbMem == 8
7180 || a_cbMem == sizeof(RTUINT128U) || a_cbMem == sizeof(RTUINT256U));
7181#else
7182 AssertCompile(a_cbMem == 1 || a_cbMem == 2 || a_cbMem == 4 || a_cbMem == 8);
7183#endif
7184 AssertCompile(!(a_fAlignMaskAndCtl & ~(UINT32_C(0xff) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)));
7185 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
7186#ifdef VBOX_STRICT
7187 if (iSegReg == UINT8_MAX)
7188 {
7189 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7190 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7191 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7192 switch (a_cbMem)
7193 {
7194 case 1:
7195 Assert( pfnFunction
7196 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
7197 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7198 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7199 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7200 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7201 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
7202 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
7203 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
7204 : UINT64_C(0xc000b000a0009000) ));
7205 Assert(!a_fAlignMaskAndCtl);
7206 break;
7207 case 2:
7208 Assert( pfnFunction
7209 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
7210 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7211 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7212 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7213 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
7214 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
7215 : UINT64_C(0xc000b000a0009000) ));
7216 Assert(a_fAlignMaskAndCtl <= 1);
7217 break;
7218 case 4:
7219 Assert( pfnFunction
7220 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
7221 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
7222 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
7223 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
7224 : UINT64_C(0xc000b000a0009000) ));
7225 Assert(a_fAlignMaskAndCtl <= 3);
7226 break;
7227 case 8:
7228 Assert( pfnFunction
7229 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
7230 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
7231 : UINT64_C(0xc000b000a0009000) ));
7232 Assert(a_fAlignMaskAndCtl <= 7);
7233 break;
7234#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7235 case sizeof(RTUINT128U):
7236 Assert( ( a_enmOp == kIemNativeEmitMemOp_Fetch
7237 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
7238 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
7239 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
7240 || ( a_enmOp == kIemNativeEmitMemOp_Store
7241 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
7242 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
7243 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
7244 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
7245 ? (a_fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (a_fAlignMaskAndCtl & 0xff) == 15
7246 : a_fAlignMaskAndCtl <= 15U);
7247 break;
7248 case sizeof(RTUINT256U):
7249 Assert( ( a_enmOp == kIemNativeEmitMemOp_Fetch
7250 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
7251 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
7252 || ( a_enmOp == kIemNativeEmitMemOp_Store
7253 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
7254 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
7255 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx
7256 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx
7257 ? (a_fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (a_fAlignMaskAndCtl & 0xff) == 31
7258 : a_fAlignMaskAndCtl <= 31);
7259 break;
7260#endif
7261 }
7262 }
7263 else
7264 {
7265 Assert(iSegReg < 6);
7266 switch (a_cbMem)
7267 {
7268 case 1:
7269 Assert( pfnFunction
7270 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
7271 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
7272 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7273 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7274 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7275 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
7276 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
7277 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
7278 : UINT64_C(0xc000b000a0009000) ));
7279 Assert(!a_fAlignMaskAndCtl);
7280 break;
7281 case 2:
7282 Assert( pfnFunction
7283 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
7284 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
7285 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
7286 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
7287 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
7288 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
7289 : UINT64_C(0xc000b000a0009000) ));
7290 Assert(a_fAlignMaskAndCtl <= 1);
7291 break;
7292 case 4:
7293 Assert( pfnFunction
7294 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
7295 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
7296 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
7297 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
7298 : UINT64_C(0xc000b000a0009000) ));
7299 Assert(a_fAlignMaskAndCtl <= 3);
7300 break;
7301 case 8:
7302 Assert( pfnFunction
7303 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
7304 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
7305 : UINT64_C(0xc000b000a0009000) ));
7306 Assert(a_fAlignMaskAndCtl <= 7);
7307 break;
7308#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7309 case sizeof(RTUINT128U):
7310 Assert( ( a_enmOp == kIemNativeEmitMemOp_Fetch
7311 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
7312 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
7313 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
7314 || ( a_enmOp == kIemNativeEmitMemOp_Store
7315 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
7316 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
7317 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
7318 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
7319 ? (a_fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (a_fAlignMaskAndCtl & 0xff) == 15
7320 : a_fAlignMaskAndCtl <= 15);
7321 break;
7322 case sizeof(RTUINT256U):
7323 Assert( ( a_enmOp == kIemNativeEmitMemOp_Fetch
7324 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
7325 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
7326 || ( a_enmOp == kIemNativeEmitMemOp_Store
7327 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
7328 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
7329 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx
7330 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx
7331 ? (a_fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (a_fAlignMaskAndCtl & 0xff) == 31
7332 : a_fAlignMaskAndCtl <= 31);
7333 break;
7334#endif
7335 }
7336 }
7337#endif
7338
7339#ifdef VBOX_STRICT
7340 /*
7341 * Check that the fExec flags we've got make sense.
7342 */
7343 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7344#endif
7345
7346 /*
7347 * To keep things simple we have to commit any pending writes first as we
7348 * may end up making calls.
7349 */
7350 /** @todo we could postpone this till we make the call and reload the
7351 * registers after returning from the call. Not sure if that's sensible or
7352 * not, though. */
7353#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7354 off = iemNativeRegFlushPendingWrites(pReNative, off);
7355#else
7356 /* The program counter is treated differently for now. */
7357 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
7358#endif
7359
7360#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7361 /*
7362 * Move/spill/flush stuff out of call-volatile registers.
7363 * This is the easy way out. We could contain this to the tlb-miss branch
7364 * by saving and restoring active stuff here.
7365 */
7366 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
7367#endif
7368
7369 /*
7370 * Define labels and allocate the result register (trying for the return
7371 * register if we can).
7372 */
7373 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7374#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7375 uint8_t idxRegValueFetch;
7376 if RT_CONSTEXPR_IF(a_cbMem == sizeof(RTUINT128U) || a_cbMem == sizeof(RTUINT256U))
7377 idxRegValueFetch = a_enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7378 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
7379 else
7380 idxRegValueFetch = a_enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7381 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7382 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
7383 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
7384#else
7385 uint8_t const idxRegValueFetch = a_enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7386 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7387 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
7388 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
7389#endif
7390 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, a_cbMem, offDisp);
7391
7392#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7393 uint8_t idxRegValueStore = UINT8_MAX;
7394
7395 if RT_CONSTEXPR_IF(a_cbMem == sizeof(RTUINT128U) || a_cbMem == sizeof(RTUINT256U))
7396 idxRegValueStore = a_enmOp == kIemNativeEmitMemOp_Store
7397 && !TlbState.fSkip
7398 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7399 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
7400 : UINT8_MAX;
7401 else
7402 idxRegValueStore = a_enmOp == kIemNativeEmitMemOp_Store
7403 && !TlbState.fSkip
7404 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7405 ? iemNativeVarRegisterAcquireInited(pReNative, idxVarValue, &off)
7406 : UINT8_MAX;
7407
7408#else
7409 uint8_t const idxRegValueStore = a_enmOp == kIemNativeEmitMemOp_Store
7410 && !TlbState.fSkip
7411 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7412 ? iemNativeVarRegisterAcquireInited(pReNative, idxVarValue, &off)
7413 : UINT8_MAX;
7414#endif
7415 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
7416 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7417 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7418 : UINT32_MAX;
7419
7420 /*
7421 * Jump to the TLB lookup code.
7422 */
7423 if (!TlbState.fSkip)
7424 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7425
7426 /*
7427 * TlbMiss:
7428 *
7429 * Call helper to do the fetching.
7430 * We flush all guest register shadow copies here.
7431 */
7432 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
7433
7434#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7435 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7436#else
7437 RT_NOREF(idxInstr);
7438#endif
7439
7440#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7441 if (pReNative->Core.offPc)
7442 {
7443 /*
7444 * Update the program counter but restore it at the end of the TlbMiss branch.
7445 * This should allow delaying more program counter updates for the TlbLookup and hit paths
7446 * which are hopefully much more frequent, reducing the amount of memory accesses.
7447 */
7448 /* Allocate a temporary PC register. */
7449/** @todo r=bird: This would technically need to be done up front as it's a register allocation. */
7450 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
7451 kIemNativeGstRegUse_ForUpdate);
7452
7453 /* Perform the addition and store the result. */
7454 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
7455 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7456# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
7457 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
7458# endif
7459
7460 /* Free and flush the PC register. */
7461 iemNativeRegFreeTmp(pReNative, idxPcReg);
7462 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
7463 }
7464#endif
7465
7466#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7467 /* Save variables in volatile registers. */
7468 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7469 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
7470 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
7471 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7472#endif
7473
7474 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
7475 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
7476#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7477 if RT_CONSTEXPR_IF(a_cbMem == sizeof(RTUINT128U) || a_cbMem == sizeof(RTUINT256U))
7478 {
7479 /*
7480 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
7481 *
7482 * Note! There was a register variable assigned to the variable for the TlbLookup case above
7483 * which must not be freed or the value loaded into the register will not be synced into the register
7484 * further down the road because the variable doesn't know it had a variable assigned.
7485 *
7486 * Note! For loads it is not required to sync what is in the assigned register with the stack slot
7487 * as it will be overwritten anyway.
7488 */
7489 uint8_t const idxRegArgValue = a_fFlat ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
7490 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
7491 a_enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
7492 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
7493 }
7494 else
7495#endif
7496 if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitMemOp_Store)
7497 {
7498 uint8_t const idxRegArgValue = a_fFlat ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
7499 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
7500#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7501 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7502#else
7503 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
7504 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
7505#endif
7506 }
7507
7508 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
7509 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
7510#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7511 fVolGregMask);
7512#else
7513 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
7514#endif
7515
7516 if RT_CONSTEXPR_IF(!a_fFlat)
7517 {
7518 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
7519 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
7520 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
7521 }
7522
7523#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
7524 /* Do delayed EFLAGS calculations. */
7525 if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitMemOp_Store || a_cbMem == sizeof(RTUINT128U) || a_cbMem == sizeof(RTUINT256U))
7526 {
7527 if RT_CONSTEXPR_IF(a_fFlat)
7528 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
7529 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState,
7530 fHstRegsNotToSave);
7531 else
7532 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
7533 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
7534 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG)>(pReNative, off, &TlbState,
7535 fHstRegsNotToSave);
7536 }
7537 else if RT_CONSTEXPR_IF(a_fFlat)
7538 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)>(pReNative, off, &TlbState,
7539 fHstRegsNotToSave);
7540 else
7541 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
7542 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState,
7543 fHstRegsNotToSave);
7544#endif
7545
7546 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7547 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7548
7549 /* Done setting up parameters, make the call. */
7550 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
7551
7552 /*
7553 * Put the result in the right register if this is a fetch.
7554 */
7555 if RT_CONSTEXPR_IF(a_enmOp != kIemNativeEmitMemOp_Store)
7556 {
7557#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7558 if RT_CONSTEXPR_IF(a_cbMem == sizeof(RTUINT128U) || a_cbMem == sizeof(RTUINT256U))
7559 {
7560 Assert(a_enmOp == kIemNativeEmitMemOp_Fetch);
7561
7562 /* Sync the value on the stack with the host register assigned to the variable. */
7563 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
7564 }
7565 else
7566#endif
7567 {
7568 Assert(idxRegValueFetch == pVarValue->idxReg);
7569 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
7570 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
7571 }
7572 }
7573
7574#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7575 /* Restore variables and guest shadow registers to volatile registers. */
7576 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7577 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7578#endif
7579
7580#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7581 if (pReNative->Core.offPc)
7582 {
7583 /*
7584 * Time to restore the program counter to its original value.
7585 */
7586 /* Allocate a temporary PC register. */
7587 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
7588 kIemNativeGstRegUse_ForUpdate);
7589
7590 /* Restore the original value. */
7591 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
7592 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7593
7594 /* Free and flush the PC register. */
7595 iemNativeRegFreeTmp(pReNative, idxPcReg);
7596 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
7597 }
7598#endif
7599
7600#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7601 if (!TlbState.fSkip)
7602 {
7603 /* end of TlbMiss - Jump to the done label. */
7604 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7605 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7606
7607 /*
7608 * TlbLookup:
7609 */
7610 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, a_cbMem, a_fAlignMaskAndCtl,
7611 a_enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
7612 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
7613
7614 /*
7615 * Emit code to do the actual storing / fetching.
7616 */
7617 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
7618# ifdef IEM_WITH_TLB_STATISTICS
7619 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7620 a_enmOp == kIemNativeEmitMemOp_Store
7621 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
7622 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
7623# endif
7624 switch (a_enmOp)
7625 {
7626 case kIemNativeEmitMemOp_Store:
7627 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
7628 {
7629 switch (a_cbMem)
7630 {
7631 case 1:
7632 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7633 break;
7634 case 2:
7635 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7636 break;
7637 case 4:
7638 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7639 break;
7640 case 8:
7641 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7642 break;
7643#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7644 case sizeof(RTUINT128U):
7645 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7646 break;
7647 case sizeof(RTUINT256U):
7648 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7649 break;
7650#endif
7651 default:
7652 AssertFailed();
7653 }
7654 }
7655 else
7656 {
7657 switch (a_cbMem)
7658 {
7659 case 1:
7660 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
7661 idxRegMemResult, TlbState.idxReg1);
7662 break;
7663 case 2:
7664 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
7665 idxRegMemResult, TlbState.idxReg1);
7666 break;
7667 case 4:
7668 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
7669 idxRegMemResult, TlbState.idxReg1);
7670 break;
7671 case 8:
7672 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
7673 idxRegMemResult, TlbState.idxReg1);
7674 break;
7675 default:
7676 AssertFailed();
7677 }
7678 }
7679 break;
7680
7681 case kIemNativeEmitMemOp_Fetch:
7682 case kIemNativeEmitMemOp_Fetch_Zx_U16:
7683 case kIemNativeEmitMemOp_Fetch_Zx_U32:
7684 case kIemNativeEmitMemOp_Fetch_Zx_U64:
7685 switch (a_cbMem)
7686 {
7687 case 1:
7688 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7689 break;
7690 case 2:
7691 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7692 break;
7693 case 4:
7694 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7695 break;
7696 case 8:
7697 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7698 break;
7699#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7700 case sizeof(RTUINT128U):
7701 /*
7702 * No need to sync back the register with the stack, this is done by the generic variable handling
7703 * code if there is a register assigned to a variable and the stack must be accessed.
7704 */
7705 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7706 break;
7707 case sizeof(RTUINT256U):
7708 /*
7709 * No need to sync back the register with the stack, this is done by the generic variable handling
7710 * code if there is a register assigned to a variable and the stack must be accessed.
7711 */
7712 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7713 break;
7714#endif
7715 default:
7716 AssertFailed();
7717 }
7718 break;
7719
7720 case kIemNativeEmitMemOp_Fetch_Sx_U16:
7721 Assert(a_cbMem == 1);
7722 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7723 break;
7724
7725 case kIemNativeEmitMemOp_Fetch_Sx_U32:
7726 Assert(a_cbMem == 1 || a_cbMem == 2);
7727 if (a_cbMem == 1)
7728 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7729 else
7730 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7731 break;
7732
7733 case kIemNativeEmitMemOp_Fetch_Sx_U64:
7734 switch (a_cbMem)
7735 {
7736 case 1:
7737 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7738 break;
7739 case 2:
7740 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7741 break;
7742 case 4:
7743 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7744 break;
7745 default:
7746 AssertFailed();
7747 }
7748 break;
7749
7750 default:
7751 AssertFailed();
7752 }
7753
7754 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
7755
7756 /*
7757 * TlbDone:
7758 */
7759 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7760
7761 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
7762
7763# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7764 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
7765 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7766# endif
7767 }
7768#else
7769 RT_NOREF(idxLabelTlbMiss);
7770#endif
7771
7772 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
7773 iemNativeVarRegisterRelease(pReNative, idxVarValue);
7774 return off;
7775}
7776
7777
7778
7779/*********************************************************************************************************************************
7780* Memory fetches (IEM_MEM_FETCH_XXX). *
7781*********************************************************************************************************************************/
7782
7783/* 8-bit segmented: */
7784#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
7785 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch>( \
7786 pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7787
7788#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7789 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16>( \
7790 pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7791
7792#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7793 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32>( \
7794 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7795
7796#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7797 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64>( \
7798 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7799
7800#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7801 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16>(\
7802 pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7803
7804#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7805 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32>(\
7806 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7807
7808#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7809 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64>(\
7810 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7811
7812/* 16-bit segmented: */
7813#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7814 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7815 pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7816
7817#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7818 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7819 pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7820
7821#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7822 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32>(\
7823 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7824
7825#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7826 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64>(\
7827 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7828
7829#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7830 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32>(\
7831 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7832
7833#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7834 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64>(\
7835 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7836
7837
7838/* 32-bit segmented: */
7839#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7840 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7841 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7842
7843#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7844 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7845 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7846
7847#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7848 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64>(\
7849 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7850
7851#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7852 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64>(\
7853 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7854
7855#define IEM_MC_FETCH_MEM_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
7856 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32>(\
7857 pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7858
7859#define IEM_MC_FETCH_MEM_I16_DISP(a_i16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7860 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32>(\
7861 pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr, \
7862 a_offDisp)
7863
7864#define IEM_MC_FETCH_MEM_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
7865 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7866 pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7867
7868#define IEM_MC_FETCH_MEM_I32_DISP(a_i32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7869 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7870 pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7871
7872#define IEM_MC_FETCH_MEM_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
7873 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7874 pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7875
7876AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
7877#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
7878 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch>(\
7879 pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7880
7881
7882/* 64-bit segmented: */
7883#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7884 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7885 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7886
7887AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
7888#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
7889 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch>(\
7890 pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7891
7892
7893/* 8-bit flat: */
7894#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
7895 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, true>(\
7896 pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7897
7898#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
7899 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, true>(\
7900 pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7901
7902#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
7903 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, true>(\
7904 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7905
7906#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
7907 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, true>(\
7908 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7909
7910#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
7911 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, true>(\
7912 pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7913
7914#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
7915 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, true>(\
7916 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7917
7918#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
7919 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, true>(\
7920 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7921
7922
7923/* 16-bit flat: */
7924#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
7925 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7926 pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7927
7928#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
7929 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7930 pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7931
7932#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
7933 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, true>(\
7934 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7935
7936#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
7937 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, true>(\
7938 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7939
7940#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
7941 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, true>(\
7942 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7943
7944#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
7945 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, true>(\
7946 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7947
7948/* 32-bit flat: */
7949#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
7950 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7951 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7952
7953#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
7954 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7955 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7956
7957#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
7958 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, true>(\
7959 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7960
7961#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
7962 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, true>(\
7963 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7964
7965#define IEM_MC_FETCH_MEM_FLAT_I16(a_i16Dst, a_GCPtrMem) \
7966 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, true>(\
7967 pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7968
7969#define IEM_MC_FETCH_MEM_FLAT_I16_DISP(a_i16Dst, a_GCPtrMem, a_offDisp) \
7970 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, true>(\
7971 pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr, a_offDisp)
7972
7973#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
7974 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7975 pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7976
7977#define IEM_MC_FETCH_MEM_FLAT_I32_DISP(a_i32Dst, a_GCPtrMem, a_offDisp) \
7978 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7979 pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7980
7981#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
7982 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7983 pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7984
7985#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
7986 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7987 pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7988
7989
7990/* 64-bit flat: */
7991#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
7992 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7993 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7994
7995#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
7996 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7997 pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7998
7999#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8000/* 128-bit segmented: */
8001#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
8002 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch>(\
8003 pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
8004
8005#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
8006 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), \
8007 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8008 kIemNativeEmitMemOp_Fetch>(\
8009 pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
8010
8011AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
8012#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
8013 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86XMMREG), \
8014 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8015 kIemNativeEmitMemOp_Fetch>(\
8016 pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
8017
8018#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
8019 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch>(\
8020 pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
8021
8022#define IEM_MC_FETCH_MEM_XMM_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
8023 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch>(\
8024 pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
8025
8026
8027/* 128-bit flat: */
8028#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
8029 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
8030 pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
8031
8032#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
8033 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), \
8034 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8035 kIemNativeEmitMemOp_Fetch, true>(\
8036 pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
8037
8038#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
8039 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86XMMREG), \
8040 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8041 kIemNativeEmitMemOp_Fetch, true>(\
8042 pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
8043
8044#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
8045 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
8046 pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
8047
8048#define IEM_MC_FETCH_MEM_FLAT_XMM_NO_AC(a_uXmmDst, a_GCPtrMem) \
8049 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, true>(\
8050 pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
8051
8052/* 256-bit segmented: */
8053#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
8054 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch>(\
8055 pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
8056
8057#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
8058 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch>(\
8059 pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
8060
8061#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
8062 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), \
8063 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, \
8064 kIemNativeEmitMemOp_Fetch>(\
8065 pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
8066
8067#define IEM_MC_FETCH_MEM_YMM_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
8068 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch>(\
8069 pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
8070
8071
8072/* 256-bit flat: */
8073#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
8074 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
8075 pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8076
8077#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
8078 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
8079 pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8080
8081#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
8082 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), \
8083 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, \
8084 kIemNativeEmitMemOp_Fetch, true>(\
8085 pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
8086
8087#define IEM_MC_FETCH_MEM_FLAT_YMM_NO_AC(a_uYmmDst, a_GCPtrMem) \
8088 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, true>(\
8089 pReNative, off, a_uYmmDst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8090
8091#endif
8092
8093
8094/*********************************************************************************************************************************
8095* Memory stores (IEM_MEM_STORE_XXX). *
8096*********************************************************************************************************************************/
8097
8098#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
8099 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store>(\
8100 pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
8101
8102#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
8103 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store>(\
8104 pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
8105
8106#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
8107 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store>(\
8108 pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
8109
8110#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
8111 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store>(\
8112 pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
8113
8114
8115#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
8116 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, true>(\
8117 pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
8118
8119#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
8120 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, true>(\
8121 pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
8122
8123#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
8124 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, true>(\
8125 pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
8126
8127#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
8128 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, true>(\
8129 pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
8130
8131
8132#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
8133 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint8_t)>(\
8134 pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
8135
8136#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
8137 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint16_t)>(\
8138 pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
8139
8140#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
8141 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint32_t)>(\
8142 pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
8143
8144#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
8145 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint64_t)>(\
8146 pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
8147
8148
8149#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
8150 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint8_t), true>(\
8151 pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
8152
8153#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
8154 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint16_t), true>(\
8155 pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
8156
8157#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
8158 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint32_t), true>(\
8159 pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
8160
8161#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
8162 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint64_t), true>(\
8163 pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
8164
8165/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
8166 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
8167template<uint8_t const a_cbMem, bool a_fFlat = false>
8168DECL_INLINE_THROW(uint32_t)
8169iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
8170 uint8_t idxVarGCPtrMem, uintptr_t pfnFunction, uint8_t idxInstr)
8171{
8172 /*
8173 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
8174 * to do the grunt work.
8175 */
8176 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, a_cbMem, uValueConst);
8177 off = iemNativeEmitMemFetchStoreDataCommon<a_cbMem, a_cbMem - 1,
8178 kIemNativeEmitMemOp_Store,
8179 a_fFlat>(pReNative, off, idxVarConstValue, iSegReg,
8180 idxVarGCPtrMem, pfnFunction, idxInstr);
8181 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
8182 return off;
8183}
8184
8185
8186#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8187# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
8188 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), \
8189 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8190 kIemNativeEmitMemOp_Store>(\
8191 pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
8192
8193# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
8194 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store>(\
8195 pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
8196
8197# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
8198 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store>(\
8199 pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
8200
8201# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
8202 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), \
8203 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, \
8204 kIemNativeEmitMemOp_Store>(\
8205 pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
8206
8207
8208# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
8209 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), \
8210 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8211 kIemNativeEmitMemOp_Store, true>(\
8212 pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, \
8213 pCallEntry->idxInstr)
8214
8215# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
8216 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, true>(\
8217 pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
8218
8219# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
8220 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, true>(\
8221 pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
8222
8223# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
8224 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), \
8225 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
8226 true>(\
8227 pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
8228#endif
8229
8230
8231
8232/*********************************************************************************************************************************
8233* Stack Accesses. *
8234*********************************************************************************************************************************/
8235#define IEM_MC_PUSH_U16(a_u16Value) \
8236 off = iemNativeEmitStackPush<16, 0, 0>(pReNative, off, a_u16Value, (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
8237#define IEM_MC_PUSH_U32(a_u32Value) \
8238 off = iemNativeEmitStackPush<32, 0, 0>(pReNative, off, a_u32Value, (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
8239#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
8240 off = iemNativeEmitStackPush<32, 0, 1>(pReNative, off, a_uSegVal, (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
8241#define IEM_MC_PUSH_U64(a_u64Value) \
8242 off = iemNativeEmitStackPush<64, 0, 0>(pReNative, off, a_u64Value, (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
8243
8244#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
8245 off = iemNativeEmitStackPush<16, 32, 0>(pReNative, off, a_u16Value, (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
8246#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
8247 off = iemNativeEmitStackPush<32, 32, 0>(pReNative, off, a_u32Value, (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
8248#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
8249 off = iemNativeEmitStackPush<32, 32, 1>(pReNative, off, a_u32Value, (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
8250
8251#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
8252 off = iemNativeEmitStackPush<16, 64, 0>(pReNative, off, a_u16Value, (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
8253#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
8254 off = iemNativeEmitStackPush<64, 64, 0>(pReNative, off, a_u64Value, (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
8255
8256
8257/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
8258template<uint8_t const a_cBitsVar, uint8_t const a_cBitsFlat, bool a_fIsSegReg = false>
8259DECL_INLINE_THROW(uint32_t)
8260iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uintptr_t pfnFunction, uint8_t idxInstr)
8261{
8262 /*
8263 * Assert sanity.
8264 */
8265 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
8266 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
8267#ifdef VBOX_STRICT
8268 uint32_t const cTmplArgs = RT_MAKE_U32_FROM_U8(a_cBitsVar, a_cBitsFlat, a_fIsSegReg, 0);
8269 if (a_cBitsFlat != 0)
8270 {
8271 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8272 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8273 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8274 Assert( pfnFunction
8275 == ( cTmplArgs == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
8276 : cTmplArgs == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
8277 : cTmplArgs == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
8278 : cTmplArgs == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
8279 : cTmplArgs == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
8280 : UINT64_C(0xc000b000a0009000) ));
8281 }
8282 else
8283 Assert( pfnFunction
8284 == ( cTmplArgs == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
8285 : cTmplArgs == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
8286 : cTmplArgs == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
8287 : cTmplArgs == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
8288 : UINT64_C(0xc000b000a0009000) ));
8289#endif
8290
8291#ifdef VBOX_STRICT
8292 /*
8293 * Check that the fExec flags we've got make sense.
8294 */
8295 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8296#endif
8297
8298 /*
8299 * To keep things simple we have to commit any pending writes first as we
8300 * may end up making calls.
8301 */
8302 /** @todo we could postpone this till we make the call and reload the
8303 * registers after returning from the call. Not sure if that's sensible or
8304 * not, though. */
8305 off = iemNativeRegFlushPendingWrites(pReNative, off);
8306
8307 /*
8308 * First we calculate the new RSP and the effective stack pointer value.
8309 * For 64-bit mode and flat 32-bit these two are the same.
8310 * (Code structure is very similar to that of PUSH)
8311 */
8312 uint8_t const cbMem = a_cBitsVar / 8;
8313 bool const fIsIntelSeg = a_fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
8314 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
8315 ? cbMem : sizeof(uint16_t);
8316 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
8317 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8318 uint8_t const idxRegEffSp = a_cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
8319 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8320 if RT_CONSTEXPR_IF(a_cBitsFlat != 0)
8321 {
8322 Assert(idxRegEffSp == idxRegRsp);
8323 Assert(a_cBitsFlat == 32 || a_cBitsFlat == 64);
8324 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8325 if RT_CONSTEXPR_IF(a_cBitsFlat == 64)
8326 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
8327 else
8328 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
8329 }
8330 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8331 {
8332 Assert(idxRegEffSp != idxRegRsp);
8333 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8334 kIemNativeGstRegUse_ReadOnly);
8335#ifdef RT_ARCH_AMD64
8336 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8337#else
8338 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8339#endif
8340 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8341 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8342 offFixupJumpToUseOtherBitSp = off;
8343 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8344 {
8345 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8346 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8347 }
8348 else
8349 {
8350 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8351 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8352 }
8353 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8354 }
8355 /* SpUpdateEnd: */
8356 uint32_t const offLabelSpUpdateEnd = off;
8357
8358 /*
8359 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8360 * we're skipping lookup).
8361 */
8362 uint8_t const iSegReg = a_cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8363 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
8364 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8365 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8366 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8367 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8368 : UINT32_MAX;
8369 uint8_t const idxRegValue = !TlbState.fSkip
8370 && pVarValue->enmKind != kIemNativeVarKind_Immediate
8371 ? iemNativeVarRegisterAcquireInitedWithPref(pReNative, idxVarValue, &off,
8372 IEMNATIVE_CALL_ARG2_GREG)
8373 : UINT8_MAX;
8374 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
8375
8376
8377 if (!TlbState.fSkip)
8378 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8379 else
8380 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8381
8382 /*
8383 * Use16BitSp:
8384 */
8385 if RT_CONSTEXPR_IF(a_cBitsFlat == 0)
8386 {
8387#ifdef RT_ARCH_AMD64
8388 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8389#else
8390 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8391#endif
8392 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8393 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8394 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8395 else
8396 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8397 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8398 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8399 }
8400
8401 /*
8402 * TlbMiss:
8403 *
8404 * Call helper to do the pushing.
8405 */
8406 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8407
8408#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8409 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8410#else
8411 RT_NOREF(idxInstr);
8412#endif
8413
8414 /* Save variables in volatile registers. */
8415 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8416 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8417 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
8418 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
8419 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8420
8421 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
8422 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
8423 {
8424 /* Swap them using ARG0 as temp register: */
8425 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
8426 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
8427 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
8428 }
8429 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
8430 {
8431 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
8432 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
8433 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8434
8435 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
8436 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8437 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8438 }
8439 else
8440 {
8441 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
8442 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8443
8444 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
8445 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
8446 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG));
8447 }
8448
8449#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8450 /* Do delayed EFLAGS calculations. */
8451 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
8452 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
8453#endif
8454
8455 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8456 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8457
8458 /* Done setting up parameters, make the call. */
8459 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
8460
8461 /* Restore variables and guest shadow registers to volatile registers. */
8462 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8463 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8464
8465#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8466 if (!TlbState.fSkip)
8467 {
8468 /* end of TlbMiss - Jump to the done label. */
8469 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8470 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8471
8472 /*
8473 * TlbLookup:
8474 */
8475 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
8476 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8477
8478 /*
8479 * Emit code to do the actual storing / fetching.
8480 */
8481 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
8482# ifdef IEM_WITH_TLB_STATISTICS
8483 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8484 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8485# endif
8486 if (idxRegValue != UINT8_MAX)
8487 {
8488 switch (cbMemAccess)
8489 {
8490 case 2:
8491 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8492 break;
8493 case 4:
8494 if (!fIsIntelSeg)
8495 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8496 else
8497 {
8498 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
8499 PUSH FS in real mode, so we have to try emulate that here.
8500 We borrow the now unused idxReg1 from the TLB lookup code here. */
8501 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
8502 kIemNativeGstReg_EFlags);
8503 if (idxRegEfl != UINT8_MAX)
8504 {
8505#ifdef ARCH_AMD64
8506 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
8507 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8508 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8509#else
8510 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
8511 off, TlbState.idxReg1, idxRegEfl,
8512 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8513#endif
8514 iemNativeRegFreeTmp(pReNative, idxRegEfl);
8515 }
8516 else
8517 {
8518 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
8519 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
8520 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8521 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8522 }
8523 /* ASSUMES the upper half of idxRegValue is ZERO. */
8524 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
8525 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
8526 }
8527 break;
8528 case 8:
8529 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8530 break;
8531 default:
8532 AssertFailed();
8533 }
8534 }
8535 else
8536 {
8537 switch (cbMemAccess)
8538 {
8539 case 2:
8540 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
8541 idxRegMemResult, TlbState.idxReg1);
8542 break;
8543 case 4:
8544 Assert(!a_fIsSegReg);
8545 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
8546 idxRegMemResult, TlbState.idxReg1);
8547 break;
8548 case 8:
8549 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
8550 break;
8551 default:
8552 AssertFailed();
8553 }
8554 }
8555
8556 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8557 TlbState.freeRegsAndReleaseVars(pReNative);
8558
8559 /*
8560 * TlbDone:
8561 *
8562 * Commit the new RSP value.
8563 */
8564 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8565 }
8566#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8567
8568#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8569 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
8570#endif
8571 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8572 if (idxRegEffSp != idxRegRsp)
8573 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8574
8575 /* The value variable is implictly flushed. */
8576 if (idxRegValue != UINT8_MAX)
8577 iemNativeVarRegisterRelease(pReNative, idxVarValue);
8578 iemNativeVarFreeLocal(pReNative, idxVarValue);
8579
8580 return off;
8581}
8582
8583
8584
8585#define IEM_MC_POP_GREG_U16(a_iGReg) \
8586 off = iemNativeEmitStackPopGReg<16, 0>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
8587#define IEM_MC_POP_GREG_U32(a_iGReg) \
8588 off = iemNativeEmitStackPopGReg<32, 0>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
8589#define IEM_MC_POP_GREG_U64(a_iGReg) \
8590 off = iemNativeEmitStackPopGReg<64, 0>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
8591
8592#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
8593 off = iemNativeEmitStackPopGReg<16, 32>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8594#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
8595 off = iemNativeEmitStackPopGReg<32, 32>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
8596
8597#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
8598 off = iemNativeEmitStackPopGReg<16, 64>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8599#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
8600 off = iemNativeEmitStackPopGReg<64, 64>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
8601
8602
8603DECL_FORCE_INLINE_THROW(uint32_t)
8604iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
8605 uint8_t idxRegTmp)
8606{
8607 /* Use16BitSp: */
8608#ifdef RT_ARCH_AMD64
8609 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8610 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
8611 RT_NOREF(idxRegTmp);
8612#else
8613 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
8614 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
8615 /* add tmp, regrsp, #cbMem */
8616 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
8617 /* and tmp, tmp, #0xffff */
8618 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
8619 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
8620 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
8621 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
8622#endif
8623 return off;
8624}
8625
8626
8627DECL_FORCE_INLINE(uint32_t)
8628iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
8629{
8630 /* Use32BitSp: */
8631 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8632 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
8633 return off;
8634}
8635
8636
8637/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
8638template<uint8_t const a_cBitsVar, uint8_t const a_cBitsFlat>
8639DECL_INLINE_THROW(uint32_t)
8640iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg, uintptr_t pfnFunction, uint8_t idxInstr)
8641{
8642 /*
8643 * Assert sanity.
8644 */
8645 Assert(idxGReg < 16);
8646#ifdef VBOX_STRICT
8647 if (a_cBitsFlat != 0)
8648 {
8649 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8650 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8651 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8652 Assert( pfnFunction
8653 == ( RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 32) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8654 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(32, 32) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
8655 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 64) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8656 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(64, 64) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
8657 : UINT64_C(0xc000b000a0009000) ));
8658 }
8659 else
8660 Assert( pfnFunction
8661 == ( RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
8662 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(32, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
8663 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(64, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
8664 : UINT64_C(0xc000b000a0009000) ));
8665#endif
8666
8667#ifdef VBOX_STRICT
8668 /*
8669 * Check that the fExec flags we've got make sense.
8670 */
8671 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8672#endif
8673
8674 /*
8675 * To keep things simple we have to commit any pending writes first as we
8676 * may end up making calls.
8677 */
8678 off = iemNativeRegFlushPendingWrites(pReNative, off);
8679
8680 /*
8681 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
8682 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
8683 * directly as the effective stack pointer.
8684 * (Code structure is very similar to that of PUSH)
8685 */
8686 uint8_t const cbMem = a_cBitsVar / 8;
8687 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
8688 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8689 uint8_t const idxRegEffSp = a_cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
8690 /** @todo can do a better job picking the register here. For cbMem >= 4 this
8691 * will be the resulting register value. */
8692 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
8693
8694 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8695 if RT_CONSTEXPR_IF(a_cBitsFlat != 0)
8696 {
8697 Assert(idxRegEffSp == idxRegRsp);
8698 Assert(a_cBitsFlat == 32 || a_cBitsFlat == 64);
8699 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8700 }
8701 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8702 {
8703 Assert(idxRegEffSp != idxRegRsp);
8704 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8705 kIemNativeGstRegUse_ReadOnly);
8706#ifdef RT_ARCH_AMD64
8707 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8708#else
8709 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8710#endif
8711 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8712 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8713 offFixupJumpToUseOtherBitSp = off;
8714 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8715 {
8716/** @todo can skip idxRegRsp updating when popping ESP. */
8717 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8718 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8719 }
8720 else
8721 {
8722 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8723 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8724 }
8725 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8726 }
8727 /* SpUpdateEnd: */
8728 uint32_t const offLabelSpUpdateEnd = off;
8729
8730 /*
8731 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8732 * we're skipping lookup).
8733 */
8734 uint8_t const iSegReg = a_cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8735 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
8736 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8737 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8738 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8739 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8740 : UINT32_MAX;
8741
8742 if (!TlbState.fSkip)
8743 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8744 else
8745 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8746
8747 /*
8748 * Use16BitSp:
8749 */
8750 if RT_CONSTEXPR_IF(a_cBitsFlat == 0)
8751 {
8752#ifdef RT_ARCH_AMD64
8753 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8754#else
8755 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8756#endif
8757 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8758 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8759 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8760 else
8761 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8762 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8763 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8764 }
8765
8766 /*
8767 * TlbMiss:
8768 *
8769 * Call helper to do the pushing.
8770 */
8771 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8772
8773#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8774 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8775#else
8776 RT_NOREF(idxInstr);
8777#endif
8778
8779 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8780 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8781 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
8782 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8783
8784
8785 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
8786 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8787 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8788
8789#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8790 /* Do delayed EFLAGS calculations. */
8791 off = iemNativeDoPostponedEFlagsAtTlbMiss<RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
8792#endif
8793
8794 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8795 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8796
8797 /* Done setting up parameters, make the call. */
8798 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
8799
8800 /* Move the return register content to idxRegMemResult. */
8801 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
8802 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
8803
8804 /* Restore variables and guest shadow registers to volatile registers. */
8805 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8806 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8807
8808#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8809 if (!TlbState.fSkip)
8810 {
8811 /* end of TlbMiss - Jump to the done label. */
8812 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8813 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8814
8815 /*
8816 * TlbLookup:
8817 */
8818 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
8819 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8820
8821 /*
8822 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
8823 */
8824 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8825# ifdef IEM_WITH_TLB_STATISTICS
8826 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8827 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8828# endif
8829 switch (cbMem)
8830 {
8831 case 2:
8832 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8833 break;
8834 case 4:
8835 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8836 break;
8837 case 8:
8838 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8839 break;
8840 default:
8841 AssertFailed();
8842 }
8843
8844 TlbState.freeRegsAndReleaseVars(pReNative);
8845
8846 /*
8847 * TlbDone:
8848 *
8849 * Set the new RSP value (FLAT accesses needs to calculate it first) and
8850 * commit the popped register value.
8851 */
8852 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8853 }
8854#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8855
8856 if (idxGReg != X86_GREG_xSP)
8857 {
8858 /* Set the register. */
8859 if (cbMem >= sizeof(uint32_t))
8860 {
8861#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
8862 AssertMsg( pReNative->idxCurCall == 0
8863 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
8864 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName,
8865 iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
8866#endif
8867 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
8868#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8869 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
8870#endif
8871#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8872 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
8873 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8874#endif
8875 }
8876 else
8877 {
8878 Assert(cbMem == sizeof(uint16_t));
8879 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
8880 kIemNativeGstRegUse_ForUpdate);
8881 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
8882#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8883 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8884#endif
8885 iemNativeRegFreeTmp(pReNative, idxRegDst);
8886 }
8887
8888 /* Complete RSP calculation for FLAT mode. */
8889 if (idxRegEffSp == idxRegRsp)
8890 {
8891 if RT_CONSTEXPR_IF(a_cBitsFlat == 64)
8892 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8893 else
8894 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8895 }
8896 }
8897 else
8898 {
8899 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
8900 if (cbMem == sizeof(uint64_t))
8901 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
8902 else if (cbMem == sizeof(uint32_t))
8903 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
8904 else
8905 {
8906 if (idxRegEffSp == idxRegRsp)
8907 {
8908 if RT_CONSTEXPR_IF(a_cBitsFlat == 64)
8909 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8910 else
8911 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8912 }
8913 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
8914 }
8915 }
8916
8917#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8918 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
8919#endif
8920
8921 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8922 if (idxRegEffSp != idxRegRsp)
8923 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8924 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8925
8926 return off;
8927}
8928
8929
8930
8931/*********************************************************************************************************************************
8932* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
8933*********************************************************************************************************************************/
8934
8935#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8936 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/>(\
8937 pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
8938
8939#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8940 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/>(\
8941 pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
8942
8943#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8944 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/>(\
8945 pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
8946
8947#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8948 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/>(\
8949 pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
8950
8951
8952#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8953 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8954 pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
8955
8956#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8957 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8958 pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
8959
8960#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8961 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8962 pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8963
8964#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8965 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8966 pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
8967
8968#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8969 off = iemNativeEmitMemMapCommon<sizeof(int16_t), IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8970 pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8971
8972
8973#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8974 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8975 pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
8976
8977#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8978 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8979 pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
8980
8981#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8982 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8983 pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8984
8985#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8986 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8987 pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
8988
8989#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8990 off = iemNativeEmitMemMapCommon<sizeof(int32_t), IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8991 pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8992
8993
8994#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8995 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
8996 pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
8997
8998#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8999 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
9000 pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
9001#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9002 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
9003 pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
9004
9005#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9006 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
9007 pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
9008
9009#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9010 off = iemNativeEmitMemMapCommon<sizeof(int64_t), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
9011 pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
9012
9013
9014#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9015 off = iemNativeEmitMemMapCommon<sizeof(RTFLOAT80U), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
9016 pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
9017
9018#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9019 off = iemNativeEmitMemMapCommon<sizeof(RTFLOAT80U), IEM_ACCESS_DATA_W, \
9020 sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/ /** @todo check BCD align */>(\
9021 pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
9022
9023
9024#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9025 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/>(\
9026 pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
9027
9028#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9029 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/>(\
9030 pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
9031
9032#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9033 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/>(\
9034 pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
9035
9036#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9037 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/>(\
9038 pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
9039
9040
9041
9042#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9043 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, true>(\
9044 pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
9045
9046#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9047 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, true>(\
9048 pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
9049
9050#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9051 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, true>(\
9052 pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
9053
9054#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9055 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, true>(\
9056 pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
9057
9058
9059#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9060 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9061 pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
9062
9063#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9064 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9065 pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
9066
9067#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9068 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9069 pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9070
9071#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9072 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9073 pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
9074
9075#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
9076 off = iemNativeEmitMemMapCommon<sizeof(int16_t), IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9077 pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9078
9079
9080#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9081 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9082 pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
9083
9084#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9085 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9086 pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
9087
9088#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9089 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9090 pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9091
9092#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9093 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9094 pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
9095
9096#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
9097 off = iemNativeEmitMemMapCommon<sizeof(int32_t), IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9098 pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9099
9100
9101#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9102 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9103 pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
9104
9105#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9106 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9107 pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
9108
9109#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9110 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9111 pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9112
9113#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9114 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9115 pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
9116
9117#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
9118 off = iemNativeEmitMemMapCommon<sizeof(int64_t), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9119 pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9120
9121
9122#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
9123 off = iemNativeEmitMemMapCommon<sizeof(RTFLOAT80U), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9124 pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
9125
9126#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
9127 off = iemNativeEmitMemMapCommon<sizeof(RTFLOAT80U), IEM_ACCESS_DATA_W, \
9128 sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/ /** @todo check BCD align */, true>(\
9129 pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
9130
9131
9132#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9133 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, true>(\
9134 pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
9135
9136#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9137 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, true>(\
9138 pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
9139
9140#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9141 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, true>(\
9142 pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
9143
9144#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9145 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, true>(\
9146 pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
9147
9148
9149template<uint8_t const a_cbMem, uint32_t const a_fAccess, uint32_t const a_fAlignMaskAndCtl, bool a_fFlat = false>
9150DECL_INLINE_THROW(uint32_t)
9151iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
9152 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uintptr_t pfnFunction, uint8_t idxInstr)
9153{
9154 /*
9155 * Assert sanity.
9156 */
9157 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
9158 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
9159 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
9160 && pVarMem->cbVar == sizeof(void *),
9161 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9162
9163 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
9164 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9165 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
9166 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
9167 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9168
9169 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
9170 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
9171 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
9172 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
9173 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9174
9175 Assert(!a_fFlat ? iSegReg < 6 : iSegReg == UINT8_MAX);
9176
9177 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
9178
9179#ifdef VBOX_STRICT
9180# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
9181 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
9182 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
9183 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
9184 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
9185# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
9186 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
9187 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
9188 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
9189
9190 if RT_CONSTEXPR_IF(a_fFlat)
9191 {
9192 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
9193 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
9194 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
9195 switch (a_cbMem)
9196 {
9197 case 1:
9198 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU8));
9199 Assert(!a_fAlignMaskAndCtl);
9200 break;
9201 case 2:
9202 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU16));
9203 Assert(a_fAlignMaskAndCtl < 2);
9204 break;
9205 case 4:
9206 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU32));
9207 Assert(a_fAlignMaskAndCtl < 4);
9208 break;
9209 case 8:
9210 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU64));
9211 Assert(a_fAlignMaskAndCtl < 8);
9212 break;
9213 case 10:
9214 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
9215 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
9216 Assert((a_fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9217 Assert(a_fAlignMaskAndCtl < 8);
9218 break;
9219 case 16:
9220 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU128));
9221 Assert(a_fAlignMaskAndCtl < 16);
9222 break;
9223# if 0
9224 case 32:
9225 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(a_fAccess, iemNativeHlpMemFlatMapDataU256));
9226 Assert(a_fAlignMaskAndCtl < 32);
9227 break;
9228 case 64:
9229 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(a_fAccess, iemNativeHlpMemFlatMapDataU512));
9230 Assert(a_fAlignMaskAndCtl < 64);
9231 break;
9232# endif
9233 default: AssertFailed(); break;
9234 }
9235 }
9236 else
9237 {
9238 Assert(iSegReg < 6);
9239 switch (a_cbMem)
9240 {
9241 case 1:
9242 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU8));
9243 Assert(!a_fAlignMaskAndCtl);
9244 break;
9245 case 2:
9246 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU16));
9247 Assert(a_fAlignMaskAndCtl < 2);
9248 break;
9249 case 4:
9250 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU32));
9251 Assert(a_fAlignMaskAndCtl < 4);
9252 break;
9253 case 8:
9254 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU64));
9255 Assert(a_fAlignMaskAndCtl < 8);
9256 break;
9257 case 10:
9258 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
9259 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
9260 Assert((a_fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9261 Assert(a_fAlignMaskAndCtl < 8);
9262 break;
9263 case 16:
9264 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU128));
9265 Assert(a_fAlignMaskAndCtl < 16);
9266 break;
9267# if 0
9268 case 32:
9269 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(a_fAccess, iemNativeHlpMemMapDataU256));
9270 Assert(a_fAlignMaskAndCtl < 32);
9271 break;
9272 case 64:
9273 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(a_fAccess, iemNativeHlpMemMapDataU512));
9274 Assert(a_fAlignMaskAndCtl < 64);
9275 break;
9276# endif
9277 default: AssertFailed(); break;
9278 }
9279 }
9280# undef IEM_MAP_HLP_FN
9281# undef IEM_MAP_HLP_FN_NO_AT
9282#endif
9283
9284#ifdef VBOX_STRICT
9285 /*
9286 * Check that the fExec flags we've got make sense.
9287 */
9288 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
9289#endif
9290
9291 /*
9292 * To keep things simple we have to commit any pending writes first as we
9293 * may end up making calls.
9294 */
9295 off = iemNativeRegFlushPendingWrites(pReNative, off);
9296
9297#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9298 /*
9299 * Move/spill/flush stuff out of call-volatile registers.
9300 * This is the easy way out. We could contain this to the tlb-miss branch
9301 * by saving and restoring active stuff here.
9302 */
9303 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
9304 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
9305#endif
9306
9307 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
9308 while the tlb-miss codepath will temporarily put it on the stack.
9309 Set the the type to stack here so we don't need to do it twice below. */
9310 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
9311 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
9312 /** @todo use a tmp register from TlbState, since they'll be free after tlb
9313 * lookup is done. */
9314
9315 /*
9316 * Define labels and allocate the result register (trying for the return
9317 * register if we can).
9318 */
9319 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
9320 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
9321 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
9322 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
9323 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, a_cbMem);
9324 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
9325 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
9326 : UINT32_MAX;
9327
9328 /*
9329 * Jump to the TLB lookup code.
9330 */
9331 if (!TlbState.fSkip)
9332 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
9333
9334 /*
9335 * TlbMiss:
9336 *
9337 * Call helper to do the fetching.
9338 * We flush all guest register shadow copies here.
9339 */
9340 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
9341
9342#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
9343 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9344#else
9345 RT_NOREF(idxInstr);
9346#endif
9347
9348#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9349 /* Save variables in volatile registers. */
9350 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
9351 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
9352#endif
9353
9354 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
9355 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
9356#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9357 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
9358#else
9359 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9360#endif
9361
9362 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
9363 if RT_CONSTEXPR_IF(!a_fFlat)
9364 {
9365 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
9366 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
9367 }
9368
9369#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
9370 /* Do delayed EFLAGS calculations. */
9371 if RT_CONSTEXPR_IF(a_fFlat)
9372 off = iemNativeDoPostponedEFlagsAtTlbMiss<RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState,
9373 fHstRegsNotToSave);
9374 else
9375 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
9376 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG)>(pReNative, off, &TlbState,
9377 fHstRegsNotToSave);
9378#endif
9379
9380 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
9381 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
9382 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
9383
9384 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9385 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9386
9387 /* Done setting up parameters, make the call. */
9388 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
9389
9390 /*
9391 * Put the output in the right registers.
9392 */
9393 Assert(idxRegMemResult == pVarMem->idxReg);
9394 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
9395 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
9396
9397#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9398 /* Restore variables and guest shadow registers to volatile registers. */
9399 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
9400 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
9401#endif
9402
9403 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
9404 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
9405
9406#ifdef IEMNATIVE_WITH_TLB_LOOKUP
9407 if (!TlbState.fSkip)
9408 {
9409 /* end of tlbsmiss - Jump to the done label. */
9410 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
9411 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
9412
9413 /*
9414 * TlbLookup:
9415 */
9416 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, a_cbMem, a_fAlignMaskAndCtl, a_fAccess,
9417 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
9418# ifdef IEM_WITH_TLB_STATISTICS
9419 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
9420 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
9421# endif
9422
9423 /* [idxVarUnmapInfo] = 0; */
9424 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
9425
9426 /*
9427 * TlbDone:
9428 */
9429 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
9430
9431 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
9432
9433# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9434 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
9435 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9436# endif
9437 }
9438#else
9439 RT_NOREF(idxLabelTlbMiss);
9440#endif
9441
9442 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
9443 iemNativeVarRegisterRelease(pReNative, idxVarMem);
9444
9445 return off;
9446}
9447
9448
9449#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
9450 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, \
9451 pCallEntry->idxInstr, IEM_ACCESS_DATA_ATOMIC)
9452
9453#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
9454 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, \
9455 pCallEntry->idxInstr, IEM_ACCESS_DATA_RW)
9456
9457#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
9458 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, \
9459 pCallEntry->idxInstr, IEM_ACCESS_DATA_W)
9460
9461#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
9462 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, \
9463 pCallEntry->idxInstr, IEM_ACCESS_DATA_R)
9464
9465DECL_INLINE_THROW(uint32_t)
9466iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
9467 uintptr_t pfnFunction, uint8_t idxInstr, uint32_t fAccess)
9468{
9469 /*
9470 * Assert sanity.
9471 */
9472 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9473#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
9474 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
9475#endif
9476 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
9477 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
9478 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
9479#ifdef VBOX_STRICT
9480 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
9481 {
9482 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
9483 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
9484 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
9485 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
9486 case IEM_ACCESS_TYPE_WRITE:
9487 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
9488 case IEM_ACCESS_TYPE_READ:
9489 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
9490 default: AssertFailed();
9491 }
9492#else
9493 RT_NOREF(fAccess);
9494#endif
9495
9496 /*
9497 * To keep things simple we have to commit any pending writes first as we
9498 * may end up making calls (there shouldn't be any at this point, so this
9499 * is just for consistency).
9500 */
9501 /** @todo we could postpone this till we make the call and reload the
9502 * registers after returning from the call. Not sure if that's sensible or
9503 * not, though. */
9504 off = iemNativeRegFlushPendingWrites(pReNative, off);
9505
9506 /*
9507 * Move/spill/flush stuff out of call-volatile registers.
9508 *
9509 * We exclude any register holding the bUnmapInfo variable, as we'll be
9510 * checking it after returning from the call and will free it afterwards.
9511 */
9512 /** @todo save+restore active registers and maybe guest shadows in miss
9513 * scenario. */
9514 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
9515 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
9516
9517 /*
9518 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
9519 * to call the unmap helper function.
9520 *
9521 * The likelyhood of it being zero is higher than for the TLB hit when doing
9522 * the mapping, as a TLB miss for an well aligned and unproblematic memory
9523 * access should also end up with a mapping that won't need special unmapping.
9524 */
9525 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
9526 * should speed up things for the pure interpreter as well when TLBs
9527 * are enabled. */
9528#ifdef RT_ARCH_AMD64
9529 if (pVarUnmapInfo->idxReg == UINT8_MAX)
9530 {
9531 /* test byte [rbp - xxx], 0ffh */
9532 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9533 pbCodeBuf[off++] = 0xf6;
9534 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
9535 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
9536 pbCodeBuf[off++] = 0xff;
9537 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9538 }
9539 else
9540#endif
9541 {
9542 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInitedWithPref(pReNative, idxVarUnmapInfo, &off,
9543 IEMNATIVE_CALL_ARG1_GREG);
9544 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
9545 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
9546 }
9547 uint32_t const offJmpFixup = off;
9548 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
9549
9550 /*
9551 * Call the unmap helper function.
9552 */
9553#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
9554 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9555#else
9556 RT_NOREF(idxInstr);
9557#endif
9558
9559 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
9560 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
9561 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9562
9563 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9564 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9565
9566 /* Done setting up parameters, make the call.
9567 Note! Since we can only end up here if we took a TLB miss, any postponed EFLAGS
9568 calculations has been done there already. Thus, a_fSkipEflChecks = true. */
9569 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
9570
9571 /* The bUnmapInfo variable is implictly free by these MCs. */
9572 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
9573
9574 /*
9575 * Done, just fixup the jump for the non-call case.
9576 */
9577 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
9578
9579 return off;
9580}
9581
9582
9583
9584/*********************************************************************************************************************************
9585* State and Exceptions *
9586*********************************************************************************************************************************/
9587
9588#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9589#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9590
9591#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9592#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9593#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9594
9595#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9596#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9597#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9598
9599
9600DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
9601{
9602#ifndef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
9603 RT_NOREF(pReNative, fForChange);
9604#else
9605 if ( !(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED)
9606 && fForChange)
9607 {
9608# ifdef RT_ARCH_AMD64
9609
9610 /* Need to save the host MXCSR the first time, and clear the exception flags. */
9611 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9612 {
9613 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9614
9615 /* stmxcsr */
9616 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9617 pbCodeBuf[off++] = X86_OP_REX_B;
9618 pbCodeBuf[off++] = 0x0f;
9619 pbCodeBuf[off++] = 0xae;
9620 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 3, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9621 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9622 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9623 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9624 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9625 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9626
9627 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9628 }
9629
9630 uint8_t const idxRegTmp = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
9631 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
9632 kIemNativeGstRegUse_ReadOnly);
9633
9634 /*
9635 * Mask any exceptions and clear the exception status and save into MXCSR,
9636 * taking a detour through memory here because ldmxcsr/stmxcsr don't support
9637 * a register source/target (sigh).
9638 */
9639 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr);
9640 off = iemNativeEmitOrGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, X86_MXCSR_XCPT_MASK);
9641 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, ~X86_MXCSR_XCPT_FLAGS);
9642 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9643
9644 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9645
9646 /* ldmxcsr */
9647 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9648 pbCodeBuf[off++] = X86_OP_REX_B;
9649 pbCodeBuf[off++] = 0x0f;
9650 pbCodeBuf[off++] = 0xae;
9651 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 2, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9652 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9653 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9654 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9655 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9656 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9657
9658 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9659 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9660
9661# elif defined(RT_ARCH_ARM64)
9662 uint8_t const idxRegTmp = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
9663
9664 /* Need to save the host floating point control register the first time, clear FPSR. */
9665 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9666 {
9667 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9668 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(ARMV8_A64_REG_XZR, ARMV8_AARCH64_SYSREG_FPSR);
9669 pu32CodeBuf[off++] = Armv8A64MkInstrMrs(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9670 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9671 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9672 }
9673
9674 /*
9675 * Translate MXCSR to FPCR.
9676 *
9677 * Unfortunately we can't emulate the exact behavior of MXCSR as we can't take
9678 * FEAT_AFP on arm64 for granted (My M2 Macbook doesn't has it). So we can't map
9679 * MXCSR.DAZ to FPCR.FIZ and MXCSR.FZ to FPCR.FZ with FPCR.AH being set.
9680 * We can only use FPCR.FZ which will flush inputs _and_ output de-normals to zero.
9681 */
9682 /** @todo Check the host supported flags (needs additional work to get the host features from CPUM)
9683 * and implement alternate handling if FEAT_AFP is present. */
9684 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
9685 kIemNativeGstRegUse_ReadOnly);
9686
9687 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
9688
9689 /* First make sure that there is nothing set for the upper 16-bits (X86_MXCSR_MM, which we don't emulate right now). */
9690 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegTmp, idxRegMxCsr);
9691
9692 /* If either MXCSR.FZ or MXCSR.DAZ is set FPCR.FZ will be set. */
9693 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(IEMNATIVE_REG_FIXED_TMP0, idxRegTmp, X86_MXCSR_DAZ_BIT, 1);
9694 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegTmp, idxRegTmp, X86_MXCSR_FZ_BIT);
9695 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(idxRegTmp, idxRegTmp, IEMNATIVE_REG_FIXED_TMP0);
9696 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegTmp, idxRegTmp, ARMV8_FPCR_FZ_BIT);
9697
9698 /*
9699 * Init the rounding mode, the layout differs between MXCSR.RM[14:13] and FPCR.RMode[23:22]:
9700 *
9701 * Value MXCSR FPCR
9702 * 0 RN RN
9703 * 1 R- R+
9704 * 2 R+ R-
9705 * 3 RZ RZ
9706 *
9707 * Conversion can be achieved by switching bit positions
9708 */
9709 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT);
9710 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 14, 1);
9711 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT + 1);
9712 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 13, 1);
9713
9714 /* Write the value to FPCR. */
9715 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9716
9717 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9718 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9719 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9720# else
9721# error "Port me"
9722# endif
9723 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
9724 }
9725#endif
9726 return off;
9727}
9728
9729
9730
9731/*********************************************************************************************************************************
9732* Emitters for FPU related operations. *
9733*********************************************************************************************************************************/
9734
9735#define IEM_MC_FETCH_FCW(a_u16Fcw) \
9736 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
9737
9738/** Emits code for IEM_MC_FETCH_FCW. */
9739DECL_INLINE_THROW(uint32_t)
9740iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9741{
9742 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9743 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9744
9745 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9746
9747 /* Allocate a temporary FCW register. */
9748 /** @todo eliminate extra register */
9749 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
9750 kIemNativeGstRegUse_ReadOnly);
9751
9752 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
9753
9754 /* Free but don't flush the FCW register. */
9755 iemNativeRegFreeTmp(pReNative, idxFcwReg);
9756 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9757
9758 return off;
9759}
9760
9761
9762#define IEM_MC_FETCH_FSW(a_u16Fsw) \
9763 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
9764
9765/** Emits code for IEM_MC_FETCH_FSW. */
9766DECL_INLINE_THROW(uint32_t)
9767iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9768{
9769 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9770 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9771
9772 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9773 /* Allocate a temporary FSW register. */
9774 /** @todo eliminate extra register */
9775 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
9776 kIemNativeGstRegUse_ReadOnly);
9777
9778 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
9779
9780 /* Free but don't flush the FSW register. */
9781 iemNativeRegFreeTmp(pReNative, idxFswReg);
9782 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9783
9784 return off;
9785}
9786
9787
9788
9789#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9790
9791
9792/*********************************************************************************************************************************
9793* Emitters for SSE/AVX specific operations. *
9794*********************************************************************************************************************************/
9795
9796#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
9797 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
9798
9799/** Emits code for IEM_MC_COPY_XREG_U128. */
9800DECL_INLINE_THROW(uint32_t)
9801iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
9802{
9803 /* This is a nop if the source and destination register are the same. */
9804 if (iXRegDst != iXRegSrc)
9805 {
9806 /* Allocate destination and source register. */
9807 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
9808 kIemNativeGstSimdRegLdStSz_Low128,
9809 kIemNativeGstRegUse_ForFullWrite);
9810 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
9811 kIemNativeGstSimdRegLdStSz_Low128,
9812 kIemNativeGstRegUse_ReadOnly);
9813
9814 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9815
9816 /* Free but don't flush the source and destination register. */
9817 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9818 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9819 }
9820
9821 return off;
9822}
9823
9824
9825#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
9826 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
9827
9828/** Emits code for IEM_MC_FETCH_XREG_U128. */
9829DECL_INLINE_THROW(uint32_t)
9830iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
9831{
9832 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9833 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
9834
9835 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9836 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
9837
9838 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9839
9840 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9841
9842 /* Free but don't flush the source register. */
9843 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9844 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9845
9846 return off;
9847}
9848
9849
9850#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
9851 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
9852
9853#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
9854 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
9855
9856/** Emits code for IEM_MC_FETCH_XREG_U64. */
9857DECL_INLINE_THROW(uint32_t)
9858iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
9859{
9860 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9861 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9862
9863 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9864 kIemNativeGstSimdRegLdStSz_Low128,
9865 kIemNativeGstRegUse_ReadOnly);
9866
9867 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9868 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9869
9870 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
9871
9872 /* Free but don't flush the source register. */
9873 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9874 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9875
9876 return off;
9877}
9878
9879
9880#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
9881 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
9882
9883#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
9884 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
9885
9886/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
9887DECL_INLINE_THROW(uint32_t)
9888iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
9889{
9890 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9891 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9892
9893 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9894 kIemNativeGstSimdRegLdStSz_Low128,
9895 kIemNativeGstRegUse_ReadOnly);
9896
9897 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9898 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9899
9900 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
9901
9902 /* Free but don't flush the source register. */
9903 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9904 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9905
9906 return off;
9907}
9908
9909
9910#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
9911 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
9912
9913/** Emits code for IEM_MC_FETCH_XREG_U16. */
9914DECL_INLINE_THROW(uint32_t)
9915iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
9916{
9917 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9918 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9919
9920 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9921 kIemNativeGstSimdRegLdStSz_Low128,
9922 kIemNativeGstRegUse_ReadOnly);
9923
9924 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9925 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9926
9927 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
9928
9929 /* Free but don't flush the source register. */
9930 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9931 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9932
9933 return off;
9934}
9935
9936
9937#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
9938 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
9939
9940/** Emits code for IEM_MC_FETCH_XREG_U8. */
9941DECL_INLINE_THROW(uint32_t)
9942iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
9943{
9944 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9945 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
9946
9947 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9948 kIemNativeGstSimdRegLdStSz_Low128,
9949 kIemNativeGstRegUse_ReadOnly);
9950
9951 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9952 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9953
9954 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
9955
9956 /* Free but don't flush the source register. */
9957 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9958 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9959
9960 return off;
9961}
9962
9963
9964#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
9965 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
9966
9967AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
9968#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
9969 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
9970
9971
9972/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
9973DECL_INLINE_THROW(uint32_t)
9974iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9975{
9976 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9977 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9978
9979 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9980 kIemNativeGstSimdRegLdStSz_Low128,
9981 kIemNativeGstRegUse_ForFullWrite);
9982 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9983
9984 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9985
9986 /* Free but don't flush the source register. */
9987 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9988 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9989
9990 return off;
9991}
9992
9993
9994#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
9995 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
9996
9997#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
9998 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
9999
10000#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
10001 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
10002
10003#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
10004 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
10005
10006#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
10007 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
10008
10009#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
10010 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
10011
10012/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
10013DECL_INLINE_THROW(uint32_t)
10014iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
10015 uint8_t cbLocal, uint8_t iElem)
10016{
10017 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10018 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
10019
10020#ifdef VBOX_STRICT
10021 switch (cbLocal)
10022 {
10023 case sizeof(uint64_t): Assert(iElem < 2); break;
10024 case sizeof(uint32_t): Assert(iElem < 4); break;
10025 case sizeof(uint16_t): Assert(iElem < 8); break;
10026 case sizeof(uint8_t): Assert(iElem < 16); break;
10027 default: AssertFailed();
10028 }
10029#endif
10030
10031 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10032 kIemNativeGstSimdRegLdStSz_Low128,
10033 kIemNativeGstRegUse_ForUpdate);
10034 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxDstVar, &off);
10035
10036 switch (cbLocal)
10037 {
10038 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10039 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10040 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10041 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10042 default: AssertFailed();
10043 }
10044
10045 /* Free but don't flush the source register. */
10046 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10047 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10048
10049 return off;
10050}
10051
10052
10053#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
10054 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
10055
10056/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
10057DECL_INLINE_THROW(uint32_t)
10058iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
10059{
10060 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10061 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10062
10063 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10064 kIemNativeGstSimdRegLdStSz_Low128,
10065 kIemNativeGstRegUse_ForUpdate);
10066 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxDstVar, &off);
10067
10068 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
10069 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
10070 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
10071
10072 /* Free but don't flush the source register. */
10073 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10074 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10075
10076 return off;
10077}
10078
10079
10080#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
10081 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
10082
10083/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
10084DECL_INLINE_THROW(uint32_t)
10085iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
10086{
10087 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10088 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10089
10090 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10091 kIemNativeGstSimdRegLdStSz_Low128,
10092 kIemNativeGstRegUse_ForUpdate);
10093 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxDstVar, &off);
10094
10095 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
10096 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
10097 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
10098
10099 /* Free but don't flush the source register. */
10100 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10101 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10102
10103 return off;
10104}
10105
10106
10107#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
10108 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
10109
10110/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
10111DECL_INLINE_THROW(uint32_t)
10112iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
10113 uint8_t idxSrcVar, uint8_t iDwSrc)
10114{
10115 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10116 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10117
10118 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10119 kIemNativeGstSimdRegLdStSz_Low128,
10120 kIemNativeGstRegUse_ForUpdate);
10121 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
10122
10123 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
10124 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
10125
10126 /* Free but don't flush the destination register. */
10127 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10128 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10129
10130 return off;
10131}
10132
10133
10134#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
10135 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
10136
10137/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
10138DECL_INLINE_THROW(uint32_t)
10139iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
10140{
10141 /*
10142 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
10143 * if iYRegDst gets allocated first for the full write it won't load the
10144 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
10145 * duplicated from the already allocated host register for iYRegDst containing
10146 * garbage. This will be catched by the guest register value checking in debug
10147 * builds.
10148 */
10149 if (iYRegDst != iYRegSrc)
10150 {
10151 /* Allocate destination and source register. */
10152 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10153 kIemNativeGstSimdRegLdStSz_256,
10154 kIemNativeGstRegUse_ForFullWrite);
10155 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10156 kIemNativeGstSimdRegLdStSz_Low128,
10157 kIemNativeGstRegUse_ReadOnly);
10158
10159 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
10160 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10161
10162 /* Free but don't flush the source and destination register. */
10163 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10164 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10165 }
10166 else
10167 {
10168 /* This effectively only clears the upper 128-bits of the register. */
10169 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10170 kIemNativeGstSimdRegLdStSz_High128,
10171 kIemNativeGstRegUse_ForFullWrite);
10172
10173 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
10174
10175 /* Free but don't flush the destination register. */
10176 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
10177 }
10178
10179 return off;
10180}
10181
10182
10183#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
10184 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
10185
10186/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
10187DECL_INLINE_THROW(uint32_t)
10188iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
10189{
10190 /*
10191 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
10192 * if iYRegDst gets allocated first for the full write it won't load the
10193 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
10194 * duplicated from the already allocated host register for iYRegDst containing
10195 * garbage. This will be catched by the guest register value checking in debug
10196 * builds. iYRegSrc == iYRegDst would effectively only clear any upper 256-bits
10197 * for a zmm register we don't support yet, so this is just a nop.
10198 */
10199 if (iYRegDst != iYRegSrc)
10200 {
10201 /* Allocate destination and source register. */
10202 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10203 kIemNativeGstSimdRegLdStSz_256,
10204 kIemNativeGstRegUse_ReadOnly);
10205 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10206 kIemNativeGstSimdRegLdStSz_256,
10207 kIemNativeGstRegUse_ForFullWrite);
10208
10209 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
10210
10211 /* Free but don't flush the source and destination register. */
10212 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10213 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10214 }
10215
10216 return off;
10217}
10218
10219
10220#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
10221 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
10222
10223/** Emits code for IEM_MC_FETCH_YREG_U128. */
10224DECL_INLINE_THROW(uint32_t)
10225iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
10226{
10227 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10228 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
10229
10230 Assert(iDQWord <= 1);
10231 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10232 iDQWord == 1
10233 ? kIemNativeGstSimdRegLdStSz_High128
10234 : kIemNativeGstSimdRegLdStSz_Low128,
10235 kIemNativeGstRegUse_ReadOnly);
10236
10237 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10238 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10239
10240 if (iDQWord == 1)
10241 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10242 else
10243 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10244
10245 /* Free but don't flush the source register. */
10246 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10247 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10248
10249 return off;
10250}
10251
10252
10253#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
10254 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
10255
10256/** Emits code for IEM_MC_FETCH_YREG_U64. */
10257DECL_INLINE_THROW(uint32_t)
10258iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
10259{
10260 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10261 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10262
10263 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10264 iQWord >= 2
10265 ? kIemNativeGstSimdRegLdStSz_High128
10266 : kIemNativeGstSimdRegLdStSz_Low128,
10267 kIemNativeGstRegUse_ReadOnly);
10268
10269 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10270 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10271
10272 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
10273
10274 /* Free but don't flush the source register. */
10275 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10276 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10277
10278 return off;
10279}
10280
10281
10282#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
10283 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
10284
10285/** Emits code for IEM_MC_FETCH_YREG_U32. */
10286DECL_INLINE_THROW(uint32_t)
10287iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
10288{
10289 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10290 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10291
10292 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10293 iDWord >= 4
10294 ? kIemNativeGstSimdRegLdStSz_High128
10295 : kIemNativeGstSimdRegLdStSz_Low128,
10296 kIemNativeGstRegUse_ReadOnly);
10297
10298 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10299 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10300
10301 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
10302
10303 /* Free but don't flush the source register. */
10304 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10305 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10306
10307 return off;
10308}
10309
10310
10311#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
10312 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
10313
10314/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
10315DECL_INLINE_THROW(uint32_t)
10316iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10317{
10318 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10319 kIemNativeGstSimdRegLdStSz_High128,
10320 kIemNativeGstRegUse_ForFullWrite);
10321
10322 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
10323
10324 /* Free but don't flush the register. */
10325 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
10326
10327 return off;
10328}
10329
10330
10331#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
10332 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
10333
10334/** Emits code for IEM_MC_STORE_YREG_U128. */
10335DECL_INLINE_THROW(uint32_t)
10336iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
10337{
10338 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10339 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10340
10341 Assert(iDQword <= 1);
10342 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10343 iDQword == 0
10344 ? kIemNativeGstSimdRegLdStSz_Low128
10345 : kIemNativeGstSimdRegLdStSz_High128,
10346 kIemNativeGstRegUse_ForFullWrite);
10347
10348 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10349
10350 if (iDQword == 0)
10351 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10352 else
10353 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
10354
10355 /* Free but don't flush the source register. */
10356 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10357 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10358
10359 return off;
10360}
10361
10362
10363#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
10364 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
10365
10366/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
10367DECL_INLINE_THROW(uint32_t)
10368iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10369{
10370 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10371 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10372
10373 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10374 kIemNativeGstSimdRegLdStSz_256,
10375 kIemNativeGstRegUse_ForFullWrite);
10376
10377 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10378
10379 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10380 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10381
10382 /* Free but don't flush the source register. */
10383 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10384 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10385
10386 return off;
10387}
10388
10389
10390#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
10391 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
10392
10393/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
10394DECL_INLINE_THROW(uint32_t)
10395iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10396{
10397 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10398 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
10399
10400 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10401 kIemNativeGstSimdRegLdStSz_256,
10402 kIemNativeGstRegUse_ForFullWrite);
10403
10404 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10405
10406 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10407 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10408
10409 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10410 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10411
10412 return off;
10413}
10414
10415
10416#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
10417 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
10418
10419/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
10420DECL_INLINE_THROW(uint32_t)
10421iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10422{
10423 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10424 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
10425
10426 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10427 kIemNativeGstSimdRegLdStSz_256,
10428 kIemNativeGstRegUse_ForFullWrite);
10429
10430 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10431
10432 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10433 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10434
10435 /* Free but don't flush the source register. */
10436 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10437 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10438
10439 return off;
10440}
10441
10442
10443#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
10444 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
10445
10446/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
10447DECL_INLINE_THROW(uint32_t)
10448iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10449{
10450 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10451 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10452
10453 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10454 kIemNativeGstSimdRegLdStSz_256,
10455 kIemNativeGstRegUse_ForFullWrite);
10456
10457 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10458
10459 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10460 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10461
10462 /* Free but don't flush the source register. */
10463 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10464 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10465
10466 return off;
10467}
10468
10469
10470#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
10471 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
10472
10473/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
10474DECL_INLINE_THROW(uint32_t)
10475iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10476{
10477 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10478 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10479
10480 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10481 kIemNativeGstSimdRegLdStSz_256,
10482 kIemNativeGstRegUse_ForFullWrite);
10483
10484 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10485
10486 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10487 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10488
10489 /* Free but don't flush the source register. */
10490 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10491 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10492
10493 return off;
10494}
10495
10496
10497#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
10498 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
10499
10500/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
10501DECL_INLINE_THROW(uint32_t)
10502iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10503{
10504 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10505 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
10506
10507 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10508 kIemNativeGstSimdRegLdStSz_256,
10509 kIemNativeGstRegUse_ForFullWrite);
10510
10511 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10512
10513 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10514
10515 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10516 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10517
10518 return off;
10519}
10520
10521
10522#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
10523 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
10524
10525/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
10526DECL_INLINE_THROW(uint32_t)
10527iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10528{
10529 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10530 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
10531
10532 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10533 kIemNativeGstSimdRegLdStSz_256,
10534 kIemNativeGstRegUse_ForFullWrite);
10535
10536 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10537
10538 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10539
10540 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10541 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10542
10543 return off;
10544}
10545
10546
10547#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10548 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10549
10550/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
10551DECL_INLINE_THROW(uint32_t)
10552iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10553{
10554 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10555 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10556
10557 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10558 kIemNativeGstSimdRegLdStSz_256,
10559 kIemNativeGstRegUse_ForFullWrite);
10560
10561 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10562
10563 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10564
10565 /* Free but don't flush the source register. */
10566 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10567 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10568
10569 return off;
10570}
10571
10572
10573#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10574 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10575
10576/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
10577DECL_INLINE_THROW(uint32_t)
10578iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10579{
10580 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10581 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10582
10583 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10584 kIemNativeGstSimdRegLdStSz_256,
10585 kIemNativeGstRegUse_ForFullWrite);
10586
10587 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10588
10589 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10590
10591 /* Free but don't flush the source register. */
10592 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10593 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10594
10595 return off;
10596}
10597
10598
10599#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
10600 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
10601
10602/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
10603DECL_INLINE_THROW(uint32_t)
10604iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10605{
10606 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10607 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10608
10609 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10610 kIemNativeGstSimdRegLdStSz_256,
10611 kIemNativeGstRegUse_ForFullWrite);
10612
10613 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10614
10615 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
10616
10617 /* Free but don't flush the source register. */
10618 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10619 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10620
10621 return off;
10622}
10623
10624
10625#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10626 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10627
10628/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
10629DECL_INLINE_THROW(uint32_t)
10630iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10631{
10632 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10633 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10634
10635 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10636 kIemNativeGstSimdRegLdStSz_256,
10637 kIemNativeGstRegUse_ForFullWrite);
10638
10639 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10640
10641 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10642 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
10643
10644 /* Free but don't flush the source register. */
10645 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10646 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10647
10648 return off;
10649}
10650
10651
10652#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10653 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10654
10655/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
10656DECL_INLINE_THROW(uint32_t)
10657iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10658{
10659 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10660 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10661
10662 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10663 kIemNativeGstSimdRegLdStSz_256,
10664 kIemNativeGstRegUse_ForFullWrite);
10665
10666 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10667
10668 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10669 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10670
10671 /* Free but don't flush the source register. */
10672 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10673 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10674
10675 return off;
10676}
10677
10678
10679#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
10680 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
10681
10682/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
10683DECL_INLINE_THROW(uint32_t)
10684iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
10685{
10686 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10687 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10688
10689 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10690 kIemNativeGstSimdRegLdStSz_256,
10691 kIemNativeGstRegUse_ForFullWrite);
10692 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10693 kIemNativeGstSimdRegLdStSz_Low128,
10694 kIemNativeGstRegUse_ReadOnly);
10695 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10696
10697 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10698 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10699 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10700
10701 /* Free but don't flush the source and destination registers. */
10702 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10703 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10704 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10705
10706 return off;
10707}
10708
10709
10710#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
10711 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
10712
10713/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
10714DECL_INLINE_THROW(uint32_t)
10715iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
10716{
10717 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10718 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10719
10720 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10721 kIemNativeGstSimdRegLdStSz_256,
10722 kIemNativeGstRegUse_ForFullWrite);
10723 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10724 kIemNativeGstSimdRegLdStSz_Low128,
10725 kIemNativeGstRegUse_ReadOnly);
10726 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10727
10728 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10729 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
10730 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10731
10732 /* Free but don't flush the source and destination registers. */
10733 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10734 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10735 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10736
10737 return off;
10738}
10739
10740
10741#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
10742 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
10743
10744
10745/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
10746DECL_INLINE_THROW(uint32_t)
10747iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
10748{
10749 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10750 kIemNativeGstSimdRegLdStSz_Low128,
10751 kIemNativeGstRegUse_ForUpdate);
10752
10753 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
10754 if (bImm8Mask & RT_BIT(0))
10755 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
10756 if (bImm8Mask & RT_BIT(1))
10757 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
10758 if (bImm8Mask & RT_BIT(2))
10759 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
10760 if (bImm8Mask & RT_BIT(3))
10761 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
10762
10763 /* Free but don't flush the destination register. */
10764 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10765
10766 return off;
10767}
10768
10769
10770#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
10771 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
10772
10773#define IEM_MC_FETCH_YREG_YMM(a_uYmmDst, a_iYRegSrc) \
10774 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_uYmmDst, a_iYRegSrc)
10775
10776/** Emits code for IEM_MC_FETCH_YREG_U256/IEM_MC_FETCH_YREG_YMM. */
10777DECL_INLINE_THROW(uint32_t)
10778iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
10779{
10780 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10781 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
10782
10783 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10784 kIemNativeGstSimdRegLdStSz_256,
10785 kIemNativeGstRegUse_ReadOnly);
10786 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10787
10788 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
10789
10790 /* Free but don't flush the source register. */
10791 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10792 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10793
10794 return off;
10795}
10796
10797
10798#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
10799 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
10800
10801#define IEM_MC_STORE_YREG_YMM_ZX_VLMAX(a_iYRegDst, a_uYmmSrc) \
10802 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_uYmmSrc)
10803
10804/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX/IEM_MC_STORE_YREG_YMM_ZX_VLMAX. */
10805DECL_INLINE_THROW(uint32_t)
10806iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
10807{
10808 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10809 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10810
10811 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10812 kIemNativeGstSimdRegLdStSz_256,
10813 kIemNativeGstRegUse_ForFullWrite);
10814 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10815
10816 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
10817
10818 /* Free but don't flush the source register. */
10819 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10820 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10821
10822 return off;
10823}
10824
10825
10826#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
10827 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
10828
10829
10830/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
10831DECL_INLINE_THROW(uint32_t)
10832iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
10833 uint8_t idxSrcVar, uint8_t iDwSrc)
10834{
10835 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10836 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10837
10838 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10839 iDwDst < 4
10840 ? kIemNativeGstSimdRegLdStSz_Low128
10841 : kIemNativeGstSimdRegLdStSz_High128,
10842 kIemNativeGstRegUse_ForUpdate);
10843 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10844 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10845
10846 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
10847 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
10848
10849 /* Free but don't flush the source register. */
10850 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10851 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10852 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10853
10854 return off;
10855}
10856
10857
10858#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
10859 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
10860
10861
10862/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
10863DECL_INLINE_THROW(uint32_t)
10864iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
10865 uint8_t idxSrcVar, uint8_t iQwSrc)
10866{
10867 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10868 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10869
10870 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10871 iQwDst < 2
10872 ? kIemNativeGstSimdRegLdStSz_Low128
10873 : kIemNativeGstSimdRegLdStSz_High128,
10874 kIemNativeGstRegUse_ForUpdate);
10875 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10876 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10877
10878 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
10879 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
10880
10881 /* Free but don't flush the source register. */
10882 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10883 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10884 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10885
10886 return off;
10887}
10888
10889
10890#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
10891 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
10892
10893
10894/** Emits code for IEM_MC_STORE_YREG_U64. */
10895DECL_INLINE_THROW(uint32_t)
10896iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
10897{
10898 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10899 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10900
10901 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10902 iQwDst < 2
10903 ? kIemNativeGstSimdRegLdStSz_Low128
10904 : kIemNativeGstSimdRegLdStSz_High128,
10905 kIemNativeGstRegUse_ForUpdate);
10906
10907 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10908
10909 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
10910
10911 /* Free but don't flush the source register. */
10912 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10913 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10914
10915 return off;
10916}
10917
10918
10919#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
10920 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
10921
10922/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
10923DECL_INLINE_THROW(uint32_t)
10924iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10925{
10926 RT_NOREF(pReNative, iYReg);
10927 /** @todo Needs to be implemented when support for AVX-512 is added. */
10928 return off;
10929}
10930
10931
10932
10933/*********************************************************************************************************************************
10934* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
10935*********************************************************************************************************************************/
10936
10937/**
10938 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
10939 */
10940DECL_INLINE_THROW(uint32_t)
10941iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs, uint8_t idxInstr)
10942{
10943 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
10944 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
10945 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
10946 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
10947
10948#if 0 /* This is not required right now as the called helper will set up the SSE/AVX state if it is an assembly one. */
10949 /*
10950 * Need to do the FPU preparation.
10951 */
10952 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
10953#endif
10954
10955 /*
10956 * Do all the call setup and cleanup.
10957 */
10958 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS,
10959 false /*fFlushPendingWrites*/);
10960
10961 /*
10962 * Load the MXCSR register into the first argument and mask out the current exception flags.
10963 */
10964 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
10965 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
10966
10967 /*
10968 * Make the call.
10969 */
10970 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnAImpl);
10971
10972 /*
10973 * The updated MXCSR is in the return register, update exception status flags.
10974 *
10975 * The return register is marked allocated as a temporary because it is required for the
10976 * exception generation check below.
10977 */
10978 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG)));
10979 uint8_t const idxRegTmp = iemNativeRegMarkAllocated(pReNative, IEMNATIVE_CALL_RET_GREG, kIemNativeWhat_Tmp);
10980 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, idxRegMxCsr, idxRegTmp);
10981
10982#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
10983 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
10984 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
10985#endif
10986
10987 /*
10988 * Make sure we don't have any outstanding guest register writes as we may
10989 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
10990 */
10991 off = iemNativeRegFlushPendingWrites(pReNative, off);
10992
10993#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
10994 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
10995#else
10996 RT_NOREF(idxInstr);
10997#endif
10998
10999 /** @todo r=aeichner ANDN from BMI1 would save us a temporary and additional instruction here but I don't
11000 * want to assume the existence for this instruction at the moment. */
11001 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
11002
11003 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegTmp2, idxRegTmp);
11004 /* tmp &= X86_MXCSR_XCPT_MASK */
11005 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
11006 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
11007 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
11008 /* tmp = ~tmp */
11009 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
11010 /* tmp &= mxcsr */
11011 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegTmp2);
11012 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_RaiseSseAvxFpRelated>(pReNative, off, idxRegTmp,
11013 X86_MXCSR_XCPT_FLAGS);
11014
11015 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
11016 iemNativeRegFreeTmp(pReNative, idxRegTmp);
11017 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
11018
11019 return off;
11020}
11021
11022
11023#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
11024 off = iemNativeEmitCallSseAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
11025
11026/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
11027DECL_INLINE_THROW(uint32_t)
11028iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
11029{
11030 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11031 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11032 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
11033}
11034
11035
11036#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
11037 off = iemNativeEmitCallSseAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
11038
11039/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
11040DECL_INLINE_THROW(uint32_t)
11041iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
11042 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
11043{
11044 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11045 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11046 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11047 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
11048}
11049
11050
11051/*********************************************************************************************************************************
11052* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
11053*********************************************************************************************************************************/
11054
11055#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
11056 off = iemNativeEmitCallAvxAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
11057
11058/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
11059DECL_INLINE_THROW(uint32_t)
11060iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
11061{
11062 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11063 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11064 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
11065}
11066
11067
11068#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
11069 off = iemNativeEmitCallAvxAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
11070
11071/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
11072DECL_INLINE_THROW(uint32_t)
11073iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
11074 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
11075{
11076 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11077 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11078 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11079 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
11080}
11081
11082
11083#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
11084
11085
11086/*********************************************************************************************************************************
11087* Include instruction emitters. *
11088*********************************************************************************************************************************/
11089#include "target-x86/IEMAllN8veEmit-x86.h"
11090
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette