VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 106078

Last change on this file since 106078 was 106078, checked in by vboxsync, 2 months ago

VMM/IEM: Liveness work for bugref:10720. bugref:10372

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 540.7 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 106078 2024-09-17 19:41:52Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62
63
64/*
65 * Narrow down configs here to avoid wasting time on unused configs here.
66 * Note! Same checks in IEMAllThrdRecompiler.cpp.
67 */
68
69#ifndef IEM_WITH_CODE_TLB
70# error The code TLB must be enabled for the recompiler.
71#endif
72
73#ifndef IEM_WITH_DATA_TLB
74# error The data TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_SETJMP
78# error The setjmp approach must be enabled for the recompiler.
79#endif
80
81#if defined(IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS) && !defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
82# error "IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS requires IEMNATIVE_WITH_SIMD_REG_ALLOCATOR"
83#endif
84
85
86/*********************************************************************************************************************************
87* Code emitters for flushing pending guest register writes and sanity checks *
88*********************************************************************************************************************************/
89
90#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
91
92# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
93/**
94 * Updates IEMCPU::uPcUpdatingDebug.
95 */
96DECL_INLINE_THROW(uint32_t) iemNativeEmitPcDebugAdd(PIEMRECOMPILERSTATE pReNative, uint32_t off, int64_t offDisp, uint8_t cBits)
97{
98# ifdef RT_ARCH_AMD64
99 if (pReNative->Core.fDebugPcInitialized && cBits >= 32)
100 {
101 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
102 if ((int32_t)offDisp == offDisp || cBits != 64)
103 {
104 /* add [q]word [pVCpu->iem.s.uPcUpdatingDebug], imm32/imm8 */
105 if (cBits == 64)
106 pCodeBuf[off++] = X86_OP_REX_W;
107 pCodeBuf[off++] = (int8_t)offDisp == offDisp ? 0x83 : 0x81;
108 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
109 if ((int8_t)offDisp == offDisp)
110 pCodeBuf[off++] = (int8_t)offDisp;
111 else
112 {
113 *(int32_t *)&pCodeBuf[off] = (int32_t)offDisp;
114 off += sizeof(int32_t);
115 }
116 }
117 else
118 {
119 /* mov tmp0, imm64 */
120 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, offDisp);
121
122 /* add [pVCpu->iem.s.uPcUpdatingDebug], tmp0 */
123 if (cBits == 64)
124 pCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 >= 8 ? X86_OP_REX_R : 0);
125 else if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
126 pCodeBuf[off++] = X86_OP_REX_R;
127 pCodeBuf[off++] = 0x01;
128 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0 & 7,
129 RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
130 }
131 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
132 return off;
133 }
134# endif
135
136 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
137 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, RT_ARCH_VAL == RT_ARCH_VAL_AMD64 ? 32 : 12);
138
139 if (pReNative->Core.fDebugPcInitialized)
140 {
141 Log4(("uPcUpdatingDebug+=%ld cBits=%d off=%#x\n", offDisp, cBits, off));
142 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
143 }
144 else
145 {
146 Log4(("uPcUpdatingDebug=rip+%ld cBits=%d off=%#x\n", offDisp, cBits, off));
147 pReNative->Core.fDebugPcInitialized = true;
148 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
149 }
150
151 if (cBits == 64)
152 off = iemNativeEmitAddGprImmEx(pCodeBuf, off, idxTmpReg, offDisp, IEMNATIVE_REG_FIXED_TMP0);
153 else
154 {
155 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxTmpReg, (int32_t)offDisp, IEMNATIVE_REG_FIXED_TMP0);
156 if (cBits == 16)
157 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, UINT16_MAX);
158 }
159
160 off = iemNativeEmitStoreGprToVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug),
161 IEMNATIVE_REG_FIXED_TMP0);
162
163 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
164 iemNativeRegFreeTmp(pReNative, idxTmpReg);
165 return off;
166}
167
168
169# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
170DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
171{
172 /* Compare the shadow with the context value, they should match. */
173 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
174 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
175 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
176 return off;
177}
178# endif
179
180#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
181
182/**
183 * Flushes delayed write of a specific guest register.
184 *
185 * This must be called prior to calling CImpl functions and any helpers that use
186 * the guest state (like raising exceptions) and such.
187 *
188 * This optimization has not yet been implemented. The first target would be
189 * RIP updates, since these are the most common ones.
190 */
191DECL_INLINE_THROW(uint32_t)
192iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
193{
194#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
195 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
196#endif
197
198#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
199#if 0 /** @todo r=aeichner EFLAGS writeback delay. */
200 if ( enmClass == kIemNativeGstRegRef_EFlags
201 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
202 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
203#else
204 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
205#endif
206
207 if ( enmClass == kIemNativeGstRegRef_Gpr
208 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
209 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
210#endif
211
212#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
213 if ( enmClass == kIemNativeGstRegRef_XReg
214 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
215 {
216 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
217 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
218 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
219
220 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
221 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
222 }
223#endif
224 RT_NOREF(pReNative, enmClass, idxReg);
225 return off;
226}
227
228
229
230/*********************************************************************************************************************************
231* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
232*********************************************************************************************************************************/
233
234#undef IEM_MC_BEGIN /* unused */
235#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
236 { \
237 Assert(pReNative->Core.bmVars == 0); \
238 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
239 Assert(pReNative->Core.bmStack == 0); \
240 pReNative->fMc = (a_fMcFlags); \
241 pReNative->fCImpl = (a_fCImplFlags); \
242 pReNative->cArgsX = (a_cArgsIncludingHidden)
243
244/** We have to get to the end in recompilation mode, as otherwise we won't
245 * generate code for all the IEM_MC_IF_XXX branches. */
246#define IEM_MC_END() \
247 iemNativeVarFreeAll(pReNative); \
248 } return off
249
250
251
252/*********************************************************************************************************************************
253* Liveness Stubs *
254*********************************************************************************************************************************/
255
256#define IEM_MC_LIVENESS_GREG_INPUT(a_iGReg) ((void)0)
257#define IEM_MC_LIVENESS_GREG_CLOBBER(a_iGReg) ((void)0)
258#define IEM_MC_LIVENESS_GREG_MODIFY(a_iGReg) ((void)0)
259
260#define IEM_MC_LIVENESS_MREG_INPUT(a_iMReg) ((void)0)
261#define IEM_MC_LIVENESS_MREG_CLOBBER(a_iMReg) ((void)0)
262#define IEM_MC_LIVENESS_MREG_MODIFY(a_iMReg) ((void)0)
263
264#define IEM_MC_LIVENESS_XREG_INPUT(a_iXReg) ((void)0)
265#define IEM_MC_LIVENESS_XREG_CLOBBER(a_iXReg) ((void)0)
266#define IEM_MC_LIVENESS_XREG_MODIFY(a_iXReg) ((void)0)
267
268
269
270/*********************************************************************************************************************************
271* Native Emitter Support. *
272*********************************************************************************************************************************/
273
274#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
275
276#define IEM_MC_NATIVE_ELSE() } else {
277
278#define IEM_MC_NATIVE_ENDIF() } ((void)0)
279
280
281#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
282 off = a_fnEmitter(pReNative, off)
283
284#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
285 off = a_fnEmitter(pReNative, off, (a0))
286
287#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
288 off = a_fnEmitter(pReNative, off, (a0), (a1))
289
290#define IEM_MC_NATIVE_EMIT_2_EX(a_fnEmitter, a0, a1) \
291 off = a_fnEmitter(pReNative, off, pCallEntry->idxInstr, (a0), (a1))
292
293#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
294 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
295
296#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
297 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
298
299#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
300 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
301
302#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
303 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
304
305#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
306 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
307
308#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
309 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
310
311
312#ifndef RT_ARCH_AMD64
313# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
314#else
315/** @note This is a naive approach that ASSUMES that the register isn't
316 * allocated, so it only works safely for the first allocation(s) in
317 * a MC block. */
318# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
319 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
320
321DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg,
322 uint32_t off, bool fAllocated);
323
324DECL_INLINE_THROW(uint32_t)
325iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
326{
327 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
328 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
329 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
330
331# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
332 /* Must flush the register if it hold pending writes. */
333 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
334 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
335 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
336# endif
337
338 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off, false /*fAllocated*/);
339 return off;
340}
341
342#endif /* RT_ARCH_AMD64 */
343
344
345
346/*********************************************************************************************************************************
347* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
348*********************************************************************************************************************************/
349
350#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
351 pReNative->fMc = 0; \
352 pReNative->fCImpl = (a_fFlags); \
353 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, \
354 a_cbInstr) /** @todo not used ... */
355
356
357#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
358 pReNative->fMc = 0; \
359 pReNative->fCImpl = (a_fFlags); \
360 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
361
362DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
363 uint8_t idxInstr, uint64_t a_fGstShwFlush,
364 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
365{
366 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
367}
368
369
370#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
371 pReNative->fMc = 0; \
372 pReNative->fCImpl = (a_fFlags); \
373 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
374 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
375
376DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
377 uint8_t idxInstr, uint64_t a_fGstShwFlush,
378 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
379{
380 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
381}
382
383
384#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
385 pReNative->fMc = 0; \
386 pReNative->fCImpl = (a_fFlags); \
387 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
388 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
389
390DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
391 uint8_t idxInstr, uint64_t a_fGstShwFlush,
392 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
393 uint64_t uArg2)
394{
395 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
396}
397
398
399
400/*********************************************************************************************************************************
401* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
402*********************************************************************************************************************************/
403
404/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
405 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
406DECL_INLINE_THROW(uint32_t)
407iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
408{
409 /*
410 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
411 * return with special status code and make the execution loop deal with
412 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
413 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
414 * could continue w/o interruption, it probably will drop into the
415 * debugger, so not worth the effort of trying to services it here and we
416 * just lump it in with the handling of the others.
417 *
418 * To simplify the code and the register state management even more (wrt
419 * immediate in AND operation), we always update the flags and skip the
420 * extra check associated conditional jump.
421 */
422 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
423 <= UINT32_MAX);
424#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
425 AssertMsg( pReNative->idxCurCall == 0
426 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
427 IEMLIVENESSBIT_IDX_EFL_OTHER)),
428 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
429 IEMLIVENESSBIT_IDX_EFL_OTHER)));
430#endif
431
432 /*
433 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
434 * any pending register writes must be flushed.
435 */
436 off = iemNativeRegFlushPendingWrites(pReNative, off);
437
438 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
439 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
440 true /*fSkipLivenessAssert*/);
441 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxEflReg,
442 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
443 kIemNativeLabelType_ReturnWithFlags);
444 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
445 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
446
447 /* Free but don't flush the EFLAGS register. */
448 iemNativeRegFreeTmp(pReNative, idxEflReg);
449
450 return off;
451}
452
453
454/** Helper for iemNativeEmitFinishInstructionWithStatus. */
455DECLINLINE(RTGCPHYS) iemNativeCallEntryToGCPhysPc(PCIEMTB pTb, PCIEMTHRDEDCALLENTRY pCallEntry)
456{
457 unsigned const offOpcodes = pCallEntry->offOpcode;
458 unsigned const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
459 for (unsigned idxRange = 0; idxRange < cRanges; idxRange++)
460 {
461 unsigned const offRange = offOpcodes - (unsigned)pTb->aRanges[idxRange].offOpcodes;
462 if (offRange < (unsigned)pTb->aRanges[idxRange].cbOpcodes)
463 return iemTbGetRangePhysPageAddr(pTb, idxRange) + offRange + pTb->aRanges[idxRange].offPhysPage;
464 }
465 AssertFailedReturn(NIL_RTGCPHYS);
466}
467
468
469/** The VINF_SUCCESS dummy. */
470template<int const a_rcNormal, bool const a_fIsJump>
471DECL_FORCE_INLINE_THROW(uint32_t)
472iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry,
473 int32_t const offJump)
474{
475 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
476 if (a_rcNormal != VINF_SUCCESS)
477 {
478#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
479 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
480#else
481 RT_NOREF_PV(pCallEntry);
482#endif
483
484 /* As this code returns from the TB any pending register writes must be flushed. */
485 off = iemNativeRegFlushPendingWrites(pReNative, off);
486
487 /*
488 * If we're in a conditional, mark the current branch as exiting so we
489 * can disregard its state when we hit the IEM_MC_ENDIF.
490 */
491 iemNativeMarkCurCondBranchAsExiting(pReNative);
492
493 /*
494 * Use the lookup table for getting to the next TB quickly.
495 * Note! In this code path there can only be one entry at present.
496 */
497 uint8_t const idxTbLookupFirst = IEM_TB_LOOKUP_TAB_GET_IDX(pCallEntry->uTbLookup);
498 PCIEMTB const pTbOrg = pReNative->pTbOrg;
499 Assert(idxTbLookupFirst < pTbOrg->cTbLookupEntries);
500 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1);
501
502#if 0
503 /* Update IEMCPU::ppTbLookupEntryR3 to get the best lookup effect. */
504 PIEMTB * const ppTbLookupFirst = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTbOrg, idxTbLookupFirst);
505 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1); /* large stuff later/never */
506 off = iemNativeEmitStoreImmToVCpuU64(pReNative, off, (uintptr_t)ppTbLookupFirst,
507 RT_UOFFSETOF(VMCPU, iem.s.ppTbLookupEntryR3));
508
509 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreak);
510
511#else
512 /* Load the index as argument #1 for the helper call at the given label. */
513 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxTbLookupFirst);
514
515 /*
516 * Figure out the physical address of the current instruction and see
517 * whether the next instruction we're about to execute is in the same
518 * page so we by can optimistically skip TLB loading.
519 *
520 * - This is safe for all cases in FLAT mode.
521 * - In segmentmented modes it is complicated, given that a negative
522 * jump may underflow EIP and a forward jump may overflow or run into
523 * CS.LIM and triggering a #GP. The only thing we can get away with
524 * now at compile time is forward jumps w/o CS.LIM checks, since the
525 * lack of CS.LIM checks means we're good for the entire physical page
526 * we're executing on and another 15 bytes before we run into CS.LIM.
527 */
528 if ( IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)
529# if 0 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. See also iemNativeHlpReturnBreakViaLookup. */
530 || !(pTbOrg->fFlags & IEMTB_F_CS_LIM_CHECKS)
531# endif
532 )
533 {
534 RTGCPHYS const GCPhysPcCurrent = iemNativeCallEntryToGCPhysPc(pTbOrg, pCallEntry);
535 RTGCPHYS const GCPhysPcNext = GCPhysPcCurrent + pCallEntry->cbOpcode + (int64_t)(a_fIsJump ? offJump : 0);
536 if ( (GCPhysPcNext >> GUEST_PAGE_SHIFT) == (GCPhysPcCurrent >> GUEST_PAGE_SHIFT)
537 && GUEST_PAGE_SIZE - (GCPhysPcCurrent & GUEST_PAGE_OFFSET_MASK) >= pCallEntry->cbOpcode /* 0xfff: je -56h */ )
538
539 {
540 /* Load the next GCPhysPc into the 3rd argument for the helper call. */
541 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, GCPhysPcNext);
542
543 /* Load the key lookup flags into the 2nd argument for the helper call.
544 - This is safe wrt CS limit checking since we're only here for FLAT modes.
545 - ASSUMING that this isn't a STI or POPF instruction, we can exclude any
546 interrupt shadow.
547 - The NMI inhibiting is more questionable, though... */
548 /** @todo We don't implement NMI blocking atm, except via VT-x/AMD-V.
549 * Should we copy it into fExec to simplify this? OTOH, it's just a
550 * couple of extra instructions if EFLAGS are already in a register. */
551 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG,
552 (pReNative->fExec & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE);
553
554 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
555 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookup);
556 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithIrq);
557 }
558 }
559 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
560 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithTlb);
561 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq);
562#endif
563 }
564 return off;
565}
566
567
568#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
569 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
570 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
571
572#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
573 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
574 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
575 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
576
577/** Same as iemRegAddToRip64AndFinishingNoFlags. */
578DECL_INLINE_THROW(uint32_t)
579iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
580{
581#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
582# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
583 if (!pReNative->Core.offPc)
584 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
585# endif
586
587 /* Allocate a temporary PC register. */
588 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
589
590 /* Perform the addition and store the result. */
591 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
592 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
593
594 /* Free but don't flush the PC register. */
595 iemNativeRegFreeTmp(pReNative, idxPcReg);
596#endif
597
598#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
599 pReNative->Core.offPc += cbInstr;
600 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
601# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
602 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 64);
603 off = iemNativeEmitPcDebugCheck(pReNative, off);
604# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
605 off = iemNativePcAdjustCheck(pReNative, off);
606# endif
607 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
608#endif
609
610 return off;
611}
612
613
614#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
615 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
616 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
617
618#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
619 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
620 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
621 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
622
623/** Same as iemRegAddToEip32AndFinishingNoFlags. */
624DECL_INLINE_THROW(uint32_t)
625iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
626{
627#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
628# ifdef IEMNATIVE_REG_FIXED_PC_DBG
629 if (!pReNative->Core.offPc)
630 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
631# endif
632
633 /* Allocate a temporary PC register. */
634 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
635
636 /* Perform the addition and store the result. */
637 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
638 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
639
640 /* Free but don't flush the PC register. */
641 iemNativeRegFreeTmp(pReNative, idxPcReg);
642#endif
643
644#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
645 pReNative->Core.offPc += cbInstr;
646 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
647# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
648 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 32);
649 off = iemNativeEmitPcDebugCheck(pReNative, off);
650# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
651 off = iemNativePcAdjustCheck(pReNative, off);
652# endif
653 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
654#endif
655
656 return off;
657}
658
659
660#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
661 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
662 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
663
664#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
665 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
666 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
667 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
668
669/** Same as iemRegAddToIp16AndFinishingNoFlags. */
670DECL_INLINE_THROW(uint32_t)
671iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
672{
673#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
674# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
675 if (!pReNative->Core.offPc)
676 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
677# endif
678
679 /* Allocate a temporary PC register. */
680 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
681
682 /* Perform the addition and store the result. */
683 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
684 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
685 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
686
687 /* Free but don't flush the PC register. */
688 iemNativeRegFreeTmp(pReNative, idxPcReg);
689#endif
690
691#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
692 pReNative->Core.offPc += cbInstr;
693 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
694# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
695 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 16);
696 off = iemNativeEmitPcDebugCheck(pReNative, off);
697# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
698 off = iemNativePcAdjustCheck(pReNative, off);
699# endif
700 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
701#endif
702
703 return off;
704}
705
706
707/*********************************************************************************************************************************
708* Common code for changing PC/RIP/EIP/IP. *
709*********************************************************************************************************************************/
710
711/**
712 * Emits code to check if the content of @a idxAddrReg is a canonical address,
713 * raising a \#GP(0) if it isn't.
714 *
715 * @returns New code buffer offset, UINT32_MAX on failure.
716 * @param pReNative The native recompile state.
717 * @param off The code buffer offset.
718 * @param idxAddrReg The host register with the address to check.
719 * @param idxInstr The current instruction.
720 */
721DECL_FORCE_INLINE_THROW(uint32_t)
722iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
723{
724 /*
725 * Make sure we don't have any outstanding guest register writes as we may
726 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
727 */
728 off = iemNativeRegFlushPendingWrites(pReNative, off);
729
730#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
731 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
732#else
733 RT_NOREF(idxInstr);
734#endif
735
736#ifdef RT_ARCH_AMD64
737 /*
738 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
739 * return raisexcpt();
740 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
741 */
742 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
743
744 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
745 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
746 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
747 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
748 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
749
750 iemNativeRegFreeTmp(pReNative, iTmpReg);
751
752#elif defined(RT_ARCH_ARM64)
753 /*
754 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
755 * return raisexcpt();
756 * ----
757 * mov x1, 0x800000000000
758 * add x1, x0, x1
759 * cmp xzr, x1, lsr 48
760 * b.ne .Lraisexcpt
761 */
762 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
763
764 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
765 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
766 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
767 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
768
769 iemNativeRegFreeTmp(pReNative, iTmpReg);
770
771#else
772# error "Port me"
773#endif
774 return off;
775}
776
777
778/**
779 * Emits code to check if the content of @a idxAddrReg is a canonical address,
780 * raising a \#GP(0) if it isn't.
781 *
782 * Caller makes sure everything is flushed, except maybe PC.
783 *
784 * @returns New code buffer offset, UINT32_MAX on failure.
785 * @param pReNative The native recompile state.
786 * @param off The code buffer offset.
787 * @param idxAddrReg The host register with the address to check.
788 * @param offDisp The relative displacement that has already been
789 * added to idxAddrReg and must be subtracted if
790 * raising a \#GP(0).
791 * @param idxInstr The current instruction.
792 */
793DECL_FORCE_INLINE_THROW(uint32_t)
794iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithDisp(PIEMRECOMPILERSTATE pReNative, uint32_t off,
795 uint8_t idxAddrReg, int64_t offDisp, uint8_t idxInstr)
796{
797#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
798 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
799#endif
800
801#ifdef RT_ARCH_AMD64
802 /*
803 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
804 * return raisexcpt();
805 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
806 */
807 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
808
809 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
810 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
811 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
812 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
813
814#elif defined(RT_ARCH_ARM64)
815 /*
816 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
817 * return raisexcpt();
818 * ----
819 * mov x1, 0x800000000000
820 * add x1, x0, x1
821 * cmp xzr, x1, lsr 48
822 * b.ne .Lraisexcpt
823 */
824 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
825
826 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
827 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
828 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
829#else
830# error "Port me"
831#endif
832
833 /* Jump to the #GP code (hoping static prediction considers forward branches as not-taken). */
834 uint32_t const offFixup1 = off;
835 off = iemNativeEmitJnzToFixed(pReNative, off, off /*8-bit jump suffices*/);
836
837 /* jump .Lnoexcept; Skip the #GP code. */
838 uint32_t const offFixup2 = off;
839 off = iemNativeEmitJmpToFixed(pReNative, off, off /*8-bit jump suffices*/);
840
841 /* .Lraisexcpt: */
842 iemNativeFixupFixedJump(pReNative, offFixup1, off);
843#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
844 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr), iTmpReg);
845#else
846 RT_NOREF(idxInstr);
847#endif
848
849 /* Undo the PC adjustment and store the old PC value. */
850 off = iemNativeEmitSubGprImm(pReNative, off, idxAddrReg, offDisp, iTmpReg);
851 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxAddrReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
852
853 off = iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0, false /*fActuallyExitingTb*/);
854
855 /* .Lnoexcept: */
856 iemNativeFixupFixedJump(pReNative, offFixup2, off);
857
858 iemNativeRegFreeTmp(pReNative, iTmpReg);
859 return off;
860}
861
862
863/**
864 * Emits code to check if the content of @a idxAddrReg is a canonical address,
865 * raising a \#GP(0) if it isn't.
866 *
867 * Caller makes sure everything is flushed, except maybe PC.
868 *
869 * @returns New code buffer offset, UINT32_MAX on failure.
870 * @param pReNative The native recompile state.
871 * @param off The code buffer offset.
872 * @param idxAddrReg The host register with the address to check.
873 * @param idxOldPcReg Register holding the old PC that offPc is relative
874 * to if available, otherwise UINT8_MAX.
875 * @param idxInstr The current instruction.
876 */
877DECL_FORCE_INLINE_THROW(uint32_t)
878iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithOldPc(PIEMRECOMPILERSTATE pReNative, uint32_t off,
879 uint8_t idxAddrReg, uint8_t idxOldPcReg, uint8_t idxInstr)
880{
881#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
882 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
883#endif
884
885#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
886# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
887 if (!pReNative->Core.offPc)
888# endif
889 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
890#else
891 RT_NOREF(idxInstr);
892#endif
893
894#ifdef RT_ARCH_AMD64
895 /*
896 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
897 * return raisexcpt();
898 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
899 */
900 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
901
902 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
903 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
904 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
905 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
906
907#elif defined(RT_ARCH_ARM64)
908 /*
909 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
910 * return raisexcpt();
911 * ----
912 * mov x1, 0x800000000000
913 * add x1, x0, x1
914 * cmp xzr, x1, lsr 48
915 * b.ne .Lraisexcpt
916 */
917 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
918
919 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
920 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
921 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
922#else
923# error "Port me"
924#endif
925
926#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
927 if (pReNative->Core.offPc)
928 {
929 /** @todo On x86, it is said that conditional jumps forward are statically
930 * predicited as not taken, so this isn't a very good construct.
931 * Investigate whether it makes sense to invert it and add another
932 * jump. Also, find out wtf the static predictor does here on arm! */
933 uint32_t const offFixup = off;
934 off = iemNativeEmitJzToFixed(pReNative, off, off + 16 /*8-bit suffices*/);
935
936 /* .Lraisexcpt: */
937# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
938 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr), iTmpReg);
939# endif
940 /* We need to update cpum.GstCtx.rip. */
941 if (idxOldPcReg == UINT8_MAX)
942 {
943 idxOldPcReg = iTmpReg;
944 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
945 }
946 off = iemNativeEmitAddGprImm(pReNative, off, idxOldPcReg, pReNative->Core.offPc);
947 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
948
949 off = iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0, false /*fActuallyExitingTb*/);
950 iemNativeFixupFixedJump(pReNative, offFixup, off);
951 }
952 else
953#endif
954 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
955
956 iemNativeRegFreeTmp(pReNative, iTmpReg);
957
958 return off;
959}
960
961
962/**
963 * Emits code to check if that the content of @a idxAddrReg is within the limit
964 * of CS, raising a \#GP(0) if it isn't.
965 *
966 * @returns New code buffer offset; throws VBox status code on error.
967 * @param pReNative The native recompile state.
968 * @param off The code buffer offset.
969 * @param idxAddrReg The host register (32-bit) with the address to
970 * check.
971 * @param idxInstr The current instruction.
972 */
973DECL_FORCE_INLINE_THROW(uint32_t)
974iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
975 uint8_t idxAddrReg, uint8_t idxInstr)
976{
977 /*
978 * Make sure we don't have any outstanding guest register writes as we may
979 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
980 */
981 off = iemNativeRegFlushPendingWrites(pReNative, off);
982
983#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
984 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
985#else
986 RT_NOREF(idxInstr);
987#endif
988
989 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
990 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
991 kIemNativeGstRegUse_ReadOnly);
992
993 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
994 off = iemNativeEmitJaTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
995
996 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
997 return off;
998}
999
1000
1001
1002
1003/**
1004 * Emits code to check if that the content of @a idxAddrReg is within the limit
1005 * of CS, raising a \#GP(0) if it isn't.
1006 *
1007 * Caller makes sure everything is flushed, except maybe PC.
1008 *
1009 * @returns New code buffer offset; throws VBox status code on error.
1010 * @param pReNative The native recompile state.
1011 * @param off The code buffer offset.
1012 * @param idxAddrReg The host register (32-bit) with the address to
1013 * check.
1014 * @param idxOldPcReg Register holding the old PC that offPc is relative
1015 * to if available, otherwise UINT8_MAX.
1016 * @param idxInstr The current instruction.
1017 */
1018DECL_FORCE_INLINE_THROW(uint32_t)
1019iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0WithOldPc(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1020 uint8_t idxAddrReg, uint8_t idxOldPcReg, uint8_t idxInstr)
1021{
1022#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1023 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
1024#endif
1025
1026#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1027# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1028 if (!pReNative->Core.offPc)
1029# endif
1030 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1031#else
1032 RT_NOREF(idxInstr);
1033#endif
1034
1035 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
1036 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
1037 kIemNativeGstRegUse_ReadOnly);
1038
1039 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
1040#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1041 if (pReNative->Core.offPc)
1042 {
1043 uint32_t const offFixup = off;
1044 off = iemNativeEmitJbeToFixed(pReNative, off, off + 16 /*8-bit suffices*/);
1045
1046 /* Raising a GP(0), but first we need to update cpum.GstCtx.rip. */
1047 if (idxOldPcReg == UINT8_MAX)
1048 {
1049 idxOldPcReg = idxAddrReg;
1050 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1051 }
1052 off = iemNativeEmitAddGprImm(pReNative, off, idxOldPcReg, pReNative->Core.offPc);
1053 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1054# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1055 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1056# endif
1057 off = iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0, false /*fActuallyExitingTb*/);
1058 iemNativeFixupFixedJump(pReNative, offFixup, off);
1059 }
1060 else
1061#endif
1062 off = iemNativeEmitJaTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
1063
1064 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
1065 return off;
1066}
1067
1068
1069/*********************************************************************************************************************************
1070* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
1071*********************************************************************************************************************************/
1072
1073#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1074 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1075 (a_enmEffOpSize), pCallEntry->idxInstr); \
1076 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1077
1078#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1079 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1080 (a_enmEffOpSize), pCallEntry->idxInstr); \
1081 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1082 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1083
1084#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
1085 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1086 IEMMODE_16BIT, pCallEntry->idxInstr); \
1087 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1088
1089#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1090 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1091 IEMMODE_16BIT, pCallEntry->idxInstr); \
1092 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1093 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1094
1095#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
1096 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1097 IEMMODE_64BIT, pCallEntry->idxInstr); \
1098 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1099
1100#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1101 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1102 IEMMODE_64BIT, pCallEntry->idxInstr); \
1103 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1104 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1105
1106
1107#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1108 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1109 (a_enmEffOpSize), pCallEntry->idxInstr); \
1110 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1111
1112#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1113 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1114 (a_enmEffOpSize), pCallEntry->idxInstr); \
1115 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1116 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1117
1118#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG(a_i16, a_cbInstr, a_rcNormal) \
1119 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1120 IEMMODE_16BIT, pCallEntry->idxInstr); \
1121 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1122
1123#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1124 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1125 IEMMODE_16BIT, pCallEntry->idxInstr); \
1126 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1127 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1128
1129#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG(a_i32, a_cbInstr, a_rcNormal) \
1130 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1131 IEMMODE_64BIT, pCallEntry->idxInstr); \
1132 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1133
1134#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1135 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1136 IEMMODE_64BIT, pCallEntry->idxInstr); \
1137 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1138 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1139
1140/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
1141 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
1142 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
1143template<bool const a_fWithinPage>
1144DECL_INLINE_THROW(uint32_t)
1145iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
1146 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
1147{
1148 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
1149#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1150 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1151 if (a_fWithinPage && enmEffOpSize == IEMMODE_64BIT)
1152 {
1153 /* No #GP checking required, just update offPc and get on with it. */
1154 pReNative->Core.offPc += (int64_t)offDisp + cbInstr;
1155# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1156 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
1157# endif
1158 }
1159 else
1160#endif
1161 {
1162 /* Flush all but PC iff we're doing a 64-bit update here and this isn't within a page.. */
1163 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT && !a_fWithinPage))
1164 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc) /*fGstShwExcept*/);
1165
1166 /* Allocate a temporary PC register. */
1167 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1168 kIemNativeGstRegUse_ForUpdate);
1169
1170 /* Perform the addition. */
1171 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr + pReNative->Core.offPc);
1172
1173 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
1174 {
1175 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't.
1176 We can skip this if the target is within the same page. */
1177 if (!a_fWithinPage)
1178 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithDisp(pReNative, off, idxPcReg,
1179 (int64_t)offDisp + cbInstr, idxInstr);
1180 }
1181 else
1182 {
1183 /* Just truncate the result to 16-bit IP. */
1184 Assert(enmEffOpSize == IEMMODE_16BIT);
1185 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1186 }
1187
1188#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1189# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1190 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
1191 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1192# endif
1193 /* Since we've already got the new PC value in idxPcReg, we can just as
1194 well write it out and reset offPc to zero. Otherwise, we'd need to use
1195 a copy the shadow PC, which will cost another move instruction here. */
1196# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS)
1197 uint8_t const idxOldInstrPlusOne = pReNative->idxInstrPlusOneOfLastPcUpdate;
1198 pReNative->idxInstrPlusOneOfLastPcUpdate = RT_MAX(idxInstr + 1, idxOldInstrPlusOne);
1199 uint8_t const cInstrsSkipped = idxInstr <= idxOldInstrPlusOne ? 0 : idxInstr - idxOldInstrPlusOne;
1200 Log4(("iemNativeEmitRip64RelativeJumpAndFinishingNoFlags: offPc=%#RX64 -> 0; off=%#x; idxInstr=%u cInstrsSkipped=%u cCondDepth=%d\n",
1201 pReNative->Core.offPc, off, idxInstr, cInstrsSkipped, pReNative->cCondDepth));
1202 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, cInstrsSkipped);
1203# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1204 iemNativeDbgInfoAddNativeOffset(pReNative, off);
1205 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, cInstrsSkipped);
1206# endif
1207# endif
1208 pReNative->Core.offPc = 0;
1209#endif
1210
1211 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1212
1213 /* Free but don't flush the PC register. */
1214 iemNativeRegFreeTmp(pReNative, idxPcReg);
1215 }
1216 return off;
1217}
1218
1219
1220#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1221 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1222 (a_enmEffOpSize), pCallEntry->idxInstr); \
1223 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1224
1225#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1226 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1227 (a_enmEffOpSize), pCallEntry->idxInstr); \
1228 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1229 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1230
1231#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
1232 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1233 IEMMODE_16BIT, pCallEntry->idxInstr); \
1234 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1235
1236#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1237 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1238 IEMMODE_16BIT, pCallEntry->idxInstr); \
1239 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1240 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1241
1242#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
1243 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1244 IEMMODE_32BIT, pCallEntry->idxInstr); \
1245 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1246
1247#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1248 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1249 IEMMODE_32BIT, pCallEntry->idxInstr); \
1250 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1251 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1252
1253
1254#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1255 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1256 (a_enmEffOpSize), pCallEntry->idxInstr); \
1257 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1258
1259#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1260 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1261 (a_enmEffOpSize), pCallEntry->idxInstr); \
1262 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1263 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1264
1265#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT(a_i16, a_cbInstr, a_rcNormal) \
1266 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1267 IEMMODE_16BIT, pCallEntry->idxInstr); \
1268 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1269
1270#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1271 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1272 IEMMODE_16BIT, pCallEntry->idxInstr); \
1273 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1274 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1275
1276#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT(a_i32, a_cbInstr, a_rcNormal) \
1277 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1278 IEMMODE_32BIT, pCallEntry->idxInstr); \
1279 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1280
1281#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1282 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1283 IEMMODE_32BIT, pCallEntry->idxInstr); \
1284 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1285 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1286
1287/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
1288 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
1289 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
1290template<bool const a_fFlat>
1291DECL_INLINE_THROW(uint32_t)
1292iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
1293 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
1294{
1295 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
1296#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1297 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1298#endif
1299
1300 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1301 if (!a_fFlat || enmEffOpSize == IEMMODE_16BIT)
1302 {
1303 off = iemNativeRegFlushPendingWrites(pReNative, off);
1304#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1305 Assert(pReNative->Core.offPc == 0);
1306#endif
1307 }
1308
1309 /* Allocate a temporary PC register. */
1310 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
1311
1312 /* Perform the addition. */
1313#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1314 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
1315#else
1316 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
1317#endif
1318
1319 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
1320 if (enmEffOpSize == IEMMODE_16BIT)
1321 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1322
1323 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
1324 if (!a_fFlat)
1325 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1326
1327 /* Commit it. */
1328#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1329 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, enmEffOpSize == IEMMODE_32BIT ? 32 : 16);
1330 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1331#endif
1332
1333 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1334#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1335 pReNative->Core.offPc = 0;
1336#endif
1337
1338 /* Free but don't flush the PC register. */
1339 iemNativeRegFreeTmp(pReNative, idxPcReg);
1340
1341 return off;
1342}
1343
1344
1345#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
1346 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
1347 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1348
1349#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
1350 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
1351 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1352 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1353
1354#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
1355 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
1356 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1357
1358#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1359 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
1360 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1361 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1362
1363#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
1364 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
1365 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
1366
1367#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1368 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
1369 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1370 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
1371
1372/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
1373DECL_INLINE_THROW(uint32_t)
1374iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1375 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
1376{
1377 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1378 off = iemNativeRegFlushPendingWrites(pReNative, off);
1379
1380#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1381 Assert(pReNative->Core.offPc == 0);
1382 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1383#endif
1384
1385 /* Allocate a temporary PC register. */
1386 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
1387
1388 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
1389 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
1390 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1391 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1392#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1393 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, 16);
1394 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1395#endif
1396 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1397
1398 /* Free but don't flush the PC register. */
1399 iemNativeRegFreeTmp(pReNative, idxPcReg);
1400
1401 return off;
1402}
1403
1404
1405
1406/*********************************************************************************************************************************
1407* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
1408*********************************************************************************************************************************/
1409
1410/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
1411#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
1412 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1413
1414/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
1415#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
1416 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1417
1418/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
1419#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
1420 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1421
1422/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
1423 * clears flags. */
1424#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
1425 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
1426 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1427
1428/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
1429 * clears flags. */
1430#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
1431 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
1432 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1433
1434/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
1435 * clears flags. */
1436#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
1437 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
1438 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1439
1440#undef IEM_MC_SET_RIP_U16_AND_FINISH
1441
1442
1443/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
1444#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
1445 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1446
1447/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
1448#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
1449 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1450
1451/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
1452 * clears flags. */
1453#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
1454 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
1455 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1456
1457/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
1458 * and clears flags. */
1459#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
1460 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
1461 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1462
1463#undef IEM_MC_SET_RIP_U32_AND_FINISH
1464
1465
1466/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
1467#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
1468 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1469
1470/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
1471 * and clears flags. */
1472#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
1473 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
1474 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1475
1476#undef IEM_MC_SET_RIP_U64_AND_FINISH
1477
1478
1479/** Same as iemRegRipJumpU16AndFinishNoFlags,
1480 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
1481DECL_INLINE_THROW(uint32_t)
1482iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
1483 uint8_t idxInstr, uint8_t cbVar)
1484{
1485 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1486 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1487
1488 /* If we can't rule out a #GP(0) below, flush all dirty register except for
1489 PC which will be handled specially by the two workers below if they raise a GP. */
1490 bool const fMayRaiseGp0 = (f64Bit && cbVar > sizeof(uint32_t)) || (!f64Bit && !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1491 uint8_t const idxOldPcReg = fMayRaiseGp0
1492 ? iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off, kIemNativeGstReg_Pc)
1493 : UINT8_MAX;
1494 if (fMayRaiseGp0)
1495 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc) /*fGstShwExcept*/);
1496
1497 /* Get a register with the new PC loaded from idxVarPc.
1498 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1499 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
1500
1501 /* Check that the target is within CS.LIM / is canonical (may #GP(0) + exit TB). */
1502 if (fMayRaiseGp0)
1503 {
1504 if (f64Bit)
1505 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithOldPc(pReNative, off, idxPcReg, idxOldPcReg, idxInstr);
1506 else
1507 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0WithOldPc(pReNative, off, idxPcReg, idxOldPcReg, idxInstr);
1508 }
1509
1510 /* Store the result. */
1511 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1512
1513#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1514 pReNative->Core.offPc = 0;
1515 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1516# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1517 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1518 pReNative->Core.fDebugPcInitialized = true;
1519 Log4(("uPcUpdatingDebug=rip off=%#x\n", off));
1520# endif
1521#endif
1522
1523 if (idxOldPcReg != UINT8_MAX)
1524 iemNativeRegFreeTmp(pReNative, idxOldPcReg);
1525 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1526 /** @todo implictly free the variable? */
1527
1528 return off;
1529}
1530
1531
1532
1533/*********************************************************************************************************************************
1534* Emitters for changing PC/RIP/EIP/IP with a relative call jump (IEM_MC_IND_CALL_UXX_AND_FINISH) (requires stack emmiters). *
1535*********************************************************************************************************************************/
1536
1537/** @todo These helpers belong to the stack push API naturally but we already need them up here (we could of course move
1538 * this below the stack emitters but then this is not close to the rest of the PC/RIP handling...). */
1539DECL_FORCE_INLINE_THROW(uint32_t)
1540iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1541{
1542 /* Use16BitSp: */
1543#ifdef RT_ARCH_AMD64
1544 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
1545 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1546#else
1547 /* sub regeff, regrsp, #cbMem */
1548 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
1549 /* and regeff, regeff, #0xffff */
1550 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1551 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
1552 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
1553 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
1554#endif
1555 return off;
1556}
1557
1558
1559DECL_FORCE_INLINE(uint32_t)
1560iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1561{
1562 /* Use32BitSp: */
1563 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
1564 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1565 return off;
1566}
1567
1568
1569DECL_INLINE_THROW(uint32_t)
1570iemNativeEmitStackPushRip(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxRegPc,
1571 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
1572{
1573 /*
1574 * Assert sanity.
1575 */
1576#ifdef VBOX_STRICT
1577 if (RT_BYTE2(cBitsVarAndFlat) != 0)
1578 {
1579 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
1580 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
1581 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
1582 Assert( pfnFunction
1583 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1584 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
1585 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1586 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
1587 : UINT64_C(0xc000b000a0009000) ));
1588 }
1589 else
1590 Assert( pfnFunction
1591 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
1592 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
1593 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
1594 : UINT64_C(0xc000b000a0009000) ));
1595#endif
1596
1597#ifdef VBOX_STRICT
1598 /*
1599 * Check that the fExec flags we've got make sense.
1600 */
1601 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
1602#endif
1603
1604 /*
1605 * To keep things simple we have to commit any pending writes first as we
1606 * may end up making calls.
1607 */
1608 /** @todo we could postpone this till we make the call and reload the
1609 * registers after returning from the call. Not sure if that's sensible or
1610 * not, though. */
1611 off = iemNativeRegFlushPendingWrites(pReNative, off);
1612
1613 /*
1614 * First we calculate the new RSP and the effective stack pointer value.
1615 * For 64-bit mode and flat 32-bit these two are the same.
1616 * (Code structure is very similar to that of PUSH)
1617 */
1618 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
1619 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
1620 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
1621 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
1622 ? cbMem : sizeof(uint16_t);
1623 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
1624 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1625 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
1626 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1627 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1628 if (cBitsFlat != 0)
1629 {
1630 Assert(idxRegEffSp == idxRegRsp);
1631 Assert(cBitsFlat == 32 || cBitsFlat == 64);
1632 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1633 if (cBitsFlat == 64)
1634 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
1635 else
1636 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
1637 }
1638 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1639 {
1640 Assert(idxRegEffSp != idxRegRsp);
1641 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1642 kIemNativeGstRegUse_ReadOnly);
1643#ifdef RT_ARCH_AMD64
1644 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1645#else
1646 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1647#endif
1648 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1649 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1650 offFixupJumpToUseOtherBitSp = off;
1651 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1652 {
1653 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1654 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1655 }
1656 else
1657 {
1658 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1659 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1660 }
1661 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1662 }
1663 /* SpUpdateEnd: */
1664 uint32_t const offLabelSpUpdateEnd = off;
1665
1666 /*
1667 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1668 * we're skipping lookup).
1669 */
1670 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
1671 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
1672 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1673 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1674 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1675 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1676 : UINT32_MAX;
1677 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
1678
1679
1680 if (!TlbState.fSkip)
1681 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1682 else
1683 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1684
1685 /*
1686 * Use16BitSp:
1687 */
1688 if (cBitsFlat == 0)
1689 {
1690#ifdef RT_ARCH_AMD64
1691 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1692#else
1693 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1694#endif
1695 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1696 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1697 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1698 else
1699 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1700 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1701 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1702 }
1703
1704 /*
1705 * TlbMiss:
1706 *
1707 * Call helper to do the pushing.
1708 */
1709 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1710
1711#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1712 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1713#else
1714 RT_NOREF(idxInstr);
1715#endif
1716
1717 /* Save variables in volatile registers. */
1718 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1719 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1720 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
1721 | (RT_BIT_32(idxRegPc));
1722 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1723
1724 if ( idxRegPc == IEMNATIVE_CALL_ARG1_GREG
1725 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
1726 {
1727 /* Swap them using ARG0 as temp register: */
1728 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
1729 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
1730 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
1731 }
1732 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
1733 {
1734 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc (first!) */
1735 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1736
1737 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
1738 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1739 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1740 }
1741 else
1742 {
1743 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
1744 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1745
1746 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc */
1747 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1748 }
1749
1750 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1751 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1752
1753 /* Done setting up parameters, make the call. */
1754 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
1755
1756 /* Restore variables and guest shadow registers to volatile registers. */
1757 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1758 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1759
1760#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1761 if (!TlbState.fSkip)
1762 {
1763 /* end of TlbMiss - Jump to the done label. */
1764 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1765 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1766
1767 /*
1768 * TlbLookup:
1769 */
1770 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
1771 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1772
1773 /*
1774 * Emit code to do the actual storing / fetching.
1775 */
1776 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
1777# ifdef IEM_WITH_TLB_STATISTICS
1778 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1779 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1780# endif
1781 switch (cbMemAccess)
1782 {
1783 case 2:
1784 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1785 break;
1786 case 4:
1787 if (!fIsIntelSeg)
1788 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1789 else
1790 {
1791 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
1792 PUSH FS in real mode, so we have to try emulate that here.
1793 We borrow the now unused idxReg1 from the TLB lookup code here. */
1794 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
1795 kIemNativeGstReg_EFlags);
1796 if (idxRegEfl != UINT8_MAX)
1797 {
1798#ifdef ARCH_AMD64
1799 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
1800 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1801 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1802#else
1803 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
1804 off, TlbState.idxReg1, idxRegEfl,
1805 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1806#endif
1807 iemNativeRegFreeTmp(pReNative, idxRegEfl);
1808 }
1809 else
1810 {
1811 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
1812 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
1813 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1814 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1815 }
1816 /* ASSUMES the upper half of idxRegPc is ZERO. */
1817 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegPc);
1818 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
1819 }
1820 break;
1821 case 8:
1822 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1823 break;
1824 default:
1825 AssertFailed();
1826 }
1827
1828 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1829 TlbState.freeRegsAndReleaseVars(pReNative);
1830
1831 /*
1832 * TlbDone:
1833 *
1834 * Commit the new RSP value.
1835 */
1836 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1837 }
1838#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1839
1840#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
1841 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
1842#endif
1843 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1844 if (idxRegEffSp != idxRegRsp)
1845 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1846
1847 return off;
1848}
1849
1850
1851/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets. */
1852#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr) \
1853 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1854
1855/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets that checks and
1856 * clears flags. */
1857#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1858 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr); \
1859 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1860
1861/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets. */
1862#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr) \
1863 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1864
1865/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets that checks and
1866 * clears flags. */
1867#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1868 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr); \
1869 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1870
1871#undef IEM_MC_IND_CALL_U16_AND_FINISH
1872
1873
1874/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets. */
1875#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr) \
1876 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1877
1878/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets that checks and
1879 * clears flags. */
1880#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP, a_cbInstr) \
1881 IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr); \
1882 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1883
1884#undef IEM_MC_IND_CALL_U32_AND_FINISH
1885
1886
1887/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1888 * an extra parameter, for use in 64-bit code. */
1889#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr) \
1890 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u64NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1891
1892
1893/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1894 * an extra parameter, for use in 64-bit code and we need to check and clear
1895 * flags. */
1896#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewIP, a_cbInstr) \
1897 IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr); \
1898 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1899
1900#undef IEM_MC_IND_CALL_U64_AND_FINISH
1901
1902/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1903 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1904DECL_INLINE_THROW(uint32_t)
1905iemNativeEmitRipIndirectCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxVarPc, bool f64Bit,
1906 uint8_t idxInstr, uint8_t cbVar)
1907{
1908 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1909 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1910
1911 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1912 off = iemNativeRegFlushPendingWrites(pReNative, off);
1913
1914#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1915 Assert(pReNative->Core.offPc == 0);
1916 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1917#endif
1918
1919 /* Get a register with the new PC loaded from idxVarPc.
1920 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1921 uint8_t const idxNewPcReg = iemNativeVarRegisterAcquire(pReNative, idxVarPc, &off);
1922
1923 /* Check limit (may #GP(0) + exit TB). */
1924 if (!f64Bit)
1925/** @todo we can skip this test in FLAT 32-bit mode. */
1926 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1927 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1928 else if (cbVar > sizeof(uint32_t))
1929 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1930
1931#if 1
1932 /* Allocate a temporary PC register, we don't want it shadowed. */
1933 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1934 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1935#else
1936 /* Allocate a temporary PC register. */
1937 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate,
1938 true /*fNoVolatileRegs*/);
1939#endif
1940
1941 /* Perform the addition and push the variable to the guest stack. */
1942 /** @todo Flat variants for PC32 variants. */
1943 switch (cbVar)
1944 {
1945 case sizeof(uint16_t):
1946 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1947 /* Truncate the result to 16-bit IP. */
1948 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1949 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1950 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1951 break;
1952 case sizeof(uint32_t):
1953 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1954 /** @todo In FLAT mode we can use the flat variant. */
1955 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1956 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1957 break;
1958 case sizeof(uint64_t):
1959 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
1960 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1961 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1962 break;
1963 default:
1964 AssertFailed();
1965 }
1966
1967 /* RSP got changed, so do this again. */
1968 off = iemNativeRegFlushPendingWrites(pReNative, off);
1969
1970 /* Store the result. */
1971 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1972#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1973 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1974 pReNative->Core.fDebugPcInitialized = true;
1975 Log4(("uPcUpdatingDebug=rip/indirect-call off=%#x\n", off));
1976#endif
1977
1978#if 1
1979 /* Need to transfer the shadow information to the new RIP register. */
1980 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxNewPcReg, kIemNativeGstReg_Pc, off);
1981#else
1982 /* Sync the new PC. */
1983 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcReg, idxNewPcReg);
1984#endif
1985 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1986 iemNativeRegFreeTmp(pReNative, idxPcReg);
1987 /** @todo implictly free the variable? */
1988
1989 return off;
1990}
1991
1992
1993/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1994 * an extra parameter, for use in 16-bit code on a pre-386 CPU. */
1995#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
1996 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1997
1998/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1999 * an extra parameter, for use in 16-bit code on a pre-386 CPU and we need to check and clear
2000 * flags. */
2001#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
2002 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
2003 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2004
2005/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2006 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
2007#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
2008 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
2009
2010/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2011 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
2012 * flags. */
2013#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
2014 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
2015 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2016
2017/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2018 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
2019#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
2020 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
2021
2022/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2023 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
2024 * flags. */
2025#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
2026 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
2027 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2028
2029#undef IEM_MC_REL_CALL_S16_AND_FINISH
2030
2031/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2032 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2033DECL_INLINE_THROW(uint32_t)
2034iemNativeEmitRipRelativeCallS16NoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int16_t offDisp,
2035 uint8_t idxInstr)
2036{
2037 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2038 off = iemNativeRegFlushPendingWrites(pReNative, off);
2039
2040#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2041 Assert(pReNative->Core.offPc == 0);
2042 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2043#endif
2044
2045 /* Allocate a temporary PC register. */
2046 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2047 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
2048 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
2049
2050 /* Calculate the new RIP. */
2051 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
2052 /* Truncate the result to 16-bit IP. */
2053 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegOld);
2054 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
2055 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
2056
2057 /* Truncate the result to 16-bit IP. */
2058 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegNew);
2059
2060 /* Check limit (may #GP(0) + exit TB). */
2061 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2062
2063 /* Perform the addition and push the variable to the guest stack. */
2064 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
2065 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
2066
2067 /* RSP got changed, so flush again. */
2068 off = iemNativeRegFlushPendingWrites(pReNative, off);
2069
2070 /* Store the result. */
2071 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2072#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2073 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2074 pReNative->Core.fDebugPcInitialized = true;
2075 Log4(("uPcUpdatingDebug=rip/rel-call-16 off=%#x offDisp=%d\n", off, offDisp));
2076#endif
2077
2078 /* Need to transfer the shadow information to the new RIP register. */
2079 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2080 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2081 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2082
2083 return off;
2084}
2085
2086
2087/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
2088 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
2089#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
2090 off = iemNativeEmitEip32RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i32), pCallEntry->idxInstr)
2091
2092/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
2093 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
2094 * flags. */
2095#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
2096 IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
2097 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2098
2099#undef IEM_MC_REL_CALL_S32_AND_FINISH
2100
2101/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2102 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2103DECL_INLINE_THROW(uint32_t)
2104iemNativeEmitEip32RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int32_t offDisp,
2105 uint8_t idxInstr)
2106{
2107 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2108 off = iemNativeRegFlushPendingWrites(pReNative, off);
2109
2110#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2111 Assert(pReNative->Core.offPc == 0);
2112 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2113#endif
2114
2115 /* Allocate a temporary PC register. */
2116 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2117 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
2118 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
2119
2120 /* Update the EIP to get the return address. */
2121 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
2122
2123 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2124 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
2125 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
2126 /** @todo we can skip this test in FLAT 32-bit mode. */
2127 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2128
2129 /* Perform Perform the return address to the guest stack. */
2130 /** @todo Can avoid the stack limit checks in FLAT 32-bit mode. */
2131 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
2132 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
2133
2134 /* RSP got changed, so do this again. */
2135 off = iemNativeRegFlushPendingWrites(pReNative, off);
2136
2137 /* Store the result. */
2138 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2139#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2140 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2141 pReNative->Core.fDebugPcInitialized = true;
2142 Log4(("uPcUpdatingDebug=eip/rel-call-32 off=%#x offDisp=%d\n", off, offDisp));
2143#endif
2144
2145 /* Need to transfer the shadow information to the new RIP register. */
2146 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2147 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2148 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2149
2150 return off;
2151}
2152
2153
2154/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
2155 * an extra parameter, for use in 64-bit code. */
2156#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr) \
2157 off = iemNativeEmitRip64RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i64), pCallEntry->idxInstr)
2158
2159/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
2160 * an extra parameter, for use in 64-bit code and we need to check and clear
2161 * flags. */
2162#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i64, a_cbInstr) \
2163 IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr); \
2164 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2165
2166#undef IEM_MC_REL_CALL_S64_AND_FINISH
2167
2168/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2169 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2170DECL_INLINE_THROW(uint32_t)
2171iemNativeEmitRip64RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int64_t offDisp,
2172 uint8_t idxInstr)
2173{
2174 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2175 off = iemNativeRegFlushPendingWrites(pReNative, off);
2176
2177#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2178 Assert(pReNative->Core.offPc == 0);
2179 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2180#endif
2181
2182 /* Allocate a temporary PC register. */
2183 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2184 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
2185 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
2186
2187 /* Update the RIP to get the return address. */
2188 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcRegOld, cbInstr);
2189
2190 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2191 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcRegNew, idxPcRegOld);
2192 off = iemNativeEmitAddGprImm(pReNative, off, idxPcRegNew, offDisp);
2193 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2194
2195 /* Perform Perform the return address to the guest stack. */
2196 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
2197 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
2198
2199 /* RSP got changed, so do this again. */
2200 off = iemNativeRegFlushPendingWrites(pReNative, off);
2201
2202 /* Store the result. */
2203 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2204#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2205 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2206 pReNative->Core.fDebugPcInitialized = true;
2207 Log4(("uPcUpdatingDebug=rip/rel-call-64 off=%#x offDisp=%ld\n", off, offDisp));
2208#endif
2209
2210 /* Need to transfer the shadow information to the new RIP register. */
2211 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2212 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2213 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2214
2215 return off;
2216}
2217
2218
2219/*********************************************************************************************************************************
2220* Emitters for changing PC/RIP/EIP/IP with a RETN (Iw) instruction (IEM_MC_RETN_AND_FINISH) (requires stack emmiters). *
2221*********************************************************************************************************************************/
2222
2223DECL_FORCE_INLINE_THROW(uint32_t)
2224iemNativeEmitStackPopForRetnUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
2225 uint16_t cbPopAdd, uint8_t idxRegTmp)
2226{
2227 /* Use16BitSp: */
2228#ifdef RT_ARCH_AMD64
2229 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
2230 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
2231 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbPopAdd); /* ASSUMES this does NOT modify bits [63:16]! */
2232 RT_NOREF(idxRegTmp);
2233
2234#elif defined(RT_ARCH_ARM64)
2235 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
2236 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
2237 /* add tmp, regrsp, #cbMem */
2238 uint16_t const cbCombined = cbMem + cbPopAdd;
2239 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbCombined & (RT_BIT_32(12) - 1U), false /*f64Bit*/);
2240 if (cbCombined >= RT_BIT_32(12))
2241 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegTmp, cbCombined >> 12,
2242 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
2243 /* and tmp, tmp, #0xffff */
2244 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2245 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
2246 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
2247 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
2248
2249#else
2250# error "Port me"
2251#endif
2252 return off;
2253}
2254
2255
2256DECL_FORCE_INLINE_THROW(uint32_t)
2257iemNativeEmitStackPopForRetnUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
2258 uint16_t cbPopAdd)
2259{
2260 /* Use32BitSp: */
2261 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
2262 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem + cbPopAdd);
2263 return off;
2264}
2265
2266
2267/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets. */
2268#define IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr) \
2269 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, IEMMODE_16BIT, pCallEntry->idxInstr)
2270
2271/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets. */
2272#define IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2273 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
2274
2275/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code. */
2276#define IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2277 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), true /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
2278
2279/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets that checks and
2280 * clears flags. */
2281#define IEM_MC_RETN_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16Pop, a_cbInstr) \
2282 IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr); \
2283 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2284
2285/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets that checks and
2286 * clears flags. */
2287#define IEM_MC_RETN_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2288 IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
2289 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2290
2291/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code that checks and
2292 * clears flags. */
2293#define IEM_MC_RETN_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2294 IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
2295 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2296
2297/** IEM_MC[|_FLAT32|_FLAT64]_RETN_AND_FINISH */
2298DECL_INLINE_THROW(uint32_t)
2299iemNativeEmitRetn(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint16_t cbPop, bool f64Bit,
2300 IEMMODE enmEffOpSize, uint8_t idxInstr)
2301{
2302 RT_NOREF(cbInstr);
2303
2304#ifdef VBOX_STRICT
2305 /*
2306 * Check that the fExec flags we've got make sense.
2307 */
2308 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
2309#endif
2310
2311 /*
2312 * To keep things simple we have to commit any pending writes first as we
2313 * may end up making calls.
2314 */
2315 off = iemNativeRegFlushPendingWrites(pReNative, off);
2316
2317 /*
2318 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
2319 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
2320 * directly as the effective stack pointer.
2321 * (Code structure is very similar to that of PUSH)
2322 *
2323 * Note! As a simplification, we treat opsize overridden returns (o16 ret)
2324 * in FLAT 32-bit mode as if we weren't in FLAT mode since these
2325 * aren't commonly used (or useful) and thus not in need of optimizing.
2326 *
2327 * Note! For non flat modes the guest RSP is not allocated for update but rather for calculation
2328 * as the shadowed register would remain modified even if the return address throws a \#GP(0)
2329 * due to being outside the CS limit causing a wrong stack pointer value in the guest (see
2330 * the near return testcase in bs3-cpu-basic-2). If no exception is thrown the shadowing is transfered
2331 * to the new register returned by iemNativeRegAllocTmpForGuestReg() at the end.
2332 */
2333 uint8_t const cbMem = enmEffOpSize == IEMMODE_64BIT
2334 ? sizeof(uint64_t)
2335 : enmEffOpSize == IEMMODE_32BIT
2336 ? sizeof(uint32_t)
2337 : sizeof(uint16_t);
2338 bool const fFlat = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) && enmEffOpSize != IEMMODE_16BIT; /* see note */
2339 uintptr_t const pfnFunction = fFlat
2340 ? enmEffOpSize == IEMMODE_64BIT
2341 ? (uintptr_t)iemNativeHlpStackFlatFetchU64
2342 : (uintptr_t)iemNativeHlpStackFlatFetchU32
2343 : enmEffOpSize == IEMMODE_32BIT
2344 ? (uintptr_t)iemNativeHlpStackFetchU32
2345 : (uintptr_t)iemNativeHlpStackFetchU16;
2346 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
2347 fFlat ? kIemNativeGstRegUse_ForUpdate
2348 : kIemNativeGstRegUse_Calculation,
2349 true /*fNoVolatileRegs*/);
2350 uint8_t const idxRegEffSp = fFlat ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
2351 /** @todo can do a better job picking the register here. For cbMem >= 4 this
2352 * will be the resulting register value. */
2353 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
2354
2355 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
2356 if (fFlat)
2357 Assert(idxRegEffSp == idxRegRsp);
2358 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
2359 {
2360 Assert(idxRegEffSp != idxRegRsp);
2361 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
2362 kIemNativeGstRegUse_ReadOnly);
2363#ifdef RT_ARCH_AMD64
2364 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2365#else
2366 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2367#endif
2368 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
2369 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
2370 offFixupJumpToUseOtherBitSp = off;
2371 if (enmEffOpSize == IEMMODE_32BIT)
2372 {
2373 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
2374 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
2375 }
2376 else
2377 {
2378 Assert(enmEffOpSize == IEMMODE_16BIT);
2379 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
2380 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
2381 idxRegMemResult);
2382 }
2383 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2384 }
2385 /* SpUpdateEnd: */
2386 uint32_t const offLabelSpUpdateEnd = off;
2387
2388 /*
2389 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
2390 * we're skipping lookup).
2391 */
2392 uint8_t const iSegReg = fFlat ? UINT8_MAX : X86_SREG_SS;
2393 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
2394 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
2395 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
2396 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
2397 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
2398 : UINT32_MAX;
2399
2400 if (!TlbState.fSkip)
2401 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
2402 else
2403 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
2404
2405 /*
2406 * Use16BitSp:
2407 */
2408 if (!fFlat)
2409 {
2410#ifdef RT_ARCH_AMD64
2411 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2412#else
2413 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2414#endif
2415 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
2416 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
2417 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
2418 idxRegMemResult);
2419 else
2420 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
2421 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
2422 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2423 }
2424
2425 /*
2426 * TlbMiss:
2427 *
2428 * Call helper to do the pushing.
2429 */
2430 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
2431
2432#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2433 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2434#else
2435 RT_NOREF(idxInstr);
2436#endif
2437
2438 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
2439 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
2440 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
2441 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
2442
2443
2444 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
2445 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
2446 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
2447
2448 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
2449 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2450
2451 /* Done setting up parameters, make the call. */
2452 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
2453
2454 /* Move the return register content to idxRegMemResult. */
2455 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
2456 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
2457
2458 /* Restore variables and guest shadow registers to volatile registers. */
2459 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
2460 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
2461
2462#ifdef IEMNATIVE_WITH_TLB_LOOKUP
2463 if (!TlbState.fSkip)
2464 {
2465 /* end of TlbMiss - Jump to the done label. */
2466 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
2467 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
2468
2469 /*
2470 * TlbLookup:
2471 */
2472 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
2473 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
2474
2475 /*
2476 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
2477 */
2478 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2479# ifdef IEM_WITH_TLB_STATISTICS
2480 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
2481 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
2482# endif
2483 switch (cbMem)
2484 {
2485 case 2:
2486 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2487 break;
2488 case 4:
2489 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2490 break;
2491 case 8:
2492 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2493 break;
2494 default:
2495 AssertFailed();
2496 }
2497
2498 TlbState.freeRegsAndReleaseVars(pReNative);
2499
2500 /*
2501 * TlbDone:
2502 *
2503 * Set the new RSP value (FLAT accesses needs to calculate it first) and
2504 * commit the popped register value.
2505 */
2506 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
2507 }
2508#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
2509
2510 /* Check limit before committing RIP and RSP (may #GP(0) + exit TB). */
2511 if (!f64Bit)
2512/** @todo we can skip this test in FLAT 32-bit mode. */
2513 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2514 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2515 else if (enmEffOpSize == IEMMODE_64BIT)
2516 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2517
2518 /* Complete RSP calculation for FLAT mode. */
2519 if (idxRegEffSp == idxRegRsp)
2520 {
2521 if (enmEffOpSize == IEMMODE_64BIT)
2522 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRsp, sizeof(uint64_t) + cbPop);
2523 else
2524 {
2525 Assert(enmEffOpSize == IEMMODE_32BIT);
2526 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxRegRsp, sizeof(uint32_t) + cbPop);
2527 }
2528 }
2529
2530 /* Commit the result and clear any current guest shadows for RIP. */
2531 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
2532 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2533 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, kIemNativeGstReg_Pc, off);
2534#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2535 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2536 pReNative->Core.fDebugPcInitialized = true;
2537 Log4(("uPcUpdatingDebug=rip/ret off=%#x\n", off));
2538#endif
2539
2540 /* Need to transfer the shadowing information to the host register containing the updated value now. */
2541 if (!fFlat)
2542 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegRsp, IEMNATIVEGSTREG_GPR(X86_GREG_xSP), off);
2543
2544 iemNativeRegFreeTmp(pReNative, idxRegRsp);
2545 if (idxRegEffSp != idxRegRsp)
2546 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
2547 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
2548 return off;
2549}
2550
2551
2552/*********************************************************************************************************************************
2553* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
2554*********************************************************************************************************************************/
2555
2556#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
2557 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2558
2559/**
2560 * Emits code to check if a \#NM exception should be raised.
2561 *
2562 * @returns New code buffer offset, UINT32_MAX on failure.
2563 * @param pReNative The native recompile state.
2564 * @param off The code buffer offset.
2565 * @param idxInstr The current instruction.
2566 */
2567DECL_INLINE_THROW(uint32_t)
2568iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2569{
2570#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2571 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
2572
2573 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
2574 {
2575#endif
2576 /*
2577 * Make sure we don't have any outstanding guest register writes as we may
2578 * raise an #NM and all guest register must be up to date in CPUMCTX.
2579 */
2580 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2581 off = iemNativeRegFlushPendingWrites(pReNative, off);
2582
2583#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2584 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2585#else
2586 RT_NOREF(idxInstr);
2587#endif
2588
2589 /* Allocate a temporary CR0 register. */
2590 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2591 kIemNativeGstRegUse_ReadOnly);
2592
2593 /*
2594 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
2595 * return raisexcpt();
2596 */
2597 /* Test and jump. */
2598 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS,
2599 kIemNativeLabelType_RaiseNm);
2600
2601 /* Free but don't flush the CR0 register. */
2602 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2603
2604#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2605 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
2606 }
2607 else
2608 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
2609#endif
2610
2611 return off;
2612}
2613
2614
2615#define IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE() \
2616 off = iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2617
2618/**
2619 * Emits code to check if a \#NM exception should be raised.
2620 *
2621 * @returns New code buffer offset, UINT32_MAX on failure.
2622 * @param pReNative The native recompile state.
2623 * @param off The code buffer offset.
2624 * @param idxInstr The current instruction.
2625 */
2626DECL_INLINE_THROW(uint32_t)
2627iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2628{
2629#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2630 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckPotential);
2631
2632 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE))
2633 {
2634#endif
2635 /*
2636 * Make sure we don't have any outstanding guest register writes as we may
2637 * raise an #NM and all guest register must be up to date in CPUMCTX.
2638 */
2639 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2640 off = iemNativeRegFlushPendingWrites(pReNative, off);
2641
2642#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2643 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2644#else
2645 RT_NOREF(idxInstr);
2646#endif
2647
2648 /* Allocate a temporary CR0 register. */
2649 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2650 kIemNativeGstRegUse_Calculation);
2651
2652 /*
2653 * if (cr0 & (X86_CR0_MP | X86_CR0_TS) == (X86_CR0_MP | X86_CR0_TS))
2654 * return raisexcpt();
2655 */
2656 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2657 /* Test and jump. */
2658 off = iemNativeEmitTestIfGpr32EqualsImmAndTbExit(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS,
2659 kIemNativeLabelType_RaiseNm);
2660
2661 /* Free the CR0 register. */
2662 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2663
2664#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2665 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE;
2666 }
2667 else
2668 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckOmitted);
2669#endif
2670
2671 return off;
2672}
2673
2674
2675#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
2676 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
2677
2678/**
2679 * Emits code to check if a \#MF exception should be raised.
2680 *
2681 * @returns New code buffer offset, UINT32_MAX on failure.
2682 * @param pReNative The native recompile state.
2683 * @param off The code buffer offset.
2684 * @param idxInstr The current instruction.
2685 */
2686DECL_INLINE_THROW(uint32_t)
2687iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2688{
2689 /*
2690 * Make sure we don't have any outstanding guest register writes as we may
2691 * raise an #MF and all guest register must be up to date in CPUMCTX.
2692 */
2693 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
2694 off = iemNativeRegFlushPendingWrites(pReNative, off);
2695
2696#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2697 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2698#else
2699 RT_NOREF(idxInstr);
2700#endif
2701
2702 /* Allocate a temporary FSW register. */
2703 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
2704 kIemNativeGstRegUse_ReadOnly);
2705
2706 /*
2707 * if (FSW & X86_FSW_ES != 0)
2708 * return raisexcpt();
2709 */
2710 /* Test and jump. */
2711 off = iemNativeEmitTestBitInGprAndTbExitIfSet(pReNative, off, idxFpuFswReg, X86_FSW_ES_BIT, kIemNativeLabelType_RaiseMf);
2712
2713 /* Free but don't flush the FSW register. */
2714 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
2715
2716 return off;
2717}
2718
2719
2720#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
2721 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2722
2723/**
2724 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
2725 *
2726 * @returns New code buffer offset, UINT32_MAX on failure.
2727 * @param pReNative The native recompile state.
2728 * @param off The code buffer offset.
2729 * @param idxInstr The current instruction.
2730 */
2731DECL_INLINE_THROW(uint32_t)
2732iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2733{
2734#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2735 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
2736
2737 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
2738 {
2739#endif
2740 /*
2741 * Make sure we don't have any outstanding guest register writes as we may
2742 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2743 */
2744 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2745 off = iemNativeRegFlushPendingWrites(pReNative, off);
2746
2747#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2748 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2749#else
2750 RT_NOREF(idxInstr);
2751#endif
2752
2753 /* Allocate a temporary CR0 and CR4 register. */
2754 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2755 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2756 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2757
2758 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
2759#ifdef RT_ARCH_AMD64
2760 /*
2761 * We do a modified test here:
2762 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
2763 * else { goto RaiseSseRelated; }
2764 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
2765 * all targets except the 386, which doesn't support SSE, this should
2766 * be a safe assumption.
2767 */
2768 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+3+7+7+6);
2769 //pCodeBuf[off++] = 0xcc;
2770 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
2771 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
2772 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
2773 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
2774 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
2775 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_RaiseSseRelated, kIemNativeInstrCond_ne);
2776
2777#elif defined(RT_ARCH_ARM64)
2778 /*
2779 * We do a modified test here:
2780 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
2781 * else { goto RaiseSseRelated; }
2782 */
2783 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+5);
2784 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2785 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
2786 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
2787 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
2788 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2789 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
2790 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
2791 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2792 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2793 kIemNativeLabelType_RaiseSseRelated);
2794
2795#else
2796# error "Port me!"
2797#endif
2798
2799 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2800 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2801 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2802 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2803
2804#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2805 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
2806 }
2807 else
2808 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
2809#endif
2810
2811 return off;
2812}
2813
2814
2815#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
2816 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2817
2818/**
2819 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
2820 *
2821 * @returns New code buffer offset, UINT32_MAX on failure.
2822 * @param pReNative The native recompile state.
2823 * @param off The code buffer offset.
2824 * @param idxInstr The current instruction.
2825 */
2826DECL_INLINE_THROW(uint32_t)
2827iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2828{
2829#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2830 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
2831
2832 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
2833 {
2834#endif
2835 /*
2836 * Make sure we don't have any outstanding guest register writes as we may
2837 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2838 */
2839 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2840 off = iemNativeRegFlushPendingWrites(pReNative, off);
2841
2842#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2843 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2844#else
2845 RT_NOREF(idxInstr);
2846#endif
2847
2848 /* Allocate a temporary CR0, CR4 and XCR0 register. */
2849 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2850 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2851 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
2852 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2853
2854 /*
2855 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
2856 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
2857 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
2858 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
2859 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
2860 * { likely }
2861 * else { goto RaiseAvxRelated; }
2862 */
2863#ifdef RT_ARCH_AMD64
2864 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
2865 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
2866 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
2867 ^ 0x1a) ) { likely }
2868 else { goto RaiseAvxRelated; } */
2869 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+5+3+5+3+7+6);
2870 //pCodeBuf[off++] = 0xcc;
2871 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
2872 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
2873 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
2874 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2875 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
2876 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
2877 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2878 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
2879 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
2880 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
2881 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_RaiseAvxRelated, kIemNativeInstrCond_ne);
2882
2883#elif defined(RT_ARCH_ARM64)
2884 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
2885 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
2886 else { goto RaiseAvxRelated; } */
2887 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6);
2888 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2889 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
2890 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
2891 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
2892 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
2893 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
2894 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
2895 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
2896 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
2897 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
2898 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
2899 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2900 kIemNativeLabelType_RaiseAvxRelated);
2901
2902#else
2903# error "Port me!"
2904#endif
2905
2906 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2907 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2908 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2909 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
2910#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2911 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2912 }
2913 else
2914 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
2915#endif
2916
2917 return off;
2918}
2919
2920
2921#define IEM_MC_RAISE_DIVIDE_ERROR() \
2922 off = iemNativeEmitRaiseDivideError(pReNative, off, pCallEntry->idxInstr)
2923
2924/**
2925 * Emits code to raise a \#DE.
2926 *
2927 * @returns New code buffer offset, UINT32_MAX on failure.
2928 * @param pReNative The native recompile state.
2929 * @param off The code buffer offset.
2930 * @param idxInstr The current instruction.
2931 */
2932DECL_INLINE_THROW(uint32_t)
2933iemNativeEmitRaiseDivideError(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2934{
2935 /*
2936 * Make sure we don't have any outstanding guest register writes as we may
2937 */
2938 off = iemNativeRegFlushPendingWrites(pReNative, off);
2939
2940#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2941 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2942#else
2943 RT_NOREF(idxInstr);
2944#endif
2945
2946 /* raise \#DE exception unconditionally. */
2947 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_RaiseDe);
2948}
2949
2950
2951#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
2952 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
2953
2954/**
2955 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
2956 *
2957 * @returns New code buffer offset, UINT32_MAX on failure.
2958 * @param pReNative The native recompile state.
2959 * @param off The code buffer offset.
2960 * @param idxInstr The current instruction.
2961 * @param idxVarEffAddr Index of the variable containing the effective address to check.
2962 * @param cbAlign The alignment in bytes to check against.
2963 */
2964DECL_INLINE_THROW(uint32_t)
2965iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
2966 uint8_t idxVarEffAddr, uint8_t cbAlign)
2967{
2968 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
2969 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
2970
2971 /*
2972 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
2973 */
2974 off = iemNativeRegFlushPendingWrites(pReNative, off);
2975
2976#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2977 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2978#else
2979 RT_NOREF(idxInstr);
2980#endif
2981
2982 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
2983
2984 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxVarReg, cbAlign - 1,
2985 kIemNativeLabelType_RaiseGp0);
2986
2987 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
2988 return off;
2989}
2990
2991
2992/*********************************************************************************************************************************
2993* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
2994*********************************************************************************************************************************/
2995
2996/**
2997 * Pushes an IEM_MC_IF_XXX onto the condition stack.
2998 *
2999 * @returns Pointer to the condition stack entry on success, NULL on failure
3000 * (too many nestings)
3001 */
3002DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
3003{
3004 uint32_t const idxStack = pReNative->cCondDepth;
3005 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
3006
3007 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
3008 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
3009
3010 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
3011 pEntry->fInElse = false;
3012 pEntry->fIfExitTb = false;
3013 pEntry->fElseExitTb = false;
3014 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
3015 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
3016
3017 return pEntry;
3018}
3019
3020
3021/**
3022 * Start of the if-block, snapshotting the register and variable state.
3023 */
3024DECL_INLINE_THROW(void)
3025iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
3026{
3027 Assert(offIfBlock != UINT32_MAX);
3028 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3029 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3030 Assert(!pEntry->fInElse);
3031
3032 /* Define the start of the IF block if request or for disassembly purposes. */
3033 if (idxLabelIf != UINT32_MAX)
3034 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
3035#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3036 else
3037 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
3038#else
3039 RT_NOREF(offIfBlock);
3040#endif
3041
3042 /* Copy the initial state so we can restore it in the 'else' block. */
3043 pEntry->InitialState = pReNative->Core;
3044}
3045
3046
3047#define IEM_MC_ELSE() } while (0); \
3048 off = iemNativeEmitElse(pReNative, off); \
3049 do {
3050
3051/** Emits code related to IEM_MC_ELSE. */
3052DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3053{
3054 /* Check sanity and get the conditional stack entry. */
3055 Assert(off != UINT32_MAX);
3056 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3057 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3058 Assert(!pEntry->fInElse);
3059
3060 /* We can skip dirty register flushing and the dirty register flushing if
3061 the branch already jumped to a TB exit. */
3062 if (!pEntry->fIfExitTb)
3063 {
3064#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) && 0
3065 /* Writeback any dirty shadow registers. */
3066 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
3067 * in one of the branches and leave guest registers already dirty before the start of the if
3068 * block alone. */
3069 off = iemNativeRegFlushDirtyGuest(pReNative, off);
3070#endif
3071
3072 /* Jump to the endif. */
3073 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
3074 }
3075# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3076 else
3077 Assert(pReNative->Core.offPc == 0);
3078# endif
3079
3080 /* Define the else label and enter the else part of the condition. */
3081 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3082 pEntry->fInElse = true;
3083
3084 /* Snapshot the core state so we can do a merge at the endif and restore
3085 the snapshot we took at the start of the if-block. */
3086 pEntry->IfFinalState = pReNative->Core;
3087 pReNative->Core = pEntry->InitialState;
3088
3089 return off;
3090}
3091
3092
3093#define IEM_MC_ENDIF() } while (0); \
3094 off = iemNativeEmitEndIf(pReNative, off)
3095
3096/** Emits code related to IEM_MC_ENDIF. */
3097DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3098{
3099 /* Check sanity and get the conditional stack entry. */
3100 Assert(off != UINT32_MAX);
3101 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3102 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3103
3104#if defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) && 0
3105 off = iemNativeRegFlushDirtyGuest(pReNative, off);
3106#endif
3107
3108 /*
3109 * If either of the branches exited the TB, we can take the state from the
3110 * other branch and skip all the merging headache.
3111 */
3112 bool fDefinedLabels = false;
3113 if (pEntry->fElseExitTb || pEntry->fIfExitTb)
3114 {
3115#ifdef VBOX_STRICT
3116 Assert(pReNative->cCondDepth == 1); /* Assuming this only happens in simple conditional structures. */
3117 Assert(pEntry->fElseExitTb != pEntry->fIfExitTb); /* Assuming we don't have any code where both branches exits. */
3118 PCIEMNATIVECORESTATE const pExitCoreState = pEntry->fIfExitTb && pEntry->fInElse
3119 ? &pEntry->IfFinalState : &pReNative->Core;
3120# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3121 Assert(pExitCoreState->bmGstRegShadowDirty == 0);
3122# endif
3123# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3124 Assert(pExitCoreState->offPc == 0);
3125# endif
3126 RT_NOREF(pExitCoreState);
3127#endif
3128
3129 if (!pEntry->fIfExitTb)
3130 {
3131 Assert(pEntry->fInElse);
3132 pReNative->Core = pEntry->IfFinalState;
3133 }
3134 }
3135 else
3136 {
3137 /*
3138 * Now we have find common group with the core state at the end of the
3139 * if-final. Use the smallest common denominator and just drop anything
3140 * that isn't the same in both states.
3141 */
3142 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
3143 * which is why we're doing this at the end of the else-block.
3144 * But we'd need more info about future for that to be worth the effort. */
3145 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
3146#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3147 AssertMsgStmt(pReNative->Core.offPc == pOther->offPc,
3148 ("Core.offPc=%#RX64 pOther->offPc=%#RX64\n", pReNative->Core.offPc, pOther->offPc),
3149 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
3150#endif
3151
3152 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
3153 {
3154#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3155 /*
3156 * If the branch has differences in dirty shadow registers, we will flush
3157 * the register only dirty in the current branch and dirty any that's only
3158 * dirty in the other one.
3159 */
3160 uint64_t const fGstRegDirtyOther = pOther->bmGstRegShadowDirty;
3161 uint64_t const fGstRegDirtyThis = pReNative->Core.bmGstRegShadowDirty;
3162 uint64_t const fGstRegDirtyDiff = fGstRegDirtyOther ^ fGstRegDirtyThis;
3163 uint64_t const fGstRegDirtyHead = fGstRegDirtyThis & fGstRegDirtyDiff;
3164 uint64_t fGstRegDirtyTail = fGstRegDirtyOther & fGstRegDirtyDiff;
3165 if (!fGstRegDirtyDiff)
3166 { /* likely */ }
3167 else
3168 {
3169 //uint64_t const fGstRegDirtyHead = pReNative->Core.bmGstRegShadowDirty & fGstRegDirtyDiff;
3170 if (fGstRegDirtyHead)
3171 {
3172 Log12(("iemNativeEmitEndIf: flushing dirty guest registers in current branch: %RX64\n", fGstRegDirtyHead));
3173 off = iemNativeRegFlushDirtyGuest(pReNative, off, fGstRegDirtyHead);
3174 }
3175 }
3176#endif
3177
3178 /*
3179 * Shadowed guest registers.
3180 *
3181 * We drop any shadows where the two states disagree about where
3182 * things are kept. We may end up flushing dirty more registers
3183 * here, if the two branches keeps things in different registers.
3184 */
3185 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
3186 if (fGstRegs)
3187 {
3188 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
3189 do
3190 {
3191 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3192 fGstRegs &= ~RT_BIT_64(idxGstReg);
3193
3194 uint8_t const idxCurHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3195 uint8_t const idxOtherHstReg = pOther->aidxGstRegShadows[idxGstReg];
3196 if ( idxCurHstReg != idxOtherHstReg
3197 || !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg)))
3198 {
3199#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3200 Log12(("iemNativeEmitEndIf: dropping gst %s (%d) from hst %s (other %d/%#RX64)\n",
3201 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
3202 idxOtherHstReg, pOther->bmGstRegShadows));
3203#else
3204 Log12(("iemNativeEmitEndIf: dropping %s gst %s (%d) from hst %s (other %d/%#RX64/%s)\n",
3205 pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "_dirty_" : "clean",
3206 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
3207 idxOtherHstReg, pOther->bmGstRegShadows,
3208 pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "dirty" : "clean"));
3209 if (pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
3210 fGstRegDirtyTail |= RT_BIT_64(idxGstReg);
3211 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
3212 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3213#endif
3214 iemNativeRegClearGstRegShadowingOne(pReNative, idxCurHstReg, (IEMNATIVEGSTREG)idxGstReg, off);
3215 }
3216 } while (fGstRegs);
3217 }
3218 else
3219 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
3220
3221#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3222 /*
3223 * Generate jumpy code for flushing dirty registers from the other
3224 * branch that aren't dirty in the current one.
3225 */
3226 if (!fGstRegDirtyTail)
3227 { /* likely */ }
3228 else
3229 {
3230 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeEndIfOtherBranchDirty);
3231 Log12(("iemNativeEmitEndIf: Dirty register only in the other branch: %#RX64 - BAD!\n", fGstRegDirtyTail));
3232
3233 /* First the current branch has to jump over the dirty flushing from the other branch. */
3234 uint32_t const offFixup1 = off;
3235 off = iemNativeEmitJmpToFixed(pReNative, off, off + 10);
3236
3237 /* Put the endif and maybe else label here so the other branch ends up here. */
3238 if (!pEntry->fInElse)
3239 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3240 else
3241 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
3242 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
3243 fDefinedLabels = true;
3244
3245 /* Flush the dirty guest registers from the other branch. */
3246 while (fGstRegDirtyTail)
3247 {
3248 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegDirtyTail) - 1;
3249 fGstRegDirtyTail &= ~RT_BIT_64(idxGstReg);
3250 Log12(("iemNativeEmitEndIf: tail flushing %s (%d) from other branch %d (cur %d/%#RX64)\n",
3251 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, pOther->aidxGstRegShadows[idxGstReg],
3252 pReNative->Core.aidxGstRegShadows[idxGstReg], pReNative->Core.bmGstRegShadows));
3253
3254 off = iemNativeRegFlushPendingWriteEx(pReNative, off, (PIEMNATIVECORESTATE)pOther, (IEMNATIVEGSTREG)idxGstReg);
3255
3256 /* Mismatching shadowing should've been dropped in the previous step already. */
3257 Assert( !(pReNative->Core.bmGstRegShadows & RT_BIT_64(idxGstReg))
3258 || pReNative->Core.aidxGstRegShadows[idxGstReg] == pOther->aidxGstRegShadows[idxGstReg]);
3259 }
3260
3261 /* Here is the actual endif label, fixup the above jump to land here. */
3262 iemNativeFixupFixedJump(pReNative, offFixup1, off);
3263 }
3264#endif
3265
3266 /*
3267 * Check variables next. For now we must require them to be identical
3268 * or stuff we can recreate. (No code is emitted here.)
3269 */
3270 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
3271#ifdef VBOX_STRICT
3272 uint32_t const offAssert = off;
3273#endif
3274 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
3275 if (fVars)
3276 {
3277 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
3278 do
3279 {
3280 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
3281 fVars &= ~RT_BIT_32(idxVar);
3282
3283 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
3284 {
3285 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
3286 continue;
3287 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
3288 {
3289 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
3290 if (idxHstReg != UINT8_MAX)
3291 {
3292 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3293 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3294 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
3295 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3296 }
3297 continue;
3298 }
3299 }
3300 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
3301 continue;
3302
3303 /* Irreconcilable, so drop it. */
3304 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
3305 if (idxHstReg != UINT8_MAX)
3306 {
3307 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3308 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3309 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
3310 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3311 }
3312 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3313 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
3314 } while (fVars);
3315 }
3316 Assert(off == offAssert);
3317
3318 /*
3319 * Finally, check that the host register allocations matches.
3320 */
3321 AssertMsgStmt((pReNative->Core.bmHstRegs & (pReNative->Core.bmHstRegs ^ pOther->bmHstRegs)) == 0,
3322 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
3323 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
3324 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
3325 }
3326 }
3327
3328 /*
3329 * Define the endif label and maybe the else one if we're still in the 'if' part.
3330 */
3331 if (!fDefinedLabels)
3332 {
3333 if (!pEntry->fInElse)
3334 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3335 else
3336 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
3337 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
3338 }
3339
3340 /* Pop the conditional stack.*/
3341 pReNative->cCondDepth -= 1;
3342
3343 return off;
3344}
3345
3346
3347#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
3348 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
3349 do {
3350
3351/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
3352DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
3353{
3354 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
3355 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3356
3357 /* Get the eflags. */
3358 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3359 kIemNativeGstRegUse_ReadOnly);
3360
3361 /* Test and jump. */
3362 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
3363
3364 /* Free but don't flush the EFlags register. */
3365 iemNativeRegFreeTmp(pReNative, idxEflReg);
3366
3367 /* Make a copy of the core state now as we start the if-block. */
3368 iemNativeCondStartIfBlock(pReNative, off);
3369
3370 return off;
3371}
3372
3373
3374#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
3375 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
3376 do {
3377
3378/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
3379DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
3380{
3381 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
3382 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3383
3384 /* Get the eflags. */
3385 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3386 kIemNativeGstRegUse_ReadOnly);
3387
3388 /* Test and jump. */
3389 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
3390
3391 /* Free but don't flush the EFlags register. */
3392 iemNativeRegFreeTmp(pReNative, idxEflReg);
3393
3394 /* Make a copy of the core state now as we start the if-block. */
3395 iemNativeCondStartIfBlock(pReNative, off);
3396
3397 return off;
3398}
3399
3400
3401#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
3402 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
3403 do {
3404
3405/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
3406DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
3407{
3408 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3409 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3410
3411 /* Get the eflags. */
3412 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3413 kIemNativeGstRegUse_ReadOnly);
3414
3415 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3416 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3417
3418 /* Test and jump. */
3419 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3420
3421 /* Free but don't flush the EFlags register. */
3422 iemNativeRegFreeTmp(pReNative, idxEflReg);
3423
3424 /* Make a copy of the core state now as we start the if-block. */
3425 iemNativeCondStartIfBlock(pReNative, off);
3426
3427 return off;
3428}
3429
3430
3431#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
3432 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
3433 do {
3434
3435/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
3436DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
3437{
3438 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3439 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3440
3441 /* Get the eflags. */
3442 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3443 kIemNativeGstRegUse_ReadOnly);
3444
3445 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3446 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3447
3448 /* Test and jump. */
3449 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3450
3451 /* Free but don't flush the EFlags register. */
3452 iemNativeRegFreeTmp(pReNative, idxEflReg);
3453
3454 /* Make a copy of the core state now as we start the if-block. */
3455 iemNativeCondStartIfBlock(pReNative, off);
3456
3457 return off;
3458}
3459
3460
3461#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
3462 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
3463 do {
3464
3465#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
3466 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
3467 do {
3468
3469/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
3470DECL_INLINE_THROW(uint32_t)
3471iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3472 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
3473{
3474 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBit1InEfl | fBit2InEfl);
3475 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3476
3477 /* Get the eflags. */
3478 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3479 kIemNativeGstRegUse_ReadOnly);
3480
3481 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
3482 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
3483
3484 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
3485 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
3486 Assert(iBitNo1 != iBitNo2);
3487
3488#ifdef RT_ARCH_AMD64
3489 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
3490
3491 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3492 if (iBitNo1 > iBitNo2)
3493 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3494 else
3495 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3496 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3497
3498#elif defined(RT_ARCH_ARM64)
3499 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3500 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3501
3502 /* and tmpreg, eflreg, #1<<iBitNo1 */
3503 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3504
3505 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3506 if (iBitNo1 > iBitNo2)
3507 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3508 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3509 else
3510 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3511 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3512
3513 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3514
3515#else
3516# error "Port me"
3517#endif
3518
3519 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3520 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3521 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3522
3523 /* Free but don't flush the EFlags and tmp registers. */
3524 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3525 iemNativeRegFreeTmp(pReNative, idxEflReg);
3526
3527 /* Make a copy of the core state now as we start the if-block. */
3528 iemNativeCondStartIfBlock(pReNative, off);
3529
3530 return off;
3531}
3532
3533
3534#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
3535 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
3536 do {
3537
3538#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
3539 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
3540 do {
3541
3542/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
3543 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
3544DECL_INLINE_THROW(uint32_t)
3545iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
3546 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
3547{
3548 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl | fBit1InEfl | fBit2InEfl);
3549 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3550
3551 /* We need an if-block label for the non-inverted variant. */
3552 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
3553 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
3554
3555 /* Get the eflags. */
3556 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3557 kIemNativeGstRegUse_ReadOnly);
3558
3559 /* Translate the flag masks to bit numbers. */
3560 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3561 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3562
3563 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
3564 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
3565 Assert(iBitNo1 != iBitNo);
3566
3567 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
3568 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
3569 Assert(iBitNo2 != iBitNo);
3570 Assert(iBitNo2 != iBitNo1);
3571
3572#ifdef RT_ARCH_AMD64
3573 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
3574#elif defined(RT_ARCH_ARM64)
3575 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3576#endif
3577
3578 /* Check for the lone bit first. */
3579 if (!fInverted)
3580 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3581 else
3582 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
3583
3584 /* Then extract and compare the other two bits. */
3585#ifdef RT_ARCH_AMD64
3586 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3587 if (iBitNo1 > iBitNo2)
3588 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3589 else
3590 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3591 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3592
3593#elif defined(RT_ARCH_ARM64)
3594 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3595
3596 /* and tmpreg, eflreg, #1<<iBitNo1 */
3597 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3598
3599 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3600 if (iBitNo1 > iBitNo2)
3601 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3602 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3603 else
3604 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3605 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3606
3607 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3608
3609#else
3610# error "Port me"
3611#endif
3612
3613 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3614 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3615 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3616
3617 /* Free but don't flush the EFlags and tmp registers. */
3618 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3619 iemNativeRegFreeTmp(pReNative, idxEflReg);
3620
3621 /* Make a copy of the core state now as we start the if-block. */
3622 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
3623
3624 return off;
3625}
3626
3627
3628#define IEM_MC_IF_CX_IS_NZ() \
3629 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
3630 do {
3631
3632/** Emits code for IEM_MC_IF_CX_IS_NZ. */
3633DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3634{
3635 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3636
3637 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3638 kIemNativeGstRegUse_ReadOnly);
3639 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
3640 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3641
3642 iemNativeCondStartIfBlock(pReNative, off);
3643 return off;
3644}
3645
3646
3647#define IEM_MC_IF_ECX_IS_NZ() \
3648 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
3649 do {
3650
3651#define IEM_MC_IF_RCX_IS_NZ() \
3652 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
3653 do {
3654
3655/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
3656DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3657{
3658 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3659
3660 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3661 kIemNativeGstRegUse_ReadOnly);
3662 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
3663 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3664
3665 iemNativeCondStartIfBlock(pReNative, off);
3666 return off;
3667}
3668
3669
3670#define IEM_MC_IF_CX_IS_NOT_ONE() \
3671 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
3672 do {
3673
3674/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
3675DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3676{
3677 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3678
3679 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3680 kIemNativeGstRegUse_ReadOnly);
3681#ifdef RT_ARCH_AMD64
3682 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3683#else
3684 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3685 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3686 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3687#endif
3688 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3689
3690 iemNativeCondStartIfBlock(pReNative, off);
3691 return off;
3692}
3693
3694
3695#define IEM_MC_IF_ECX_IS_NOT_ONE() \
3696 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
3697 do {
3698
3699#define IEM_MC_IF_RCX_IS_NOT_ONE() \
3700 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
3701 do {
3702
3703/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
3704DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3705{
3706 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3707
3708 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3709 kIemNativeGstRegUse_ReadOnly);
3710 if (f64Bit)
3711 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3712 else
3713 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3714 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3715
3716 iemNativeCondStartIfBlock(pReNative, off);
3717 return off;
3718}
3719
3720
3721#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3722 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
3723 do {
3724
3725#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3726 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
3727 do {
3728
3729/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
3730 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3731DECL_INLINE_THROW(uint32_t)
3732iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
3733{
3734 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3735 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3736
3737 /* We have to load both RCX and EFLAGS before we can start branching,
3738 otherwise we'll end up in the else-block with an inconsistent
3739 register allocator state.
3740 Doing EFLAGS first as it's more likely to be loaded, right? */
3741 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3742 kIemNativeGstRegUse_ReadOnly);
3743 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3744 kIemNativeGstRegUse_ReadOnly);
3745
3746 /** @todo we could reduce this to a single branch instruction by spending a
3747 * temporary register and some setnz stuff. Not sure if loops are
3748 * worth it. */
3749 /* Check CX. */
3750#ifdef RT_ARCH_AMD64
3751 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3752#else
3753 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3754 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3755 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3756#endif
3757
3758 /* Check the EFlags bit. */
3759 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3760 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3761 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3762 !fCheckIfSet /*fJmpIfSet*/);
3763
3764 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3765 iemNativeRegFreeTmp(pReNative, idxEflReg);
3766
3767 iemNativeCondStartIfBlock(pReNative, off);
3768 return off;
3769}
3770
3771
3772#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3773 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
3774 do {
3775
3776#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3777 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
3778 do {
3779
3780#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3781 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
3782 do {
3783
3784#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3785 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
3786 do {
3787
3788/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
3789 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
3790 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
3791 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3792DECL_INLINE_THROW(uint32_t)
3793iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3794 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
3795{
3796 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3797 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3798
3799 /* We have to load both RCX and EFLAGS before we can start branching,
3800 otherwise we'll end up in the else-block with an inconsistent
3801 register allocator state.
3802 Doing EFLAGS first as it's more likely to be loaded, right? */
3803 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3804 kIemNativeGstRegUse_ReadOnly);
3805 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3806 kIemNativeGstRegUse_ReadOnly);
3807
3808 /** @todo we could reduce this to a single branch instruction by spending a
3809 * temporary register and some setnz stuff. Not sure if loops are
3810 * worth it. */
3811 /* Check RCX/ECX. */
3812 if (f64Bit)
3813 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3814 else
3815 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3816
3817 /* Check the EFlags bit. */
3818 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3819 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3820 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3821 !fCheckIfSet /*fJmpIfSet*/);
3822
3823 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3824 iemNativeRegFreeTmp(pReNative, idxEflReg);
3825
3826 iemNativeCondStartIfBlock(pReNative, off);
3827 return off;
3828}
3829
3830
3831#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
3832 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
3833 do {
3834
3835/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
3836DECL_INLINE_THROW(uint32_t)
3837iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
3838{
3839 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3840
3841 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
3842 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
3843 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3844 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3845
3846 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
3847
3848 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
3849
3850 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
3851
3852 iemNativeCondStartIfBlock(pReNative, off);
3853 return off;
3854}
3855
3856
3857#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
3858 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
3859 do {
3860
3861/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
3862DECL_INLINE_THROW(uint32_t)
3863iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
3864{
3865 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3866 Assert(iGReg < 16);
3867
3868 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3869 kIemNativeGstRegUse_ReadOnly);
3870
3871 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
3872
3873 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3874
3875 iemNativeCondStartIfBlock(pReNative, off);
3876 return off;
3877}
3878
3879
3880
3881/*********************************************************************************************************************************
3882* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
3883*********************************************************************************************************************************/
3884
3885#define IEM_MC_NOREF(a_Name) \
3886 RT_NOREF_PV(a_Name)
3887
3888#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
3889 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
3890
3891#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
3892 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
3893
3894#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
3895 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
3896
3897#define IEM_MC_LOCAL(a_Type, a_Name) \
3898 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
3899
3900#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
3901 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
3902
3903#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
3904 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
3905
3906
3907/**
3908 * Sets the host register for @a idxVarRc to @a idxReg.
3909 *
3910 * Any guest register shadowing will be implictly dropped by this call.
3911 *
3912 * The variable must not have any register associated with it (causes
3913 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
3914 * implied.
3915 *
3916 * @returns idxReg
3917 * @param pReNative The recompiler state.
3918 * @param idxVar The variable.
3919 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
3920 * @param off For recording in debug info.
3921 * @param fAllocated Set if the register is already allocated, false if not.
3922 *
3923 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
3924 */
3925DECL_INLINE_THROW(uint8_t)
3926iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off, bool fAllocated)
3927{
3928 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3929 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3930 Assert(!pVar->fRegAcquired);
3931 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3932 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
3933 AssertStmt(RT_BOOL(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)) == fAllocated,
3934 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
3935
3936 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
3937 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
3938
3939 iemNativeVarSetKindToStack(pReNative, idxVar);
3940 pVar->idxReg = idxReg;
3941
3942 return idxReg;
3943}
3944
3945
3946/**
3947 * A convenient helper function.
3948 */
3949DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
3950 uint8_t idxReg, uint32_t *poff)
3951{
3952 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff, false /*fAllocated*/);
3953 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
3954 return idxReg;
3955}
3956
3957
3958/**
3959 * This is called by IEM_MC_END() to clean up all variables.
3960 */
3961DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
3962{
3963 uint32_t const bmVars = pReNative->Core.bmVars;
3964 if (bmVars != 0)
3965 iemNativeVarFreeAllSlow(pReNative, bmVars);
3966 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
3967 Assert(pReNative->Core.bmStack == 0);
3968}
3969
3970
3971#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
3972
3973/**
3974 * This is called by IEM_MC_FREE_LOCAL.
3975 */
3976DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3977{
3978 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3979 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
3980 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3981}
3982
3983
3984#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
3985
3986/**
3987 * This is called by IEM_MC_FREE_ARG.
3988 */
3989DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3990{
3991 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3992 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
3993 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3994}
3995
3996
3997#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
3998
3999/**
4000 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
4001 */
4002DECL_INLINE_THROW(uint32_t)
4003iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
4004{
4005 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
4006 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
4007 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4008 Assert( pVarDst->cbVar == sizeof(uint16_t)
4009 || pVarDst->cbVar == sizeof(uint32_t));
4010
4011 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
4012 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
4013 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
4014 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
4015 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4016
4017 Assert(pVarDst->cbVar < pVarSrc->cbVar);
4018
4019 /*
4020 * Special case for immediates.
4021 */
4022 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
4023 {
4024 switch (pVarDst->cbVar)
4025 {
4026 case sizeof(uint16_t):
4027 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
4028 break;
4029 case sizeof(uint32_t):
4030 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
4031 break;
4032 default: AssertFailed(); break;
4033 }
4034 }
4035 else
4036 {
4037 /*
4038 * The generic solution for now.
4039 */
4040 /** @todo optimize this by having the python script make sure the source
4041 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
4042 * statement. Then we could just transfer the register assignments. */
4043 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
4044 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
4045 switch (pVarDst->cbVar)
4046 {
4047 case sizeof(uint16_t):
4048 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
4049 break;
4050 case sizeof(uint32_t):
4051 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
4052 break;
4053 default: AssertFailed(); break;
4054 }
4055 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
4056 iemNativeVarRegisterRelease(pReNative, idxVarDst);
4057 }
4058 return off;
4059}
4060
4061
4062
4063/*********************************************************************************************************************************
4064* Emitters for IEM_MC_CALL_CIMPL_XXX *
4065*********************************************************************************************************************************/
4066
4067/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
4068DECL_INLINE_THROW(uint32_t)
4069iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
4070 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
4071
4072{
4073 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
4074
4075#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4076 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
4077 when a calls clobber any of the relevant control registers. */
4078# if 1
4079 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
4080 {
4081 /* Likely as long as call+ret are done via cimpl. */
4082 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
4083 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
4084 }
4085 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
4086 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
4087 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
4088 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4089 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
4090 else
4091 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4092 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
4093 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
4094
4095# else
4096 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
4097 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
4098 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
4099 pReNative->fSimdRaiseXcptChecksEmitted = 0;
4100 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
4101 || pfnCImpl == (uintptr_t)iemCImpl_callf
4102 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
4103 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
4104 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4105 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
4106 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
4107# endif
4108
4109# ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
4110 /* Mark the host floating point control register as not synced if MXCSR is modified. */
4111 if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_MxCsr))
4112 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
4113# endif
4114#endif
4115
4116 /*
4117 * Do all the call setup and cleanup.
4118 */
4119 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
4120
4121 /*
4122 * Load the two or three hidden arguments.
4123 */
4124#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
4125 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
4126 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4127 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
4128#else
4129 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4130 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
4131#endif
4132
4133 /*
4134 * Make the call and check the return code.
4135 *
4136 * Shadow PC copies are always flushed here, other stuff depends on flags.
4137 * Segment and general purpose registers are explictily flushed via the
4138 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
4139 * macros.
4140 */
4141 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
4142#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
4143 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
4144#endif
4145 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
4146 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
4147 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
4148 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
4149
4150#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
4151 pReNative->Core.fDebugPcInitialized = false;
4152 Log4(("fDebugPcInitialized=false cimpl off=%#x (v1)\n", off));
4153#endif
4154
4155 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
4156}
4157
4158
4159#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
4160 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
4161
4162/** Emits code for IEM_MC_CALL_CIMPL_1. */
4163DECL_INLINE_THROW(uint32_t)
4164iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4165 uintptr_t pfnCImpl, uint8_t idxArg0)
4166{
4167 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4168 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
4169}
4170
4171
4172#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
4173 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
4174
4175/** Emits code for IEM_MC_CALL_CIMPL_2. */
4176DECL_INLINE_THROW(uint32_t)
4177iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4178 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
4179{
4180 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4181 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4182 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
4183}
4184
4185
4186#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
4187 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4188 (uintptr_t)a_pfnCImpl, a0, a1, a2)
4189
4190/** Emits code for IEM_MC_CALL_CIMPL_3. */
4191DECL_INLINE_THROW(uint32_t)
4192iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4193 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
4194{
4195 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4196 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4197 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4198 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
4199}
4200
4201
4202#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
4203 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4204 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
4205
4206/** Emits code for IEM_MC_CALL_CIMPL_4. */
4207DECL_INLINE_THROW(uint32_t)
4208iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4209 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
4210{
4211 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4212 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4213 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4214 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
4215 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
4216}
4217
4218
4219#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
4220 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4221 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
4222
4223/** Emits code for IEM_MC_CALL_CIMPL_4. */
4224DECL_INLINE_THROW(uint32_t)
4225iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4226 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
4227{
4228 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4229 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4230 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4231 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
4232 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
4233 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
4234}
4235
4236
4237/** Recompiler debugging: Flush guest register shadow copies. */
4238#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
4239
4240
4241
4242/*********************************************************************************************************************************
4243* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
4244*********************************************************************************************************************************/
4245
4246/**
4247 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
4248 */
4249DECL_INLINE_THROW(uint32_t)
4250iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4251 uintptr_t pfnAImpl, uint8_t cArgs)
4252{
4253 if (idxVarRc != UINT8_MAX)
4254 {
4255 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
4256 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
4257 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
4258 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
4259 }
4260
4261 /*
4262 * Do all the call setup and cleanup.
4263 *
4264 * It is only required to flush pending guest register writes in call volatile registers as
4265 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
4266 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
4267 * no matter the fFlushPendingWrites parameter.
4268 */
4269 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
4270
4271 /*
4272 * Make the call and update the return code variable if we've got one.
4273 */
4274 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
4275 if (idxVarRc != UINT8_MAX)
4276 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off, false /*fAllocated*/);
4277
4278 return off;
4279}
4280
4281
4282
4283#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
4284 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
4285
4286#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
4287 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
4288
4289/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
4290DECL_INLINE_THROW(uint32_t)
4291iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
4292{
4293 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
4294}
4295
4296
4297#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
4298 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
4299
4300#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
4301 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
4302
4303/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
4304DECL_INLINE_THROW(uint32_t)
4305iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
4306{
4307 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4308 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
4309}
4310
4311
4312#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
4313 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
4314
4315#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
4316 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
4317
4318/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
4319DECL_INLINE_THROW(uint32_t)
4320iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4321 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
4322{
4323 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4324 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4325 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
4326}
4327
4328
4329#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
4330 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
4331
4332#define IEM_MC_CALL_AIMPL_3(a_rcType, a_rc, a_pfn, a0, a1, a2) \
4333 IEM_MC_LOCAL(a_rcType, a_rc); \
4334 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
4335
4336/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
4337DECL_INLINE_THROW(uint32_t)
4338iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4339 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
4340{
4341 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4342 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4343 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
4344 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
4345}
4346
4347
4348#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
4349 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
4350
4351#define IEM_MC_CALL_AIMPL_4(a_rcType, a_rc, a_pfn, a0, a1, a2, a3) \
4352 IEM_MC_LOCAL(a_rcType, a_rc); \
4353 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
4354
4355/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
4356DECL_INLINE_THROW(uint32_t)
4357iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4358 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
4359{
4360 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4361 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4362 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
4363 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
4364 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
4365}
4366
4367
4368
4369/*********************************************************************************************************************************
4370* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
4371*********************************************************************************************************************************/
4372
4373#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
4374 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
4375
4376#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
4377 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
4378
4379#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
4380 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
4381
4382#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
4383 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
4384
4385
4386/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
4387 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
4388DECL_INLINE_THROW(uint32_t)
4389iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
4390{
4391 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4392 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4393 Assert(iGRegEx < 20);
4394
4395 /* Same discussion as in iemNativeEmitFetchGregU16 */
4396 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4397 kIemNativeGstRegUse_ReadOnly);
4398
4399 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4400 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4401
4402 /* The value is zero-extended to the full 64-bit host register width. */
4403 if (iGRegEx < 16)
4404 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4405 else
4406 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
4407
4408 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4409 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4410 return off;
4411}
4412
4413
4414#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
4415 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
4416
4417#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
4418 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
4419
4420#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
4421 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
4422
4423/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
4424DECL_INLINE_THROW(uint32_t)
4425iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
4426{
4427 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4428 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4429 Assert(iGRegEx < 20);
4430
4431 /* Same discussion as in iemNativeEmitFetchGregU16 */
4432 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4433 kIemNativeGstRegUse_ReadOnly);
4434
4435 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4436 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4437
4438 if (iGRegEx < 16)
4439 {
4440 switch (cbSignExtended)
4441 {
4442 case sizeof(uint16_t):
4443 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4444 break;
4445 case sizeof(uint32_t):
4446 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4447 break;
4448 case sizeof(uint64_t):
4449 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4450 break;
4451 default: AssertFailed(); break;
4452 }
4453 }
4454 else
4455 {
4456 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
4457 switch (cbSignExtended)
4458 {
4459 case sizeof(uint16_t):
4460 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4461 break;
4462 case sizeof(uint32_t):
4463 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4464 break;
4465 case sizeof(uint64_t):
4466 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4467 break;
4468 default: AssertFailed(); break;
4469 }
4470 }
4471
4472 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4473 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4474 return off;
4475}
4476
4477
4478
4479#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
4480 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
4481
4482#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
4483 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4484
4485#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
4486 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4487
4488/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
4489DECL_INLINE_THROW(uint32_t)
4490iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4491{
4492 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4493 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4494 Assert(iGReg < 16);
4495
4496 /*
4497 * We can either just load the low 16-bit of the GPR into a host register
4498 * for the variable, or we can do so via a shadow copy host register. The
4499 * latter will avoid having to reload it if it's being stored later, but
4500 * will waste a host register if it isn't touched again. Since we don't
4501 * know what going to happen, we choose the latter for now.
4502 */
4503 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4504 kIemNativeGstRegUse_ReadOnly);
4505
4506 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4507 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4508 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4509 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4510
4511 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4512 return off;
4513}
4514
4515#define IEM_MC_FETCH_GREG_I16(a_i16Dst, a_iGReg) \
4516 off = iemNativeEmitFetchGregI16(pReNative, off, a_i16Dst, a_iGReg)
4517
4518/** Emits code for IEM_MC_FETCH_GREG_I16. */
4519DECL_INLINE_THROW(uint32_t)
4520iemNativeEmitFetchGregI16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4521{
4522 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4523 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(int16_t));
4524 Assert(iGReg < 16);
4525
4526 /*
4527 * We can either just load the low 16-bit of the GPR into a host register
4528 * for the variable, or we can do so via a shadow copy host register. The
4529 * latter will avoid having to reload it if it's being stored later, but
4530 * will waste a host register if it isn't touched again. Since we don't
4531 * know what going to happen, we choose the latter for now.
4532 */
4533 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4534 kIemNativeGstRegUse_ReadOnly);
4535
4536 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4537 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4538#ifdef RT_ARCH_AMD64
4539 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4540#elif defined(RT_ARCH_ARM64) /* Note! There are no 16-bit registers on ARM, we emulate that through 32-bit registers which requires sign extension. */
4541 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4542#endif
4543 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4544
4545 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4546 return off;
4547}
4548
4549
4550#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
4551 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4552
4553#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
4554 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4555
4556/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
4557DECL_INLINE_THROW(uint32_t)
4558iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
4559{
4560 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4561 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4562 Assert(iGReg < 16);
4563
4564 /*
4565 * We can either just load the low 16-bit of the GPR into a host register
4566 * for the variable, or we can do so via a shadow copy host register. The
4567 * latter will avoid having to reload it if it's being stored later, but
4568 * will waste a host register if it isn't touched again. Since we don't
4569 * know what going to happen, we choose the latter for now.
4570 */
4571 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4572 kIemNativeGstRegUse_ReadOnly);
4573
4574 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4575 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4576 if (cbSignExtended == sizeof(uint32_t))
4577 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4578 else
4579 {
4580 Assert(cbSignExtended == sizeof(uint64_t));
4581 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4582 }
4583 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4584
4585 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4586 return off;
4587}
4588
4589
4590#define IEM_MC_FETCH_GREG_I32(a_i32Dst, a_iGReg) \
4591 off = iemNativeEmitFetchGregU32(pReNative, off, a_i32Dst, a_iGReg, sizeof(uint32_t))
4592
4593#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
4594 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
4595
4596#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
4597 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
4598
4599/** Emits code for IEM_MC_FETCH_GREG_U32. */
4600DECL_INLINE_THROW(uint32_t)
4601iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4602{
4603 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4604 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4605 Assert(iGReg < 16);
4606
4607 /*
4608 * We can either just load the low 16-bit of the GPR into a host register
4609 * for the variable, or we can do so via a shadow copy host register. The
4610 * latter will avoid having to reload it if it's being stored later, but
4611 * will waste a host register if it isn't touched again. Since we don't
4612 * know what going to happen, we choose the latter for now.
4613 */
4614 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4615 kIemNativeGstRegUse_ReadOnly);
4616
4617 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4618 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4619 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4620 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4621
4622 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4623 return off;
4624}
4625
4626
4627#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
4628 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
4629
4630/** Emits code for IEM_MC_FETCH_GREG_U32. */
4631DECL_INLINE_THROW(uint32_t)
4632iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4633{
4634 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4635 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4636 Assert(iGReg < 16);
4637
4638 /*
4639 * We can either just load the low 32-bit of the GPR into a host register
4640 * for the variable, or we can do so via a shadow copy host register. The
4641 * latter will avoid having to reload it if it's being stored later, but
4642 * will waste a host register if it isn't touched again. Since we don't
4643 * know what going to happen, we choose the latter for now.
4644 */
4645 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4646 kIemNativeGstRegUse_ReadOnly);
4647
4648 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4649 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4650 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4651 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4652
4653 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4654 return off;
4655}
4656
4657
4658#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
4659 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4660
4661#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
4662 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4663
4664/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
4665 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
4666DECL_INLINE_THROW(uint32_t)
4667iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4668{
4669 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4670 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4671 Assert(iGReg < 16);
4672
4673 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4674 kIemNativeGstRegUse_ReadOnly);
4675
4676 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4677 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4678 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
4679 /** @todo name the register a shadow one already? */
4680 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4681
4682 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4683 return off;
4684}
4685
4686
4687#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4688#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
4689 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
4690
4691/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
4692DECL_INLINE_THROW(uint32_t)
4693iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
4694{
4695 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4696 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
4697 Assert(iGRegLo < 16 && iGRegHi < 16);
4698
4699 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
4700 kIemNativeGstRegUse_ReadOnly);
4701 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
4702 kIemNativeGstRegUse_ReadOnly);
4703
4704 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4705 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
4706 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
4707 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
4708
4709 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
4710 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
4711 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
4712 return off;
4713}
4714#endif
4715
4716
4717/*********************************************************************************************************************************
4718* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
4719*********************************************************************************************************************************/
4720
4721#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
4722 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
4723
4724/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
4725DECL_INLINE_THROW(uint32_t)
4726iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
4727{
4728 Assert(iGRegEx < 20);
4729 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4730 kIemNativeGstRegUse_ForUpdate);
4731#ifdef RT_ARCH_AMD64
4732 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4733
4734 /* To the lowest byte of the register: mov r8, imm8 */
4735 if (iGRegEx < 16)
4736 {
4737 if (idxGstTmpReg >= 8)
4738 pbCodeBuf[off++] = X86_OP_REX_B;
4739 else if (idxGstTmpReg >= 4)
4740 pbCodeBuf[off++] = X86_OP_REX;
4741 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4742 pbCodeBuf[off++] = u8Value;
4743 }
4744 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
4745 else if (idxGstTmpReg < 4)
4746 {
4747 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
4748 pbCodeBuf[off++] = u8Value;
4749 }
4750 else
4751 {
4752 /* ror reg64, 8 */
4753 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4754 pbCodeBuf[off++] = 0xc1;
4755 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4756 pbCodeBuf[off++] = 8;
4757
4758 /* mov reg8, imm8 */
4759 if (idxGstTmpReg >= 8)
4760 pbCodeBuf[off++] = X86_OP_REX_B;
4761 else if (idxGstTmpReg >= 4)
4762 pbCodeBuf[off++] = X86_OP_REX;
4763 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4764 pbCodeBuf[off++] = u8Value;
4765
4766 /* rol reg64, 8 */
4767 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4768 pbCodeBuf[off++] = 0xc1;
4769 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4770 pbCodeBuf[off++] = 8;
4771 }
4772
4773#elif defined(RT_ARCH_ARM64)
4774 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
4775 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4776 if (iGRegEx < 16)
4777 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
4778 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
4779 else
4780 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
4781 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
4782 iemNativeRegFreeTmp(pReNative, idxImmReg);
4783
4784#else
4785# error "Port me!"
4786#endif
4787
4788 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4789
4790#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4791 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4792#endif
4793
4794 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4795 return off;
4796}
4797
4798
4799#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
4800 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
4801
4802/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
4803DECL_INLINE_THROW(uint32_t)
4804iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
4805{
4806 Assert(iGRegEx < 20);
4807 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4808
4809 /*
4810 * If it's a constant value (unlikely) we treat this as a
4811 * IEM_MC_STORE_GREG_U8_CONST statement.
4812 */
4813 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4814 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4815 { /* likely */ }
4816 else
4817 {
4818 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4819 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4820 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
4821 }
4822
4823 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4824 kIemNativeGstRegUse_ForUpdate);
4825 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4826
4827#ifdef RT_ARCH_AMD64
4828 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
4829 if (iGRegEx < 16)
4830 {
4831 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4832 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4833 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4834 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4835 pbCodeBuf[off++] = X86_OP_REX;
4836 pbCodeBuf[off++] = 0x8a;
4837 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4838 }
4839 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
4840 else if (idxGstTmpReg < 4 && idxVarReg < 4)
4841 {
4842 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
4843 pbCodeBuf[off++] = 0x8a;
4844 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
4845 }
4846 else
4847 {
4848 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
4849
4850 /* ror reg64, 8 */
4851 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4852 pbCodeBuf[off++] = 0xc1;
4853 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4854 pbCodeBuf[off++] = 8;
4855
4856 /* mov reg8, reg8(r/m) */
4857 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4858 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4859 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4860 pbCodeBuf[off++] = X86_OP_REX;
4861 pbCodeBuf[off++] = 0x8a;
4862 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4863
4864 /* rol reg64, 8 */
4865 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4866 pbCodeBuf[off++] = 0xc1;
4867 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4868 pbCodeBuf[off++] = 8;
4869 }
4870
4871#elif defined(RT_ARCH_ARM64)
4872 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
4873 or
4874 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
4875 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4876 if (iGRegEx < 16)
4877 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
4878 else
4879 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
4880
4881#else
4882# error "Port me!"
4883#endif
4884 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4885
4886 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4887
4888#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4889 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4890#endif
4891 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4892 return off;
4893}
4894
4895
4896
4897#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
4898 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
4899
4900/** Emits code for IEM_MC_STORE_GREG_U16. */
4901DECL_INLINE_THROW(uint32_t)
4902iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
4903{
4904 Assert(iGReg < 16);
4905 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4906 kIemNativeGstRegUse_ForUpdate);
4907#ifdef RT_ARCH_AMD64
4908 /* mov reg16, imm16 */
4909 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4910 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4911 if (idxGstTmpReg >= 8)
4912 pbCodeBuf[off++] = X86_OP_REX_B;
4913 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
4914 pbCodeBuf[off++] = RT_BYTE1(uValue);
4915 pbCodeBuf[off++] = RT_BYTE2(uValue);
4916
4917#elif defined(RT_ARCH_ARM64)
4918 /* movk xdst, #uValue, lsl #0 */
4919 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4920 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
4921
4922#else
4923# error "Port me!"
4924#endif
4925
4926 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4927
4928#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4929 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4930#endif
4931 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4932 return off;
4933}
4934
4935
4936#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
4937 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
4938
4939/** Emits code for IEM_MC_STORE_GREG_U16. */
4940DECL_INLINE_THROW(uint32_t)
4941iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4942{
4943 Assert(iGReg < 16);
4944 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4945
4946 /*
4947 * If it's a constant value (unlikely) we treat this as a
4948 * IEM_MC_STORE_GREG_U16_CONST statement.
4949 */
4950 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4951 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4952 { /* likely */ }
4953 else
4954 {
4955 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4956 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4957 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
4958 }
4959
4960 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4961 kIemNativeGstRegUse_ForUpdate);
4962
4963#ifdef RT_ARCH_AMD64
4964 /* mov reg16, reg16 or [mem16] */
4965 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4966 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4967 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
4968 {
4969 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
4970 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
4971 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
4972 pbCodeBuf[off++] = 0x8b;
4973 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
4974 }
4975 else
4976 {
4977 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
4978 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
4979 if (idxGstTmpReg >= 8)
4980 pbCodeBuf[off++] = X86_OP_REX_R;
4981 pbCodeBuf[off++] = 0x8b;
4982 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
4983 }
4984
4985#elif defined(RT_ARCH_ARM64)
4986 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
4987 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4988 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4989 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
4990 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4991
4992#else
4993# error "Port me!"
4994#endif
4995
4996 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4997
4998#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4999 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5000#endif
5001 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5002 return off;
5003}
5004
5005
5006#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
5007 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
5008
5009/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
5010DECL_INLINE_THROW(uint32_t)
5011iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
5012{
5013 Assert(iGReg < 16);
5014 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5015 kIemNativeGstRegUse_ForFullWrite);
5016 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
5017#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5018 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5019#endif
5020 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5021 return off;
5022}
5023
5024
5025#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
5026 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
5027
5028#define IEM_MC_STORE_GREG_I32(a_iGReg, a_i32Value) \
5029 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_i32Value)
5030
5031/** Emits code for IEM_MC_STORE_GREG_U32/IEM_MC_STORE_GREG_I32. */
5032DECL_INLINE_THROW(uint32_t)
5033iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5034{
5035 Assert(iGReg < 16);
5036 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5037
5038 /*
5039 * If it's a constant value (unlikely) we treat this as a
5040 * IEM_MC_STORE_GREG_U32_CONST statement.
5041 */
5042 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5043 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5044 { /* likely */ }
5045 else
5046 {
5047 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5048 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5049 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
5050 }
5051
5052 /*
5053 * For the rest we allocate a guest register for the variable and writes
5054 * it to the CPUMCTX structure.
5055 */
5056 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
5057#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5058 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5059#else
5060 RT_NOREF(idxVarReg);
5061#endif
5062#ifdef VBOX_STRICT
5063 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
5064#endif
5065 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5066 return off;
5067}
5068
5069
5070#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
5071 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
5072
5073/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
5074DECL_INLINE_THROW(uint32_t)
5075iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
5076{
5077 Assert(iGReg < 16);
5078 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5079 kIemNativeGstRegUse_ForFullWrite);
5080 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
5081#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5082 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5083#endif
5084 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5085 return off;
5086}
5087
5088
5089#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
5090 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
5091
5092#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
5093 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
5094
5095/** Emits code for IEM_MC_STORE_GREG_U64. */
5096DECL_INLINE_THROW(uint32_t)
5097iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5098{
5099 Assert(iGReg < 16);
5100 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5101
5102 /*
5103 * If it's a constant value (unlikely) we treat this as a
5104 * IEM_MC_STORE_GREG_U64_CONST statement.
5105 */
5106 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5107 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5108 { /* likely */ }
5109 else
5110 {
5111 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5112 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5113 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
5114 }
5115
5116 /*
5117 * For the rest we allocate a guest register for the variable and writes
5118 * it to the CPUMCTX structure.
5119 */
5120 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
5121#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5122 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5123#else
5124 RT_NOREF(idxVarReg);
5125#endif
5126 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5127 return off;
5128}
5129
5130
5131#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
5132 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
5133
5134/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
5135DECL_INLINE_THROW(uint32_t)
5136iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
5137{
5138 Assert(iGReg < 16);
5139 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5140 kIemNativeGstRegUse_ForUpdate);
5141 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
5142#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5143 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5144#endif
5145 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5146 return off;
5147}
5148
5149
5150#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5151#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
5152 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
5153
5154/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
5155DECL_INLINE_THROW(uint32_t)
5156iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
5157{
5158 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
5159 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
5160 Assert(iGRegLo < 16 && iGRegHi < 16);
5161
5162 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
5163 kIemNativeGstRegUse_ForFullWrite);
5164 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
5165 kIemNativeGstRegUse_ForFullWrite);
5166
5167 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5168 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
5169 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
5170 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
5171
5172 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
5173 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
5174 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
5175 return off;
5176}
5177#endif
5178
5179
5180/*********************************************************************************************************************************
5181* General purpose register manipulation (add, sub). *
5182*********************************************************************************************************************************/
5183
5184#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5185 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5186
5187/** Emits code for IEM_MC_ADD_GREG_U16. */
5188DECL_INLINE_THROW(uint32_t)
5189iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
5190{
5191 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5192 kIemNativeGstRegUse_ForUpdate);
5193
5194#ifdef RT_ARCH_AMD64
5195 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5196 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5197 if (idxGstTmpReg >= 8)
5198 pbCodeBuf[off++] = X86_OP_REX_B;
5199 if (uAddend == 1)
5200 {
5201 pbCodeBuf[off++] = 0xff; /* inc */
5202 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5203 }
5204 else
5205 {
5206 pbCodeBuf[off++] = 0x81;
5207 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5208 pbCodeBuf[off++] = uAddend;
5209 pbCodeBuf[off++] = 0;
5210 }
5211
5212#else
5213 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5214 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5215
5216 /* sub tmp, gstgrp, uAddend */
5217 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
5218
5219 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5220 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5221
5222 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5223#endif
5224
5225 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5226
5227#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5228 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5229#endif
5230
5231 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5232 return off;
5233}
5234
5235
5236#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
5237 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5238
5239#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
5240 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5241
5242/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
5243DECL_INLINE_THROW(uint32_t)
5244iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
5245{
5246 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5247 kIemNativeGstRegUse_ForUpdate);
5248
5249#ifdef RT_ARCH_AMD64
5250 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5251 if (f64Bit)
5252 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5253 else if (idxGstTmpReg >= 8)
5254 pbCodeBuf[off++] = X86_OP_REX_B;
5255 if (uAddend == 1)
5256 {
5257 pbCodeBuf[off++] = 0xff; /* inc */
5258 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5259 }
5260 else if (uAddend < 128)
5261 {
5262 pbCodeBuf[off++] = 0x83; /* add */
5263 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5264 pbCodeBuf[off++] = RT_BYTE1(uAddend);
5265 }
5266 else
5267 {
5268 pbCodeBuf[off++] = 0x81; /* add */
5269 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5270 pbCodeBuf[off++] = RT_BYTE1(uAddend);
5271 pbCodeBuf[off++] = 0;
5272 pbCodeBuf[off++] = 0;
5273 pbCodeBuf[off++] = 0;
5274 }
5275
5276#else
5277 /* sub tmp, gstgrp, uAddend */
5278 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5279 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
5280
5281#endif
5282
5283 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5284
5285#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5286 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5287#endif
5288
5289 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5290 return off;
5291}
5292
5293
5294
5295#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5296 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5297
5298/** Emits code for IEM_MC_SUB_GREG_U16. */
5299DECL_INLINE_THROW(uint32_t)
5300iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
5301{
5302 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5303 kIemNativeGstRegUse_ForUpdate);
5304
5305#ifdef RT_ARCH_AMD64
5306 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5307 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5308 if (idxGstTmpReg >= 8)
5309 pbCodeBuf[off++] = X86_OP_REX_B;
5310 if (uSubtrahend == 1)
5311 {
5312 pbCodeBuf[off++] = 0xff; /* dec */
5313 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5314 }
5315 else
5316 {
5317 pbCodeBuf[off++] = 0x81;
5318 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5319 pbCodeBuf[off++] = uSubtrahend;
5320 pbCodeBuf[off++] = 0;
5321 }
5322
5323#else
5324 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5325 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5326
5327 /* sub tmp, gstgrp, uSubtrahend */
5328 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
5329
5330 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5331 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5332
5333 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5334#endif
5335
5336 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5337
5338#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5339 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5340#endif
5341
5342 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5343 return off;
5344}
5345
5346
5347#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
5348 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5349
5350#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
5351 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5352
5353/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
5354DECL_INLINE_THROW(uint32_t)
5355iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
5356{
5357 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5358 kIemNativeGstRegUse_ForUpdate);
5359
5360#ifdef RT_ARCH_AMD64
5361 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5362 if (f64Bit)
5363 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5364 else if (idxGstTmpReg >= 8)
5365 pbCodeBuf[off++] = X86_OP_REX_B;
5366 if (uSubtrahend == 1)
5367 {
5368 pbCodeBuf[off++] = 0xff; /* dec */
5369 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5370 }
5371 else if (uSubtrahend < 128)
5372 {
5373 pbCodeBuf[off++] = 0x83; /* sub */
5374 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5375 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5376 }
5377 else
5378 {
5379 pbCodeBuf[off++] = 0x81; /* sub */
5380 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5381 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5382 pbCodeBuf[off++] = 0;
5383 pbCodeBuf[off++] = 0;
5384 pbCodeBuf[off++] = 0;
5385 }
5386
5387#else
5388 /* sub tmp, gstgrp, uSubtrahend */
5389 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5390 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
5391
5392#endif
5393
5394 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5395
5396#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5397 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5398#endif
5399
5400 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5401 return off;
5402}
5403
5404
5405#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
5406 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
5407
5408#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
5409 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
5410
5411#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
5412 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5413
5414#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
5415 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5416
5417/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
5418DECL_INLINE_THROW(uint32_t)
5419iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5420{
5421#ifdef VBOX_STRICT
5422 switch (cbMask)
5423 {
5424 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5425 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5426 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5427 case sizeof(uint64_t): break;
5428 default: AssertFailedBreak();
5429 }
5430#endif
5431
5432 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5433 kIemNativeGstRegUse_ForUpdate);
5434
5435 switch (cbMask)
5436 {
5437 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5438 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
5439 break;
5440 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
5441 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
5442 break;
5443 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5444 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5445 break;
5446 case sizeof(uint64_t):
5447 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
5448 break;
5449 default: AssertFailedBreak();
5450 }
5451
5452 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5453
5454#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5455 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5456#endif
5457
5458 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5459 return off;
5460}
5461
5462
5463#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
5464 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
5465
5466#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
5467 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
5468
5469#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
5470 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5471
5472#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
5473 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5474
5475/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
5476DECL_INLINE_THROW(uint32_t)
5477iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5478{
5479#ifdef VBOX_STRICT
5480 switch (cbMask)
5481 {
5482 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5483 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5484 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5485 case sizeof(uint64_t): break;
5486 default: AssertFailedBreak();
5487 }
5488#endif
5489
5490 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5491 kIemNativeGstRegUse_ForUpdate);
5492
5493 switch (cbMask)
5494 {
5495 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5496 case sizeof(uint16_t):
5497 case sizeof(uint64_t):
5498 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
5499 break;
5500 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5501 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5502 break;
5503 default: AssertFailedBreak();
5504 }
5505
5506 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5507
5508#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5509 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5510#endif
5511
5512 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5513 return off;
5514}
5515
5516
5517/*********************************************************************************************************************************
5518* Local/Argument variable manipulation (add, sub, and, or). *
5519*********************************************************************************************************************************/
5520
5521#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
5522 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5523
5524#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
5525 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5526
5527#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
5528 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5529
5530#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
5531 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5532
5533
5534#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
5535 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
5536
5537#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
5538 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
5539
5540#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
5541 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
5542
5543/** Emits code for AND'ing a local and a constant value. */
5544DECL_INLINE_THROW(uint32_t)
5545iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5546{
5547#ifdef VBOX_STRICT
5548 switch (cbMask)
5549 {
5550 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5551 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5552 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5553 case sizeof(uint64_t): break;
5554 default: AssertFailedBreak();
5555 }
5556#endif
5557
5558 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5559 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5560
5561 if (cbMask <= sizeof(uint32_t))
5562 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
5563 else
5564 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
5565
5566 iemNativeVarRegisterRelease(pReNative, idxVar);
5567 return off;
5568}
5569
5570
5571#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
5572 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5573
5574#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
5575 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5576
5577#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
5578 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5579
5580#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
5581 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5582
5583/** Emits code for OR'ing a local and a constant value. */
5584DECL_INLINE_THROW(uint32_t)
5585iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5586{
5587#ifdef VBOX_STRICT
5588 switch (cbMask)
5589 {
5590 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5591 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5592 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5593 case sizeof(uint64_t): break;
5594 default: AssertFailedBreak();
5595 }
5596#endif
5597
5598 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5599 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5600
5601 if (cbMask <= sizeof(uint32_t))
5602 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
5603 else
5604 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
5605
5606 iemNativeVarRegisterRelease(pReNative, idxVar);
5607 return off;
5608}
5609
5610
5611#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
5612 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
5613
5614#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
5615 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
5616
5617#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
5618 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
5619
5620/** Emits code for reversing the byte order in a local value. */
5621DECL_INLINE_THROW(uint32_t)
5622iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
5623{
5624 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5625 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5626
5627 switch (cbLocal)
5628 {
5629 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
5630 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
5631 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
5632 default: AssertFailedBreak();
5633 }
5634
5635 iemNativeVarRegisterRelease(pReNative, idxVar);
5636 return off;
5637}
5638
5639
5640#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
5641 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5642
5643#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
5644 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5645
5646#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
5647 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5648
5649/** Emits code for shifting left a local value. */
5650DECL_INLINE_THROW(uint32_t)
5651iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5652{
5653#ifdef VBOX_STRICT
5654 switch (cbLocal)
5655 {
5656 case sizeof(uint8_t): Assert(cShift < 8); break;
5657 case sizeof(uint16_t): Assert(cShift < 16); break;
5658 case sizeof(uint32_t): Assert(cShift < 32); break;
5659 case sizeof(uint64_t): Assert(cShift < 64); break;
5660 default: AssertFailedBreak();
5661 }
5662#endif
5663
5664 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5665 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5666
5667 if (cbLocal <= sizeof(uint32_t))
5668 {
5669 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
5670 if (cbLocal < sizeof(uint32_t))
5671 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
5672 cbLocal == sizeof(uint16_t)
5673 ? UINT32_C(0xffff)
5674 : UINT32_C(0xff));
5675 }
5676 else
5677 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
5678
5679 iemNativeVarRegisterRelease(pReNative, idxVar);
5680 return off;
5681}
5682
5683
5684#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
5685 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5686
5687#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
5688 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5689
5690#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
5691 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5692
5693/** Emits code for shifting left a local value. */
5694DECL_INLINE_THROW(uint32_t)
5695iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5696{
5697#ifdef VBOX_STRICT
5698 switch (cbLocal)
5699 {
5700 case sizeof(int8_t): Assert(cShift < 8); break;
5701 case sizeof(int16_t): Assert(cShift < 16); break;
5702 case sizeof(int32_t): Assert(cShift < 32); break;
5703 case sizeof(int64_t): Assert(cShift < 64); break;
5704 default: AssertFailedBreak();
5705 }
5706#endif
5707
5708 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5709 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5710
5711 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
5712 if (cbLocal == sizeof(uint8_t))
5713 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
5714 else if (cbLocal == sizeof(uint16_t))
5715 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
5716
5717 if (cbLocal <= sizeof(uint32_t))
5718 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
5719 else
5720 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
5721
5722 iemNativeVarRegisterRelease(pReNative, idxVar);
5723 return off;
5724}
5725
5726
5727#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
5728 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
5729
5730#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
5731 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
5732
5733#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
5734 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
5735
5736/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
5737DECL_INLINE_THROW(uint32_t)
5738iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
5739{
5740 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
5741 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
5742 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5743 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5744
5745 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5746 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off, true /*fInitialized*/);
5747
5748 /* Need to sign extend the value. */
5749 if (cbLocal <= sizeof(uint32_t))
5750 {
5751/** @todo ARM64: In case of boredone, the extended add instruction can do the
5752 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
5753 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5754
5755 switch (cbLocal)
5756 {
5757 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
5758 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
5759 default: AssertFailed();
5760 }
5761
5762 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
5763 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5764 }
5765 else
5766 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
5767
5768 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
5769 iemNativeVarRegisterRelease(pReNative, idxVar);
5770 return off;
5771}
5772
5773
5774
5775/*********************************************************************************************************************************
5776* EFLAGS *
5777*********************************************************************************************************************************/
5778
5779#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
5780# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
5781#else
5782# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
5783 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
5784
5785DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
5786{
5787 if (fEflOutput)
5788 {
5789 PVMCPUCC const pVCpu = pReNative->pVCpu;
5790# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5791 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
5792 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
5793 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
5794# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5795 if (fEflOutput & (a_fEfl)) \
5796 { \
5797 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
5798 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5799 else \
5800 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5801 } else do { } while (0)
5802# else
5803 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
5804 IEMLIVENESSBIT const LivenessClobbered =
5805 {
5806 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5807 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_POTENTIAL_CALL].bm64
5808 | pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5809 | pLivenessEntry->aBits[IEMLIVENESS_BIT_CALL].bm64)
5810 };
5811 IEMLIVENESSBIT const LivenessDelayable =
5812 {
5813 pLivenessEntry->aBits[IEMLIVENESS_BIT_POTENTIAL_CALL].bm64
5814 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5815 | pLivenessEntry->aBits[IEMLIVENESS_BIT_CALL].bm64)
5816 };
5817# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5818 if (fEflOutput & (a_fEfl)) \
5819 { \
5820 if (LivenessClobbered.a_fLivenessMember) \
5821 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5822 else if (LivenessDelayable.a_fLivenessMember) \
5823 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
5824 else \
5825 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5826 } else do { } while (0)
5827# endif
5828 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
5829 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
5830 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
5831 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
5832 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
5833 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
5834 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
5835# undef CHECK_FLAG_AND_UPDATE_STATS
5836 }
5837 RT_NOREF(fEflInput);
5838}
5839#endif /* VBOX_WITH_STATISTICS */
5840
5841#undef IEM_MC_FETCH_EFLAGS /* should not be used */
5842#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5843 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
5844
5845/** Handles IEM_MC_FETCH_EFLAGS_EX. */
5846DECL_INLINE_THROW(uint32_t)
5847iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
5848 uint32_t fEflInput, uint32_t fEflOutput)
5849{
5850 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
5851 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5852 RT_NOREF(fEflInput, fEflOutput);
5853
5854#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5855# ifdef VBOX_STRICT
5856 if ( pReNative->idxCurCall != 0
5857 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
5858 {
5859 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5860 uint32_t const fBoth = fEflInput | fEflOutput;
5861# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
5862 AssertMsg( !(fBoth & (a_fElfConst)) \
5863 || (!(fEflInput & (a_fElfConst)) \
5864 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5865 : !(fEflOutput & (a_fElfConst)) \
5866 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5867 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
5868 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5869 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
5870 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
5871 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
5872 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
5873 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
5874 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
5875 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
5876# undef ASSERT_ONE_EFL
5877 }
5878# endif
5879#endif
5880
5881 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
5882
5883 /** @todo This could be prettier...*/
5884 /** @todo Also, the shadowing+liveness handling of EFlags is currently
5885 * problematic, but I'll try tackle that soon (@bugref{10720}). */
5886 PCIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarEFlags)];
5887 Assert(pVar->enmKind == kIemNativeVarKind_Invalid || pVar->enmKind == kIemNativeVarKind_Stack);
5888 Assert(pVar->idxReg == UINT8_MAX);
5889 if (pVar->uArgNo >= IEMNATIVE_CALL_ARG_GREG_COUNT)
5890 {
5891 /** @todo We could use kIemNativeGstRegUse_ReadOnly here when fOutput is
5892 * zero, but since iemNativeVarRegisterSet clears the shadowing,
5893 * that's counter productive... */
5894 uint8_t const idxGstReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5895 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
5896 true /** @todo EFlags shadowing+liveness weirdness (@bugref{10720}). */);
5897 iemNativeVarRegisterSet(pReNative, idxVarEFlags, idxGstReg, off, true /*fAllocated*/);
5898 }
5899 else
5900 {
5901 /* Register argument variable: Avoid assertions in generic call code and load it the traditional way. */
5902 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
5903 uint8_t const idxGstReg = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off, kIemNativeGstReg_EFlags);
5904 if (idxGstReg != UINT8_MAX)
5905 {
5906 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstReg);
5907 iemNativeRegFreeTmp(pReNative, idxGstReg);
5908 }
5909 else
5910 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxVarReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
5911 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5912 }
5913 return off;
5914}
5915
5916
5917
5918/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
5919 * start using it with custom native code emission (inlining assembly
5920 * instruction helpers). */
5921#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
5922#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5923 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5924 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, true /*fUpdateSkipping*/)
5925
5926#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
5927#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5928 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5929 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, false /*fUpdateSkipping*/)
5930
5931/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
5932DECL_INLINE_THROW(uint32_t)
5933iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput,
5934 bool fUpdateSkipping)
5935{
5936 RT_NOREF(fEflOutput);
5937 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
5938 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5939
5940#ifdef VBOX_STRICT
5941 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
5942 uint32_t offFixup = off;
5943 off = iemNativeEmitJnzToFixed(pReNative, off, off);
5944 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
5945 iemNativeFixupFixedJump(pReNative, offFixup, off);
5946
5947 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
5948 offFixup = off;
5949 off = iemNativeEmitJzToFixed(pReNative, off, off);
5950 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
5951 iemNativeFixupFixedJump(pReNative, offFixup, off);
5952
5953 /** @todo validate that only bits in the fElfOutput mask changed. */
5954#endif
5955
5956#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5957 if (fUpdateSkipping)
5958 {
5959 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
5960 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5961 else
5962 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
5963 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5964 }
5965#else
5966 RT_NOREF_PV(fUpdateSkipping);
5967#endif
5968
5969 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
5970 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
5971 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5972 return off;
5973}
5974
5975
5976typedef enum IEMNATIVEMITEFLOP
5977{
5978 kIemNativeEmitEflOp_Set,
5979 kIemNativeEmitEflOp_Clear,
5980 kIemNativeEmitEflOp_Flip
5981} IEMNATIVEMITEFLOP;
5982
5983#define IEM_MC_SET_EFL_BIT(a_fBit) \
5984 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Set>(pReNative, off, a_fBit)
5985
5986#define IEM_MC_CLEAR_EFL_BIT(a_fBit) \
5987 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Clear>(pReNative, off, a_fBit)
5988
5989#define IEM_MC_FLIP_EFL_BIT(a_fBit) \
5990 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Flip>(pReNative, off, a_fBit)
5991
5992/** Handles IEM_MC_SET_EFL_BIT/IEM_MC_CLEAR_EFL_BIT/IEM_MC_FLIP_EFL_BIT. */
5993template<IEMNATIVEMITEFLOP const a_enmOp>
5994DECL_INLINE_THROW(uint32_t) iemNativeEmitModifyEFlagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflBit)
5995{
5996 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5997 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
5998 true /*fSkipLivenessAssert*/); /** @todo proper liveness / eflags fix */
5999
6000 /* Using 'if constexpr' forces code elimination in debug builds with VC. */
6001 if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Set)
6002 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
6003 else if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Clear)
6004 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~fEflBit);
6005 else if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Flip)
6006 off = iemNativeEmitXorGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
6007 else
6008 AssertCompile( a_enmOp == kIemNativeEmitEflOp_Set /* AssertCompile(false) works with VC 2019 but not clang 15. */
6009 || a_enmOp == kIemNativeEmitEflOp_Clear
6010 || a_enmOp == kIemNativeEmitEflOp_Flip);
6011
6012 /** @todo No delayed writeback for EFLAGS right now. */
6013 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
6014
6015 /* Free but don't flush the EFLAGS register. */
6016 iemNativeRegFreeTmp(pReNative, idxEflReg);
6017
6018 return off;
6019}
6020
6021
6022/*********************************************************************************************************************************
6023* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
6024*********************************************************************************************************************************/
6025
6026#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
6027 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
6028
6029#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
6030 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
6031
6032#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
6033 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
6034
6035
6036/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
6037 * IEM_MC_FETCH_SREG_ZX_U64. */
6038DECL_INLINE_THROW(uint32_t)
6039iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
6040{
6041 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
6042 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
6043 Assert(iSReg < X86_SREG_COUNT);
6044
6045 /*
6046 * For now, we will not create a shadow copy of a selector. The rational
6047 * is that since we do not recompile the popping and loading of segment
6048 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
6049 * pushing and moving to registers, there is only a small chance that the
6050 * shadow copy will be accessed again before the register is reloaded. One
6051 * scenario would be nested called in 16-bit code, but I doubt it's worth
6052 * the extra register pressure atm.
6053 *
6054 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
6055 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
6056 * store scencario covered at present (r160730).
6057 */
6058 iemNativeVarSetKindToStack(pReNative, idxDstVar);
6059 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
6060 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
6061 iemNativeVarRegisterRelease(pReNative, idxDstVar);
6062 return off;
6063}
6064
6065
6066
6067/*********************************************************************************************************************************
6068* Register references. *
6069*********************************************************************************************************************************/
6070
6071#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
6072 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
6073
6074#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
6075 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
6076
6077/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
6078DECL_INLINE_THROW(uint32_t)
6079iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
6080{
6081 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
6082 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6083 Assert(iGRegEx < 20);
6084
6085 if (iGRegEx < 16)
6086 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
6087 else
6088 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
6089
6090 /* If we've delayed writing back the register value, flush it now. */
6091 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
6092
6093 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6094 if (!fConst)
6095 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
6096
6097 return off;
6098}
6099
6100#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
6101 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
6102
6103#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
6104 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
6105
6106#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
6107 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
6108
6109#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
6110 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
6111
6112#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
6113 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
6114
6115#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
6116 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
6117
6118#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
6119 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
6120
6121#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
6122 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
6123
6124#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
6125 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
6126
6127#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
6128 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
6129
6130/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
6131DECL_INLINE_THROW(uint32_t)
6132iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
6133{
6134 Assert(iGReg < 16);
6135 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
6136 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6137
6138 /* If we've delayed writing back the register value, flush it now. */
6139 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
6140
6141 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6142 if (!fConst)
6143 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
6144
6145 return off;
6146}
6147
6148
6149#undef IEM_MC_REF_EFLAGS /* should not be used. */
6150#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
6151 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
6152 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags, a_fEflInput, a_fEflOutput)
6153
6154/** Handles IEM_MC_REF_EFLAGS. */
6155DECL_INLINE_THROW(uint32_t)
6156iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput, uint32_t fEflOutput)
6157{
6158 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
6159 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6160
6161#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6162 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
6163
6164 /* Updating the skipping according to the outputs is a little early, but
6165 we don't have any other hooks for references atm. */
6166 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6167 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6168 else if (fEflOutput & X86_EFL_STATUS_BITS)
6169 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
6170 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6171#else
6172 RT_NOREF(fEflInput, fEflOutput);
6173#endif
6174
6175 /* If we've delayed writing back the register value, flush it now. */
6176 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
6177
6178 /* If there is a shadow copy of guest EFLAGS, flush it now. */
6179 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
6180
6181 return off;
6182}
6183
6184
6185/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
6186 * different code from threaded recompiler, maybe it would be helpful. For now
6187 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
6188#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
6189
6190
6191#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
6192 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
6193
6194#define IEM_MC_REF_XREG_XMM(a_puXmmDst, a_iXReg) \
6195 off = iemNativeEmitRefXregXxx(pReNative, off, a_puXmmDst, a_iXReg, false /*fConst*/)
6196
6197#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
6198 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
6199
6200#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
6201 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
6202
6203#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6204/* Just being paranoid here. */
6205# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
6206AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
6207AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
6208AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
6209AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
6210# endif
6211AssertCompileMemberOffset(X86XMMREG, au64, 0);
6212AssertCompileMemberOffset(X86XMMREG, au32, 0);
6213AssertCompileMemberOffset(X86XMMREG, ar64, 0);
6214AssertCompileMemberOffset(X86XMMREG, ar32, 0);
6215
6216# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
6217 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
6218# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
6219 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
6220# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
6221 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
6222# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
6223 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
6224#endif
6225
6226/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
6227DECL_INLINE_THROW(uint32_t)
6228iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
6229{
6230 Assert(iXReg < 16);
6231 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
6232 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6233
6234 /* If we've delayed writing back the register value, flush it now. */
6235 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
6236
6237#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6238 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6239 if (!fConst)
6240 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
6241#else
6242 RT_NOREF(fConst);
6243#endif
6244
6245 return off;
6246}
6247
6248
6249
6250/*********************************************************************************************************************************
6251* Effective Address Calculation *
6252*********************************************************************************************************************************/
6253#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
6254 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
6255
6256/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
6257 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
6258DECL_INLINE_THROW(uint32_t)
6259iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6260 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
6261{
6262 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6263
6264 /*
6265 * Handle the disp16 form with no registers first.
6266 *
6267 * Convert to an immediate value, as that'll delay the register allocation
6268 * and assignment till the memory access / call / whatever and we can use
6269 * a more appropriate register (or none at all).
6270 */
6271 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
6272 {
6273 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
6274 return off;
6275 }
6276
6277 /* Determin the displacment. */
6278 uint16_t u16EffAddr;
6279 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6280 {
6281 case 0: u16EffAddr = 0; break;
6282 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
6283 case 2: u16EffAddr = u16Disp; break;
6284 default: AssertFailedStmt(u16EffAddr = 0);
6285 }
6286
6287 /* Determine the registers involved. */
6288 uint8_t idxGstRegBase;
6289 uint8_t idxGstRegIndex;
6290 switch (bRm & X86_MODRM_RM_MASK)
6291 {
6292 case 0:
6293 idxGstRegBase = X86_GREG_xBX;
6294 idxGstRegIndex = X86_GREG_xSI;
6295 break;
6296 case 1:
6297 idxGstRegBase = X86_GREG_xBX;
6298 idxGstRegIndex = X86_GREG_xDI;
6299 break;
6300 case 2:
6301 idxGstRegBase = X86_GREG_xBP;
6302 idxGstRegIndex = X86_GREG_xSI;
6303 break;
6304 case 3:
6305 idxGstRegBase = X86_GREG_xBP;
6306 idxGstRegIndex = X86_GREG_xDI;
6307 break;
6308 case 4:
6309 idxGstRegBase = X86_GREG_xSI;
6310 idxGstRegIndex = UINT8_MAX;
6311 break;
6312 case 5:
6313 idxGstRegBase = X86_GREG_xDI;
6314 idxGstRegIndex = UINT8_MAX;
6315 break;
6316 case 6:
6317 idxGstRegBase = X86_GREG_xBP;
6318 idxGstRegIndex = UINT8_MAX;
6319 break;
6320#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
6321 default:
6322#endif
6323 case 7:
6324 idxGstRegBase = X86_GREG_xBX;
6325 idxGstRegIndex = UINT8_MAX;
6326 break;
6327 }
6328
6329 /*
6330 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
6331 */
6332 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6333 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6334 kIemNativeGstRegUse_ReadOnly);
6335 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
6336 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6337 kIemNativeGstRegUse_ReadOnly)
6338 : UINT8_MAX;
6339#ifdef RT_ARCH_AMD64
6340 if (idxRegIndex == UINT8_MAX)
6341 {
6342 if (u16EffAddr == 0)
6343 {
6344 /* movxz ret, base */
6345 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
6346 }
6347 else
6348 {
6349 /* lea ret32, [base64 + disp32] */
6350 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6351 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6352 if (idxRegRet >= 8 || idxRegBase >= 8)
6353 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6354 pbCodeBuf[off++] = 0x8d;
6355 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6356 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
6357 else
6358 {
6359 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
6360 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6361 }
6362 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6363 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6364 pbCodeBuf[off++] = 0;
6365 pbCodeBuf[off++] = 0;
6366 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6367
6368 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6369 }
6370 }
6371 else
6372 {
6373 /* lea ret32, [index64 + base64 (+ disp32)] */
6374 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6375 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6376 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6377 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6378 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6379 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6380 pbCodeBuf[off++] = 0x8d;
6381 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
6382 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6383 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
6384 if (bMod == X86_MOD_MEM4)
6385 {
6386 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6387 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6388 pbCodeBuf[off++] = 0;
6389 pbCodeBuf[off++] = 0;
6390 }
6391 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6392 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6393 }
6394
6395#elif defined(RT_ARCH_ARM64)
6396 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6397 if (u16EffAddr == 0)
6398 {
6399 if (idxRegIndex == UINT8_MAX)
6400 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
6401 else
6402 {
6403 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
6404 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6405 }
6406 }
6407 else
6408 {
6409 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
6410 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
6411 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
6412 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6413 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
6414 else
6415 {
6416 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
6417 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6418 }
6419 if (idxRegIndex != UINT8_MAX)
6420 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
6421 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6422 }
6423
6424#else
6425# error "port me"
6426#endif
6427
6428 if (idxRegIndex != UINT8_MAX)
6429 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6430 iemNativeRegFreeTmp(pReNative, idxRegBase);
6431 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6432 return off;
6433}
6434
6435
6436#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
6437 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
6438
6439/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
6440 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
6441DECL_INLINE_THROW(uint32_t)
6442iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6443 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
6444{
6445 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6446
6447 /*
6448 * Handle the disp32 form with no registers first.
6449 *
6450 * Convert to an immediate value, as that'll delay the register allocation
6451 * and assignment till the memory access / call / whatever and we can use
6452 * a more appropriate register (or none at all).
6453 */
6454 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6455 {
6456 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
6457 return off;
6458 }
6459
6460 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6461 uint32_t u32EffAddr = 0;
6462 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6463 {
6464 case 0: break;
6465 case 1: u32EffAddr = (int8_t)u32Disp; break;
6466 case 2: u32EffAddr = u32Disp; break;
6467 default: AssertFailed();
6468 }
6469
6470 /* Get the register (or SIB) value. */
6471 uint8_t idxGstRegBase = UINT8_MAX;
6472 uint8_t idxGstRegIndex = UINT8_MAX;
6473 uint8_t cShiftIndex = 0;
6474 switch (bRm & X86_MODRM_RM_MASK)
6475 {
6476 case 0: idxGstRegBase = X86_GREG_xAX; break;
6477 case 1: idxGstRegBase = X86_GREG_xCX; break;
6478 case 2: idxGstRegBase = X86_GREG_xDX; break;
6479 case 3: idxGstRegBase = X86_GREG_xBX; break;
6480 case 4: /* SIB */
6481 {
6482 /* index /w scaling . */
6483 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6484 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6485 {
6486 case 0: idxGstRegIndex = X86_GREG_xAX; break;
6487 case 1: idxGstRegIndex = X86_GREG_xCX; break;
6488 case 2: idxGstRegIndex = X86_GREG_xDX; break;
6489 case 3: idxGstRegIndex = X86_GREG_xBX; break;
6490 case 4: cShiftIndex = 0; /*no index*/ break;
6491 case 5: idxGstRegIndex = X86_GREG_xBP; break;
6492 case 6: idxGstRegIndex = X86_GREG_xSI; break;
6493 case 7: idxGstRegIndex = X86_GREG_xDI; break;
6494 }
6495
6496 /* base */
6497 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
6498 {
6499 case 0: idxGstRegBase = X86_GREG_xAX; break;
6500 case 1: idxGstRegBase = X86_GREG_xCX; break;
6501 case 2: idxGstRegBase = X86_GREG_xDX; break;
6502 case 3: idxGstRegBase = X86_GREG_xBX; break;
6503 case 4:
6504 idxGstRegBase = X86_GREG_xSP;
6505 u32EffAddr += uSibAndRspOffset >> 8;
6506 break;
6507 case 5:
6508 if ((bRm & X86_MODRM_MOD_MASK) != 0)
6509 idxGstRegBase = X86_GREG_xBP;
6510 else
6511 {
6512 Assert(u32EffAddr == 0);
6513 u32EffAddr = u32Disp;
6514 }
6515 break;
6516 case 6: idxGstRegBase = X86_GREG_xSI; break;
6517 case 7: idxGstRegBase = X86_GREG_xDI; break;
6518 }
6519 break;
6520 }
6521 case 5: idxGstRegBase = X86_GREG_xBP; break;
6522 case 6: idxGstRegBase = X86_GREG_xSI; break;
6523 case 7: idxGstRegBase = X86_GREG_xDI; break;
6524 }
6525
6526 /*
6527 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6528 * the start of the function.
6529 */
6530 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6531 {
6532 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
6533 return off;
6534 }
6535
6536 /*
6537 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6538 */
6539 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6540 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6541 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6542 kIemNativeGstRegUse_ReadOnly);
6543 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6544 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6545 kIemNativeGstRegUse_ReadOnly);
6546
6547 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6548 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6549 {
6550 idxRegBase = idxRegIndex;
6551 idxRegIndex = UINT8_MAX;
6552 }
6553
6554#ifdef RT_ARCH_AMD64
6555 if (idxRegIndex == UINT8_MAX)
6556 {
6557 if (u32EffAddr == 0)
6558 {
6559 /* mov ret, base */
6560 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6561 }
6562 else
6563 {
6564 /* lea ret32, [base64 + disp32] */
6565 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6566 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6567 if (idxRegRet >= 8 || idxRegBase >= 8)
6568 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6569 pbCodeBuf[off++] = 0x8d;
6570 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6571 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6572 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6573 else
6574 {
6575 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6576 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6577 }
6578 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6579 if (bMod == X86_MOD_MEM4)
6580 {
6581 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6582 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6583 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6584 }
6585 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6586 }
6587 }
6588 else
6589 {
6590 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6591 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6592 if (idxRegBase == UINT8_MAX)
6593 {
6594 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
6595 if (idxRegRet >= 8 || idxRegIndex >= 8)
6596 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6597 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6598 pbCodeBuf[off++] = 0x8d;
6599 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6600 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6601 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6602 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6603 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6604 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6605 }
6606 else
6607 {
6608 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6609 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6610 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6611 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6612 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6613 pbCodeBuf[off++] = 0x8d;
6614 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6615 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6616 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6617 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6618 if (bMod != X86_MOD_MEM0)
6619 {
6620 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6621 if (bMod == X86_MOD_MEM4)
6622 {
6623 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6624 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6625 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6626 }
6627 }
6628 }
6629 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6630 }
6631
6632#elif defined(RT_ARCH_ARM64)
6633 if (u32EffAddr == 0)
6634 {
6635 if (idxRegIndex == UINT8_MAX)
6636 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6637 else if (idxRegBase == UINT8_MAX)
6638 {
6639 if (cShiftIndex == 0)
6640 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
6641 else
6642 {
6643 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6644 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
6645 }
6646 }
6647 else
6648 {
6649 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6650 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6651 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6652 }
6653 }
6654 else
6655 {
6656 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
6657 {
6658 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6659 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
6660 }
6661 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
6662 {
6663 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6664 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6665 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
6666 }
6667 else
6668 {
6669 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
6670 if (idxRegBase != UINT8_MAX)
6671 {
6672 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6673 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6674 }
6675 }
6676 if (idxRegIndex != UINT8_MAX)
6677 {
6678 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6679 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
6680 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6681 }
6682 }
6683
6684#else
6685# error "port me"
6686#endif
6687
6688 if (idxRegIndex != UINT8_MAX)
6689 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6690 if (idxRegBase != UINT8_MAX)
6691 iemNativeRegFreeTmp(pReNative, idxRegBase);
6692 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6693 return off;
6694}
6695
6696
6697#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6698 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6699 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6700
6701#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6702 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6703 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6704
6705#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6706 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6707 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
6708
6709/**
6710 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
6711 *
6712 * @returns New off.
6713 * @param pReNative .
6714 * @param off .
6715 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
6716 * bit 4 to REX.X. The two bits are part of the
6717 * REG sub-field, which isn't needed in this
6718 * function.
6719 * @param uSibAndRspOffset Two parts:
6720 * - The first 8 bits make up the SIB byte.
6721 * - The next 8 bits are the fixed RSP/ESP offset
6722 * in case of a pop [xSP].
6723 * @param u32Disp The displacement byte/word/dword, if any.
6724 * @param cbInstr The size of the fully decoded instruction. Used
6725 * for RIP relative addressing.
6726 * @param idxVarRet The result variable number.
6727 * @param f64Bit Whether to use a 64-bit or 32-bit address size
6728 * when calculating the address.
6729 *
6730 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
6731 */
6732DECL_INLINE_THROW(uint32_t)
6733iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
6734 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
6735{
6736 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6737
6738 /*
6739 * Special case the rip + disp32 form first.
6740 */
6741 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6742 {
6743 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6744 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
6745 kIemNativeGstRegUse_ReadOnly);
6746 if (f64Bit)
6747 {
6748#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6749 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr + (int64_t)pReNative->Core.offPc;
6750#else
6751 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
6752#endif
6753#ifdef RT_ARCH_AMD64
6754 if ((int32_t)offFinalDisp == offFinalDisp)
6755 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
6756 else
6757 {
6758 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
6759 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
6760 }
6761#else
6762 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6763#endif
6764 }
6765 else
6766 {
6767# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6768 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr + (int32_t)pReNative->Core.offPc;
6769# else
6770 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr;
6771# endif
6772 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6773 }
6774 iemNativeRegFreeTmp(pReNative, idxRegPc);
6775 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6776 return off;
6777 }
6778
6779 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6780 int64_t i64EffAddr = 0;
6781 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6782 {
6783 case 0: break;
6784 case 1: i64EffAddr = (int8_t)u32Disp; break;
6785 case 2: i64EffAddr = (int32_t)u32Disp; break;
6786 default: AssertFailed();
6787 }
6788
6789 /* Get the register (or SIB) value. */
6790 uint8_t idxGstRegBase = UINT8_MAX;
6791 uint8_t idxGstRegIndex = UINT8_MAX;
6792 uint8_t cShiftIndex = 0;
6793 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
6794 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
6795 else /* SIB: */
6796 {
6797 /* index /w scaling . */
6798 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6799 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6800 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
6801 if (idxGstRegIndex == 4)
6802 {
6803 /* no index */
6804 cShiftIndex = 0;
6805 idxGstRegIndex = UINT8_MAX;
6806 }
6807
6808 /* base */
6809 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
6810 if (idxGstRegBase == 4)
6811 {
6812 /* pop [rsp] hack */
6813 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
6814 }
6815 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
6816 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
6817 {
6818 /* mod=0 and base=5 -> disp32, no base reg. */
6819 Assert(i64EffAddr == 0);
6820 i64EffAddr = (int32_t)u32Disp;
6821 idxGstRegBase = UINT8_MAX;
6822 }
6823 }
6824
6825 /*
6826 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6827 * the start of the function.
6828 */
6829 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6830 {
6831 if (f64Bit)
6832 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
6833 else
6834 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
6835 return off;
6836 }
6837
6838 /*
6839 * Now emit code that calculates:
6840 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6841 * or if !f64Bit:
6842 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6843 */
6844 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6845 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6846 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6847 kIemNativeGstRegUse_ReadOnly);
6848 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6849 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6850 kIemNativeGstRegUse_ReadOnly);
6851
6852 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6853 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6854 {
6855 idxRegBase = idxRegIndex;
6856 idxRegIndex = UINT8_MAX;
6857 }
6858
6859#ifdef RT_ARCH_AMD64
6860 uint8_t bFinalAdj;
6861 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
6862 bFinalAdj = 0; /* likely */
6863 else
6864 {
6865 /* pop [rsp] with a problematic disp32 value. Split out the
6866 RSP offset and add it separately afterwards (bFinalAdj). */
6867 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
6868 Assert(idxGstRegBase == X86_GREG_xSP);
6869 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
6870 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
6871 Assert(bFinalAdj != 0);
6872 i64EffAddr -= bFinalAdj;
6873 Assert((int32_t)i64EffAddr == i64EffAddr);
6874 }
6875 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
6876//pReNative->pInstrBuf[off++] = 0xcc;
6877
6878 if (idxRegIndex == UINT8_MAX)
6879 {
6880 if (u32EffAddr == 0)
6881 {
6882 /* mov ret, base */
6883 if (f64Bit)
6884 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
6885 else
6886 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6887 }
6888 else
6889 {
6890 /* lea ret, [base + disp32] */
6891 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6892 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6893 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
6894 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6895 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6896 | (f64Bit ? X86_OP_REX_W : 0);
6897 pbCodeBuf[off++] = 0x8d;
6898 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6899 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6900 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6901 else
6902 {
6903 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6904 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6905 }
6906 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6907 if (bMod == X86_MOD_MEM4)
6908 {
6909 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6910 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6911 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6912 }
6913 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6914 }
6915 }
6916 else
6917 {
6918 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6919 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6920 if (idxRegBase == UINT8_MAX)
6921 {
6922 /* lea ret, [(index64 << cShiftIndex) + disp32] */
6923 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
6924 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6925 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6926 | (f64Bit ? X86_OP_REX_W : 0);
6927 pbCodeBuf[off++] = 0x8d;
6928 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6929 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6930 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6931 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6932 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6933 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6934 }
6935 else
6936 {
6937 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6938 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6939 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6940 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6941 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6942 | (f64Bit ? X86_OP_REX_W : 0);
6943 pbCodeBuf[off++] = 0x8d;
6944 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6945 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6946 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6947 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6948 if (bMod != X86_MOD_MEM0)
6949 {
6950 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6951 if (bMod == X86_MOD_MEM4)
6952 {
6953 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6954 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6955 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6956 }
6957 }
6958 }
6959 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6960 }
6961
6962 if (!bFinalAdj)
6963 { /* likely */ }
6964 else
6965 {
6966 Assert(f64Bit);
6967 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
6968 }
6969
6970#elif defined(RT_ARCH_ARM64)
6971 if (i64EffAddr == 0)
6972 {
6973 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6974 if (idxRegIndex == UINT8_MAX)
6975 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
6976 else if (idxRegBase != UINT8_MAX)
6977 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6978 f64Bit, false /*fSetFlags*/, cShiftIndex);
6979 else
6980 {
6981 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
6982 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
6983 }
6984 }
6985 else
6986 {
6987 if (f64Bit)
6988 { /* likely */ }
6989 else
6990 i64EffAddr = (int32_t)i64EffAddr;
6991
6992 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
6993 {
6994 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6995 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
6996 }
6997 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
6998 {
6999 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7000 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
7001 }
7002 else
7003 {
7004 if (f64Bit)
7005 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
7006 else
7007 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
7008 if (idxRegBase != UINT8_MAX)
7009 {
7010 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7011 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
7012 }
7013 }
7014 if (idxRegIndex != UINT8_MAX)
7015 {
7016 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7017 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
7018 f64Bit, false /*fSetFlags*/, cShiftIndex);
7019 }
7020 }
7021
7022#else
7023# error "port me"
7024#endif
7025
7026 if (idxRegIndex != UINT8_MAX)
7027 iemNativeRegFreeTmp(pReNative, idxRegIndex);
7028 if (idxRegBase != UINT8_MAX)
7029 iemNativeRegFreeTmp(pReNative, idxRegBase);
7030 iemNativeVarRegisterRelease(pReNative, idxVarRet);
7031 return off;
7032}
7033
7034
7035/*********************************************************************************************************************************
7036* Memory fetches and stores common *
7037*********************************************************************************************************************************/
7038
7039typedef enum IEMNATIVEMITMEMOP
7040{
7041 kIemNativeEmitMemOp_Store = 0,
7042 kIemNativeEmitMemOp_Fetch,
7043 kIemNativeEmitMemOp_Fetch_Zx_U16,
7044 kIemNativeEmitMemOp_Fetch_Zx_U32,
7045 kIemNativeEmitMemOp_Fetch_Zx_U64,
7046 kIemNativeEmitMemOp_Fetch_Sx_U16,
7047 kIemNativeEmitMemOp_Fetch_Sx_U32,
7048 kIemNativeEmitMemOp_Fetch_Sx_U64
7049} IEMNATIVEMITMEMOP;
7050
7051/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
7052 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
7053 * (with iSegReg = UINT8_MAX). */
7054DECL_INLINE_THROW(uint32_t)
7055iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
7056 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAlignMaskAndCtl, IEMNATIVEMITMEMOP enmOp,
7057 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
7058{
7059 /*
7060 * Assert sanity.
7061 */
7062 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
7063 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
7064 Assert( enmOp != kIemNativeEmitMemOp_Store
7065 || pVarValue->enmKind == kIemNativeVarKind_Immediate
7066 || pVarValue->enmKind == kIemNativeVarKind_Stack);
7067 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
7068 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
7069 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
7070 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
7071 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7072 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
7073#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7074 Assert( cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8
7075 || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U));
7076#else
7077 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
7078#endif
7079 Assert(!(fAlignMaskAndCtl & ~(UINT32_C(0xff) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)));
7080 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
7081#ifdef VBOX_STRICT
7082 if (iSegReg == UINT8_MAX)
7083 {
7084 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7085 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7086 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7087 switch (cbMem)
7088 {
7089 case 1:
7090 Assert( pfnFunction
7091 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
7092 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7093 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7094 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7095 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7096 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
7097 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
7098 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
7099 : UINT64_C(0xc000b000a0009000) ));
7100 Assert(!fAlignMaskAndCtl);
7101 break;
7102 case 2:
7103 Assert( pfnFunction
7104 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
7105 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7106 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7107 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7108 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
7109 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
7110 : UINT64_C(0xc000b000a0009000) ));
7111 Assert(fAlignMaskAndCtl <= 1);
7112 break;
7113 case 4:
7114 Assert( pfnFunction
7115 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
7116 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
7117 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
7118 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
7119 : UINT64_C(0xc000b000a0009000) ));
7120 Assert(fAlignMaskAndCtl <= 3);
7121 break;
7122 case 8:
7123 Assert( pfnFunction
7124 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
7125 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
7126 : UINT64_C(0xc000b000a0009000) ));
7127 Assert(fAlignMaskAndCtl <= 7);
7128 break;
7129#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7130 case sizeof(RTUINT128U):
7131 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7132 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
7133 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
7134 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
7135 || ( enmOp == kIemNativeEmitMemOp_Store
7136 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
7137 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
7138 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
7139 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
7140 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
7141 : fAlignMaskAndCtl <= 15);
7142 break;
7143 case sizeof(RTUINT256U):
7144 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7145 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
7146 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
7147 || ( enmOp == kIemNativeEmitMemOp_Store
7148 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
7149 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
7150 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx
7151 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx
7152 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
7153 : fAlignMaskAndCtl <= 31);
7154 break;
7155#endif
7156 }
7157 }
7158 else
7159 {
7160 Assert(iSegReg < 6);
7161 switch (cbMem)
7162 {
7163 case 1:
7164 Assert( pfnFunction
7165 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
7166 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
7167 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7168 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7169 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7170 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
7171 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
7172 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
7173 : UINT64_C(0xc000b000a0009000) ));
7174 Assert(!fAlignMaskAndCtl);
7175 break;
7176 case 2:
7177 Assert( pfnFunction
7178 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
7179 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
7180 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
7181 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
7182 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
7183 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
7184 : UINT64_C(0xc000b000a0009000) ));
7185 Assert(fAlignMaskAndCtl <= 1);
7186 break;
7187 case 4:
7188 Assert( pfnFunction
7189 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
7190 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
7191 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
7192 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
7193 : UINT64_C(0xc000b000a0009000) ));
7194 Assert(fAlignMaskAndCtl <= 3);
7195 break;
7196 case 8:
7197 Assert( pfnFunction
7198 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
7199 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
7200 : UINT64_C(0xc000b000a0009000) ));
7201 Assert(fAlignMaskAndCtl <= 7);
7202 break;
7203#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7204 case sizeof(RTUINT128U):
7205 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7206 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
7207 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
7208 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
7209 || ( enmOp == kIemNativeEmitMemOp_Store
7210 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
7211 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
7212 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
7213 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
7214 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
7215 : fAlignMaskAndCtl <= 15);
7216 break;
7217 case sizeof(RTUINT256U):
7218 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7219 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
7220 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
7221 || ( enmOp == kIemNativeEmitMemOp_Store
7222 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
7223 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
7224 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx
7225 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx
7226 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
7227 : fAlignMaskAndCtl <= 31);
7228 break;
7229#endif
7230 }
7231 }
7232#endif
7233
7234#ifdef VBOX_STRICT
7235 /*
7236 * Check that the fExec flags we've got make sense.
7237 */
7238 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7239#endif
7240
7241 /*
7242 * To keep things simple we have to commit any pending writes first as we
7243 * may end up making calls.
7244 */
7245 /** @todo we could postpone this till we make the call and reload the
7246 * registers after returning from the call. Not sure if that's sensible or
7247 * not, though. */
7248#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7249 off = iemNativeRegFlushPendingWrites(pReNative, off);
7250#else
7251 /* The program counter is treated differently for now. */
7252 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
7253#endif
7254
7255#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7256 /*
7257 * Move/spill/flush stuff out of call-volatile registers.
7258 * This is the easy way out. We could contain this to the tlb-miss branch
7259 * by saving and restoring active stuff here.
7260 */
7261 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
7262#endif
7263
7264 /*
7265 * Define labels and allocate the result register (trying for the return
7266 * register if we can).
7267 */
7268 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7269#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7270 uint8_t idxRegValueFetch = UINT8_MAX;
7271
7272 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
7273 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7274 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
7275 else
7276 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7277 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7278 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
7279 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
7280#else
7281 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7282 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7283 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
7284 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
7285#endif
7286 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
7287
7288#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7289 uint8_t idxRegValueStore = UINT8_MAX;
7290
7291 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
7292 idxRegValueStore = !TlbState.fSkip
7293 && enmOp == kIemNativeEmitMemOp_Store
7294 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7295 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
7296 : UINT8_MAX;
7297 else
7298 idxRegValueStore = !TlbState.fSkip
7299 && enmOp == kIemNativeEmitMemOp_Store
7300 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7301 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
7302 : UINT8_MAX;
7303
7304#else
7305 uint8_t const idxRegValueStore = !TlbState.fSkip
7306 && enmOp == kIemNativeEmitMemOp_Store
7307 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7308 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
7309 : UINT8_MAX;
7310#endif
7311 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
7312 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7313 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7314 : UINT32_MAX;
7315
7316 /*
7317 * Jump to the TLB lookup code.
7318 */
7319 if (!TlbState.fSkip)
7320 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7321
7322 /*
7323 * TlbMiss:
7324 *
7325 * Call helper to do the fetching.
7326 * We flush all guest register shadow copies here.
7327 */
7328 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
7329
7330#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7331 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7332#else
7333 RT_NOREF(idxInstr);
7334#endif
7335
7336#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7337 if (pReNative->Core.offPc)
7338 {
7339 /*
7340 * Update the program counter but restore it at the end of the TlbMiss branch.
7341 * This should allow delaying more program counter updates for the TlbLookup and hit paths
7342 * which are hopefully much more frequent, reducing the amount of memory accesses.
7343 */
7344 /* Allocate a temporary PC register. */
7345/** @todo r=bird: This would technically need to be done up front as it's a register allocation. */
7346 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
7347 kIemNativeGstRegUse_ForUpdate);
7348
7349 /* Perform the addition and store the result. */
7350 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
7351 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7352# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
7353 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
7354# endif
7355
7356 /* Free and flush the PC register. */
7357 iemNativeRegFreeTmp(pReNative, idxPcReg);
7358 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
7359 }
7360#endif
7361
7362#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7363 /* Save variables in volatile registers. */
7364 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7365 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
7366 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
7367 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7368#endif
7369
7370 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
7371 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
7372#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7373 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
7374 {
7375 /*
7376 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
7377 *
7378 * @note There was a register variable assigned to the variable for the TlbLookup case above
7379 * which must not be freed or the value loaded into the register will not be synced into the register
7380 * further down the road because the variable doesn't know it had a variable assigned.
7381 *
7382 * @note For loads it is not required to sync what is in the assigned register with the stack slot
7383 * as it will be overwritten anyway.
7384 */
7385 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
7386 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
7387 enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
7388 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
7389 }
7390 else
7391#endif
7392 if (enmOp == kIemNativeEmitMemOp_Store)
7393 {
7394 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
7395 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
7396#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7397 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7398#else
7399 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
7400 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
7401#endif
7402 }
7403
7404 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
7405 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
7406#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7407 fVolGregMask);
7408#else
7409 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
7410#endif
7411
7412 if (iSegReg != UINT8_MAX)
7413 {
7414 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
7415 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
7416 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
7417 }
7418
7419 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7420 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7421
7422 /* Done setting up parameters, make the call. */
7423 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7424
7425 /*
7426 * Put the result in the right register if this is a fetch.
7427 */
7428 if (enmOp != kIemNativeEmitMemOp_Store)
7429 {
7430#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7431 if ( cbMem == sizeof(RTUINT128U)
7432 || cbMem == sizeof(RTUINT256U))
7433 {
7434 Assert(enmOp == kIemNativeEmitMemOp_Fetch);
7435
7436 /* Sync the value on the stack with the host register assigned to the variable. */
7437 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
7438 }
7439 else
7440#endif
7441 {
7442 Assert(idxRegValueFetch == pVarValue->idxReg);
7443 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
7444 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
7445 }
7446 }
7447
7448#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7449 /* Restore variables and guest shadow registers to volatile registers. */
7450 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7451 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7452#endif
7453
7454#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7455 if (pReNative->Core.offPc)
7456 {
7457 /*
7458 * Time to restore the program counter to its original value.
7459 */
7460 /* Allocate a temporary PC register. */
7461 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
7462 kIemNativeGstRegUse_ForUpdate);
7463
7464 /* Restore the original value. */
7465 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
7466 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7467
7468 /* Free and flush the PC register. */
7469 iemNativeRegFreeTmp(pReNative, idxPcReg);
7470 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
7471 }
7472#endif
7473
7474#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7475 if (!TlbState.fSkip)
7476 {
7477 /* end of TlbMiss - Jump to the done label. */
7478 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7479 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7480
7481 /*
7482 * TlbLookup:
7483 */
7484 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl,
7485 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
7486 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
7487
7488 /*
7489 * Emit code to do the actual storing / fetching.
7490 */
7491 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
7492# ifdef IEM_WITH_TLB_STATISTICS
7493 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7494 enmOp == kIemNativeEmitMemOp_Store
7495 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
7496 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
7497# endif
7498 switch (enmOp)
7499 {
7500 case kIemNativeEmitMemOp_Store:
7501 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
7502 {
7503 switch (cbMem)
7504 {
7505 case 1:
7506 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7507 break;
7508 case 2:
7509 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7510 break;
7511 case 4:
7512 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7513 break;
7514 case 8:
7515 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7516 break;
7517#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7518 case sizeof(RTUINT128U):
7519 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7520 break;
7521 case sizeof(RTUINT256U):
7522 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7523 break;
7524#endif
7525 default:
7526 AssertFailed();
7527 }
7528 }
7529 else
7530 {
7531 switch (cbMem)
7532 {
7533 case 1:
7534 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
7535 idxRegMemResult, TlbState.idxReg1);
7536 break;
7537 case 2:
7538 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
7539 idxRegMemResult, TlbState.idxReg1);
7540 break;
7541 case 4:
7542 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
7543 idxRegMemResult, TlbState.idxReg1);
7544 break;
7545 case 8:
7546 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
7547 idxRegMemResult, TlbState.idxReg1);
7548 break;
7549 default:
7550 AssertFailed();
7551 }
7552 }
7553 break;
7554
7555 case kIemNativeEmitMemOp_Fetch:
7556 case kIemNativeEmitMemOp_Fetch_Zx_U16:
7557 case kIemNativeEmitMemOp_Fetch_Zx_U32:
7558 case kIemNativeEmitMemOp_Fetch_Zx_U64:
7559 switch (cbMem)
7560 {
7561 case 1:
7562 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7563 break;
7564 case 2:
7565 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7566 break;
7567 case 4:
7568 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7569 break;
7570 case 8:
7571 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7572 break;
7573#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7574 case sizeof(RTUINT128U):
7575 /*
7576 * No need to sync back the register with the stack, this is done by the generic variable handling
7577 * code if there is a register assigned to a variable and the stack must be accessed.
7578 */
7579 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7580 break;
7581 case sizeof(RTUINT256U):
7582 /*
7583 * No need to sync back the register with the stack, this is done by the generic variable handling
7584 * code if there is a register assigned to a variable and the stack must be accessed.
7585 */
7586 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7587 break;
7588#endif
7589 default:
7590 AssertFailed();
7591 }
7592 break;
7593
7594 case kIemNativeEmitMemOp_Fetch_Sx_U16:
7595 Assert(cbMem == 1);
7596 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7597 break;
7598
7599 case kIemNativeEmitMemOp_Fetch_Sx_U32:
7600 Assert(cbMem == 1 || cbMem == 2);
7601 if (cbMem == 1)
7602 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7603 else
7604 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7605 break;
7606
7607 case kIemNativeEmitMemOp_Fetch_Sx_U64:
7608 switch (cbMem)
7609 {
7610 case 1:
7611 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7612 break;
7613 case 2:
7614 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7615 break;
7616 case 4:
7617 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7618 break;
7619 default:
7620 AssertFailed();
7621 }
7622 break;
7623
7624 default:
7625 AssertFailed();
7626 }
7627
7628 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
7629
7630 /*
7631 * TlbDone:
7632 */
7633 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7634
7635 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
7636
7637# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7638 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
7639 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7640# endif
7641 }
7642#else
7643 RT_NOREF(fAlignMaskAndCtl, idxLabelTlbMiss);
7644#endif
7645
7646 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
7647 iemNativeVarRegisterRelease(pReNative, idxVarValue);
7648 return off;
7649}
7650
7651
7652
7653/*********************************************************************************************************************************
7654* Memory fetches (IEM_MEM_FETCH_XXX). *
7655*********************************************************************************************************************************/
7656
7657/* 8-bit segmented: */
7658#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
7659 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
7660 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
7661 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7662
7663#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7664 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7665 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
7666 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7667
7668#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7669 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7670 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7671 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7672
7673#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7674 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7675 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7676 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7677
7678#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7679 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7680 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
7681 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7682
7683#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7684 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7685 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7686 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7687
7688#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7689 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7690 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7691 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7692
7693/* 16-bit segmented: */
7694#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7695 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7696 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7697 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7698
7699#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7700 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7701 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7702 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7703
7704#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7705 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7706 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7707 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7708
7709#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7710 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7711 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7712 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7713
7714#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7715 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7716 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7717 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7718
7719#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7720 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7721 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7722 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7723
7724
7725/* 32-bit segmented: */
7726#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7727 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7728 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7729 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7730
7731#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7732 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7733 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7734 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7735
7736#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7737 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7738 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7739 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7740
7741#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7742 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7743 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7744 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7745
7746#define IEM_MC_FETCH_MEM_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
7747 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
7748 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7749 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7750
7751#define IEM_MC_FETCH_MEM_I16_DISP(a_i16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7752 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
7753 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7754 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr, a_offDisp)
7755
7756#define IEM_MC_FETCH_MEM_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
7757 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
7758 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7759 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7760
7761#define IEM_MC_FETCH_MEM_I32_DISP(a_i32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7762 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
7763 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7764 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7765
7766#define IEM_MC_FETCH_MEM_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
7767 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, \
7768 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7769 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7770
7771AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
7772#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
7773 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, \
7774 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
7775 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7776
7777
7778/* 64-bit segmented: */
7779#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7780 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7781 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7782 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7783
7784AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
7785#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
7786 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, \
7787 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
7788 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7789
7790
7791/* 8-bit flat: */
7792#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
7793 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
7794 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
7795 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7796
7797#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
7798 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7799 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
7800 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7801
7802#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
7803 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7804 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7805 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7806
7807#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
7808 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7809 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7810 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7811
7812#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
7813 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7814 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
7815 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7816
7817#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
7818 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7819 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7820 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7821
7822#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
7823 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7824 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7825 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7826
7827
7828/* 16-bit flat: */
7829#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
7830 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7831 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7832 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7833
7834#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
7835 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7836 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7837 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7838
7839#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
7840 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7841 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7842 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7843
7844#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
7845 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7846 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7847 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7848
7849#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
7850 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7851 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7852 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7853
7854#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
7855 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7856 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7857 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7858
7859/* 32-bit flat: */
7860#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
7861 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7862 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7863 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7864
7865#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
7866 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7867 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7868 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7869
7870#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
7871 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7872 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7873 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7874
7875#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
7876 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7877 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7878 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7879
7880#define IEM_MC_FETCH_MEM_FLAT_I16(a_i16Dst, a_GCPtrMem) \
7881 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
7882 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7883 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7884
7885#define IEM_MC_FETCH_MEM_FLAT_I16_DISP(a_i16Dst, a_GCPtrMem, a_offDisp) \
7886 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
7887 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7888 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr, a_offDisp)
7889
7890#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
7891 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
7892 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7893 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7894
7895#define IEM_MC_FETCH_MEM_FLAT_I32_DISP(a_i32Dst, a_GCPtrMem, a_offDisp) \
7896 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
7897 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7898 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7899
7900#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
7901 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, \
7902 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7903 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7904
7905#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
7906 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, \
7907 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
7908 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7909
7910
7911/* 64-bit flat: */
7912#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
7913 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7914 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7915 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7916
7917#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
7918 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, \
7919 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
7920 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7921
7922#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7923/* 128-bit segmented: */
7924#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
7925 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7926 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7927 (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
7928
7929#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
7930 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7931 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7932 kIemNativeEmitMemOp_Fetch, \
7933 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7934
7935AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
7936#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
7937 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, sizeof(X86XMMREG), \
7938 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7939 kIemNativeEmitMemOp_Fetch, \
7940 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7941
7942#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7943 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7944 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7945 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7946
7947#define IEM_MC_FETCH_MEM_XMM_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7948 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7949 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7950 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7951
7952
7953/* 128-bit flat: */
7954#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
7955 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7956 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7957 (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
7958
7959#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
7960 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7961 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7962 kIemNativeEmitMemOp_Fetch, \
7963 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7964
7965#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
7966 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, sizeof(X86XMMREG), \
7967 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7968 kIemNativeEmitMemOp_Fetch, \
7969 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7970
7971#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
7972 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7973 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7974 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7975
7976#define IEM_MC_FETCH_MEM_FLAT_XMM_NO_AC(a_uXmmDst, a_GCPtrMem) \
7977 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, \
7978 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7979 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7980
7981/* 256-bit segmented: */
7982#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
7983 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7984 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7985 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7986
7987#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7988 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7989 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7990 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7991
7992#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
7993 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
7994 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
7995 (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
7996
7997#define IEM_MC_FETCH_MEM_YMM_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7998 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7999 sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
8000 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
8001
8002
8003/* 256-bit flat: */
8004#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
8005 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
8006 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
8007 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8008
8009#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
8010 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
8011 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
8012 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8013
8014#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
8015 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
8016 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
8017 (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
8018
8019#define IEM_MC_FETCH_MEM_FLAT_YMM_NO_AC(a_uYmmDst, a_GCPtrMem) \
8020 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uYmmDst, UINT8_MAX, a_GCPtrMem, \
8021 sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
8022 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8023
8024#endif
8025
8026
8027/*********************************************************************************************************************************
8028* Memory stores (IEM_MEM_STORE_XXX). *
8029*********************************************************************************************************************************/
8030
8031#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
8032 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
8033 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
8034 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
8035
8036#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
8037 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
8038 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
8039 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
8040
8041#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
8042 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
8043 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
8044 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
8045
8046#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
8047 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
8048 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
8049 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
8050
8051
8052#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
8053 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
8054 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
8055 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
8056
8057#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
8058 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
8059 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
8060 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
8061
8062#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
8063 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
8064 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
8065 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
8066
8067#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
8068 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
8069 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
8070 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
8071
8072
8073#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
8074 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8075 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
8076
8077#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
8078 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8079 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
8080
8081#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
8082 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8083 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
8084
8085#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
8086 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8087 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
8088
8089
8090#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
8091 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8092 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
8093
8094#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
8095 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8096 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
8097
8098#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
8099 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8100 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
8101
8102#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
8103 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8104 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
8105
8106/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
8107 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
8108DECL_INLINE_THROW(uint32_t)
8109iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
8110 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
8111{
8112 /*
8113 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
8114 * to do the grunt work.
8115 */
8116 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
8117 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
8118 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
8119 pfnFunction, idxInstr);
8120 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
8121 return off;
8122}
8123
8124
8125#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8126# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
8127 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8128 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8129 kIemNativeEmitMemOp_Store, \
8130 (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
8131
8132# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
8133 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
8134 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
8135 (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
8136
8137# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
8138 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
8139 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
8140 (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
8141
8142# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
8143 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
8144 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
8145 (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
8146
8147
8148# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
8149 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8150 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8151 kIemNativeEmitMemOp_Store, \
8152 (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, pCallEntry->idxInstr)
8153
8154# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
8155 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
8156 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
8157 (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
8158
8159# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
8160 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
8161 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
8162 (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
8163
8164# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
8165 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
8166 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
8167 (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
8168#endif
8169
8170
8171
8172/*********************************************************************************************************************************
8173* Stack Accesses. *
8174*********************************************************************************************************************************/
8175/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
8176#define IEM_MC_PUSH_U16(a_u16Value) \
8177 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
8178 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
8179#define IEM_MC_PUSH_U32(a_u32Value) \
8180 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
8181 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
8182#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
8183 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
8184 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
8185#define IEM_MC_PUSH_U64(a_u64Value) \
8186 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
8187 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
8188
8189#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
8190 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
8191 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
8192#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
8193 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
8194 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
8195#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
8196 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
8197 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
8198
8199#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
8200 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
8201 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
8202#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
8203 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
8204 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
8205
8206
8207/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
8208DECL_INLINE_THROW(uint32_t)
8209iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
8210 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
8211{
8212 /*
8213 * Assert sanity.
8214 */
8215 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
8216 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
8217#ifdef VBOX_STRICT
8218 if (RT_BYTE2(cBitsVarAndFlat) != 0)
8219 {
8220 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8221 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8222 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8223 Assert( pfnFunction
8224 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
8225 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
8226 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
8227 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
8228 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
8229 : UINT64_C(0xc000b000a0009000) ));
8230 }
8231 else
8232 Assert( pfnFunction
8233 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
8234 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
8235 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
8236 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
8237 : UINT64_C(0xc000b000a0009000) ));
8238#endif
8239
8240#ifdef VBOX_STRICT
8241 /*
8242 * Check that the fExec flags we've got make sense.
8243 */
8244 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8245#endif
8246
8247 /*
8248 * To keep things simple we have to commit any pending writes first as we
8249 * may end up making calls.
8250 */
8251 /** @todo we could postpone this till we make the call and reload the
8252 * registers after returning from the call. Not sure if that's sensible or
8253 * not, though. */
8254 off = iemNativeRegFlushPendingWrites(pReNative, off);
8255
8256 /*
8257 * First we calculate the new RSP and the effective stack pointer value.
8258 * For 64-bit mode and flat 32-bit these two are the same.
8259 * (Code structure is very similar to that of PUSH)
8260 */
8261 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
8262 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
8263 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
8264 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
8265 ? cbMem : sizeof(uint16_t);
8266 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
8267 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
8268 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8269 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
8270 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8271 if (cBitsFlat != 0)
8272 {
8273 Assert(idxRegEffSp == idxRegRsp);
8274 Assert(cBitsFlat == 32 || cBitsFlat == 64);
8275 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8276 if (cBitsFlat == 64)
8277 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
8278 else
8279 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
8280 }
8281 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8282 {
8283 Assert(idxRegEffSp != idxRegRsp);
8284 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8285 kIemNativeGstRegUse_ReadOnly);
8286#ifdef RT_ARCH_AMD64
8287 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8288#else
8289 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8290#endif
8291 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8292 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8293 offFixupJumpToUseOtherBitSp = off;
8294 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8295 {
8296 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8297 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8298 }
8299 else
8300 {
8301 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8302 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8303 }
8304 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8305 }
8306 /* SpUpdateEnd: */
8307 uint32_t const offLabelSpUpdateEnd = off;
8308
8309 /*
8310 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8311 * we're skipping lookup).
8312 */
8313 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8314 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
8315 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8316 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8317 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8318 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8319 : UINT32_MAX;
8320 uint8_t const idxRegValue = !TlbState.fSkip
8321 && pVarValue->enmKind != kIemNativeVarKind_Immediate
8322 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
8323 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
8324 : UINT8_MAX;
8325 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
8326
8327
8328 if (!TlbState.fSkip)
8329 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8330 else
8331 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8332
8333 /*
8334 * Use16BitSp:
8335 */
8336 if (cBitsFlat == 0)
8337 {
8338#ifdef RT_ARCH_AMD64
8339 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8340#else
8341 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8342#endif
8343 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8344 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8345 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8346 else
8347 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8348 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8349 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8350 }
8351
8352 /*
8353 * TlbMiss:
8354 *
8355 * Call helper to do the pushing.
8356 */
8357 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8358
8359#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8360 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8361#else
8362 RT_NOREF(idxInstr);
8363#endif
8364
8365 /* Save variables in volatile registers. */
8366 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8367 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8368 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
8369 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
8370 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8371
8372 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
8373 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
8374 {
8375 /* Swap them using ARG0 as temp register: */
8376 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
8377 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
8378 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
8379 }
8380 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
8381 {
8382 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
8383 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
8384 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8385
8386 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
8387 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8388 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8389 }
8390 else
8391 {
8392 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
8393 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8394
8395 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
8396 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
8397 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG));
8398 }
8399
8400 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8401 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8402
8403 /* Done setting up parameters, make the call. */
8404 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8405
8406 /* Restore variables and guest shadow registers to volatile registers. */
8407 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8408 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8409
8410#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8411 if (!TlbState.fSkip)
8412 {
8413 /* end of TlbMiss - Jump to the done label. */
8414 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8415 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8416
8417 /*
8418 * TlbLookup:
8419 */
8420 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
8421 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8422
8423 /*
8424 * Emit code to do the actual storing / fetching.
8425 */
8426 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
8427# ifdef IEM_WITH_TLB_STATISTICS
8428 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8429 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8430# endif
8431 if (idxRegValue != UINT8_MAX)
8432 {
8433 switch (cbMemAccess)
8434 {
8435 case 2:
8436 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8437 break;
8438 case 4:
8439 if (!fIsIntelSeg)
8440 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8441 else
8442 {
8443 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
8444 PUSH FS in real mode, so we have to try emulate that here.
8445 We borrow the now unused idxReg1 from the TLB lookup code here. */
8446 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
8447 kIemNativeGstReg_EFlags);
8448 if (idxRegEfl != UINT8_MAX)
8449 {
8450#ifdef ARCH_AMD64
8451 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
8452 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8453 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8454#else
8455 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
8456 off, TlbState.idxReg1, idxRegEfl,
8457 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8458#endif
8459 iemNativeRegFreeTmp(pReNative, idxRegEfl);
8460 }
8461 else
8462 {
8463 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
8464 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
8465 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8466 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8467 }
8468 /* ASSUMES the upper half of idxRegValue is ZERO. */
8469 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
8470 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
8471 }
8472 break;
8473 case 8:
8474 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8475 break;
8476 default:
8477 AssertFailed();
8478 }
8479 }
8480 else
8481 {
8482 switch (cbMemAccess)
8483 {
8484 case 2:
8485 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
8486 idxRegMemResult, TlbState.idxReg1);
8487 break;
8488 case 4:
8489 Assert(!fIsSegReg);
8490 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
8491 idxRegMemResult, TlbState.idxReg1);
8492 break;
8493 case 8:
8494 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
8495 break;
8496 default:
8497 AssertFailed();
8498 }
8499 }
8500
8501 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8502 TlbState.freeRegsAndReleaseVars(pReNative);
8503
8504 /*
8505 * TlbDone:
8506 *
8507 * Commit the new RSP value.
8508 */
8509 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8510 }
8511#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8512
8513#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8514 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
8515#endif
8516 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8517 if (idxRegEffSp != idxRegRsp)
8518 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8519
8520 /* The value variable is implictly flushed. */
8521 if (idxRegValue != UINT8_MAX)
8522 iemNativeVarRegisterRelease(pReNative, idxVarValue);
8523 iemNativeVarFreeLocal(pReNative, idxVarValue);
8524
8525 return off;
8526}
8527
8528
8529
8530/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
8531#define IEM_MC_POP_GREG_U16(a_iGReg) \
8532 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
8533 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
8534#define IEM_MC_POP_GREG_U32(a_iGReg) \
8535 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
8536 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
8537#define IEM_MC_POP_GREG_U64(a_iGReg) \
8538 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
8539 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
8540
8541#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
8542 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
8543 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8544#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
8545 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
8546 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
8547
8548#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
8549 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
8550 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8551#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
8552 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
8553 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
8554
8555
8556DECL_FORCE_INLINE_THROW(uint32_t)
8557iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
8558 uint8_t idxRegTmp)
8559{
8560 /* Use16BitSp: */
8561#ifdef RT_ARCH_AMD64
8562 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8563 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
8564 RT_NOREF(idxRegTmp);
8565#else
8566 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
8567 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
8568 /* add tmp, regrsp, #cbMem */
8569 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
8570 /* and tmp, tmp, #0xffff */
8571 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
8572 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
8573 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
8574 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
8575#endif
8576 return off;
8577}
8578
8579
8580DECL_FORCE_INLINE(uint32_t)
8581iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
8582{
8583 /* Use32BitSp: */
8584 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8585 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
8586 return off;
8587}
8588
8589
8590/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
8591DECL_INLINE_THROW(uint32_t)
8592iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
8593 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
8594{
8595 /*
8596 * Assert sanity.
8597 */
8598 Assert(idxGReg < 16);
8599#ifdef VBOX_STRICT
8600 if (RT_BYTE2(cBitsVarAndFlat) != 0)
8601 {
8602 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8603 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8604 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8605 Assert( pfnFunction
8606 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8607 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
8608 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8609 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
8610 : UINT64_C(0xc000b000a0009000) ));
8611 }
8612 else
8613 Assert( pfnFunction
8614 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
8615 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
8616 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
8617 : UINT64_C(0xc000b000a0009000) ));
8618#endif
8619
8620#ifdef VBOX_STRICT
8621 /*
8622 * Check that the fExec flags we've got make sense.
8623 */
8624 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8625#endif
8626
8627 /*
8628 * To keep things simple we have to commit any pending writes first as we
8629 * may end up making calls.
8630 */
8631 off = iemNativeRegFlushPendingWrites(pReNative, off);
8632
8633 /*
8634 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
8635 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
8636 * directly as the effective stack pointer.
8637 * (Code structure is very similar to that of PUSH)
8638 */
8639 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
8640 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
8641 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
8642 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8643 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
8644 /** @todo can do a better job picking the register here. For cbMem >= 4 this
8645 * will be the resulting register value. */
8646 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
8647
8648 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8649 if (cBitsFlat != 0)
8650 {
8651 Assert(idxRegEffSp == idxRegRsp);
8652 Assert(cBitsFlat == 32 || cBitsFlat == 64);
8653 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8654 }
8655 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8656 {
8657 Assert(idxRegEffSp != idxRegRsp);
8658 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8659 kIemNativeGstRegUse_ReadOnly);
8660#ifdef RT_ARCH_AMD64
8661 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8662#else
8663 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8664#endif
8665 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8666 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8667 offFixupJumpToUseOtherBitSp = off;
8668 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8669 {
8670/** @todo can skip idxRegRsp updating when popping ESP. */
8671 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8672 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8673 }
8674 else
8675 {
8676 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8677 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8678 }
8679 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8680 }
8681 /* SpUpdateEnd: */
8682 uint32_t const offLabelSpUpdateEnd = off;
8683
8684 /*
8685 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8686 * we're skipping lookup).
8687 */
8688 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8689 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
8690 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8691 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8692 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8693 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8694 : UINT32_MAX;
8695
8696 if (!TlbState.fSkip)
8697 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8698 else
8699 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8700
8701 /*
8702 * Use16BitSp:
8703 */
8704 if (cBitsFlat == 0)
8705 {
8706#ifdef RT_ARCH_AMD64
8707 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8708#else
8709 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8710#endif
8711 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8712 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8713 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8714 else
8715 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8716 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8717 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8718 }
8719
8720 /*
8721 * TlbMiss:
8722 *
8723 * Call helper to do the pushing.
8724 */
8725 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8726
8727#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8728 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8729#else
8730 RT_NOREF(idxInstr);
8731#endif
8732
8733 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8734 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8735 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
8736 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8737
8738
8739 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
8740 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8741 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8742
8743 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8744 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8745
8746 /* Done setting up parameters, make the call. */
8747 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8748
8749 /* Move the return register content to idxRegMemResult. */
8750 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
8751 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
8752
8753 /* Restore variables and guest shadow registers to volatile registers. */
8754 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8755 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8756
8757#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8758 if (!TlbState.fSkip)
8759 {
8760 /* end of TlbMiss - Jump to the done label. */
8761 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8762 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8763
8764 /*
8765 * TlbLookup:
8766 */
8767 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
8768 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8769
8770 /*
8771 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
8772 */
8773 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8774# ifdef IEM_WITH_TLB_STATISTICS
8775 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8776 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8777# endif
8778 switch (cbMem)
8779 {
8780 case 2:
8781 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8782 break;
8783 case 4:
8784 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8785 break;
8786 case 8:
8787 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8788 break;
8789 default:
8790 AssertFailed();
8791 }
8792
8793 TlbState.freeRegsAndReleaseVars(pReNative);
8794
8795 /*
8796 * TlbDone:
8797 *
8798 * Set the new RSP value (FLAT accesses needs to calculate it first) and
8799 * commit the popped register value.
8800 */
8801 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8802 }
8803#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8804
8805 if (idxGReg != X86_GREG_xSP)
8806 {
8807 /* Set the register. */
8808 if (cbMem >= sizeof(uint32_t))
8809 {
8810#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
8811 AssertMsg( pReNative->idxCurCall == 0
8812 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
8813 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName,
8814 iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
8815#endif
8816 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
8817#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8818 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
8819#endif
8820#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8821 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
8822 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8823#endif
8824 }
8825 else
8826 {
8827 Assert(cbMem == sizeof(uint16_t));
8828 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
8829 kIemNativeGstRegUse_ForUpdate);
8830 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
8831#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8832 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8833#endif
8834 iemNativeRegFreeTmp(pReNative, idxRegDst);
8835 }
8836
8837 /* Complete RSP calculation for FLAT mode. */
8838 if (idxRegEffSp == idxRegRsp)
8839 {
8840 if (cBitsFlat == 64)
8841 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8842 else
8843 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8844 }
8845 }
8846 else
8847 {
8848 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
8849 if (cbMem == sizeof(uint64_t))
8850 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
8851 else if (cbMem == sizeof(uint32_t))
8852 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
8853 else
8854 {
8855 if (idxRegEffSp == idxRegRsp)
8856 {
8857 if (cBitsFlat == 64)
8858 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8859 else
8860 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8861 }
8862 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
8863 }
8864 }
8865
8866#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8867 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
8868#endif
8869
8870 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8871 if (idxRegEffSp != idxRegRsp)
8872 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8873 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8874
8875 return off;
8876}
8877
8878
8879
8880/*********************************************************************************************************************************
8881* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
8882*********************************************************************************************************************************/
8883
8884#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8885 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8886 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
8887 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
8888
8889#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8890 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8891 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
8892 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
8893
8894#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8895 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8896 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
8897 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
8898
8899#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8900 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8901 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
8902 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
8903
8904
8905#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8906 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8907 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8908 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
8909
8910#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8911 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8912 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8913 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
8914
8915#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8916 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8917 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8918 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8919
8920#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8921 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8922 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8923 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
8924
8925#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8926 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
8927 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8928 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8929
8930
8931#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8932 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8933 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8934 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
8935
8936#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8937 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8938 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8939 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
8940
8941#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8942 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8943 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8944 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8945
8946#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8947 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8948 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8949 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
8950
8951#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8952 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
8953 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8954 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8955
8956
8957#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8958 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8959 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8960 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
8961
8962#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8963 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8964 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8965 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
8966#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8967 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8968 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8969 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8970
8971#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8972 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8973 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8974 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
8975
8976#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8977 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
8978 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8979 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8980
8981
8982#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8983 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8984 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8985 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
8986
8987#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8988 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8989 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
8990 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
8991
8992
8993#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8994 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8995 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8996 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
8997
8998#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8999 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
9000 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9001 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
9002
9003#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9004 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
9005 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9006 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
9007
9008#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9009 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
9010 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9011 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
9012
9013
9014
9015#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9016 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9017 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
9018 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
9019
9020#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9021 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9022 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
9023 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
9024
9025#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9026 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9027 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
9028 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
9029
9030#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9031 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9032 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
9033 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
9034
9035
9036#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9037 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9038 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9039 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
9040
9041#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9042 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9043 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9044 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
9045
9046#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9047 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9048 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9049 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9050
9051#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9052 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9053 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9054 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
9055
9056#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
9057 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
9058 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9059 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9060
9061
9062#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9063 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9064 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9065 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
9066
9067#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9068 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9069 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9070 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
9071
9072#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9073 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9074 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9075 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9076
9077#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9078 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9079 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9080 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
9081
9082#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
9083 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
9084 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9085 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9086
9087
9088#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9089 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9090 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9091 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
9092
9093#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9094 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9095 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9096 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
9097
9098#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9099 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9100 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9101 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9102
9103#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9104 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9105 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9106 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
9107
9108#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
9109 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
9110 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9111 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9112
9113
9114#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
9115 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
9116 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9117 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
9118
9119#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
9120 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
9121 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
9122 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
9123
9124
9125#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9126 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9127 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9128 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
9129
9130#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9131 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9132 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9133 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
9134
9135#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9136 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9137 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9138 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
9139
9140#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9141 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9142 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9143 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
9144
9145
9146DECL_INLINE_THROW(uint32_t)
9147iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
9148 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint32_t fAlignMaskAndCtl,
9149 uintptr_t pfnFunction, uint8_t idxInstr)
9150{
9151 /*
9152 * Assert sanity.
9153 */
9154 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
9155 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
9156 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
9157 && pVarMem->cbVar == sizeof(void *),
9158 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9159
9160 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
9161 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9162 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
9163 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
9164 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9165
9166 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
9167 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
9168 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
9169 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
9170 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9171
9172 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
9173
9174 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
9175
9176#ifdef VBOX_STRICT
9177# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
9178 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
9179 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
9180 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
9181 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
9182# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
9183 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
9184 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
9185 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
9186
9187 if (iSegReg == UINT8_MAX)
9188 {
9189 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
9190 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
9191 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
9192 switch (cbMem)
9193 {
9194 case 1:
9195 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8));
9196 Assert(!fAlignMaskAndCtl);
9197 break;
9198 case 2:
9199 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16));
9200 Assert(fAlignMaskAndCtl < 2);
9201 break;
9202 case 4:
9203 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32));
9204 Assert(fAlignMaskAndCtl < 4);
9205 break;
9206 case 8:
9207 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64));
9208 Assert(fAlignMaskAndCtl < 8);
9209 break;
9210 case 10:
9211 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
9212 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
9213 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9214 Assert(fAlignMaskAndCtl < 8);
9215 break;
9216 case 16:
9217 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128));
9218 Assert(fAlignMaskAndCtl < 16);
9219 break;
9220# if 0
9221 case 32:
9222 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256));
9223 Assert(fAlignMaskAndCtl < 32);
9224 break;
9225 case 64:
9226 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512));
9227 Assert(fAlignMaskAndCtl < 64);
9228 break;
9229# endif
9230 default: AssertFailed(); break;
9231 }
9232 }
9233 else
9234 {
9235 Assert(iSegReg < 6);
9236 switch (cbMem)
9237 {
9238 case 1:
9239 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8));
9240 Assert(!fAlignMaskAndCtl);
9241 break;
9242 case 2:
9243 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16));
9244 Assert(fAlignMaskAndCtl < 2);
9245 break;
9246 case 4:
9247 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32));
9248 Assert(fAlignMaskAndCtl < 4);
9249 break;
9250 case 8:
9251 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64));
9252 Assert(fAlignMaskAndCtl < 8);
9253 break;
9254 case 10:
9255 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
9256 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
9257 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9258 Assert(fAlignMaskAndCtl < 8);
9259 break;
9260 case 16:
9261 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128));
9262 Assert(fAlignMaskAndCtl < 16);
9263 break;
9264# if 0
9265 case 32:
9266 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256));
9267 Assert(fAlignMaskAndCtl < 32);
9268 break;
9269 case 64:
9270 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512));
9271 Assert(fAlignMaskAndCtl < 64);
9272 break;
9273# endif
9274 default: AssertFailed(); break;
9275 }
9276 }
9277# undef IEM_MAP_HLP_FN
9278# undef IEM_MAP_HLP_FN_NO_AT
9279#endif
9280
9281#ifdef VBOX_STRICT
9282 /*
9283 * Check that the fExec flags we've got make sense.
9284 */
9285 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
9286#endif
9287
9288 /*
9289 * To keep things simple we have to commit any pending writes first as we
9290 * may end up making calls.
9291 */
9292 off = iemNativeRegFlushPendingWrites(pReNative, off);
9293
9294#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9295 /*
9296 * Move/spill/flush stuff out of call-volatile registers.
9297 * This is the easy way out. We could contain this to the tlb-miss branch
9298 * by saving and restoring active stuff here.
9299 */
9300 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
9301 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
9302#endif
9303
9304 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
9305 while the tlb-miss codepath will temporarily put it on the stack.
9306 Set the the type to stack here so we don't need to do it twice below. */
9307 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
9308 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
9309 /** @todo use a tmp register from TlbState, since they'll be free after tlb
9310 * lookup is done. */
9311
9312 /*
9313 * Define labels and allocate the result register (trying for the return
9314 * register if we can).
9315 */
9316 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
9317 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
9318 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
9319 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
9320 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
9321 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
9322 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
9323 : UINT32_MAX;
9324
9325 /*
9326 * Jump to the TLB lookup code.
9327 */
9328 if (!TlbState.fSkip)
9329 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
9330
9331 /*
9332 * TlbMiss:
9333 *
9334 * Call helper to do the fetching.
9335 * We flush all guest register shadow copies here.
9336 */
9337 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
9338
9339#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
9340 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9341#else
9342 RT_NOREF(idxInstr);
9343#endif
9344
9345#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9346 /* Save variables in volatile registers. */
9347 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
9348 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
9349#endif
9350
9351 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
9352 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
9353#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9354 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
9355#else
9356 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9357#endif
9358
9359 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
9360 if (iSegReg != UINT8_MAX)
9361 {
9362 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
9363 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
9364 }
9365
9366 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
9367 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
9368 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
9369
9370 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9371 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9372
9373 /* Done setting up parameters, make the call. */
9374 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
9375
9376 /*
9377 * Put the output in the right registers.
9378 */
9379 Assert(idxRegMemResult == pVarMem->idxReg);
9380 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
9381 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
9382
9383#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9384 /* Restore variables and guest shadow registers to volatile registers. */
9385 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
9386 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
9387#endif
9388
9389 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
9390 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
9391
9392#ifdef IEMNATIVE_WITH_TLB_LOOKUP
9393 if (!TlbState.fSkip)
9394 {
9395 /* end of tlbsmiss - Jump to the done label. */
9396 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
9397 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
9398
9399 /*
9400 * TlbLookup:
9401 */
9402 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl, fAccess,
9403 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
9404# ifdef IEM_WITH_TLB_STATISTICS
9405 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
9406 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
9407# endif
9408
9409 /* [idxVarUnmapInfo] = 0; */
9410 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
9411
9412 /*
9413 * TlbDone:
9414 */
9415 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
9416
9417 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
9418
9419# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9420 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
9421 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9422# endif
9423 }
9424#else
9425 RT_NOREF(fAccess, fAlignMaskAndCtl, idxLabelTlbMiss);
9426#endif
9427
9428 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
9429 iemNativeVarRegisterRelease(pReNative, idxVarMem);
9430
9431 return off;
9432}
9433
9434
9435#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
9436 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
9437 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
9438
9439#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
9440 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
9441 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
9442
9443#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
9444 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
9445 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
9446
9447#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
9448 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
9449 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
9450
9451DECL_INLINE_THROW(uint32_t)
9452iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
9453 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
9454{
9455 /*
9456 * Assert sanity.
9457 */
9458 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9459#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
9460 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
9461#endif
9462 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
9463 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
9464 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
9465#ifdef VBOX_STRICT
9466 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
9467 {
9468 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
9469 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
9470 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
9471 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
9472 case IEM_ACCESS_TYPE_WRITE:
9473 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
9474 case IEM_ACCESS_TYPE_READ:
9475 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
9476 default: AssertFailed();
9477 }
9478#else
9479 RT_NOREF(fAccess);
9480#endif
9481
9482 /*
9483 * To keep things simple we have to commit any pending writes first as we
9484 * may end up making calls (there shouldn't be any at this point, so this
9485 * is just for consistency).
9486 */
9487 /** @todo we could postpone this till we make the call and reload the
9488 * registers after returning from the call. Not sure if that's sensible or
9489 * not, though. */
9490 off = iemNativeRegFlushPendingWrites(pReNative, off);
9491
9492 /*
9493 * Move/spill/flush stuff out of call-volatile registers.
9494 *
9495 * We exclude any register holding the bUnmapInfo variable, as we'll be
9496 * checking it after returning from the call and will free it afterwards.
9497 */
9498 /** @todo save+restore active registers and maybe guest shadows in miss
9499 * scenario. */
9500 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
9501 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
9502
9503 /*
9504 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
9505 * to call the unmap helper function.
9506 *
9507 * The likelyhood of it being zero is higher than for the TLB hit when doing
9508 * the mapping, as a TLB miss for an well aligned and unproblematic memory
9509 * access should also end up with a mapping that won't need special unmapping.
9510 */
9511 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
9512 * should speed up things for the pure interpreter as well when TLBs
9513 * are enabled. */
9514#ifdef RT_ARCH_AMD64
9515 if (pVarUnmapInfo->idxReg == UINT8_MAX)
9516 {
9517 /* test byte [rbp - xxx], 0ffh */
9518 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9519 pbCodeBuf[off++] = 0xf6;
9520 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
9521 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
9522 pbCodeBuf[off++] = 0xff;
9523 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9524 }
9525 else
9526#endif
9527 {
9528 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
9529 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
9530 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
9531 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
9532 }
9533 uint32_t const offJmpFixup = off;
9534 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
9535
9536 /*
9537 * Call the unmap helper function.
9538 */
9539#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
9540 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9541#else
9542 RT_NOREF(idxInstr);
9543#endif
9544
9545 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
9546 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
9547 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9548
9549 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9550 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9551
9552 /* Done setting up parameters, make the call. */
9553 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
9554
9555 /* The bUnmapInfo variable is implictly free by these MCs. */
9556 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
9557
9558 /*
9559 * Done, just fixup the jump for the non-call case.
9560 */
9561 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
9562
9563 return off;
9564}
9565
9566
9567
9568/*********************************************************************************************************************************
9569* State and Exceptions *
9570*********************************************************************************************************************************/
9571
9572#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9573#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9574
9575#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9576#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9577#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9578
9579#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9580#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9581#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9582
9583
9584DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
9585{
9586#ifndef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
9587 RT_NOREF(pReNative, fForChange);
9588#else
9589 if ( !(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED)
9590 && fForChange)
9591 {
9592# ifdef RT_ARCH_AMD64
9593
9594 /* Need to save the host MXCSR the first time, and clear the exception flags. */
9595 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9596 {
9597 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9598
9599 /* stmxcsr */
9600 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9601 pbCodeBuf[off++] = X86_OP_REX_B;
9602 pbCodeBuf[off++] = 0x0f;
9603 pbCodeBuf[off++] = 0xae;
9604 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 3, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9605 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9606 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9607 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9608 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9609 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9610
9611 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9612 }
9613
9614 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
9615 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
9616
9617 /*
9618 * Mask any exceptions and clear the exception status and save into MXCSR,
9619 * taking a detour through memory here because ldmxcsr/stmxcsr don't support
9620 * a register source/target (sigh).
9621 */
9622 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr);
9623 off = iemNativeEmitOrGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, X86_MXCSR_XCPT_MASK);
9624 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, ~X86_MXCSR_XCPT_FLAGS);
9625 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9626
9627 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9628
9629 /* ldmxcsr */
9630 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9631 pbCodeBuf[off++] = X86_OP_REX_B;
9632 pbCodeBuf[off++] = 0x0f;
9633 pbCodeBuf[off++] = 0xae;
9634 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 2, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9635 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9636 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9637 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9638 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9639 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9640
9641 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9642 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9643
9644# elif defined(RT_ARCH_ARM64)
9645 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
9646
9647 /* Need to save the host floating point control register the first time, clear FPSR. */
9648 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9649 {
9650 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9651 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(ARMV8_A64_REG_XZR, ARMV8_AARCH64_SYSREG_FPSR);
9652 pu32CodeBuf[off++] = Armv8A64MkInstrMrs(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9653 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9654 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9655 }
9656
9657 /*
9658 * Translate MXCSR to FPCR.
9659 *
9660 * Unfortunately we can't emulate the exact behavior of MXCSR as we can't take
9661 * FEAT_AFP on arm64 for granted (My M2 Macbook doesn't has it). So we can't map
9662 * MXCSR.DAZ to FPCR.FIZ and MXCSR.FZ to FPCR.FZ with FPCR.AH being set.
9663 * We can only use FPCR.FZ which will flush inputs _and_ output de-normals to zero.
9664 */
9665 /** @todo Check the host supported flags (needs additional work to get the host features from CPUM)
9666 * and implement alternate handling if FEAT_AFP is present. */
9667 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
9668
9669 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
9670
9671 /* First make sure that there is nothing set for the upper 16-bits (X86_MXCSR_MM, which we don't emulate right now). */
9672 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegTmp, idxRegMxCsr);
9673
9674 /* If either MXCSR.FZ or MXCSR.DAZ is set FPCR.FZ will be set. */
9675 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(IEMNATIVE_REG_FIXED_TMP0, idxRegTmp, X86_MXCSR_DAZ_BIT, 1);
9676 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegTmp, idxRegTmp, X86_MXCSR_FZ_BIT);
9677 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(idxRegTmp, idxRegTmp, IEMNATIVE_REG_FIXED_TMP0);
9678 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegTmp, idxRegTmp, ARMV8_FPCR_FZ_BIT);
9679
9680 /*
9681 * Init the rounding mode, the layout differs between MXCSR.RM[14:13] and FPCR.RMode[23:22]:
9682 *
9683 * Value MXCSR FPCR
9684 * 0 RN RN
9685 * 1 R- R+
9686 * 2 R+ R-
9687 * 3 RZ RZ
9688 *
9689 * Conversion can be achieved by switching bit positions
9690 */
9691 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT);
9692 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 14, 1);
9693 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT + 1);
9694 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 13, 1);
9695
9696 /* Write the value to FPCR. */
9697 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9698
9699 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9700 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9701 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9702# else
9703# error "Port me"
9704# endif
9705 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
9706 }
9707#endif
9708 return off;
9709}
9710
9711
9712
9713/*********************************************************************************************************************************
9714* Emitters for FPU related operations. *
9715*********************************************************************************************************************************/
9716
9717#define IEM_MC_FETCH_FCW(a_u16Fcw) \
9718 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
9719
9720/** Emits code for IEM_MC_FETCH_FCW. */
9721DECL_INLINE_THROW(uint32_t)
9722iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9723{
9724 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9725 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9726
9727 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9728
9729 /* Allocate a temporary FCW register. */
9730 /** @todo eliminate extra register */
9731 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
9732 kIemNativeGstRegUse_ReadOnly);
9733
9734 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
9735
9736 /* Free but don't flush the FCW register. */
9737 iemNativeRegFreeTmp(pReNative, idxFcwReg);
9738 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9739
9740 return off;
9741}
9742
9743
9744#define IEM_MC_FETCH_FSW(a_u16Fsw) \
9745 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
9746
9747/** Emits code for IEM_MC_FETCH_FSW. */
9748DECL_INLINE_THROW(uint32_t)
9749iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9750{
9751 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9752 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9753
9754 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
9755 /* Allocate a temporary FSW register. */
9756 /** @todo eliminate extra register */
9757 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
9758 kIemNativeGstRegUse_ReadOnly);
9759
9760 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
9761
9762 /* Free but don't flush the FSW register. */
9763 iemNativeRegFreeTmp(pReNative, idxFswReg);
9764 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9765
9766 return off;
9767}
9768
9769
9770
9771#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9772
9773
9774/*********************************************************************************************************************************
9775* Emitters for SSE/AVX specific operations. *
9776*********************************************************************************************************************************/
9777
9778#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
9779 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
9780
9781/** Emits code for IEM_MC_COPY_XREG_U128. */
9782DECL_INLINE_THROW(uint32_t)
9783iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
9784{
9785 /* This is a nop if the source and destination register are the same. */
9786 if (iXRegDst != iXRegSrc)
9787 {
9788 /* Allocate destination and source register. */
9789 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
9790 kIemNativeGstSimdRegLdStSz_Low128,
9791 kIemNativeGstRegUse_ForFullWrite);
9792 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
9793 kIemNativeGstSimdRegLdStSz_Low128,
9794 kIemNativeGstRegUse_ReadOnly);
9795
9796 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9797
9798 /* Free but don't flush the source and destination register. */
9799 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9800 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9801 }
9802
9803 return off;
9804}
9805
9806
9807#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
9808 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
9809
9810/** Emits code for IEM_MC_FETCH_XREG_U128. */
9811DECL_INLINE_THROW(uint32_t)
9812iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
9813{
9814 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9815 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
9816
9817 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9818 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
9819
9820 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9821
9822 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9823
9824 /* Free but don't flush the source register. */
9825 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9826 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9827
9828 return off;
9829}
9830
9831
9832#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
9833 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
9834
9835#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
9836 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
9837
9838/** Emits code for IEM_MC_FETCH_XREG_U64. */
9839DECL_INLINE_THROW(uint32_t)
9840iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
9841{
9842 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9843 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9844
9845 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9846 kIemNativeGstSimdRegLdStSz_Low128,
9847 kIemNativeGstRegUse_ReadOnly);
9848
9849 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9850 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9851
9852 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
9853
9854 /* Free but don't flush the source register. */
9855 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9856 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9857
9858 return off;
9859}
9860
9861
9862#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
9863 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
9864
9865#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
9866 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
9867
9868/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
9869DECL_INLINE_THROW(uint32_t)
9870iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
9871{
9872 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9873 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9874
9875 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9876 kIemNativeGstSimdRegLdStSz_Low128,
9877 kIemNativeGstRegUse_ReadOnly);
9878
9879 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9880 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9881
9882 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
9883
9884 /* Free but don't flush the source register. */
9885 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9886 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9887
9888 return off;
9889}
9890
9891
9892#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
9893 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
9894
9895/** Emits code for IEM_MC_FETCH_XREG_U16. */
9896DECL_INLINE_THROW(uint32_t)
9897iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
9898{
9899 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9900 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9901
9902 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9903 kIemNativeGstSimdRegLdStSz_Low128,
9904 kIemNativeGstRegUse_ReadOnly);
9905
9906 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9907 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9908
9909 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
9910
9911 /* Free but don't flush the source register. */
9912 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9913 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9914
9915 return off;
9916}
9917
9918
9919#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
9920 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
9921
9922/** Emits code for IEM_MC_FETCH_XREG_U8. */
9923DECL_INLINE_THROW(uint32_t)
9924iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
9925{
9926 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9927 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
9928
9929 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9930 kIemNativeGstSimdRegLdStSz_Low128,
9931 kIemNativeGstRegUse_ReadOnly);
9932
9933 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9934 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9935
9936 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
9937
9938 /* Free but don't flush the source register. */
9939 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9940 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9941
9942 return off;
9943}
9944
9945
9946#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
9947 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
9948
9949AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
9950#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
9951 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
9952
9953
9954/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
9955DECL_INLINE_THROW(uint32_t)
9956iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9957{
9958 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9959 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9960
9961 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9962 kIemNativeGstSimdRegLdStSz_Low128,
9963 kIemNativeGstRegUse_ForFullWrite);
9964 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9965
9966 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9967
9968 /* Free but don't flush the source register. */
9969 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9970 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9971
9972 return off;
9973}
9974
9975
9976#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
9977 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
9978
9979#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
9980 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
9981
9982#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
9983 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
9984
9985#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
9986 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
9987
9988#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
9989 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
9990
9991#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
9992 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
9993
9994/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
9995DECL_INLINE_THROW(uint32_t)
9996iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
9997 uint8_t cbLocal, uint8_t iElem)
9998{
9999 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10000 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
10001
10002#ifdef VBOX_STRICT
10003 switch (cbLocal)
10004 {
10005 case sizeof(uint64_t): Assert(iElem < 2); break;
10006 case sizeof(uint32_t): Assert(iElem < 4); break;
10007 case sizeof(uint16_t): Assert(iElem < 8); break;
10008 case sizeof(uint8_t): Assert(iElem < 16); break;
10009 default: AssertFailed();
10010 }
10011#endif
10012
10013 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10014 kIemNativeGstSimdRegLdStSz_Low128,
10015 kIemNativeGstRegUse_ForUpdate);
10016 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
10017
10018 switch (cbLocal)
10019 {
10020 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10021 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10022 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10023 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10024 default: AssertFailed();
10025 }
10026
10027 /* Free but don't flush the source register. */
10028 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10029 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10030
10031 return off;
10032}
10033
10034
10035#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
10036 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
10037
10038/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
10039DECL_INLINE_THROW(uint32_t)
10040iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
10041{
10042 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10043 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10044
10045 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10046 kIemNativeGstSimdRegLdStSz_Low128,
10047 kIemNativeGstRegUse_ForUpdate);
10048 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
10049
10050 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
10051 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
10052 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
10053
10054 /* Free but don't flush the source register. */
10055 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10056 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10057
10058 return off;
10059}
10060
10061
10062#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
10063 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
10064
10065/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
10066DECL_INLINE_THROW(uint32_t)
10067iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
10068{
10069 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10070 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10071
10072 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10073 kIemNativeGstSimdRegLdStSz_Low128,
10074 kIemNativeGstRegUse_ForUpdate);
10075 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
10076
10077 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
10078 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
10079 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
10080
10081 /* Free but don't flush the source register. */
10082 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10083 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10084
10085 return off;
10086}
10087
10088
10089#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
10090 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
10091
10092/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
10093DECL_INLINE_THROW(uint32_t)
10094iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
10095 uint8_t idxSrcVar, uint8_t iDwSrc)
10096{
10097 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10098 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10099
10100 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10101 kIemNativeGstSimdRegLdStSz_Low128,
10102 kIemNativeGstRegUse_ForUpdate);
10103 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
10104
10105 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
10106 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
10107
10108 /* Free but don't flush the destination register. */
10109 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10110 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10111
10112 return off;
10113}
10114
10115
10116#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
10117 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
10118
10119/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
10120DECL_INLINE_THROW(uint32_t)
10121iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
10122{
10123 /*
10124 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
10125 * if iYRegDst gets allocated first for the full write it won't load the
10126 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
10127 * duplicated from the already allocated host register for iYRegDst containing
10128 * garbage. This will be catched by the guest register value checking in debug
10129 * builds.
10130 */
10131 if (iYRegDst != iYRegSrc)
10132 {
10133 /* Allocate destination and source register. */
10134 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10135 kIemNativeGstSimdRegLdStSz_256,
10136 kIemNativeGstRegUse_ForFullWrite);
10137 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10138 kIemNativeGstSimdRegLdStSz_Low128,
10139 kIemNativeGstRegUse_ReadOnly);
10140
10141 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
10142 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10143
10144 /* Free but don't flush the source and destination register. */
10145 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10146 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10147 }
10148 else
10149 {
10150 /* This effectively only clears the upper 128-bits of the register. */
10151 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10152 kIemNativeGstSimdRegLdStSz_High128,
10153 kIemNativeGstRegUse_ForFullWrite);
10154
10155 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
10156
10157 /* Free but don't flush the destination register. */
10158 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
10159 }
10160
10161 return off;
10162}
10163
10164
10165#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
10166 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
10167
10168/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
10169DECL_INLINE_THROW(uint32_t)
10170iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
10171{
10172 /*
10173 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
10174 * if iYRegDst gets allocated first for the full write it won't load the
10175 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
10176 * duplicated from the already allocated host register for iYRegDst containing
10177 * garbage. This will be catched by the guest register value checking in debug
10178 * builds. iYRegSrc == iYRegDst would effectively only clear any upper 256-bits
10179 * for a zmm register we don't support yet, so this is just a nop.
10180 */
10181 if (iYRegDst != iYRegSrc)
10182 {
10183 /* Allocate destination and source register. */
10184 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10185 kIemNativeGstSimdRegLdStSz_256,
10186 kIemNativeGstRegUse_ReadOnly);
10187 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10188 kIemNativeGstSimdRegLdStSz_256,
10189 kIemNativeGstRegUse_ForFullWrite);
10190
10191 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
10192
10193 /* Free but don't flush the source and destination register. */
10194 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10195 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10196 }
10197
10198 return off;
10199}
10200
10201
10202#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
10203 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
10204
10205/** Emits code for IEM_MC_FETCH_YREG_U128. */
10206DECL_INLINE_THROW(uint32_t)
10207iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
10208{
10209 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10210 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
10211
10212 Assert(iDQWord <= 1);
10213 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10214 iDQWord == 1
10215 ? kIemNativeGstSimdRegLdStSz_High128
10216 : kIemNativeGstSimdRegLdStSz_Low128,
10217 kIemNativeGstRegUse_ReadOnly);
10218
10219 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10220 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10221
10222 if (iDQWord == 1)
10223 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10224 else
10225 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10226
10227 /* Free but don't flush the source register. */
10228 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10229 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10230
10231 return off;
10232}
10233
10234
10235#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
10236 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
10237
10238/** Emits code for IEM_MC_FETCH_YREG_U64. */
10239DECL_INLINE_THROW(uint32_t)
10240iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
10241{
10242 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10243 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10244
10245 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10246 iQWord >= 2
10247 ? kIemNativeGstSimdRegLdStSz_High128
10248 : kIemNativeGstSimdRegLdStSz_Low128,
10249 kIemNativeGstRegUse_ReadOnly);
10250
10251 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10252 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10253
10254 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
10255
10256 /* Free but don't flush the source register. */
10257 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10258 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10259
10260 return off;
10261}
10262
10263
10264#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
10265 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
10266
10267/** Emits code for IEM_MC_FETCH_YREG_U32. */
10268DECL_INLINE_THROW(uint32_t)
10269iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
10270{
10271 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10272 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10273
10274 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10275 iDWord >= 4
10276 ? kIemNativeGstSimdRegLdStSz_High128
10277 : kIemNativeGstSimdRegLdStSz_Low128,
10278 kIemNativeGstRegUse_ReadOnly);
10279
10280 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10281 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10282
10283 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
10284
10285 /* Free but don't flush the source register. */
10286 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10287 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10288
10289 return off;
10290}
10291
10292
10293#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
10294 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
10295
10296/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
10297DECL_INLINE_THROW(uint32_t)
10298iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10299{
10300 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10301 kIemNativeGstSimdRegLdStSz_High128,
10302 kIemNativeGstRegUse_ForFullWrite);
10303
10304 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
10305
10306 /* Free but don't flush the register. */
10307 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
10308
10309 return off;
10310}
10311
10312
10313#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
10314 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
10315
10316/** Emits code for IEM_MC_STORE_YREG_U128. */
10317DECL_INLINE_THROW(uint32_t)
10318iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
10319{
10320 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10321 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10322
10323 Assert(iDQword <= 1);
10324 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10325 iDQword == 0
10326 ? kIemNativeGstSimdRegLdStSz_Low128
10327 : kIemNativeGstSimdRegLdStSz_High128,
10328 kIemNativeGstRegUse_ForFullWrite);
10329
10330 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10331
10332 if (iDQword == 0)
10333 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10334 else
10335 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
10336
10337 /* Free but don't flush the source register. */
10338 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10339 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10340
10341 return off;
10342}
10343
10344
10345#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
10346 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
10347
10348/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
10349DECL_INLINE_THROW(uint32_t)
10350iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10351{
10352 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10353 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10354
10355 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10356 kIemNativeGstSimdRegLdStSz_256,
10357 kIemNativeGstRegUse_ForFullWrite);
10358
10359 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10360
10361 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10362 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10363
10364 /* Free but don't flush the source register. */
10365 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10366 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10367
10368 return off;
10369}
10370
10371
10372#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
10373 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
10374
10375/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
10376DECL_INLINE_THROW(uint32_t)
10377iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10378{
10379 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10380 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
10381
10382 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10383 kIemNativeGstSimdRegLdStSz_256,
10384 kIemNativeGstRegUse_ForFullWrite);
10385
10386 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10387
10388 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10389 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10390
10391 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10392 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10393
10394 return off;
10395}
10396
10397
10398#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
10399 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
10400
10401/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
10402DECL_INLINE_THROW(uint32_t)
10403iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10404{
10405 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10406 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
10407
10408 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10409 kIemNativeGstSimdRegLdStSz_256,
10410 kIemNativeGstRegUse_ForFullWrite);
10411
10412 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10413
10414 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10415 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10416
10417 /* Free but don't flush the source register. */
10418 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10419 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10420
10421 return off;
10422}
10423
10424
10425#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
10426 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
10427
10428/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
10429DECL_INLINE_THROW(uint32_t)
10430iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10431{
10432 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10433 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10434
10435 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10436 kIemNativeGstSimdRegLdStSz_256,
10437 kIemNativeGstRegUse_ForFullWrite);
10438
10439 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10440
10441 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10442 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10443
10444 /* Free but don't flush the source register. */
10445 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10446 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10447
10448 return off;
10449}
10450
10451
10452#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
10453 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
10454
10455/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
10456DECL_INLINE_THROW(uint32_t)
10457iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10458{
10459 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10460 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10461
10462 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10463 kIemNativeGstSimdRegLdStSz_256,
10464 kIemNativeGstRegUse_ForFullWrite);
10465
10466 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10467
10468 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10469 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10470
10471 /* Free but don't flush the source register. */
10472 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10473 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10474
10475 return off;
10476}
10477
10478
10479#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
10480 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
10481
10482/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
10483DECL_INLINE_THROW(uint32_t)
10484iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10485{
10486 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10487 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
10488
10489 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10490 kIemNativeGstSimdRegLdStSz_256,
10491 kIemNativeGstRegUse_ForFullWrite);
10492
10493 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10494
10495 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10496
10497 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10498 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10499
10500 return off;
10501}
10502
10503
10504#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
10505 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
10506
10507/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
10508DECL_INLINE_THROW(uint32_t)
10509iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10510{
10511 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10512 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
10513
10514 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10515 kIemNativeGstSimdRegLdStSz_256,
10516 kIemNativeGstRegUse_ForFullWrite);
10517
10518 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10519
10520 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10521
10522 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10523 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10524
10525 return off;
10526}
10527
10528
10529#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10530 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10531
10532/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
10533DECL_INLINE_THROW(uint32_t)
10534iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10535{
10536 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10537 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10538
10539 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10540 kIemNativeGstSimdRegLdStSz_256,
10541 kIemNativeGstRegUse_ForFullWrite);
10542
10543 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10544
10545 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10546
10547 /* Free but don't flush the source register. */
10548 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10549 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10550
10551 return off;
10552}
10553
10554
10555#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10556 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10557
10558/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
10559DECL_INLINE_THROW(uint32_t)
10560iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10561{
10562 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10563 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10564
10565 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10566 kIemNativeGstSimdRegLdStSz_256,
10567 kIemNativeGstRegUse_ForFullWrite);
10568
10569 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10570
10571 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10572
10573 /* Free but don't flush the source register. */
10574 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10575 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10576
10577 return off;
10578}
10579
10580
10581#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
10582 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
10583
10584/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
10585DECL_INLINE_THROW(uint32_t)
10586iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10587{
10588 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10589 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10590
10591 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10592 kIemNativeGstSimdRegLdStSz_256,
10593 kIemNativeGstRegUse_ForFullWrite);
10594
10595 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10596
10597 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
10598
10599 /* Free but don't flush the source register. */
10600 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10601 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10602
10603 return off;
10604}
10605
10606
10607#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10608 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10609
10610/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
10611DECL_INLINE_THROW(uint32_t)
10612iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10613{
10614 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10615 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10616
10617 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10618 kIemNativeGstSimdRegLdStSz_256,
10619 kIemNativeGstRegUse_ForFullWrite);
10620
10621 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10622
10623 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10624 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
10625
10626 /* Free but don't flush the source register. */
10627 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10628 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10629
10630 return off;
10631}
10632
10633
10634#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10635 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10636
10637/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
10638DECL_INLINE_THROW(uint32_t)
10639iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10640{
10641 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10642 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10643
10644 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10645 kIemNativeGstSimdRegLdStSz_256,
10646 kIemNativeGstRegUse_ForFullWrite);
10647
10648 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10649
10650 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10651 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10652
10653 /* Free but don't flush the source register. */
10654 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10655 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10656
10657 return off;
10658}
10659
10660
10661#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
10662 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
10663
10664/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
10665DECL_INLINE_THROW(uint32_t)
10666iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
10667{
10668 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10669 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10670
10671 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10672 kIemNativeGstSimdRegLdStSz_256,
10673 kIemNativeGstRegUse_ForFullWrite);
10674 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10675 kIemNativeGstSimdRegLdStSz_Low128,
10676 kIemNativeGstRegUse_ReadOnly);
10677 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10678
10679 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10680 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10681 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10682
10683 /* Free but don't flush the source and destination registers. */
10684 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10685 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10686 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10687
10688 return off;
10689}
10690
10691
10692#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
10693 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
10694
10695/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
10696DECL_INLINE_THROW(uint32_t)
10697iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
10698{
10699 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10700 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10701
10702 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10703 kIemNativeGstSimdRegLdStSz_256,
10704 kIemNativeGstRegUse_ForFullWrite);
10705 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10706 kIemNativeGstSimdRegLdStSz_Low128,
10707 kIemNativeGstRegUse_ReadOnly);
10708 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10709
10710 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10711 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
10712 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10713
10714 /* Free but don't flush the source and destination registers. */
10715 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10716 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10717 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10718
10719 return off;
10720}
10721
10722
10723#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
10724 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
10725
10726
10727/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
10728DECL_INLINE_THROW(uint32_t)
10729iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
10730{
10731 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10732 kIemNativeGstSimdRegLdStSz_Low128,
10733 kIemNativeGstRegUse_ForUpdate);
10734
10735 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
10736 if (bImm8Mask & RT_BIT(0))
10737 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
10738 if (bImm8Mask & RT_BIT(1))
10739 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
10740 if (bImm8Mask & RT_BIT(2))
10741 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
10742 if (bImm8Mask & RT_BIT(3))
10743 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
10744
10745 /* Free but don't flush the destination register. */
10746 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10747
10748 return off;
10749}
10750
10751
10752#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
10753 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
10754
10755#define IEM_MC_FETCH_YREG_YMM(a_uYmmDst, a_iYRegSrc) \
10756 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_uYmmDst, a_iYRegSrc)
10757
10758/** Emits code for IEM_MC_FETCH_YREG_U256/IEM_MC_FETCH_YREG_YMM. */
10759DECL_INLINE_THROW(uint32_t)
10760iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
10761{
10762 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10763 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
10764
10765 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10766 kIemNativeGstSimdRegLdStSz_256,
10767 kIemNativeGstRegUse_ReadOnly);
10768 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10769
10770 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
10771
10772 /* Free but don't flush the source register. */
10773 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10774 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10775
10776 return off;
10777}
10778
10779
10780#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
10781 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
10782
10783#define IEM_MC_STORE_YREG_YMM_ZX_VLMAX(a_iYRegDst, a_uYmmSrc) \
10784 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_uYmmSrc)
10785
10786/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX/IEM_MC_STORE_YREG_YMM_ZX_VLMAX. */
10787DECL_INLINE_THROW(uint32_t)
10788iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
10789{
10790 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10791 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10792
10793 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10794 kIemNativeGstSimdRegLdStSz_256,
10795 kIemNativeGstRegUse_ForFullWrite);
10796 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10797
10798 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
10799
10800 /* Free but don't flush the source register. */
10801 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10802 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10803
10804 return off;
10805}
10806
10807
10808#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
10809 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
10810
10811
10812/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
10813DECL_INLINE_THROW(uint32_t)
10814iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
10815 uint8_t idxSrcVar, uint8_t iDwSrc)
10816{
10817 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10818 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10819
10820 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10821 iDwDst < 4
10822 ? kIemNativeGstSimdRegLdStSz_Low128
10823 : kIemNativeGstSimdRegLdStSz_High128,
10824 kIemNativeGstRegUse_ForUpdate);
10825 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10826 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10827
10828 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
10829 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
10830
10831 /* Free but don't flush the source register. */
10832 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10833 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10834 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10835
10836 return off;
10837}
10838
10839
10840#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
10841 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
10842
10843
10844/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
10845DECL_INLINE_THROW(uint32_t)
10846iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
10847 uint8_t idxSrcVar, uint8_t iQwSrc)
10848{
10849 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10850 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10851
10852 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10853 iQwDst < 2
10854 ? kIemNativeGstSimdRegLdStSz_Low128
10855 : kIemNativeGstSimdRegLdStSz_High128,
10856 kIemNativeGstRegUse_ForUpdate);
10857 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10858 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10859
10860 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
10861 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
10862
10863 /* Free but don't flush the source register. */
10864 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10865 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10866 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10867
10868 return off;
10869}
10870
10871
10872#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
10873 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
10874
10875
10876/** Emits code for IEM_MC_STORE_YREG_U64. */
10877DECL_INLINE_THROW(uint32_t)
10878iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
10879{
10880 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10881 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10882
10883 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10884 iQwDst < 2
10885 ? kIemNativeGstSimdRegLdStSz_Low128
10886 : kIemNativeGstSimdRegLdStSz_High128,
10887 kIemNativeGstRegUse_ForUpdate);
10888
10889 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10890
10891 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
10892
10893 /* Free but don't flush the source register. */
10894 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10895 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10896
10897 return off;
10898}
10899
10900
10901#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
10902 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
10903
10904/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
10905DECL_INLINE_THROW(uint32_t)
10906iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10907{
10908 RT_NOREF(pReNative, iYReg);
10909 /** @todo Needs to be implemented when support for AVX-512 is added. */
10910 return off;
10911}
10912
10913
10914
10915/*********************************************************************************************************************************
10916* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
10917*********************************************************************************************************************************/
10918
10919/**
10920 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
10921 */
10922DECL_INLINE_THROW(uint32_t)
10923iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs, uint8_t idxInstr)
10924{
10925 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
10926 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
10927 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
10928 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
10929
10930#if 0 /* This is not required right now as the called helper will set up the SSE/AVX state if it is an assembly one. */
10931 /*
10932 * Need to do the FPU preparation.
10933 */
10934 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
10935#endif
10936
10937 /*
10938 * Do all the call setup and cleanup.
10939 */
10940 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS,
10941 false /*fFlushPendingWrites*/);
10942
10943 /*
10944 * Load the MXCSR register into the first argument and mask out the current exception flags.
10945 */
10946 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
10947 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
10948
10949 /*
10950 * Make the call.
10951 */
10952 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
10953
10954 /*
10955 * The updated MXCSR is in the return register, update exception status flags.
10956 *
10957 * The return register is marked allocated as a temporary because it is required for the
10958 * exception generation check below.
10959 */
10960 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG)));
10961 uint8_t const idxRegTmp = iemNativeRegMarkAllocated(pReNative, IEMNATIVE_CALL_RET_GREG, kIemNativeWhat_Tmp);
10962 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, idxRegMxCsr, idxRegTmp);
10963
10964#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
10965 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
10966 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
10967#endif
10968
10969 /*
10970 * Make sure we don't have any outstanding guest register writes as we may
10971 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
10972 */
10973 off = iemNativeRegFlushPendingWrites(pReNative, off);
10974
10975#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
10976 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
10977#else
10978 RT_NOREF(idxInstr);
10979#endif
10980
10981 /** @todo r=aeichner ANDN from BMI1 would save us a temporary and additional instruction here but I don't
10982 * want to assume the existence for this instruction at the moment. */
10983 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
10984
10985 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegTmp2, idxRegTmp);
10986 /* tmp &= X86_MXCSR_XCPT_MASK */
10987 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
10988 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
10989 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
10990 /* tmp = ~tmp */
10991 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
10992 /* tmp &= mxcsr */
10993 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegTmp2);
10994 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS,
10995 kIemNativeLabelType_RaiseSseAvxFpRelated);
10996
10997 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
10998 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10999 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
11000
11001 return off;
11002}
11003
11004
11005#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
11006 off = iemNativeEmitCallSseAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
11007
11008/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
11009DECL_INLINE_THROW(uint32_t)
11010iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
11011{
11012 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11013 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11014 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
11015}
11016
11017
11018#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
11019 off = iemNativeEmitCallSseAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
11020
11021/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
11022DECL_INLINE_THROW(uint32_t)
11023iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
11024 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
11025{
11026 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11027 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11028 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11029 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
11030}
11031
11032
11033/*********************************************************************************************************************************
11034* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
11035*********************************************************************************************************************************/
11036
11037#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
11038 off = iemNativeEmitCallAvxAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
11039
11040/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
11041DECL_INLINE_THROW(uint32_t)
11042iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
11043{
11044 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11045 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11046 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
11047}
11048
11049
11050#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
11051 off = iemNativeEmitCallAvxAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
11052
11053/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
11054DECL_INLINE_THROW(uint32_t)
11055iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
11056 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
11057{
11058 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11059 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11060 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11061 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
11062}
11063
11064
11065#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
11066
11067
11068/*********************************************************************************************************************************
11069* Include instruction emitters. *
11070*********************************************************************************************************************************/
11071#include "target-x86/IEMAllN8veEmit-x86.h"
11072
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette