VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 104797

Last change on this file since 104797 was 104797, checked in by vboxsync, 7 months ago

VMM/IEM: Introduce special helpers for generating code to exit a TB in order to be able to experiment with different approaches more easily and convert the code emitters to make use of them, bugref:10677

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 481.8 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 104797 2024-05-28 05:50:30Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62
63
64/*
65 * Narrow down configs here to avoid wasting time on unused configs here.
66 * Note! Same checks in IEMAllThrdRecompiler.cpp.
67 */
68
69#ifndef IEM_WITH_CODE_TLB
70# error The code TLB must be enabled for the recompiler.
71#endif
72
73#ifndef IEM_WITH_DATA_TLB
74# error The data TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_SETJMP
78# error The setjmp approach must be enabled for the recompiler.
79#endif
80
81
82
83/*********************************************************************************************************************************
84* Code emitters for flushing pending guest register writes and sanity checks *
85*********************************************************************************************************************************/
86
87#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
88# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
89DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
90{
91 /* Compare the shadow with the context value, they should match. */
92 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
93 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
94 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
95 return off;
96}
97# endif
98#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
99
100/**
101 * Flushes delayed write of a specific guest register.
102 *
103 * This must be called prior to calling CImpl functions and any helpers that use
104 * the guest state (like raising exceptions) and such.
105 *
106 * This optimization has not yet been implemented. The first target would be
107 * RIP updates, since these are the most common ones.
108 */
109DECL_INLINE_THROW(uint32_t)
110iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
111{
112#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
113 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
114#endif
115
116#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
117#if 0 /** @todo r=aeichner EFLAGS writeback delay. */
118 if ( enmClass == kIemNativeGstRegRef_EFlags
119 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
120 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
121#else
122 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
123#endif
124
125 if ( enmClass == kIemNativeGstRegRef_Gpr
126 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
127 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
128#endif
129
130#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
131 if ( enmClass == kIemNativeGstRegRef_XReg
132 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
133 {
134 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
135 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
136 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
137
138 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
139 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
140 }
141#endif
142 RT_NOREF(pReNative, enmClass, idxReg);
143 return off;
144}
145
146
147
148/*********************************************************************************************************************************
149* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
150*********************************************************************************************************************************/
151
152#undef IEM_MC_BEGIN /* unused */
153#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
154 { \
155 Assert(pReNative->Core.bmVars == 0); \
156 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
157 Assert(pReNative->Core.bmStack == 0); \
158 pReNative->fMc = (a_fMcFlags); \
159 pReNative->fCImpl = (a_fCImplFlags); \
160 pReNative->cArgsX = (a_cArgsIncludingHidden)
161
162/** We have to get to the end in recompilation mode, as otherwise we won't
163 * generate code for all the IEM_MC_IF_XXX branches. */
164#define IEM_MC_END() \
165 iemNativeVarFreeAll(pReNative); \
166 } return off
167
168
169
170/*********************************************************************************************************************************
171* Native Emitter Support. *
172*********************************************************************************************************************************/
173
174#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
175
176#define IEM_MC_NATIVE_ELSE() } else {
177
178#define IEM_MC_NATIVE_ENDIF() } ((void)0)
179
180
181#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
182 off = a_fnEmitter(pReNative, off)
183
184#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
185 off = a_fnEmitter(pReNative, off, (a0))
186
187#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
188 off = a_fnEmitter(pReNative, off, (a0), (a1))
189
190#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
191 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
192
193#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
194 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
195
196#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
197 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
198
199#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
200 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
201
202#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
203 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
204
205#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
206 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
207
208
209#ifndef RT_ARCH_AMD64
210# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
211#else
212/** @note This is a naive approach that ASSUMES that the register isn't
213 * allocated, so it only works safely for the first allocation(s) in
214 * a MC block. */
215# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
216 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
217
218DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off);
219
220DECL_INLINE_THROW(uint32_t)
221iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
222{
223 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
224 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
225 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
226
227# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
228 /* Must flush the register if it hold pending writes. */
229 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
230 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
231 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
232# endif
233
234 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off);
235 return off;
236}
237
238#endif /* RT_ARCH_AMD64 */
239
240
241
242/*********************************************************************************************************************************
243* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
244*********************************************************************************************************************************/
245
246#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
247 pReNative->fMc = 0; \
248 pReNative->fCImpl = (a_fFlags); \
249 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
250
251
252#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
253 pReNative->fMc = 0; \
254 pReNative->fCImpl = (a_fFlags); \
255 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
256
257DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
258 uint8_t idxInstr, uint64_t a_fGstShwFlush,
259 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
260{
261 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
262}
263
264
265#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
266 pReNative->fMc = 0; \
267 pReNative->fCImpl = (a_fFlags); \
268 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
269 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
270
271DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
272 uint8_t idxInstr, uint64_t a_fGstShwFlush,
273 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
274{
275 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
276}
277
278
279#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
280 pReNative->fMc = 0; \
281 pReNative->fCImpl = (a_fFlags); \
282 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
283 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
284
285DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
286 uint8_t idxInstr, uint64_t a_fGstShwFlush,
287 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
288 uint64_t uArg2)
289{
290 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
291}
292
293
294
295/*********************************************************************************************************************************
296* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
297*********************************************************************************************************************************/
298
299/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
300 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
301DECL_INLINE_THROW(uint32_t)
302iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
303{
304 /*
305 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
306 * return with special status code and make the execution loop deal with
307 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
308 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
309 * could continue w/o interruption, it probably will drop into the
310 * debugger, so not worth the effort of trying to services it here and we
311 * just lump it in with the handling of the others.
312 *
313 * To simplify the code and the register state management even more (wrt
314 * immediate in AND operation), we always update the flags and skip the
315 * extra check associated conditional jump.
316 */
317 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
318 <= UINT32_MAX);
319#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
320 AssertMsg( pReNative->idxCurCall == 0
321 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
322 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
323#endif
324
325 /*
326 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
327 * any pending register writes must be flushed.
328 */
329 off = iemNativeRegFlushPendingWrites(pReNative, off);
330
331 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
332 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
333 true /*fSkipLivenessAssert*/);
334 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxEflReg,
335 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
336 kIemNativeExitReason_ReturnWithFlags);
337 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
338 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
339
340 /* Free but don't flush the EFLAGS register. */
341 iemNativeRegFreeTmp(pReNative, idxEflReg);
342
343 return off;
344}
345
346
347/** Helper for iemNativeEmitFinishInstructionWithStatus. */
348DECLINLINE(RTGCPHYS) iemNativeCallEntryToGCPhysPc(PCIEMTB pTb, PCIEMTHRDEDCALLENTRY pCallEntry)
349{
350 unsigned const offOpcodes = pCallEntry->offOpcode;
351 unsigned const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
352 for (unsigned idxRange = 0; idxRange < cRanges; idxRange++)
353 {
354 unsigned const offRange = offOpcodes - (unsigned)pTb->aRanges[idxRange].offOpcodes;
355 if (offRange < (unsigned)pTb->aRanges[idxRange].cbOpcodes)
356 return iemTbGetRangePhysPageAddr(pTb, idxRange) + offRange + pTb->aRanges[idxRange].offPhysPage;
357 }
358 AssertFailedReturn(NIL_RTGCPHYS);
359}
360
361
362/** The VINF_SUCCESS dummy. */
363template<int const a_rcNormal, bool const a_fIsJump>
364DECL_FORCE_INLINE_THROW(uint32_t)
365iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry,
366 int32_t const offJump)
367{
368 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
369 if (a_rcNormal != VINF_SUCCESS)
370 {
371#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
372 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
373#else
374 RT_NOREF_PV(pCallEntry);
375#endif
376
377 /* As this code returns from the TB any pending register writes must be flushed. */
378 off = iemNativeRegFlushPendingWrites(pReNative, off);
379
380 /*
381 * Use the lookup table for getting to the next TB quickly.
382 * Note! In this code path there can only be one entry at present.
383 */
384 uint8_t const idxTbLookupFirst = IEM_TB_LOOKUP_TAB_GET_IDX(pCallEntry->uTbLookup);
385 PCIEMTB const pTbOrg = pReNative->pTbOrg;
386 Assert(idxTbLookupFirst < pTbOrg->cTbLookupEntries);
387 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1);
388
389#if 0
390 /* Update IEMCPU::ppTbLookupEntryR3 to get the best lookup effect. */
391 PIEMTB * const ppTbLookupFirst = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTbOrg, idxTbLookupFirst);
392 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1); /* large stuff later/never */
393 off = iemNativeEmitStoreImmToVCpuU64(pReNative, off, (uintptr_t)ppTbLookupFirst,
394 RT_UOFFSETOF(VMCPU, iem.s.ppTbLookupEntryR3));
395
396 return iemNativeEmitTbExit(pReNative, off, kIemNativeExitReason_ReturnBreak);
397
398#else
399 /* Load the index as argument #1 for the helper call at the given label. */
400 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxTbLookupFirst);
401
402 /*
403 * Figure out the physical address of the current instruction and see
404 * whether the next instruction we're about to execute is in the same
405 * page so we by can optimistically skip TLB loading.
406 *
407 * - This is safe for all cases in FLAT mode.
408 * - In segmentmented modes it is complicated, given that a negative
409 * jump may underflow EIP and a forward jump may overflow or run into
410 * CS.LIM and triggering a #GP. The only thing we can get away with
411 * now at compile time is forward jumps w/o CS.LIM checks, since the
412 * lack of CS.LIM checks means we're good for the entire physical page
413 * we're executing on and another 15 bytes before we run into CS.LIM.
414 */
415 if ( IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)
416 || !(pTbOrg->fFlags & IEMTB_F_CS_LIM_CHECKS) )
417 {
418 RTGCPHYS const GCPhysPcCurrent = iemNativeCallEntryToGCPhysPc(pTbOrg, pCallEntry);
419 RTGCPHYS const GCPhysPcNext = GCPhysPcCurrent + pCallEntry->cbOpcode + (int64_t)(a_fIsJump ? offJump : 0);
420 if ( (GCPhysPcNext >> GUEST_PAGE_SHIFT) == (GCPhysPcCurrent >> GUEST_PAGE_SHIFT)
421 && GUEST_PAGE_SIZE - (GCPhysPcCurrent & GUEST_PAGE_OFFSET_MASK) >= pCallEntry->cbOpcode /* 0xfff: je -56h */ )
422
423 {
424 /* Load the next GCPhysPc into the 3rd argument for the helper call. */
425 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, GCPhysPcNext);
426
427 /* Load the key lookup flags into the 2nd argument for the helper call.
428 - This is safe wrt CS limit checking since we're only here for FLAT modes.
429 - ASSUMING that this isn't a STI or POPF instruction, we can exclude any
430 interrupt shadow.
431 - The NMI inhibiting is more questionable, though... */
432 /** @todo We don't implement NMI blocking atm, except via VT-x/AMD-V.
433 * Should we copy it into fExec to simplify this? OTOH, it's just a
434 * couple of extra instructions if EFLAGS are already in a register. */
435 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG,
436 (pReNative->fExec & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE);
437
438 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
439 return iemNativeEmitTbExit(pReNative, off, kIemNativeExitReason_ReturnBreakViaLookup);
440 return iemNativeEmitTbExit(pReNative, off, kIemNativeExitReason_ReturnBreakViaLookupWithIrq);
441 }
442 }
443 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
444 return iemNativeEmitTbExit(pReNative, off, kIemNativeExitReason_ReturnBreakViaLookupWithTlb);
445 return iemNativeEmitTbExit(pReNative, off, kIemNativeExitReason_ReturnBreakViaLookupWithTlbAndIrq);
446#endif
447 }
448 return off;
449}
450
451
452#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
453 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
454 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
455
456#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
457 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
458 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
459 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
460
461/** Same as iemRegAddToRip64AndFinishingNoFlags. */
462DECL_INLINE_THROW(uint32_t)
463iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
464{
465#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
466# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
467 if (!pReNative->Core.offPc)
468 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
469# endif
470
471 /* Allocate a temporary PC register. */
472 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
473
474 /* Perform the addition and store the result. */
475 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
476 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
477
478 /* Free but don't flush the PC register. */
479 iemNativeRegFreeTmp(pReNative, idxPcReg);
480#endif
481
482#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
483 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
484
485 pReNative->Core.offPc += cbInstr;
486# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
487 off = iemNativePcAdjustCheck(pReNative, off);
488# endif
489 if (pReNative->cCondDepth)
490 off = iemNativeEmitPcWriteback(pReNative, off);
491 else
492 pReNative->Core.cInstrPcUpdateSkipped++;
493#endif
494
495 return off;
496}
497
498
499#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
500 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
501 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
502
503#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
504 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
505 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
506 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
507
508/** Same as iemRegAddToEip32AndFinishingNoFlags. */
509DECL_INLINE_THROW(uint32_t)
510iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
511{
512#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
513# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
514 if (!pReNative->Core.offPc)
515 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
516# endif
517
518 /* Allocate a temporary PC register. */
519 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
520
521 /* Perform the addition and store the result. */
522 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
523 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
524
525 /* Free but don't flush the PC register. */
526 iemNativeRegFreeTmp(pReNative, idxPcReg);
527#endif
528
529#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
530 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
531
532 pReNative->Core.offPc += cbInstr;
533# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
534 off = iemNativePcAdjustCheck(pReNative, off);
535# endif
536 if (pReNative->cCondDepth)
537 off = iemNativeEmitPcWriteback(pReNative, off);
538 else
539 pReNative->Core.cInstrPcUpdateSkipped++;
540#endif
541
542 return off;
543}
544
545
546#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
547 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
548 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
549
550#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
551 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
552 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
553 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
554
555/** Same as iemRegAddToIp16AndFinishingNoFlags. */
556DECL_INLINE_THROW(uint32_t)
557iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
558{
559#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
560# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
561 if (!pReNative->Core.offPc)
562 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
563# endif
564
565 /* Allocate a temporary PC register. */
566 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
567
568 /* Perform the addition and store the result. */
569 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
570 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
571 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
572
573 /* Free but don't flush the PC register. */
574 iemNativeRegFreeTmp(pReNative, idxPcReg);
575#endif
576
577#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
578 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
579
580 pReNative->Core.offPc += cbInstr;
581# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
582 off = iemNativePcAdjustCheck(pReNative, off);
583# endif
584 if (pReNative->cCondDepth)
585 off = iemNativeEmitPcWriteback(pReNative, off);
586 else
587 pReNative->Core.cInstrPcUpdateSkipped++;
588#endif
589
590 return off;
591}
592
593
594
595/*********************************************************************************************************************************
596* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
597*********************************************************************************************************************************/
598
599#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
600 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
601 (a_enmEffOpSize), pCallEntry->idxInstr); \
602 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
603
604#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
605 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
606 (a_enmEffOpSize), pCallEntry->idxInstr); \
607 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
608 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
609
610#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
611 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
612 IEMMODE_16BIT, pCallEntry->idxInstr); \
613 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
614
615#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
616 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
617 IEMMODE_16BIT, pCallEntry->idxInstr); \
618 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
619 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
620
621#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
622 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
623 IEMMODE_64BIT, pCallEntry->idxInstr); \
624 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
625
626#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
627 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
628 IEMMODE_64BIT, pCallEntry->idxInstr); \
629 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
630 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
631
632/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
633 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
634 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
635DECL_INLINE_THROW(uint32_t)
636iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
637 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
638{
639 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
640
641 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
642 off = iemNativeRegFlushPendingWrites(pReNative, off);
643
644#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
645 Assert(pReNative->Core.offPc == 0);
646
647 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
648#endif
649
650 /* Allocate a temporary PC register. */
651 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
652
653 /* Perform the addition. */
654 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
655
656 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
657 {
658 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
659 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
660 }
661 else
662 {
663 /* Just truncate the result to 16-bit IP. */
664 Assert(enmEffOpSize == IEMMODE_16BIT);
665 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
666 }
667 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
668
669 /* Free but don't flush the PC register. */
670 iemNativeRegFreeTmp(pReNative, idxPcReg);
671
672 return off;
673}
674
675
676#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
677 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
678 (a_enmEffOpSize), pCallEntry->idxInstr); \
679 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
680
681#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
682 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
683 (a_enmEffOpSize), pCallEntry->idxInstr); \
684 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
685 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
686
687#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
688 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
689 IEMMODE_16BIT, pCallEntry->idxInstr); \
690 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
691
692#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
693 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
694 IEMMODE_16BIT, pCallEntry->idxInstr); \
695 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
696 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
697
698#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
699 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
700 IEMMODE_32BIT, pCallEntry->idxInstr); \
701 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
702
703#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
704 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
705 IEMMODE_32BIT, pCallEntry->idxInstr); \
706 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
707 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
708
709/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
710 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
711 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
712DECL_INLINE_THROW(uint32_t)
713iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
714 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
715{
716 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
717
718 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
719 off = iemNativeRegFlushPendingWrites(pReNative, off);
720
721#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
722 Assert(pReNative->Core.offPc == 0);
723
724 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
725#endif
726
727 /* Allocate a temporary PC register. */
728 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
729
730 /* Perform the addition. */
731 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
732
733 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
734 if (enmEffOpSize == IEMMODE_16BIT)
735 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
736
737 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
738/** @todo we can skip this in 32-bit FLAT mode. */
739 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
740
741 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
742
743 /* Free but don't flush the PC register. */
744 iemNativeRegFreeTmp(pReNative, idxPcReg);
745
746 return off;
747}
748
749
750#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
751 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
752 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
753
754#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
755 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
756 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
757 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
758
759#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
760 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
761 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
762
763#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
764 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
765 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
766 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
767
768#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
769 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
770 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
771
772#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
773 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
774 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
775 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
776
777/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
778DECL_INLINE_THROW(uint32_t)
779iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
780 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
781{
782 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
783 off = iemNativeRegFlushPendingWrites(pReNative, off);
784
785#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
786 Assert(pReNative->Core.offPc == 0);
787
788 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
789#endif
790
791 /* Allocate a temporary PC register. */
792 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
793
794 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
795 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
796 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
797 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
798 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
799
800 /* Free but don't flush the PC register. */
801 iemNativeRegFreeTmp(pReNative, idxPcReg);
802
803 return off;
804}
805
806
807
808/*********************************************************************************************************************************
809* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
810*********************************************************************************************************************************/
811
812/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
813#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
814 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
815
816/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
817#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
818 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
819
820/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
821#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
822 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
823
824/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
825 * clears flags. */
826#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
827 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
828 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
829
830/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
831 * clears flags. */
832#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
833 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
834 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
835
836/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
837 * clears flags. */
838#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
839 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
840 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
841
842#undef IEM_MC_SET_RIP_U16_AND_FINISH
843
844
845/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
846#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
847 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
848
849/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
850#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
851 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
852
853/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
854 * clears flags. */
855#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
856 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
857 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
858
859/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
860 * and clears flags. */
861#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
862 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
863 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
864
865#undef IEM_MC_SET_RIP_U32_AND_FINISH
866
867
868/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
869#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
870 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
871
872/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
873 * and clears flags. */
874#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
875 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
876 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
877
878#undef IEM_MC_SET_RIP_U64_AND_FINISH
879
880
881/** Same as iemRegRipJumpU16AndFinishNoFlags,
882 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
883DECL_INLINE_THROW(uint32_t)
884iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
885 uint8_t idxInstr, uint8_t cbVar)
886{
887 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
888 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
889
890 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
891 off = iemNativeRegFlushPendingWrites(pReNative, off);
892
893#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
894 Assert(pReNative->Core.offPc == 0);
895
896 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
897#endif
898
899 /* Get a register with the new PC loaded from idxVarPc.
900 Note! This ASSUMES that the high bits of the GPR is zeroed. */
901 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
902
903 /* Check limit (may #GP(0) + exit TB). */
904 if (!f64Bit)
905/** @todo we can skip this test in FLAT 32-bit mode. */
906 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
907 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
908 else if (cbVar > sizeof(uint32_t))
909 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
910
911 /* Store the result. */
912 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
913
914 iemNativeVarRegisterRelease(pReNative, idxVarPc);
915 /** @todo implictly free the variable? */
916
917 return off;
918}
919
920
921
922/*********************************************************************************************************************************
923* Emitters for changing PC/RIP/EIP/IP with a relative call jump (IEM_MC_IND_CALL_UXX_AND_FINISH) (requires stack emmiters). *
924*********************************************************************************************************************************/
925
926/** @todo These helpers belong to the stack push API naturally but we already need them up here (we could of course move
927 * this below the stack emitters but then this is not close to the rest of the PC/RIP handling...). */
928DECL_FORCE_INLINE_THROW(uint32_t)
929iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
930{
931 /* Use16BitSp: */
932#ifdef RT_ARCH_AMD64
933 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
934 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
935#else
936 /* sub regeff, regrsp, #cbMem */
937 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
938 /* and regeff, regeff, #0xffff */
939 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
940 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
941 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
942 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
943#endif
944 return off;
945}
946
947
948DECL_FORCE_INLINE(uint32_t)
949iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
950{
951 /* Use32BitSp: */
952 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
953 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
954 return off;
955}
956
957
958DECL_INLINE_THROW(uint32_t)
959iemNativeEmitStackPushRip(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxRegPc,
960 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
961{
962 /*
963 * Assert sanity.
964 */
965#ifdef VBOX_STRICT
966 if (RT_BYTE2(cBitsVarAndFlat) != 0)
967 {
968 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
969 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
970 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
971 Assert( pfnFunction
972 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
973 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
974 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
975 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
976 : UINT64_C(0xc000b000a0009000) ));
977 }
978 else
979 Assert( pfnFunction
980 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
981 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
982 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
983 : UINT64_C(0xc000b000a0009000) ));
984#endif
985
986#ifdef VBOX_STRICT
987 /*
988 * Check that the fExec flags we've got make sense.
989 */
990 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
991#endif
992
993 /*
994 * To keep things simple we have to commit any pending writes first as we
995 * may end up making calls.
996 */
997 /** @todo we could postpone this till we make the call and reload the
998 * registers after returning from the call. Not sure if that's sensible or
999 * not, though. */
1000 off = iemNativeRegFlushPendingWrites(pReNative, off);
1001
1002 /*
1003 * First we calculate the new RSP and the effective stack pointer value.
1004 * For 64-bit mode and flat 32-bit these two are the same.
1005 * (Code structure is very similar to that of PUSH)
1006 */
1007 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
1008 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
1009 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
1010 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
1011 ? cbMem : sizeof(uint16_t);
1012 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
1013 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1014 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
1015 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1016 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1017 if (cBitsFlat != 0)
1018 {
1019 Assert(idxRegEffSp == idxRegRsp);
1020 Assert(cBitsFlat == 32 || cBitsFlat == 64);
1021 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1022 if (cBitsFlat == 64)
1023 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
1024 else
1025 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
1026 }
1027 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1028 {
1029 Assert(idxRegEffSp != idxRegRsp);
1030 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1031 kIemNativeGstRegUse_ReadOnly);
1032#ifdef RT_ARCH_AMD64
1033 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1034#else
1035 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1036#endif
1037 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1038 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1039 offFixupJumpToUseOtherBitSp = off;
1040 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1041 {
1042 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1043 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1044 }
1045 else
1046 {
1047 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1048 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1049 }
1050 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1051 }
1052 /* SpUpdateEnd: */
1053 uint32_t const offLabelSpUpdateEnd = off;
1054
1055 /*
1056 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1057 * we're skipping lookup).
1058 */
1059 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
1060 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
1061 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1062 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1063 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1064 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1065 : UINT32_MAX;
1066 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
1067
1068
1069 if (!TlbState.fSkip)
1070 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1071 else
1072 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1073
1074 /*
1075 * Use16BitSp:
1076 */
1077 if (cBitsFlat == 0)
1078 {
1079#ifdef RT_ARCH_AMD64
1080 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1081#else
1082 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1083#endif
1084 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1085 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1086 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1087 else
1088 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1089 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1090 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1091 }
1092
1093 /*
1094 * TlbMiss:
1095 *
1096 * Call helper to do the pushing.
1097 */
1098 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1099
1100#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1101 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1102#else
1103 RT_NOREF(idxInstr);
1104#endif
1105
1106 /* Save variables in volatile registers. */
1107 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1108 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1109 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
1110 | (RT_BIT_32(idxRegPc));
1111 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1112
1113 if ( idxRegPc == IEMNATIVE_CALL_ARG1_GREG
1114 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
1115 {
1116 /* Swap them using ARG0 as temp register: */
1117 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
1118 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
1119 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
1120 }
1121 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
1122 {
1123 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc (first!) */
1124 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1125
1126 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
1127 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1128 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1129 }
1130 else
1131 {
1132 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
1133 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1134
1135 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc */
1136 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1137 }
1138
1139 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1140 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1141
1142 /* Done setting up parameters, make the call. */
1143 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
1144
1145 /* Restore variables and guest shadow registers to volatile registers. */
1146 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1147 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1148
1149#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1150 if (!TlbState.fSkip)
1151 {
1152 /* end of TlbMiss - Jump to the done label. */
1153 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1154 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1155
1156 /*
1157 * TlbLookup:
1158 */
1159 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
1160 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1161
1162 /*
1163 * Emit code to do the actual storing / fetching.
1164 */
1165 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
1166# ifdef VBOX_WITH_STATISTICS
1167 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1168 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1169# endif
1170 switch (cbMemAccess)
1171 {
1172 case 2:
1173 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1174 break;
1175 case 4:
1176 if (!fIsIntelSeg)
1177 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1178 else
1179 {
1180 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
1181 PUSH FS in real mode, so we have to try emulate that here.
1182 We borrow the now unused idxReg1 from the TLB lookup code here. */
1183 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
1184 kIemNativeGstReg_EFlags);
1185 if (idxRegEfl != UINT8_MAX)
1186 {
1187#ifdef ARCH_AMD64
1188 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
1189 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1190 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1191#else
1192 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
1193 off, TlbState.idxReg1, idxRegEfl,
1194 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1195#endif
1196 iemNativeRegFreeTmp(pReNative, idxRegEfl);
1197 }
1198 else
1199 {
1200 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
1201 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
1202 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1203 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1204 }
1205 /* ASSUMES the upper half of idxRegPc is ZERO. */
1206 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegPc);
1207 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
1208 }
1209 break;
1210 case 8:
1211 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1212 break;
1213 default:
1214 AssertFailed();
1215 }
1216
1217 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1218 TlbState.freeRegsAndReleaseVars(pReNative);
1219
1220 /*
1221 * TlbDone:
1222 *
1223 * Commit the new RSP value.
1224 */
1225 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1226 }
1227#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1228
1229#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
1230 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
1231#endif
1232 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1233 if (idxRegEffSp != idxRegRsp)
1234 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1235
1236 return off;
1237}
1238
1239
1240/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets. */
1241#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr) \
1242 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1243
1244/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets that checks and
1245 * clears flags. */
1246#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1247 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr); \
1248 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1249
1250/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets. */
1251#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr) \
1252 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1253
1254/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets that checks and
1255 * clears flags. */
1256#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1257 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr); \
1258 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1259
1260#undef IEM_MC_IND_CALL_U16_AND_FINISH
1261
1262
1263/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets. */
1264#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr) \
1265 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1266
1267/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets that checks and
1268 * clears flags. */
1269#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP, a_cbInstr) \
1270 IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr); \
1271 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1272
1273#undef IEM_MC_IND_CALL_U32_AND_FINISH
1274
1275
1276/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1277 * an extra parameter, for use in 64-bit code. */
1278#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr) \
1279 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u64NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1280
1281
1282/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1283 * an extra parameter, for use in 64-bit code and we need to check and clear
1284 * flags. */
1285#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewIP, a_cbInstr) \
1286 IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr); \
1287 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1288
1289#undef IEM_MC_IND_CALL_U64_AND_FINISH
1290
1291/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1292 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1293DECL_INLINE_THROW(uint32_t)
1294iemNativeEmitRipIndirectCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxVarPc, bool f64Bit,
1295 uint8_t idxInstr, uint8_t cbVar)
1296{
1297 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1298 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1299
1300 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1301 off = iemNativeRegFlushPendingWrites(pReNative, off);
1302
1303#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1304 Assert(pReNative->Core.offPc == 0);
1305
1306 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1307#endif
1308
1309 /* Get a register with the new PC loaded from idxVarPc.
1310 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1311 uint8_t const idxNewPcReg = iemNativeVarRegisterAcquire(pReNative, idxVarPc, &off);
1312
1313 /* Check limit (may #GP(0) + exit TB). */
1314 if (!f64Bit)
1315/** @todo we can skip this test in FLAT 32-bit mode. */
1316 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1317 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1318 else if (cbVar > sizeof(uint32_t))
1319 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1320
1321#if 1
1322 /* Allocate a temporary PC register, we don't want it shadowed. */
1323 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1324 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1325#else
1326 /* Allocate a temporary PC register. */
1327 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate,
1328 true /*fNoVolatileRegs*/);
1329#endif
1330
1331 /* Perform the addition and push the variable to the guest stack. */
1332 /** @todo Flat variants for PC32 variants. */
1333 switch (cbVar)
1334 {
1335 case sizeof(uint16_t):
1336 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1337 /* Truncate the result to 16-bit IP. */
1338 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1339 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1340 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1341 break;
1342 case sizeof(uint32_t):
1343 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1344 /** @todo In FLAT mode we can use the flat variant. */
1345 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1346 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1347 break;
1348 case sizeof(uint64_t):
1349 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
1350 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1351 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1352 break;
1353 default:
1354 AssertFailed();
1355 }
1356
1357 /* RSP got changed, so do this again. */
1358 off = iemNativeRegFlushPendingWrites(pReNative, off);
1359
1360 /* Store the result. */
1361 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1362
1363#if 1
1364 /* Need to transfer the shadow information to the new RIP register. */
1365 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxNewPcReg, kIemNativeGstReg_Pc, off);
1366#else
1367 /* Sync the new PC. */
1368 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcReg, idxNewPcReg);
1369#endif
1370 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1371 iemNativeRegFreeTmp(pReNative, idxPcReg);
1372 /** @todo implictly free the variable? */
1373
1374 return off;
1375}
1376
1377
1378/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1379 * an extra parameter, for use in 16-bit code on a pre-386 CPU. */
1380#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
1381 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1382
1383/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1384 * an extra parameter, for use in 16-bit code on a pre-386 CPU and we need to check and clear
1385 * flags. */
1386#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
1387 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
1388 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1389
1390/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1391 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1392#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
1393 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1394
1395/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1396 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1397 * flags. */
1398#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
1399 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
1400 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1401
1402/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1403 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1404#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
1405 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1406
1407/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1408 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1409 * flags. */
1410#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
1411 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
1412 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1413
1414#undef IEM_MC_REL_CALL_S16_AND_FINISH
1415
1416/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1417 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1418DECL_INLINE_THROW(uint32_t)
1419iemNativeEmitRipRelativeCallS16NoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int16_t offDisp,
1420 uint8_t idxInstr)
1421{
1422 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1423 off = iemNativeRegFlushPendingWrites(pReNative, off);
1424
1425#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1426 Assert(pReNative->Core.offPc == 0);
1427
1428 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1429#endif
1430
1431 /* Allocate a temporary PC register. */
1432 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1433 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1434 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1435
1436 /* Calculate the new RIP. */
1437 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
1438 /* Truncate the result to 16-bit IP. */
1439 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegOld);
1440 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
1441 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
1442
1443 /* Truncate the result to 16-bit IP. */
1444 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegNew);
1445
1446 /* Check limit (may #GP(0) + exit TB). */
1447 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1448
1449 /* Perform the addition and push the variable to the guest stack. */
1450 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1451 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1452
1453 /* RSP got changed, so flush again. */
1454 off = iemNativeRegFlushPendingWrites(pReNative, off);
1455
1456 /* Store the result. */
1457 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1458
1459 /* Need to transfer the shadow information to the new RIP register. */
1460 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1461 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1462 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1463
1464 return off;
1465}
1466
1467
1468/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
1469 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1470#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
1471 off = iemNativeEmitEip32RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i32), pCallEntry->idxInstr)
1472
1473/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
1474 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1475 * flags. */
1476#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
1477 IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
1478 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1479
1480#undef IEM_MC_REL_CALL_S32_AND_FINISH
1481
1482/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1483 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1484DECL_INLINE_THROW(uint32_t)
1485iemNativeEmitEip32RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int32_t offDisp,
1486 uint8_t idxInstr)
1487{
1488 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1489 off = iemNativeRegFlushPendingWrites(pReNative, off);
1490
1491#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1492 Assert(pReNative->Core.offPc == 0);
1493
1494 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1495#endif
1496
1497 /* Allocate a temporary PC register. */
1498 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1499 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1500 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1501
1502 /* Update the EIP to get the return address. */
1503 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
1504
1505 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1506 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
1507 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
1508 /** @todo we can skip this test in FLAT 32-bit mode. */
1509 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1510
1511 /* Perform Perform the return address to the guest stack. */
1512 /** @todo Can avoid the stack limit checks in FLAT 32-bit mode. */
1513 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1514 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1515
1516 /* RSP got changed, so do this again. */
1517 off = iemNativeRegFlushPendingWrites(pReNative, off);
1518
1519 /* Store the result. */
1520 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1521
1522 /* Need to transfer the shadow information to the new RIP register. */
1523 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1524 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1525 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1526
1527 return off;
1528}
1529
1530
1531/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
1532 * an extra parameter, for use in 64-bit code. */
1533#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr) \
1534 off = iemNativeEmitRip64RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i64), pCallEntry->idxInstr)
1535
1536/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
1537 * an extra parameter, for use in 64-bit code and we need to check and clear
1538 * flags. */
1539#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i64, a_cbInstr) \
1540 IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr); \
1541 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1542
1543#undef IEM_MC_REL_CALL_S64_AND_FINISH
1544
1545/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1546 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1547DECL_INLINE_THROW(uint32_t)
1548iemNativeEmitRip64RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int64_t offDisp,
1549 uint8_t idxInstr)
1550{
1551 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1552 off = iemNativeRegFlushPendingWrites(pReNative, off);
1553
1554#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1555 Assert(pReNative->Core.offPc == 0);
1556
1557 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1558#endif
1559
1560 /* Allocate a temporary PC register. */
1561 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1562 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1563 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1564
1565 /* Update the RIP to get the return address. */
1566 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcRegOld, cbInstr);
1567
1568 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1569 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcRegNew, idxPcRegOld);
1570 off = iemNativeEmitAddGprImm(pReNative, off, idxPcRegNew, offDisp);
1571 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1572
1573 /* Perform Perform the return address to the guest stack. */
1574 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1575 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1576
1577 /* RSP got changed, so do this again. */
1578 off = iemNativeRegFlushPendingWrites(pReNative, off);
1579
1580 /* Store the result. */
1581 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1582
1583 /* Need to transfer the shadow information to the new RIP register. */
1584 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1585 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1586 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1587
1588 return off;
1589}
1590
1591
1592/*********************************************************************************************************************************
1593* Emitters for changing PC/RIP/EIP/IP with a RETN (Iw) instruction (IEM_MC_RETN_AND_FINISH) (requires stack emmiters). *
1594*********************************************************************************************************************************/
1595
1596DECL_FORCE_INLINE_THROW(uint32_t)
1597iemNativeEmitStackPopForRetnUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
1598 uint16_t cbPopAdd, uint8_t idxRegTmp)
1599{
1600 /* Use16BitSp: */
1601#ifdef RT_ARCH_AMD64
1602 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1603 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
1604 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbPopAdd); /* ASSUMES this does NOT modify bits [63:16]! */
1605 RT_NOREF(idxRegTmp);
1606#elif defined(RT_ARCH_ARM64)
1607 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
1608 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
1609 /* add tmp, regrsp, #cbMem */
1610 uint16_t const cbCombined = cbMem + cbPopAdd;
1611 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbCombined & (RT_BIT_32(12) - 1U), false /*f64Bit*/);
1612 if (cbCombined >= RT_BIT_32(12))
1613 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegTmp, cbCombined >> 12,
1614 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
1615 /* and tmp, tmp, #0xffff */
1616 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1617 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
1618 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
1619 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
1620#else
1621# error "Port me"
1622#endif
1623 return off;
1624}
1625
1626
1627DECL_FORCE_INLINE_THROW(uint32_t)
1628iemNativeEmitStackPopForRetnUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
1629 uint16_t cbPopAdd)
1630{
1631 /* Use32BitSp: */
1632 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1633 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem + cbPopAdd);
1634 return off;
1635}
1636
1637
1638/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets. */
1639#define IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr) \
1640 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, IEMMODE_16BIT, pCallEntry->idxInstr)
1641
1642/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets. */
1643#define IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1644 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
1645
1646/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code. */
1647#define IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1648 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), true /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
1649
1650/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets that checks and
1651 * clears flags. */
1652#define IEM_MC_RETN_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16Pop, a_cbInstr) \
1653 IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr); \
1654 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1655
1656/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets that checks and
1657 * clears flags. */
1658#define IEM_MC_RETN_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1659 IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
1660 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1661
1662/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code that checks and
1663 * clears flags. */
1664#define IEM_MC_RETN_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1665 IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
1666 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1667
1668/** IEM_MC[|_FLAT32|_FLAT64]_RETN_AND_FINISH */
1669DECL_INLINE_THROW(uint32_t)
1670iemNativeEmitRetn(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint16_t cbPop, bool f64Bit,
1671 IEMMODE enmEffOpSize, uint8_t idxInstr)
1672{
1673 RT_NOREF(cbInstr);
1674
1675#ifdef VBOX_STRICT
1676 /*
1677 * Check that the fExec flags we've got make sense.
1678 */
1679 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
1680#endif
1681
1682 /*
1683 * To keep things simple we have to commit any pending writes first as we
1684 * may end up making calls.
1685 */
1686 off = iemNativeRegFlushPendingWrites(pReNative, off);
1687
1688 /*
1689 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
1690 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
1691 * directly as the effective stack pointer.
1692 * (Code structure is very similar to that of PUSH)
1693 *
1694 * Note! As a simplification, we treat opsize overridden returns (o16 ret)
1695 * in FLAT 32-bit mode as if we weren't in FLAT mode since these
1696 * aren't commonly used (or useful) and thus not in need of optimizing.
1697 *
1698 * Note! For non flat modes the guest RSP is not allocated for update but rather for calculation
1699 * as the shadowed register would remain modified even if the return address throws a \#GP(0)
1700 * due to being outside the CS limit causing a wrong stack pointer value in the guest (see
1701 * the near return testcase in bs3-cpu-basic-2). If no exception is thrown the shadowing is transfered
1702 * to the new register returned by iemNativeRegAllocTmpForGuestReg() at the end.
1703 */
1704 uint8_t const cbMem = enmEffOpSize == IEMMODE_64BIT
1705 ? sizeof(uint64_t)
1706 : enmEffOpSize == IEMMODE_32BIT
1707 ? sizeof(uint32_t)
1708 : sizeof(uint16_t);
1709 bool const fFlat = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) && enmEffOpSize != IEMMODE_16BIT; /* see note */
1710 uintptr_t const pfnFunction = fFlat
1711 ? enmEffOpSize == IEMMODE_64BIT
1712 ? (uintptr_t)iemNativeHlpStackFlatFetchU64
1713 : (uintptr_t)iemNativeHlpStackFlatFetchU32
1714 : enmEffOpSize == IEMMODE_32BIT
1715 ? (uintptr_t)iemNativeHlpStackFetchU32
1716 : (uintptr_t)iemNativeHlpStackFetchU16;
1717 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1718 fFlat ? kIemNativeGstRegUse_ForUpdate : kIemNativeGstRegUse_Calculation,
1719 true /*fNoVolatileRegs*/);
1720 uint8_t const idxRegEffSp = fFlat ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1721 /** @todo can do a better job picking the register here. For cbMem >= 4 this
1722 * will be the resulting register value. */
1723 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
1724
1725 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1726 if (fFlat)
1727 Assert(idxRegEffSp == idxRegRsp);
1728 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1729 {
1730 Assert(idxRegEffSp != idxRegRsp);
1731 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1732 kIemNativeGstRegUse_ReadOnly);
1733#ifdef RT_ARCH_AMD64
1734 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1735#else
1736 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1737#endif
1738 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1739 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1740 offFixupJumpToUseOtherBitSp = off;
1741 if (enmEffOpSize == IEMMODE_32BIT)
1742 {
1743 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1744 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
1745 }
1746 else
1747 {
1748 Assert(enmEffOpSize == IEMMODE_16BIT);
1749 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1750 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
1751 idxRegMemResult);
1752 }
1753 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1754 }
1755 /* SpUpdateEnd: */
1756 uint32_t const offLabelSpUpdateEnd = off;
1757
1758 /*
1759 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1760 * we're skipping lookup).
1761 */
1762 uint8_t const iSegReg = fFlat ? UINT8_MAX : X86_SREG_SS;
1763 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
1764 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1765 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1766 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1767 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1768 : UINT32_MAX;
1769
1770 if (!TlbState.fSkip)
1771 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1772 else
1773 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1774
1775 /*
1776 * Use16BitSp:
1777 */
1778 if (!fFlat)
1779 {
1780#ifdef RT_ARCH_AMD64
1781 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1782#else
1783 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1784#endif
1785 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1786 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1787 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
1788 idxRegMemResult);
1789 else
1790 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
1791 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1792 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1793 }
1794
1795 /*
1796 * TlbMiss:
1797 *
1798 * Call helper to do the pushing.
1799 */
1800 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1801
1802#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1803 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1804#else
1805 RT_NOREF(idxInstr);
1806#endif
1807
1808 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1809 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1810 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
1811 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1812
1813
1814 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
1815 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1816 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1817
1818 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1819 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1820
1821 /* Done setting up parameters, make the call. */
1822 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
1823
1824 /* Move the return register content to idxRegMemResult. */
1825 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
1826 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
1827
1828 /* Restore variables and guest shadow registers to volatile registers. */
1829 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1830 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1831
1832#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1833 if (!TlbState.fSkip)
1834 {
1835 /* end of TlbMiss - Jump to the done label. */
1836 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1837 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1838
1839 /*
1840 * TlbLookup:
1841 */
1842 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
1843 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1844
1845 /*
1846 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
1847 */
1848 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1849# ifdef VBOX_WITH_STATISTICS
1850 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1851 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1852# endif
1853 switch (cbMem)
1854 {
1855 case 2:
1856 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1857 break;
1858 case 4:
1859 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1860 break;
1861 case 8:
1862 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1863 break;
1864 default:
1865 AssertFailed();
1866 }
1867
1868 TlbState.freeRegsAndReleaseVars(pReNative);
1869
1870 /*
1871 * TlbDone:
1872 *
1873 * Set the new RSP value (FLAT accesses needs to calculate it first) and
1874 * commit the popped register value.
1875 */
1876 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1877 }
1878#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1879
1880 /* Check limit before committing RIP and RSP (may #GP(0) + exit TB). */
1881 if (!f64Bit)
1882/** @todo we can skip this test in FLAT 32-bit mode. */
1883 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
1884 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1885 else if (enmEffOpSize == IEMMODE_64BIT)
1886 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
1887
1888 /* Complete RSP calculation for FLAT mode. */
1889 if (idxRegEffSp == idxRegRsp)
1890 {
1891 if (enmEffOpSize == IEMMODE_64BIT)
1892 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRsp, sizeof(uint64_t) + cbPop);
1893 else
1894 {
1895 Assert(enmEffOpSize == IEMMODE_32BIT);
1896 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxRegRsp, sizeof(uint32_t) + cbPop);
1897 }
1898 }
1899
1900 /* Commit the result and clear any current guest shadows for RIP. */
1901 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
1902 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1903 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, kIemNativeGstReg_Pc, off);
1904
1905 /* Need to transfer the shadowing information to the host register containing the updated value now. */
1906 if (!fFlat)
1907 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegRsp, IEMNATIVEGSTREG_GPR(X86_GREG_xSP), off);
1908
1909 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1910 if (idxRegEffSp != idxRegRsp)
1911 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1912 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1913 return off;
1914}
1915
1916
1917/*********************************************************************************************************************************
1918* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
1919*********************************************************************************************************************************/
1920
1921#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
1922 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
1923
1924/**
1925 * Emits code to check if a \#NM exception should be raised.
1926 *
1927 * @returns New code buffer offset, UINT32_MAX on failure.
1928 * @param pReNative The native recompile state.
1929 * @param off The code buffer offset.
1930 * @param idxInstr The current instruction.
1931 */
1932DECL_INLINE_THROW(uint32_t)
1933iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1934{
1935#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1936 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
1937
1938 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
1939 {
1940#endif
1941 /*
1942 * Make sure we don't have any outstanding guest register writes as we may
1943 * raise an #NM and all guest register must be up to date in CPUMCTX.
1944 */
1945 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
1946 off = iemNativeRegFlushPendingWrites(pReNative, off);
1947
1948#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1949 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1950#else
1951 RT_NOREF(idxInstr);
1952#endif
1953
1954 /* Allocate a temporary CR0 register. */
1955 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
1956
1957 /*
1958 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
1959 * return raisexcpt();
1960 */
1961 /* Test and jump. */
1962 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS, kIemNativeExitReason_RaiseNm);
1963
1964 /* Free but don't flush the CR0 register. */
1965 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
1966
1967#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1968 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
1969 }
1970 else
1971 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
1972#endif
1973
1974 return off;
1975}
1976
1977
1978#define IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE() \
1979 off = iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
1980
1981/**
1982 * Emits code to check if a \#NM exception should be raised.
1983 *
1984 * @returns New code buffer offset, UINT32_MAX on failure.
1985 * @param pReNative The native recompile state.
1986 * @param off The code buffer offset.
1987 * @param idxInstr The current instruction.
1988 */
1989DECL_INLINE_THROW(uint32_t)
1990iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1991{
1992#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1993 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckPotential);
1994
1995 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE))
1996 {
1997#endif
1998 /*
1999 * Make sure we don't have any outstanding guest register writes as we may
2000 * raise an #NM and all guest register must be up to date in CPUMCTX.
2001 */
2002 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2003 off = iemNativeRegFlushPendingWrites(pReNative, off);
2004
2005#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2006 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2007#else
2008 RT_NOREF(idxInstr);
2009#endif
2010
2011 /* Allocate a temporary CR0 register. */
2012 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_Calculation);
2013
2014 /*
2015 * if (cr0 & (X86_CR0_MP | X86_CR0_TS) == (X86_CR0_MP | X86_CR0_TS))
2016 * return raisexcpt();
2017 */
2018 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2019 /* Test and jump. */
2020 off = iemNativeEmitTestIfGpr32EqualsImmAndTbExit(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS, kIemNativeExitReason_RaiseNm);
2021
2022 /* Free the CR0 register. */
2023 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2024
2025#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2026 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE;
2027 }
2028 else
2029 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckOmitted);
2030#endif
2031
2032 return off;
2033}
2034
2035
2036#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
2037 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
2038
2039/**
2040 * Emits code to check if a \#MF exception should be raised.
2041 *
2042 * @returns New code buffer offset, UINT32_MAX on failure.
2043 * @param pReNative The native recompile state.
2044 * @param off The code buffer offset.
2045 * @param idxInstr The current instruction.
2046 */
2047DECL_INLINE_THROW(uint32_t)
2048iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2049{
2050 /*
2051 * Make sure we don't have any outstanding guest register writes as we may
2052 * raise an #MF and all guest register must be up to date in CPUMCTX.
2053 */
2054 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
2055 off = iemNativeRegFlushPendingWrites(pReNative, off);
2056
2057#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2058 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2059#else
2060 RT_NOREF(idxInstr);
2061#endif
2062
2063 /* Allocate a temporary FSW register. */
2064 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw, kIemNativeGstRegUse_ReadOnly);
2065
2066 /*
2067 * if (FSW & X86_FSW_ES != 0)
2068 * return raisexcpt();
2069 */
2070 /* Test and jump. */
2071 off = iemNativeEmitTestBitInGprAndTbExitIfSet(pReNative, off, idxFpuFswReg, X86_FSW_ES_BIT, kIemNativeExitReason_RaiseMf);
2072
2073 /* Free but don't flush the FSW register. */
2074 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
2075
2076 return off;
2077}
2078
2079
2080#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
2081 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2082
2083/**
2084 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
2085 *
2086 * @returns New code buffer offset, UINT32_MAX on failure.
2087 * @param pReNative The native recompile state.
2088 * @param off The code buffer offset.
2089 * @param idxInstr The current instruction.
2090 */
2091DECL_INLINE_THROW(uint32_t)
2092iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2093{
2094#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2095 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
2096
2097 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
2098 {
2099#endif
2100 /*
2101 * Make sure we don't have any outstanding guest register writes as we may
2102 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2103 */
2104 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2105 off = iemNativeRegFlushPendingWrites(pReNative, off);
2106
2107#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2108 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2109#else
2110 RT_NOREF(idxInstr);
2111#endif
2112
2113 /* Allocate a temporary CR0 and CR4 register. */
2114 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2115 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2116 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2117
2118 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
2119#ifdef RT_ARCH_AMD64
2120 /*
2121 * We do a modified test here:
2122 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
2123 * else { goto RaiseSseRelated; }
2124 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
2125 * all targets except the 386, which doesn't support SSE, this should
2126 * be a safe assumption.
2127 */
2128 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+3+7+7+6);
2129 //pCodeBuf[off++] = 0xcc;
2130 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
2131 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
2132 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
2133 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
2134 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
2135 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeExitReason_RaiseSseRelated, kIemNativeInstrCond_ne);
2136
2137#elif defined(RT_ARCH_ARM64)
2138 /*
2139 * We do a modified test here:
2140 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
2141 * else { goto RaiseSseRelated; }
2142 */
2143 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+5);
2144 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2145 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
2146 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
2147 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
2148 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2149 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
2150 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
2151 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2152 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2153 kIemNativeExitReason_RaiseSseRelated);
2154
2155#else
2156# error "Port me!"
2157#endif
2158
2159 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2160 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2161 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2162 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2163
2164#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2165 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
2166 }
2167 else
2168 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
2169#endif
2170
2171 return off;
2172}
2173
2174
2175#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
2176 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2177
2178/**
2179 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
2180 *
2181 * @returns New code buffer offset, UINT32_MAX on failure.
2182 * @param pReNative The native recompile state.
2183 * @param off The code buffer offset.
2184 * @param idxInstr The current instruction.
2185 */
2186DECL_INLINE_THROW(uint32_t)
2187iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2188{
2189#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2190 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
2191
2192 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
2193 {
2194#endif
2195 /*
2196 * Make sure we don't have any outstanding guest register writes as we may
2197 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2198 */
2199 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2200 off = iemNativeRegFlushPendingWrites(pReNative, off);
2201
2202#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2203 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2204#else
2205 RT_NOREF(idxInstr);
2206#endif
2207
2208 /* Allocate a temporary CR0, CR4 and XCR0 register. */
2209 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2210 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2211 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
2212 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2213
2214 /*
2215 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
2216 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
2217 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
2218 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
2219 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
2220 * { likely }
2221 * else { goto RaiseAvxRelated; }
2222 */
2223#ifdef RT_ARCH_AMD64
2224 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
2225 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
2226 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
2227 ^ 0x1a) ) { likely }
2228 else { goto RaiseAvxRelated; } */
2229 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+5+3+5+3+7+6);
2230 //pCodeBuf[off++] = 0xcc;
2231 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
2232 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
2233 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
2234 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2235 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
2236 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
2237 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2238 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
2239 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
2240 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
2241 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeExitReason_RaiseAvxRelated, kIemNativeInstrCond_ne);
2242
2243#elif defined(RT_ARCH_ARM64)
2244 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
2245 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
2246 else { goto RaiseAvxRelated; } */
2247 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6);
2248 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2249 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
2250 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
2251 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
2252 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
2253 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
2254 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
2255 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
2256 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
2257 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
2258 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
2259 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2260 kIemNativeExitReason_RaiseAvxRelated);
2261
2262#else
2263# error "Port me!"
2264#endif
2265
2266 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2267 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2268 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2269 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
2270#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2271 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2272 }
2273 else
2274 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
2275#endif
2276
2277 return off;
2278}
2279
2280
2281#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2282#define IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
2283 off = iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(pReNative, off, pCallEntry->idxInstr)
2284
2285/** Emits code for IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT. */
2286DECL_INLINE_THROW(uint32_t)
2287iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2288{
2289 /*
2290 * Make sure we don't have any outstanding guest register writes as we may
2291 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
2292 */
2293 off = iemNativeRegFlushPendingWrites(pReNative, off);
2294
2295#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2296 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2297#else
2298 RT_NOREF(idxInstr);
2299#endif
2300
2301 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
2302 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
2303
2304 /* mov tmp, varmxcsr */
2305 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
2306 /* tmp &= X86_MXCSR_XCPT_MASK */
2307 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
2308 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
2309 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
2310 /* tmp = ~tmp */
2311 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
2312 /* tmp &= mxcsr */
2313 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
2314 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS,
2315 kIemNativeExitReason_RaiseSseAvxFpRelated);
2316
2317 /* Free but don't flush the MXCSR register. */
2318 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
2319 iemNativeRegFreeTmp(pReNative, idxRegTmp);
2320
2321 return off;
2322}
2323#endif
2324
2325
2326#define IEM_MC_RAISE_DIVIDE_ERROR() \
2327 off = iemNativeEmitRaiseDivideError(pReNative, off, pCallEntry->idxInstr)
2328
2329/**
2330 * Emits code to raise a \#DE.
2331 *
2332 * @returns New code buffer offset, UINT32_MAX on failure.
2333 * @param pReNative The native recompile state.
2334 * @param off The code buffer offset.
2335 * @param idxInstr The current instruction.
2336 */
2337DECL_INLINE_THROW(uint32_t)
2338iemNativeEmitRaiseDivideError(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2339{
2340 /*
2341 * Make sure we don't have any outstanding guest register writes as we may
2342 */
2343 off = iemNativeRegFlushPendingWrites(pReNative, off);
2344
2345#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2346 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2347#else
2348 RT_NOREF(idxInstr);
2349#endif
2350
2351 /* raise \#DE exception unconditionally. */
2352 return iemNativeEmitTbExit(pReNative, off, kIemNativeExitReason_RaiseDe);
2353}
2354
2355
2356#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
2357 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
2358
2359/**
2360 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
2361 *
2362 * @returns New code buffer offset, UINT32_MAX on failure.
2363 * @param pReNative The native recompile state.
2364 * @param off The code buffer offset.
2365 * @param idxInstr The current instruction.
2366 * @param idxVarEffAddr Index of the variable containing the effective address to check.
2367 * @param cbAlign The alignment in bytes to check against.
2368 */
2369DECL_INLINE_THROW(uint32_t)
2370iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint8_t idxVarEffAddr, uint8_t cbAlign)
2371{
2372 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
2373 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
2374
2375 /*
2376 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
2377 */
2378 off = iemNativeRegFlushPendingWrites(pReNative, off);
2379
2380#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2381 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2382#else
2383 RT_NOREF(idxInstr);
2384#endif
2385
2386 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
2387
2388 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxVarReg, cbAlign - 1,
2389 kIemNativeExitReason_RaiseGp0);
2390
2391 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
2392 return off;
2393}
2394
2395
2396/*********************************************************************************************************************************
2397* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
2398*********************************************************************************************************************************/
2399
2400/**
2401 * Pushes an IEM_MC_IF_XXX onto the condition stack.
2402 *
2403 * @returns Pointer to the condition stack entry on success, NULL on failure
2404 * (too many nestings)
2405 */
2406DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
2407{
2408#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2409 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
2410#endif
2411
2412 uint32_t const idxStack = pReNative->cCondDepth;
2413 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
2414
2415 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
2416 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
2417
2418 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
2419 pEntry->fInElse = false;
2420 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
2421 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
2422
2423 return pEntry;
2424}
2425
2426
2427/**
2428 * Start of the if-block, snapshotting the register and variable state.
2429 */
2430DECL_INLINE_THROW(void)
2431iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
2432{
2433 Assert(offIfBlock != UINT32_MAX);
2434 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2435 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2436 Assert(!pEntry->fInElse);
2437
2438 /* Define the start of the IF block if request or for disassembly purposes. */
2439 if (idxLabelIf != UINT32_MAX)
2440 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
2441#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2442 else
2443 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
2444#else
2445 RT_NOREF(offIfBlock);
2446#endif
2447
2448#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2449 Assert(pReNative->Core.offPc == 0);
2450#endif
2451
2452 /* Copy the initial state so we can restore it in the 'else' block. */
2453 pEntry->InitialState = pReNative->Core;
2454}
2455
2456
2457#define IEM_MC_ELSE() } while (0); \
2458 off = iemNativeEmitElse(pReNative, off); \
2459 do {
2460
2461/** Emits code related to IEM_MC_ELSE. */
2462DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2463{
2464 /* Check sanity and get the conditional stack entry. */
2465 Assert(off != UINT32_MAX);
2466 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2467 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2468 Assert(!pEntry->fInElse);
2469
2470#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2471 /* Writeback any dirty shadow registers. */
2472 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
2473 * in one of the branches and leave guest registers already dirty before the start of the if
2474 * block alone. */
2475 off = iemNativeRegFlushDirtyGuest(pReNative, off);
2476#endif
2477
2478 /* Jump to the endif */
2479 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
2480
2481 /* Define the else label and enter the else part of the condition. */
2482 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
2483 pEntry->fInElse = true;
2484
2485#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2486 Assert(pReNative->Core.offPc == 0);
2487#endif
2488
2489 /* Snapshot the core state so we can do a merge at the endif and restore
2490 the snapshot we took at the start of the if-block. */
2491 pEntry->IfFinalState = pReNative->Core;
2492 pReNative->Core = pEntry->InitialState;
2493
2494 return off;
2495}
2496
2497
2498#define IEM_MC_ENDIF() } while (0); \
2499 off = iemNativeEmitEndIf(pReNative, off)
2500
2501/** Emits code related to IEM_MC_ENDIF. */
2502DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2503{
2504 /* Check sanity and get the conditional stack entry. */
2505 Assert(off != UINT32_MAX);
2506 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2507 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2508
2509#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2510 Assert(pReNative->Core.offPc == 0);
2511#endif
2512#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2513 /* Writeback any dirty shadow registers (else branch). */
2514 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
2515 * in one of the branches and leave guest registers already dirty before the start of the if
2516 * block alone. */
2517 off = iemNativeRegFlushDirtyGuest(pReNative, off);
2518#endif
2519
2520 /*
2521 * Now we have find common group with the core state at the end of the
2522 * if-final. Use the smallest common denominator and just drop anything
2523 * that isn't the same in both states.
2524 */
2525 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
2526 * which is why we're doing this at the end of the else-block.
2527 * But we'd need more info about future for that to be worth the effort. */
2528 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
2529#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2530 Assert( pOther->bmGstRegShadowDirty == 0
2531 && pReNative->Core.bmGstRegShadowDirty == 0);
2532#endif
2533
2534 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
2535 {
2536 /* shadow guest stuff first. */
2537 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
2538 if (fGstRegs)
2539 {
2540 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
2541 do
2542 {
2543 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2544 fGstRegs &= ~RT_BIT_64(idxGstReg);
2545
2546 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
2547 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
2548 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
2549 {
2550 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
2551 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
2552
2553#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2554 /* Writeback any dirty shadow registers we are about to unshadow. */
2555 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxHstReg);
2556#endif
2557 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
2558 }
2559 } while (fGstRegs);
2560 }
2561 else
2562 {
2563 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
2564#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2565 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
2566#endif
2567 }
2568
2569 /* Check variables next. For now we must require them to be identical
2570 or stuff we can recreate. */
2571 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
2572 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
2573 if (fVars)
2574 {
2575 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
2576 do
2577 {
2578 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
2579 fVars &= ~RT_BIT_32(idxVar);
2580
2581 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
2582 {
2583 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
2584 continue;
2585 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2586 {
2587 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
2588 if (idxHstReg != UINT8_MAX)
2589 {
2590 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2591 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2592 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
2593 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2594 }
2595 continue;
2596 }
2597 }
2598 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
2599 continue;
2600
2601 /* Irreconcilable, so drop it. */
2602 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
2603 if (idxHstReg != UINT8_MAX)
2604 {
2605 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2606 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2607 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
2608 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2609 }
2610 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2611 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
2612 } while (fVars);
2613 }
2614
2615 /* Finally, check that the host register allocations matches. */
2616 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
2617 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
2618 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
2619 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
2620 }
2621
2622 /*
2623 * Define the endif label and maybe the else one if we're still in the 'if' part.
2624 */
2625 if (!pEntry->fInElse)
2626 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
2627 else
2628 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
2629 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
2630
2631 /* Pop the conditional stack.*/
2632 pReNative->cCondDepth -= 1;
2633
2634 return off;
2635}
2636
2637
2638#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
2639 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
2640 do {
2641
2642/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
2643DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
2644{
2645 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
2646 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2647
2648 /* Get the eflags. */
2649 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2650 kIemNativeGstRegUse_ReadOnly);
2651
2652 /* Test and jump. */
2653 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
2654
2655 /* Free but don't flush the EFlags register. */
2656 iemNativeRegFreeTmp(pReNative, idxEflReg);
2657
2658 /* Make a copy of the core state now as we start the if-block. */
2659 iemNativeCondStartIfBlock(pReNative, off);
2660
2661 return off;
2662}
2663
2664
2665#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
2666 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
2667 do {
2668
2669/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
2670DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
2671{
2672 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
2673 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2674
2675 /* Get the eflags. */
2676 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2677 kIemNativeGstRegUse_ReadOnly);
2678
2679 /* Test and jump. */
2680 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
2681
2682 /* Free but don't flush the EFlags register. */
2683 iemNativeRegFreeTmp(pReNative, idxEflReg);
2684
2685 /* Make a copy of the core state now as we start the if-block. */
2686 iemNativeCondStartIfBlock(pReNative, off);
2687
2688 return off;
2689}
2690
2691
2692#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
2693 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
2694 do {
2695
2696/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
2697DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
2698{
2699 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
2700 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2701
2702 /* Get the eflags. */
2703 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2704 kIemNativeGstRegUse_ReadOnly);
2705
2706 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2707 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2708
2709 /* Test and jump. */
2710 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2711
2712 /* Free but don't flush the EFlags register. */
2713 iemNativeRegFreeTmp(pReNative, idxEflReg);
2714
2715 /* Make a copy of the core state now as we start the if-block. */
2716 iemNativeCondStartIfBlock(pReNative, off);
2717
2718 return off;
2719}
2720
2721
2722#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
2723 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
2724 do {
2725
2726/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
2727DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
2728{
2729 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
2730 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2731
2732 /* Get the eflags. */
2733 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2734 kIemNativeGstRegUse_ReadOnly);
2735
2736 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2737 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2738
2739 /* Test and jump. */
2740 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2741
2742 /* Free but don't flush the EFlags register. */
2743 iemNativeRegFreeTmp(pReNative, idxEflReg);
2744
2745 /* Make a copy of the core state now as we start the if-block. */
2746 iemNativeCondStartIfBlock(pReNative, off);
2747
2748 return off;
2749}
2750
2751
2752#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
2753 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
2754 do {
2755
2756#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
2757 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
2758 do {
2759
2760/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
2761DECL_INLINE_THROW(uint32_t)
2762iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2763 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
2764{
2765 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBit1InEfl | fBit2InEfl);
2766 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2767
2768 /* Get the eflags. */
2769 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2770 kIemNativeGstRegUse_ReadOnly);
2771
2772 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
2773 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
2774
2775 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
2776 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
2777 Assert(iBitNo1 != iBitNo2);
2778
2779#ifdef RT_ARCH_AMD64
2780 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
2781
2782 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2783 if (iBitNo1 > iBitNo2)
2784 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
2785 else
2786 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
2787 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2788
2789#elif defined(RT_ARCH_ARM64)
2790 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2791 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2792
2793 /* and tmpreg, eflreg, #1<<iBitNo1 */
2794 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
2795
2796 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
2797 if (iBitNo1 > iBitNo2)
2798 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2799 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
2800 else
2801 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2802 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
2803
2804 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2805
2806#else
2807# error "Port me"
2808#endif
2809
2810 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
2811 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
2812 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
2813
2814 /* Free but don't flush the EFlags and tmp registers. */
2815 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2816 iemNativeRegFreeTmp(pReNative, idxEflReg);
2817
2818 /* Make a copy of the core state now as we start the if-block. */
2819 iemNativeCondStartIfBlock(pReNative, off);
2820
2821 return off;
2822}
2823
2824
2825#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
2826 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
2827 do {
2828
2829#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
2830 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
2831 do {
2832
2833/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
2834 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
2835DECL_INLINE_THROW(uint32_t)
2836iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
2837 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
2838{
2839 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl | fBit1InEfl | fBit2InEfl);
2840 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2841
2842 /* We need an if-block label for the non-inverted variant. */
2843 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
2844 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
2845
2846 /* Get the eflags. */
2847 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2848 kIemNativeGstRegUse_ReadOnly);
2849
2850 /* Translate the flag masks to bit numbers. */
2851 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2852 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2853
2854 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
2855 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
2856 Assert(iBitNo1 != iBitNo);
2857
2858 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
2859 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
2860 Assert(iBitNo2 != iBitNo);
2861 Assert(iBitNo2 != iBitNo1);
2862
2863#ifdef RT_ARCH_AMD64
2864 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
2865#elif defined(RT_ARCH_ARM64)
2866 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2867#endif
2868
2869 /* Check for the lone bit first. */
2870 if (!fInverted)
2871 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2872 else
2873 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
2874
2875 /* Then extract and compare the other two bits. */
2876#ifdef RT_ARCH_AMD64
2877 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2878 if (iBitNo1 > iBitNo2)
2879 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
2880 else
2881 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
2882 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2883
2884#elif defined(RT_ARCH_ARM64)
2885 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2886
2887 /* and tmpreg, eflreg, #1<<iBitNo1 */
2888 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
2889
2890 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
2891 if (iBitNo1 > iBitNo2)
2892 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2893 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
2894 else
2895 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2896 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
2897
2898 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2899
2900#else
2901# error "Port me"
2902#endif
2903
2904 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
2905 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
2906 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
2907
2908 /* Free but don't flush the EFlags and tmp registers. */
2909 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2910 iemNativeRegFreeTmp(pReNative, idxEflReg);
2911
2912 /* Make a copy of the core state now as we start the if-block. */
2913 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
2914
2915 return off;
2916}
2917
2918
2919#define IEM_MC_IF_CX_IS_NZ() \
2920 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
2921 do {
2922
2923/** Emits code for IEM_MC_IF_CX_IS_NZ. */
2924DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2925{
2926 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2927
2928 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2929 kIemNativeGstRegUse_ReadOnly);
2930 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
2931 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2932
2933 iemNativeCondStartIfBlock(pReNative, off);
2934 return off;
2935}
2936
2937
2938#define IEM_MC_IF_ECX_IS_NZ() \
2939 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
2940 do {
2941
2942#define IEM_MC_IF_RCX_IS_NZ() \
2943 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
2944 do {
2945
2946/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
2947DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
2948{
2949 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2950
2951 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2952 kIemNativeGstRegUse_ReadOnly);
2953 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
2954 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2955
2956 iemNativeCondStartIfBlock(pReNative, off);
2957 return off;
2958}
2959
2960
2961#define IEM_MC_IF_CX_IS_NOT_ONE() \
2962 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
2963 do {
2964
2965/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
2966DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2967{
2968 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2969
2970 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2971 kIemNativeGstRegUse_ReadOnly);
2972#ifdef RT_ARCH_AMD64
2973 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
2974#else
2975 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2976 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
2977 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2978#endif
2979 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2980
2981 iemNativeCondStartIfBlock(pReNative, off);
2982 return off;
2983}
2984
2985
2986#define IEM_MC_IF_ECX_IS_NOT_ONE() \
2987 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
2988 do {
2989
2990#define IEM_MC_IF_RCX_IS_NOT_ONE() \
2991 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
2992 do {
2993
2994/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
2995DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
2996{
2997 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2998
2999 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3000 kIemNativeGstRegUse_ReadOnly);
3001 if (f64Bit)
3002 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3003 else
3004 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3005 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3006
3007 iemNativeCondStartIfBlock(pReNative, off);
3008 return off;
3009}
3010
3011
3012#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3013 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
3014 do {
3015
3016#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3017 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
3018 do {
3019
3020/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
3021 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3022DECL_INLINE_THROW(uint32_t)
3023iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
3024{
3025 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3026 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3027
3028 /* We have to load both RCX and EFLAGS before we can start branching,
3029 otherwise we'll end up in the else-block with an inconsistent
3030 register allocator state.
3031 Doing EFLAGS first as it's more likely to be loaded, right? */
3032 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3033 kIemNativeGstRegUse_ReadOnly);
3034 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3035 kIemNativeGstRegUse_ReadOnly);
3036
3037 /** @todo we could reduce this to a single branch instruction by spending a
3038 * temporary register and some setnz stuff. Not sure if loops are
3039 * worth it. */
3040 /* Check CX. */
3041#ifdef RT_ARCH_AMD64
3042 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3043#else
3044 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3045 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3046 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3047#endif
3048
3049 /* Check the EFlags bit. */
3050 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3051 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3052 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3053 !fCheckIfSet /*fJmpIfSet*/);
3054
3055 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3056 iemNativeRegFreeTmp(pReNative, idxEflReg);
3057
3058 iemNativeCondStartIfBlock(pReNative, off);
3059 return off;
3060}
3061
3062
3063#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3064 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
3065 do {
3066
3067#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3068 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
3069 do {
3070
3071#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3072 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
3073 do {
3074
3075#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3076 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
3077 do {
3078
3079/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
3080 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
3081 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
3082 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3083DECL_INLINE_THROW(uint32_t)
3084iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3085 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
3086{
3087 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3088 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3089
3090 /* We have to load both RCX and EFLAGS before we can start branching,
3091 otherwise we'll end up in the else-block with an inconsistent
3092 register allocator state.
3093 Doing EFLAGS first as it's more likely to be loaded, right? */
3094 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3095 kIemNativeGstRegUse_ReadOnly);
3096 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3097 kIemNativeGstRegUse_ReadOnly);
3098
3099 /** @todo we could reduce this to a single branch instruction by spending a
3100 * temporary register and some setnz stuff. Not sure if loops are
3101 * worth it. */
3102 /* Check RCX/ECX. */
3103 if (f64Bit)
3104 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3105 else
3106 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3107
3108 /* Check the EFlags bit. */
3109 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3110 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3111 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3112 !fCheckIfSet /*fJmpIfSet*/);
3113
3114 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3115 iemNativeRegFreeTmp(pReNative, idxEflReg);
3116
3117 iemNativeCondStartIfBlock(pReNative, off);
3118 return off;
3119}
3120
3121
3122#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
3123 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
3124 do {
3125
3126/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
3127DECL_INLINE_THROW(uint32_t)
3128iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
3129{
3130 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3131
3132 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
3133 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
3134 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3135 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3136
3137 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
3138
3139 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
3140
3141 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
3142
3143 iemNativeCondStartIfBlock(pReNative, off);
3144 return off;
3145}
3146
3147
3148#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
3149 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
3150 do {
3151
3152/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
3153DECL_INLINE_THROW(uint32_t)
3154iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
3155{
3156 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3157 Assert(iGReg < 16);
3158
3159 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3160 kIemNativeGstRegUse_ReadOnly);
3161
3162 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
3163
3164 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3165
3166 iemNativeCondStartIfBlock(pReNative, off);
3167 return off;
3168}
3169
3170
3171
3172/*********************************************************************************************************************************
3173* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
3174*********************************************************************************************************************************/
3175
3176#define IEM_MC_NOREF(a_Name) \
3177 RT_NOREF_PV(a_Name)
3178
3179#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
3180 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
3181
3182#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
3183 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
3184
3185#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
3186 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
3187
3188#define IEM_MC_LOCAL(a_Type, a_Name) \
3189 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
3190
3191#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
3192 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
3193
3194#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
3195 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
3196
3197
3198/**
3199 * Sets the host register for @a idxVarRc to @a idxReg.
3200 *
3201 * The register must not be allocated. Any guest register shadowing will be
3202 * implictly dropped by this call.
3203 *
3204 * The variable must not have any register associated with it (causes
3205 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
3206 * implied.
3207 *
3208 * @returns idxReg
3209 * @param pReNative The recompiler state.
3210 * @param idxVar The variable.
3211 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
3212 * @param off For recording in debug info.
3213 *
3214 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
3215 */
3216DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
3217{
3218 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3219 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3220 Assert(!pVar->fRegAcquired);
3221 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3222 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
3223 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
3224
3225 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
3226 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
3227
3228 iemNativeVarSetKindToStack(pReNative, idxVar);
3229 pVar->idxReg = idxReg;
3230
3231 return idxReg;
3232}
3233
3234
3235/**
3236 * A convenient helper function.
3237 */
3238DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
3239 uint8_t idxReg, uint32_t *poff)
3240{
3241 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
3242 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
3243 return idxReg;
3244}
3245
3246
3247/**
3248 * This is called by IEM_MC_END() to clean up all variables.
3249 */
3250DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
3251{
3252 uint32_t const bmVars = pReNative->Core.bmVars;
3253 if (bmVars != 0)
3254 iemNativeVarFreeAllSlow(pReNative, bmVars);
3255 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
3256 Assert(pReNative->Core.bmStack == 0);
3257}
3258
3259
3260#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
3261
3262/**
3263 * This is called by IEM_MC_FREE_LOCAL.
3264 */
3265DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3266{
3267 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3268 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
3269 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3270}
3271
3272
3273#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
3274
3275/**
3276 * This is called by IEM_MC_FREE_ARG.
3277 */
3278DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3279{
3280 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3281 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
3282 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3283}
3284
3285
3286#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
3287
3288/**
3289 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
3290 */
3291DECL_INLINE_THROW(uint32_t)
3292iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
3293{
3294 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
3295 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
3296 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3297 Assert( pVarDst->cbVar == sizeof(uint16_t)
3298 || pVarDst->cbVar == sizeof(uint32_t));
3299
3300 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
3301 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
3302 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
3303 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
3304 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3305
3306 Assert(pVarDst->cbVar < pVarSrc->cbVar);
3307
3308 /*
3309 * Special case for immediates.
3310 */
3311 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
3312 {
3313 switch (pVarDst->cbVar)
3314 {
3315 case sizeof(uint16_t):
3316 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
3317 break;
3318 case sizeof(uint32_t):
3319 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
3320 break;
3321 default: AssertFailed(); break;
3322 }
3323 }
3324 else
3325 {
3326 /*
3327 * The generic solution for now.
3328 */
3329 /** @todo optimize this by having the python script make sure the source
3330 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
3331 * statement. Then we could just transfer the register assignments. */
3332 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
3333 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
3334 switch (pVarDst->cbVar)
3335 {
3336 case sizeof(uint16_t):
3337 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
3338 break;
3339 case sizeof(uint32_t):
3340 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
3341 break;
3342 default: AssertFailed(); break;
3343 }
3344 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
3345 iemNativeVarRegisterRelease(pReNative, idxVarDst);
3346 }
3347 return off;
3348}
3349
3350
3351
3352/*********************************************************************************************************************************
3353* Emitters for IEM_MC_CALL_CIMPL_XXX *
3354*********************************************************************************************************************************/
3355
3356/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
3357DECL_INLINE_THROW(uint32_t)
3358iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
3359 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
3360
3361{
3362 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
3363
3364#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3365 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
3366 when a calls clobber any of the relevatn control registers. */
3367# if 1
3368 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
3369 {
3370 /* Likely as long as call+ret are done via cimpl. */
3371 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
3372 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
3373 }
3374 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
3375 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
3376 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
3377 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3378 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
3379 else
3380 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3381 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
3382 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
3383
3384# else
3385 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
3386 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
3387 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
3388 pReNative->fSimdRaiseXcptChecksEmitted = 0;
3389 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
3390 || pfnCImpl == (uintptr_t)iemCImpl_callf
3391 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
3392 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
3393 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3394 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
3395 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
3396# endif
3397#endif
3398
3399 /*
3400 * Do all the call setup and cleanup.
3401 */
3402 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
3403
3404 /*
3405 * Load the two or three hidden arguments.
3406 */
3407#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
3408 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3409 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3410 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
3411#else
3412 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3413 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
3414#endif
3415
3416 /*
3417 * Make the call and check the return code.
3418 *
3419 * Shadow PC copies are always flushed here, other stuff depends on flags.
3420 * Segment and general purpose registers are explictily flushed via the
3421 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
3422 * macros.
3423 */
3424 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
3425#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
3426 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3427#endif
3428 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
3429 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
3430 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
3431 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
3432
3433 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
3434}
3435
3436
3437#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
3438 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
3439
3440/** Emits code for IEM_MC_CALL_CIMPL_1. */
3441DECL_INLINE_THROW(uint32_t)
3442iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3443 uintptr_t pfnCImpl, uint8_t idxArg0)
3444{
3445 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3446 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
3447}
3448
3449
3450#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
3451 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
3452
3453/** Emits code for IEM_MC_CALL_CIMPL_2. */
3454DECL_INLINE_THROW(uint32_t)
3455iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3456 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
3457{
3458 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3459 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3460 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
3461}
3462
3463
3464#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
3465 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3466 (uintptr_t)a_pfnCImpl, a0, a1, a2)
3467
3468/** Emits code for IEM_MC_CALL_CIMPL_3. */
3469DECL_INLINE_THROW(uint32_t)
3470iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3471 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
3472{
3473 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3474 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3475 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3476 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
3477}
3478
3479
3480#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
3481 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3482 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
3483
3484/** Emits code for IEM_MC_CALL_CIMPL_4. */
3485DECL_INLINE_THROW(uint32_t)
3486iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3487 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
3488{
3489 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3490 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3491 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3492 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
3493 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
3494}
3495
3496
3497#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
3498 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3499 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
3500
3501/** Emits code for IEM_MC_CALL_CIMPL_4. */
3502DECL_INLINE_THROW(uint32_t)
3503iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3504 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
3505{
3506 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3507 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3508 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3509 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
3510 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
3511 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
3512}
3513
3514
3515/** Recompiler debugging: Flush guest register shadow copies. */
3516#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
3517
3518
3519
3520/*********************************************************************************************************************************
3521* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
3522*********************************************************************************************************************************/
3523
3524/**
3525 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
3526 */
3527DECL_INLINE_THROW(uint32_t)
3528iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3529 uintptr_t pfnAImpl, uint8_t cArgs)
3530{
3531 if (idxVarRc != UINT8_MAX)
3532 {
3533 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
3534 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
3535 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3536 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3537 }
3538
3539 /*
3540 * Do all the call setup and cleanup.
3541 *
3542 * It is only required to flush pending guest register writes in call volatile registers as
3543 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
3544 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
3545 * no matter the fFlushPendingWrites parameter.
3546 */
3547 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
3548
3549 /*
3550 * Make the call and update the return code variable if we've got one.
3551 */
3552 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
3553 if (idxVarRc != UINT8_MAX)
3554 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
3555
3556 return off;
3557}
3558
3559
3560
3561#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
3562 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
3563
3564#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
3565 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
3566
3567/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
3568DECL_INLINE_THROW(uint32_t)
3569iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
3570{
3571 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
3572}
3573
3574
3575#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
3576 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
3577
3578#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
3579 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
3580
3581/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
3582DECL_INLINE_THROW(uint32_t)
3583iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
3584{
3585 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3586 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
3587}
3588
3589
3590#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
3591 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
3592
3593#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
3594 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
3595
3596/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
3597DECL_INLINE_THROW(uint32_t)
3598iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3599 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
3600{
3601 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3602 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3603 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
3604}
3605
3606
3607#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
3608 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
3609
3610#define IEM_MC_CALL_AIMPL_3(a_rcType, a_rc, a_pfn, a0, a1, a2) \
3611 IEM_MC_LOCAL(a_rcType, a_rc); \
3612 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
3613
3614/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
3615DECL_INLINE_THROW(uint32_t)
3616iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3617 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
3618{
3619 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3620 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3621 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
3622 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
3623}
3624
3625
3626#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
3627 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
3628
3629#define IEM_MC_CALL_AIMPL_4(a_rcType, a_rc, a_pfn, a0, a1, a2, a3) \
3630 IEM_MC_LOCAL(a_rcType, a_rc); \
3631 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
3632
3633/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
3634DECL_INLINE_THROW(uint32_t)
3635iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3636 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
3637{
3638 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3639 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3640 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
3641 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
3642 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
3643}
3644
3645
3646
3647/*********************************************************************************************************************************
3648* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
3649*********************************************************************************************************************************/
3650
3651#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
3652 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
3653
3654#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
3655 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
3656
3657#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
3658 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
3659
3660#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
3661 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
3662
3663
3664/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
3665 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
3666DECL_INLINE_THROW(uint32_t)
3667iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
3668{
3669 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3670 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3671 Assert(iGRegEx < 20);
3672
3673 /* Same discussion as in iemNativeEmitFetchGregU16 */
3674 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3675 kIemNativeGstRegUse_ReadOnly);
3676
3677 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3678 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3679
3680 /* The value is zero-extended to the full 64-bit host register width. */
3681 if (iGRegEx < 16)
3682 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3683 else
3684 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
3685
3686 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3687 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3688 return off;
3689}
3690
3691
3692#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
3693 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
3694
3695#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
3696 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
3697
3698#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
3699 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
3700
3701/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
3702DECL_INLINE_THROW(uint32_t)
3703iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
3704{
3705 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3706 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
3707 Assert(iGRegEx < 20);
3708
3709 /* Same discussion as in iemNativeEmitFetchGregU16 */
3710 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3711 kIemNativeGstRegUse_ReadOnly);
3712
3713 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3714 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3715
3716 if (iGRegEx < 16)
3717 {
3718 switch (cbSignExtended)
3719 {
3720 case sizeof(uint16_t):
3721 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3722 break;
3723 case sizeof(uint32_t):
3724 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3725 break;
3726 case sizeof(uint64_t):
3727 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3728 break;
3729 default: AssertFailed(); break;
3730 }
3731 }
3732 else
3733 {
3734 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
3735 switch (cbSignExtended)
3736 {
3737 case sizeof(uint16_t):
3738 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3739 break;
3740 case sizeof(uint32_t):
3741 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3742 break;
3743 case sizeof(uint64_t):
3744 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3745 break;
3746 default: AssertFailed(); break;
3747 }
3748 }
3749
3750 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3751 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3752 return off;
3753}
3754
3755
3756
3757#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
3758 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
3759
3760#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
3761 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
3762
3763#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
3764 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
3765
3766/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
3767DECL_INLINE_THROW(uint32_t)
3768iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
3769{
3770 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3771 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3772 Assert(iGReg < 16);
3773
3774 /*
3775 * We can either just load the low 16-bit of the GPR into a host register
3776 * for the variable, or we can do so via a shadow copy host register. The
3777 * latter will avoid having to reload it if it's being stored later, but
3778 * will waste a host register if it isn't touched again. Since we don't
3779 * know what going to happen, we choose the latter for now.
3780 */
3781 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3782 kIemNativeGstRegUse_ReadOnly);
3783
3784 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3785 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3786 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3787 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3788
3789 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3790 return off;
3791}
3792
3793
3794#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
3795 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
3796
3797#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
3798 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
3799
3800/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
3801DECL_INLINE_THROW(uint32_t)
3802iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
3803{
3804 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3805 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
3806 Assert(iGReg < 16);
3807
3808 /*
3809 * We can either just load the low 16-bit of the GPR into a host register
3810 * for the variable, or we can do so via a shadow copy host register. The
3811 * latter will avoid having to reload it if it's being stored later, but
3812 * will waste a host register if it isn't touched again. Since we don't
3813 * know what going to happen, we choose the latter for now.
3814 */
3815 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3816 kIemNativeGstRegUse_ReadOnly);
3817
3818 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3819 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3820 if (cbSignExtended == sizeof(uint32_t))
3821 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3822 else
3823 {
3824 Assert(cbSignExtended == sizeof(uint64_t));
3825 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3826 }
3827 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3828
3829 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3830 return off;
3831}
3832
3833
3834#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
3835 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
3836
3837#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
3838 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
3839
3840/** Emits code for IEM_MC_FETCH_GREG_U32. */
3841DECL_INLINE_THROW(uint32_t)
3842iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
3843{
3844 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3845 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3846 Assert(iGReg < 16);
3847
3848 /*
3849 * We can either just load the low 16-bit of the GPR into a host register
3850 * for the variable, or we can do so via a shadow copy host register. The
3851 * latter will avoid having to reload it if it's being stored later, but
3852 * will waste a host register if it isn't touched again. Since we don't
3853 * know what going to happen, we choose the latter for now.
3854 */
3855 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3856 kIemNativeGstRegUse_ReadOnly);
3857
3858 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3859 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3860 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
3861 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3862
3863 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3864 return off;
3865}
3866
3867
3868#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
3869 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
3870
3871/** Emits code for IEM_MC_FETCH_GREG_U32. */
3872DECL_INLINE_THROW(uint32_t)
3873iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
3874{
3875 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3876 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
3877 Assert(iGReg < 16);
3878
3879 /*
3880 * We can either just load the low 32-bit of the GPR into a host register
3881 * for the variable, or we can do so via a shadow copy host register. The
3882 * latter will avoid having to reload it if it's being stored later, but
3883 * will waste a host register if it isn't touched again. Since we don't
3884 * know what going to happen, we choose the latter for now.
3885 */
3886 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3887 kIemNativeGstRegUse_ReadOnly);
3888
3889 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3890 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3891 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
3892 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3893
3894 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3895 return off;
3896}
3897
3898
3899#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
3900 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
3901
3902#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
3903 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
3904
3905/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
3906 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
3907DECL_INLINE_THROW(uint32_t)
3908iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
3909{
3910 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3911 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
3912 Assert(iGReg < 16);
3913
3914 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3915 kIemNativeGstRegUse_ReadOnly);
3916
3917 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3918 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3919 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
3920 /** @todo name the register a shadow one already? */
3921 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3922
3923 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3924 return off;
3925}
3926
3927
3928#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3929#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
3930 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
3931
3932/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
3933DECL_INLINE_THROW(uint32_t)
3934iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
3935{
3936 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3937 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
3938 Assert(iGRegLo < 16 && iGRegHi < 16);
3939
3940 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
3941 kIemNativeGstRegUse_ReadOnly);
3942 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
3943 kIemNativeGstRegUse_ReadOnly);
3944
3945 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3946 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
3947 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
3948 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
3949
3950 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
3951 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
3952 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
3953 return off;
3954}
3955#endif
3956
3957
3958/*********************************************************************************************************************************
3959* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
3960*********************************************************************************************************************************/
3961
3962#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
3963 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
3964
3965/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
3966DECL_INLINE_THROW(uint32_t)
3967iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
3968{
3969 Assert(iGRegEx < 20);
3970 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3971 kIemNativeGstRegUse_ForUpdate);
3972#ifdef RT_ARCH_AMD64
3973 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
3974
3975 /* To the lowest byte of the register: mov r8, imm8 */
3976 if (iGRegEx < 16)
3977 {
3978 if (idxGstTmpReg >= 8)
3979 pbCodeBuf[off++] = X86_OP_REX_B;
3980 else if (idxGstTmpReg >= 4)
3981 pbCodeBuf[off++] = X86_OP_REX;
3982 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
3983 pbCodeBuf[off++] = u8Value;
3984 }
3985 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
3986 else if (idxGstTmpReg < 4)
3987 {
3988 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
3989 pbCodeBuf[off++] = u8Value;
3990 }
3991 else
3992 {
3993 /* ror reg64, 8 */
3994 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
3995 pbCodeBuf[off++] = 0xc1;
3996 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
3997 pbCodeBuf[off++] = 8;
3998
3999 /* mov reg8, imm8 */
4000 if (idxGstTmpReg >= 8)
4001 pbCodeBuf[off++] = X86_OP_REX_B;
4002 else if (idxGstTmpReg >= 4)
4003 pbCodeBuf[off++] = X86_OP_REX;
4004 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4005 pbCodeBuf[off++] = u8Value;
4006
4007 /* rol reg64, 8 */
4008 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4009 pbCodeBuf[off++] = 0xc1;
4010 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4011 pbCodeBuf[off++] = 8;
4012 }
4013
4014#elif defined(RT_ARCH_ARM64)
4015 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
4016 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4017 if (iGRegEx < 16)
4018 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
4019 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
4020 else
4021 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
4022 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
4023 iemNativeRegFreeTmp(pReNative, idxImmReg);
4024
4025#else
4026# error "Port me!"
4027#endif
4028
4029 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4030
4031#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4032 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4033#endif
4034
4035 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4036 return off;
4037}
4038
4039
4040#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
4041 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
4042
4043/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
4044DECL_INLINE_THROW(uint32_t)
4045iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
4046{
4047 Assert(iGRegEx < 20);
4048 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4049
4050 /*
4051 * If it's a constant value (unlikely) we treat this as a
4052 * IEM_MC_STORE_GREG_U8_CONST statement.
4053 */
4054 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4055 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4056 { /* likely */ }
4057 else
4058 {
4059 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4060 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4061 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
4062 }
4063
4064 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4065 kIemNativeGstRegUse_ForUpdate);
4066 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4067
4068#ifdef RT_ARCH_AMD64
4069 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
4070 if (iGRegEx < 16)
4071 {
4072 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4073 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4074 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4075 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4076 pbCodeBuf[off++] = X86_OP_REX;
4077 pbCodeBuf[off++] = 0x8a;
4078 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4079 }
4080 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
4081 else if (idxGstTmpReg < 4 && idxVarReg < 4)
4082 {
4083 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
4084 pbCodeBuf[off++] = 0x8a;
4085 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
4086 }
4087 else
4088 {
4089 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
4090
4091 /* ror reg64, 8 */
4092 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4093 pbCodeBuf[off++] = 0xc1;
4094 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4095 pbCodeBuf[off++] = 8;
4096
4097 /* mov reg8, reg8(r/m) */
4098 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4099 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4100 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4101 pbCodeBuf[off++] = X86_OP_REX;
4102 pbCodeBuf[off++] = 0x8a;
4103 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4104
4105 /* rol reg64, 8 */
4106 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4107 pbCodeBuf[off++] = 0xc1;
4108 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4109 pbCodeBuf[off++] = 8;
4110 }
4111
4112#elif defined(RT_ARCH_ARM64)
4113 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
4114 or
4115 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
4116 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4117 if (iGRegEx < 16)
4118 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
4119 else
4120 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
4121
4122#else
4123# error "Port me!"
4124#endif
4125 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4126
4127 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4128
4129#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4130 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4131#endif
4132 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4133 return off;
4134}
4135
4136
4137
4138#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
4139 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
4140
4141/** Emits code for IEM_MC_STORE_GREG_U16. */
4142DECL_INLINE_THROW(uint32_t)
4143iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
4144{
4145 Assert(iGReg < 16);
4146 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4147 kIemNativeGstRegUse_ForUpdate);
4148#ifdef RT_ARCH_AMD64
4149 /* mov reg16, imm16 */
4150 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4151 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4152 if (idxGstTmpReg >= 8)
4153 pbCodeBuf[off++] = X86_OP_REX_B;
4154 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
4155 pbCodeBuf[off++] = RT_BYTE1(uValue);
4156 pbCodeBuf[off++] = RT_BYTE2(uValue);
4157
4158#elif defined(RT_ARCH_ARM64)
4159 /* movk xdst, #uValue, lsl #0 */
4160 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4161 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
4162
4163#else
4164# error "Port me!"
4165#endif
4166
4167 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4168
4169#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4170 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4171#endif
4172 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4173 return off;
4174}
4175
4176
4177#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
4178 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
4179
4180/** Emits code for IEM_MC_STORE_GREG_U16. */
4181DECL_INLINE_THROW(uint32_t)
4182iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4183{
4184 Assert(iGReg < 16);
4185 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4186
4187 /*
4188 * If it's a constant value (unlikely) we treat this as a
4189 * IEM_MC_STORE_GREG_U16_CONST statement.
4190 */
4191 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4192 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4193 { /* likely */ }
4194 else
4195 {
4196 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4197 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4198 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
4199 }
4200
4201 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4202 kIemNativeGstRegUse_ForUpdate);
4203
4204#ifdef RT_ARCH_AMD64
4205 /* mov reg16, reg16 or [mem16] */
4206 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4207 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4208 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
4209 {
4210 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
4211 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
4212 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
4213 pbCodeBuf[off++] = 0x8b;
4214 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
4215 }
4216 else
4217 {
4218 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
4219 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
4220 if (idxGstTmpReg >= 8)
4221 pbCodeBuf[off++] = X86_OP_REX_R;
4222 pbCodeBuf[off++] = 0x8b;
4223 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
4224 }
4225
4226#elif defined(RT_ARCH_ARM64)
4227 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
4228 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4229 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4230 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
4231 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4232
4233#else
4234# error "Port me!"
4235#endif
4236
4237 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4238
4239#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4240 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4241#endif
4242 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4243 return off;
4244}
4245
4246
4247#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
4248 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
4249
4250/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
4251DECL_INLINE_THROW(uint32_t)
4252iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
4253{
4254 Assert(iGReg < 16);
4255 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4256 kIemNativeGstRegUse_ForFullWrite);
4257 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
4258#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4259 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4260#endif
4261 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4262 return off;
4263}
4264
4265
4266#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
4267 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
4268
4269/** Emits code for IEM_MC_STORE_GREG_U32. */
4270DECL_INLINE_THROW(uint32_t)
4271iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4272{
4273 Assert(iGReg < 16);
4274 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4275
4276 /*
4277 * If it's a constant value (unlikely) we treat this as a
4278 * IEM_MC_STORE_GREG_U32_CONST statement.
4279 */
4280 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4281 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4282 { /* likely */ }
4283 else
4284 {
4285 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4286 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4287 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
4288 }
4289
4290 /*
4291 * For the rest we allocate a guest register for the variable and writes
4292 * it to the CPUMCTX structure.
4293 */
4294 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
4295#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4296 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4297#else
4298 RT_NOREF(idxVarReg);
4299#endif
4300#ifdef VBOX_STRICT
4301 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
4302#endif
4303 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4304 return off;
4305}
4306
4307
4308#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
4309 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
4310
4311/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
4312DECL_INLINE_THROW(uint32_t)
4313iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
4314{
4315 Assert(iGReg < 16);
4316 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4317 kIemNativeGstRegUse_ForFullWrite);
4318 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
4319#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4320 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4321#endif
4322 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4323 return off;
4324}
4325
4326
4327#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
4328 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
4329
4330#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
4331 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
4332
4333/** Emits code for IEM_MC_STORE_GREG_U64. */
4334DECL_INLINE_THROW(uint32_t)
4335iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4336{
4337 Assert(iGReg < 16);
4338 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4339
4340 /*
4341 * If it's a constant value (unlikely) we treat this as a
4342 * IEM_MC_STORE_GREG_U64_CONST statement.
4343 */
4344 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4345 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4346 { /* likely */ }
4347 else
4348 {
4349 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4350 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4351 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
4352 }
4353
4354 /*
4355 * For the rest we allocate a guest register for the variable and writes
4356 * it to the CPUMCTX structure.
4357 */
4358 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
4359#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4360 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4361#else
4362 RT_NOREF(idxVarReg);
4363#endif
4364 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4365 return off;
4366}
4367
4368
4369#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
4370 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
4371
4372/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
4373DECL_INLINE_THROW(uint32_t)
4374iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
4375{
4376 Assert(iGReg < 16);
4377 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4378 kIemNativeGstRegUse_ForUpdate);
4379 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
4380#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4381 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4382#endif
4383 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4384 return off;
4385}
4386
4387
4388#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4389#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
4390 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
4391
4392/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
4393DECL_INLINE_THROW(uint32_t)
4394iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
4395{
4396 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4397 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
4398 Assert(iGRegLo < 16 && iGRegHi < 16);
4399
4400 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
4401 kIemNativeGstRegUse_ForFullWrite);
4402 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
4403 kIemNativeGstRegUse_ForFullWrite);
4404
4405 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4406 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
4407 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
4408 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
4409
4410 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
4411 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
4412 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
4413 return off;
4414}
4415#endif
4416
4417
4418/*********************************************************************************************************************************
4419* General purpose register manipulation (add, sub). *
4420*********************************************************************************************************************************/
4421
4422#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
4423 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
4424
4425/** Emits code for IEM_MC_ADD_GREG_U16. */
4426DECL_INLINE_THROW(uint32_t)
4427iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
4428{
4429 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4430 kIemNativeGstRegUse_ForUpdate);
4431
4432#ifdef RT_ARCH_AMD64
4433 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
4434 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4435 if (idxGstTmpReg >= 8)
4436 pbCodeBuf[off++] = X86_OP_REX_B;
4437 if (uAddend == 1)
4438 {
4439 pbCodeBuf[off++] = 0xff; /* inc */
4440 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4441 }
4442 else
4443 {
4444 pbCodeBuf[off++] = 0x81;
4445 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4446 pbCodeBuf[off++] = uAddend;
4447 pbCodeBuf[off++] = 0;
4448 }
4449
4450#else
4451 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4452 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4453
4454 /* sub tmp, gstgrp, uAddend */
4455 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
4456
4457 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
4458 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
4459
4460 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4461#endif
4462
4463 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4464
4465#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4466 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4467#endif
4468
4469 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4470 return off;
4471}
4472
4473
4474#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
4475 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
4476
4477#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
4478 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
4479
4480/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
4481DECL_INLINE_THROW(uint32_t)
4482iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
4483{
4484 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4485 kIemNativeGstRegUse_ForUpdate);
4486
4487#ifdef RT_ARCH_AMD64
4488 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4489 if (f64Bit)
4490 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
4491 else if (idxGstTmpReg >= 8)
4492 pbCodeBuf[off++] = X86_OP_REX_B;
4493 if (uAddend == 1)
4494 {
4495 pbCodeBuf[off++] = 0xff; /* inc */
4496 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4497 }
4498 else if (uAddend < 128)
4499 {
4500 pbCodeBuf[off++] = 0x83; /* add */
4501 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4502 pbCodeBuf[off++] = RT_BYTE1(uAddend);
4503 }
4504 else
4505 {
4506 pbCodeBuf[off++] = 0x81; /* add */
4507 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4508 pbCodeBuf[off++] = RT_BYTE1(uAddend);
4509 pbCodeBuf[off++] = 0;
4510 pbCodeBuf[off++] = 0;
4511 pbCodeBuf[off++] = 0;
4512 }
4513
4514#else
4515 /* sub tmp, gstgrp, uAddend */
4516 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4517 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
4518
4519#endif
4520
4521 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4522
4523#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4524 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4525#endif
4526
4527 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4528 return off;
4529}
4530
4531
4532
4533#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
4534 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
4535
4536/** Emits code for IEM_MC_SUB_GREG_U16. */
4537DECL_INLINE_THROW(uint32_t)
4538iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
4539{
4540 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4541 kIemNativeGstRegUse_ForUpdate);
4542
4543#ifdef RT_ARCH_AMD64
4544 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
4545 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4546 if (idxGstTmpReg >= 8)
4547 pbCodeBuf[off++] = X86_OP_REX_B;
4548 if (uSubtrahend == 1)
4549 {
4550 pbCodeBuf[off++] = 0xff; /* dec */
4551 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4552 }
4553 else
4554 {
4555 pbCodeBuf[off++] = 0x81;
4556 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4557 pbCodeBuf[off++] = uSubtrahend;
4558 pbCodeBuf[off++] = 0;
4559 }
4560
4561#else
4562 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4563 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4564
4565 /* sub tmp, gstgrp, uSubtrahend */
4566 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
4567
4568 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
4569 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
4570
4571 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4572#endif
4573
4574 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4575
4576#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4577 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4578#endif
4579
4580 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4581 return off;
4582}
4583
4584
4585#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
4586 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
4587
4588#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
4589 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
4590
4591/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
4592DECL_INLINE_THROW(uint32_t)
4593iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
4594{
4595 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4596 kIemNativeGstRegUse_ForUpdate);
4597
4598#ifdef RT_ARCH_AMD64
4599 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4600 if (f64Bit)
4601 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
4602 else if (idxGstTmpReg >= 8)
4603 pbCodeBuf[off++] = X86_OP_REX_B;
4604 if (uSubtrahend == 1)
4605 {
4606 pbCodeBuf[off++] = 0xff; /* dec */
4607 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4608 }
4609 else if (uSubtrahend < 128)
4610 {
4611 pbCodeBuf[off++] = 0x83; /* sub */
4612 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4613 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
4614 }
4615 else
4616 {
4617 pbCodeBuf[off++] = 0x81; /* sub */
4618 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4619 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
4620 pbCodeBuf[off++] = 0;
4621 pbCodeBuf[off++] = 0;
4622 pbCodeBuf[off++] = 0;
4623 }
4624
4625#else
4626 /* sub tmp, gstgrp, uSubtrahend */
4627 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4628 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
4629
4630#endif
4631
4632 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4633
4634#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4635 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4636#endif
4637
4638 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4639 return off;
4640}
4641
4642
4643#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
4644 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
4645
4646#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
4647 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
4648
4649#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
4650 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
4651
4652#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
4653 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
4654
4655/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
4656DECL_INLINE_THROW(uint32_t)
4657iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
4658{
4659#ifdef VBOX_STRICT
4660 switch (cbMask)
4661 {
4662 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4663 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4664 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4665 case sizeof(uint64_t): break;
4666 default: AssertFailedBreak();
4667 }
4668#endif
4669
4670 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4671 kIemNativeGstRegUse_ForUpdate);
4672
4673 switch (cbMask)
4674 {
4675 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
4676 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
4677 break;
4678 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
4679 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
4680 break;
4681 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
4682 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
4683 break;
4684 case sizeof(uint64_t):
4685 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
4686 break;
4687 default: AssertFailedBreak();
4688 }
4689
4690 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4691
4692#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4693 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4694#endif
4695
4696 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4697 return off;
4698}
4699
4700
4701#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
4702 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
4703
4704#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
4705 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
4706
4707#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
4708 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
4709
4710#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
4711 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
4712
4713/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
4714DECL_INLINE_THROW(uint32_t)
4715iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
4716{
4717#ifdef VBOX_STRICT
4718 switch (cbMask)
4719 {
4720 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4721 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4722 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4723 case sizeof(uint64_t): break;
4724 default: AssertFailedBreak();
4725 }
4726#endif
4727
4728 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4729 kIemNativeGstRegUse_ForUpdate);
4730
4731 switch (cbMask)
4732 {
4733 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
4734 case sizeof(uint16_t):
4735 case sizeof(uint64_t):
4736 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
4737 break;
4738 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
4739 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
4740 break;
4741 default: AssertFailedBreak();
4742 }
4743
4744 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4745
4746#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4747 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4748#endif
4749
4750 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4751 return off;
4752}
4753
4754
4755/*********************************************************************************************************************************
4756* Local/Argument variable manipulation (add, sub, and, or). *
4757*********************************************************************************************************************************/
4758
4759#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
4760 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
4761
4762#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
4763 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
4764
4765#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
4766 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
4767
4768#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
4769 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
4770
4771
4772#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
4773 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
4774
4775#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
4776 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
4777
4778#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
4779 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
4780
4781/** Emits code for AND'ing a local and a constant value. */
4782DECL_INLINE_THROW(uint32_t)
4783iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
4784{
4785#ifdef VBOX_STRICT
4786 switch (cbMask)
4787 {
4788 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4789 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4790 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4791 case sizeof(uint64_t): break;
4792 default: AssertFailedBreak();
4793 }
4794#endif
4795
4796 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4797 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
4798
4799 if (cbMask <= sizeof(uint32_t))
4800 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
4801 else
4802 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
4803
4804 iemNativeVarRegisterRelease(pReNative, idxVar);
4805 return off;
4806}
4807
4808
4809#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
4810 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
4811
4812#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
4813 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
4814
4815#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
4816 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
4817
4818#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
4819 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
4820
4821/** Emits code for OR'ing a local and a constant value. */
4822DECL_INLINE_THROW(uint32_t)
4823iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
4824{
4825#ifdef VBOX_STRICT
4826 switch (cbMask)
4827 {
4828 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4829 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4830 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4831 case sizeof(uint64_t): break;
4832 default: AssertFailedBreak();
4833 }
4834#endif
4835
4836 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4837 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
4838
4839 if (cbMask <= sizeof(uint32_t))
4840 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
4841 else
4842 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
4843
4844 iemNativeVarRegisterRelease(pReNative, idxVar);
4845 return off;
4846}
4847
4848
4849#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
4850 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
4851
4852#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
4853 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
4854
4855#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
4856 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
4857
4858/** Emits code for reversing the byte order in a local value. */
4859DECL_INLINE_THROW(uint32_t)
4860iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
4861{
4862 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4863 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4864
4865 switch (cbLocal)
4866 {
4867 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
4868 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
4869 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
4870 default: AssertFailedBreak();
4871 }
4872
4873 iemNativeVarRegisterRelease(pReNative, idxVar);
4874 return off;
4875}
4876
4877
4878#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
4879 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
4880
4881#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
4882 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
4883
4884#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
4885 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
4886
4887/** Emits code for shifting left a local value. */
4888DECL_INLINE_THROW(uint32_t)
4889iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
4890{
4891#ifdef VBOX_STRICT
4892 switch (cbLocal)
4893 {
4894 case sizeof(uint8_t): Assert(cShift < 8); break;
4895 case sizeof(uint16_t): Assert(cShift < 16); break;
4896 case sizeof(uint32_t): Assert(cShift < 32); break;
4897 case sizeof(uint64_t): Assert(cShift < 64); break;
4898 default: AssertFailedBreak();
4899 }
4900#endif
4901
4902 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4903 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4904
4905 if (cbLocal <= sizeof(uint32_t))
4906 {
4907 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
4908 if (cbLocal < sizeof(uint32_t))
4909 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
4910 cbLocal == sizeof(uint16_t)
4911 ? UINT32_C(0xffff)
4912 : UINT32_C(0xff));
4913 }
4914 else
4915 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
4916
4917 iemNativeVarRegisterRelease(pReNative, idxVar);
4918 return off;
4919}
4920
4921
4922#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
4923 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
4924
4925#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
4926 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
4927
4928#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
4929 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
4930
4931/** Emits code for shifting left a local value. */
4932DECL_INLINE_THROW(uint32_t)
4933iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
4934{
4935#ifdef VBOX_STRICT
4936 switch (cbLocal)
4937 {
4938 case sizeof(int8_t): Assert(cShift < 8); break;
4939 case sizeof(int16_t): Assert(cShift < 16); break;
4940 case sizeof(int32_t): Assert(cShift < 32); break;
4941 case sizeof(int64_t): Assert(cShift < 64); break;
4942 default: AssertFailedBreak();
4943 }
4944#endif
4945
4946 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4947 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4948
4949 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
4950 if (cbLocal == sizeof(uint8_t))
4951 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4952 else if (cbLocal == sizeof(uint16_t))
4953 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
4954
4955 if (cbLocal <= sizeof(uint32_t))
4956 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
4957 else
4958 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
4959
4960 iemNativeVarRegisterRelease(pReNative, idxVar);
4961 return off;
4962}
4963
4964
4965#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
4966 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
4967
4968#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
4969 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
4970
4971#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
4972 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
4973
4974/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
4975DECL_INLINE_THROW(uint32_t)
4976iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
4977{
4978 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
4979 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
4980 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4981 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4982
4983 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4984 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off, true /*fInitialized*/);
4985
4986 /* Need to sign extend the value. */
4987 if (cbLocal <= sizeof(uint32_t))
4988 {
4989/** @todo ARM64: In case of boredone, the extended add instruction can do the
4990 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
4991 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
4992
4993 switch (cbLocal)
4994 {
4995 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
4996 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
4997 default: AssertFailed();
4998 }
4999
5000 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
5001 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5002 }
5003 else
5004 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
5005
5006 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
5007 iemNativeVarRegisterRelease(pReNative, idxVar);
5008 return off;
5009}
5010
5011
5012
5013/*********************************************************************************************************************************
5014* EFLAGS *
5015*********************************************************************************************************************************/
5016
5017#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
5018# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
5019#else
5020# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
5021 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
5022
5023DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
5024{
5025 if (fEflOutput)
5026 {
5027 PVMCPUCC const pVCpu = pReNative->pVCpu;
5028# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5029 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
5030 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
5031 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
5032# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5033 if (fEflOutput & (a_fEfl)) \
5034 { \
5035 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
5036 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5037 else \
5038 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5039 } else do { } while (0)
5040# else
5041 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
5042 IEMLIVENESSBIT const LivenessClobbered =
5043 {
5044 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5045 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5046 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
5047 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
5048 };
5049 IEMLIVENESSBIT const LivenessDelayable =
5050 {
5051 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5052 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
5053 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5054 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
5055 };
5056# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5057 if (fEflOutput & (a_fEfl)) \
5058 { \
5059 if (LivenessClobbered.a_fLivenessMember) \
5060 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5061 else if (LivenessDelayable.a_fLivenessMember) \
5062 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
5063 else \
5064 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5065 } else do { } while (0)
5066# endif
5067 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
5068 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
5069 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
5070 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
5071 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
5072 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
5073 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
5074# undef CHECK_FLAG_AND_UPDATE_STATS
5075 }
5076 RT_NOREF(fEflInput);
5077}
5078#endif /* VBOX_WITH_STATISTICS */
5079
5080#undef IEM_MC_FETCH_EFLAGS /* should not be used */
5081#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5082 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
5083
5084/** Handles IEM_MC_FETCH_EFLAGS_EX. */
5085DECL_INLINE_THROW(uint32_t)
5086iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
5087 uint32_t fEflInput, uint32_t fEflOutput)
5088{
5089 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
5090 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5091 RT_NOREF(fEflInput, fEflOutput);
5092
5093#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5094# ifdef VBOX_STRICT
5095 if ( pReNative->idxCurCall != 0
5096 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
5097 {
5098 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5099 uint32_t const fBoth = fEflInput | fEflOutput;
5100# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
5101 AssertMsg( !(fBoth & (a_fElfConst)) \
5102 || (!(fEflInput & (a_fElfConst)) \
5103 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5104 : !(fEflOutput & (a_fElfConst)) \
5105 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5106 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
5107 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5108 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
5109 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
5110 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
5111 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
5112 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
5113 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
5114 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
5115# undef ASSERT_ONE_EFL
5116 }
5117# endif
5118#endif
5119
5120 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
5121
5122 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
5123 * the existing shadow copy. */
5124 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
5125 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
5126 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
5127 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5128 return off;
5129}
5130
5131
5132
5133/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
5134 * start using it with custom native code emission (inlining assembly
5135 * instruction helpers). */
5136#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
5137#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5138 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5139 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, true /*fUpdateSkipping*/)
5140
5141#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
5142#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5143 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5144 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, false /*fUpdateSkipping*/)
5145
5146/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
5147DECL_INLINE_THROW(uint32_t)
5148iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput,
5149 bool fUpdateSkipping)
5150{
5151 RT_NOREF(fEflOutput);
5152 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
5153 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5154
5155#ifdef VBOX_STRICT
5156 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
5157 uint32_t offFixup = off;
5158 off = iemNativeEmitJnzToFixed(pReNative, off, off);
5159 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
5160 iemNativeFixupFixedJump(pReNative, offFixup, off);
5161
5162 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
5163 offFixup = off;
5164 off = iemNativeEmitJzToFixed(pReNative, off, off);
5165 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
5166 iemNativeFixupFixedJump(pReNative, offFixup, off);
5167
5168 /** @todo validate that only bits in the fElfOutput mask changed. */
5169#endif
5170
5171#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5172 if (fUpdateSkipping)
5173 {
5174 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
5175 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5176 else
5177 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
5178 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5179 }
5180#else
5181 RT_NOREF_PV(fUpdateSkipping);
5182#endif
5183
5184 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
5185 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
5186 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5187 return off;
5188}
5189
5190
5191typedef enum IEMNATIVEMITEFLOP
5192{
5193 kIemNativeEmitEflOp_Invalid = 0,
5194 kIemNativeEmitEflOp_Set,
5195 kIemNativeEmitEflOp_Clear,
5196 kIemNativeEmitEflOp_Flip
5197} IEMNATIVEMITEFLOP;
5198
5199#define IEM_MC_SET_EFL_BIT(a_fBit) \
5200 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Set);
5201
5202#define IEM_MC_CLEAR_EFL_BIT(a_fBit) \
5203 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Clear);
5204
5205#define IEM_MC_FLIP_EFL_BIT(a_fBit) \
5206 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Flip);
5207
5208/** Handles IEM_MC_SET_EFL_BIT/IEM_MC_CLEAR_EFL_BIT/IEM_MC_FLIP_EFL_BIT. */
5209DECL_INLINE_THROW(uint32_t) iemNativeEmitModifyEFlagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflBit, IEMNATIVEMITEFLOP enmOp)
5210{
5211 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5212 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/);
5213
5214 switch (enmOp)
5215 {
5216 case kIemNativeEmitEflOp_Set:
5217 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
5218 break;
5219 case kIemNativeEmitEflOp_Clear:
5220 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~fEflBit);
5221 break;
5222 case kIemNativeEmitEflOp_Flip:
5223 off = iemNativeEmitXorGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
5224 break;
5225 default:
5226 AssertFailed();
5227 break;
5228 }
5229
5230 /** @todo No delayed writeback for EFLAGS right now. */
5231 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
5232
5233 /* Free but don't flush the EFLAGS register. */
5234 iemNativeRegFreeTmp(pReNative, idxEflReg);
5235
5236 return off;
5237}
5238
5239
5240/*********************************************************************************************************************************
5241* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
5242*********************************************************************************************************************************/
5243
5244#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
5245 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
5246
5247#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
5248 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
5249
5250#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
5251 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
5252
5253
5254/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
5255 * IEM_MC_FETCH_SREG_ZX_U64. */
5256DECL_INLINE_THROW(uint32_t)
5257iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
5258{
5259 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
5260 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
5261 Assert(iSReg < X86_SREG_COUNT);
5262
5263 /*
5264 * For now, we will not create a shadow copy of a selector. The rational
5265 * is that since we do not recompile the popping and loading of segment
5266 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
5267 * pushing and moving to registers, there is only a small chance that the
5268 * shadow copy will be accessed again before the register is reloaded. One
5269 * scenario would be nested called in 16-bit code, but I doubt it's worth
5270 * the extra register pressure atm.
5271 *
5272 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
5273 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
5274 * store scencario covered at present (r160730).
5275 */
5276 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5277 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
5278 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
5279 iemNativeVarRegisterRelease(pReNative, idxDstVar);
5280 return off;
5281}
5282
5283
5284
5285/*********************************************************************************************************************************
5286* Register references. *
5287*********************************************************************************************************************************/
5288
5289#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
5290 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
5291
5292#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
5293 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
5294
5295/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
5296DECL_INLINE_THROW(uint32_t)
5297iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
5298{
5299 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
5300 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5301 Assert(iGRegEx < 20);
5302
5303 if (iGRegEx < 16)
5304 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
5305 else
5306 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
5307
5308 /* If we've delayed writing back the register value, flush it now. */
5309 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
5310
5311 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5312 if (!fConst)
5313 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
5314
5315 return off;
5316}
5317
5318#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
5319 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
5320
5321#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
5322 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
5323
5324#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
5325 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
5326
5327#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
5328 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
5329
5330#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
5331 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
5332
5333#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
5334 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
5335
5336#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
5337 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
5338
5339#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
5340 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
5341
5342#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
5343 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
5344
5345#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
5346 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
5347
5348/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
5349DECL_INLINE_THROW(uint32_t)
5350iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
5351{
5352 Assert(iGReg < 16);
5353 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
5354 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5355
5356 /* If we've delayed writing back the register value, flush it now. */
5357 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
5358
5359 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5360 if (!fConst)
5361 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
5362
5363 return off;
5364}
5365
5366
5367#undef IEM_MC_REF_EFLAGS /* should not be used. */
5368#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
5369 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5370 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags, a_fEflInput, a_fEflOutput)
5371
5372/** Handles IEM_MC_REF_EFLAGS. */
5373DECL_INLINE_THROW(uint32_t)
5374iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput, uint32_t fEflOutput)
5375{
5376 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
5377 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5378
5379#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5380 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
5381
5382 /* Updating the skipping according to the outputs is a little early, but
5383 we don't have any other hooks for references atm. */
5384 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
5385 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5386 else if (fEflOutput & X86_EFL_STATUS_BITS)
5387 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
5388 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5389#else
5390 RT_NOREF(fEflInput, fEflOutput);
5391#endif
5392
5393 /* If we've delayed writing back the register value, flush it now. */
5394 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
5395
5396 /* If there is a shadow copy of guest EFLAGS, flush it now. */
5397 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
5398
5399 return off;
5400}
5401
5402
5403/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
5404 * different code from threaded recompiler, maybe it would be helpful. For now
5405 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
5406#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
5407
5408
5409#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
5410 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
5411
5412#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
5413 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
5414
5415#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
5416 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
5417
5418#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5419/* Just being paranoid here. */
5420# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
5421AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
5422AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
5423AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
5424AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
5425# endif
5426AssertCompileMemberOffset(X86XMMREG, au64, 0);
5427AssertCompileMemberOffset(X86XMMREG, au32, 0);
5428AssertCompileMemberOffset(X86XMMREG, ar64, 0);
5429AssertCompileMemberOffset(X86XMMREG, ar32, 0);
5430
5431# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
5432 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
5433# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
5434 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
5435# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
5436 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
5437# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
5438 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
5439#endif
5440
5441/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
5442DECL_INLINE_THROW(uint32_t)
5443iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
5444{
5445 Assert(iXReg < 16);
5446 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
5447 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5448
5449 /* If we've delayed writing back the register value, flush it now. */
5450 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
5451
5452#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5453 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5454 if (!fConst)
5455 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
5456#else
5457 RT_NOREF(fConst);
5458#endif
5459
5460 return off;
5461}
5462
5463
5464
5465/*********************************************************************************************************************************
5466* Effective Address Calculation *
5467*********************************************************************************************************************************/
5468#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
5469 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
5470
5471/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
5472 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
5473DECL_INLINE_THROW(uint32_t)
5474iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5475 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
5476{
5477 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5478
5479 /*
5480 * Handle the disp16 form with no registers first.
5481 *
5482 * Convert to an immediate value, as that'll delay the register allocation
5483 * and assignment till the memory access / call / whatever and we can use
5484 * a more appropriate register (or none at all).
5485 */
5486 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
5487 {
5488 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
5489 return off;
5490 }
5491
5492 /* Determin the displacment. */
5493 uint16_t u16EffAddr;
5494 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
5495 {
5496 case 0: u16EffAddr = 0; break;
5497 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
5498 case 2: u16EffAddr = u16Disp; break;
5499 default: AssertFailedStmt(u16EffAddr = 0);
5500 }
5501
5502 /* Determine the registers involved. */
5503 uint8_t idxGstRegBase;
5504 uint8_t idxGstRegIndex;
5505 switch (bRm & X86_MODRM_RM_MASK)
5506 {
5507 case 0:
5508 idxGstRegBase = X86_GREG_xBX;
5509 idxGstRegIndex = X86_GREG_xSI;
5510 break;
5511 case 1:
5512 idxGstRegBase = X86_GREG_xBX;
5513 idxGstRegIndex = X86_GREG_xDI;
5514 break;
5515 case 2:
5516 idxGstRegBase = X86_GREG_xBP;
5517 idxGstRegIndex = X86_GREG_xSI;
5518 break;
5519 case 3:
5520 idxGstRegBase = X86_GREG_xBP;
5521 idxGstRegIndex = X86_GREG_xDI;
5522 break;
5523 case 4:
5524 idxGstRegBase = X86_GREG_xSI;
5525 idxGstRegIndex = UINT8_MAX;
5526 break;
5527 case 5:
5528 idxGstRegBase = X86_GREG_xDI;
5529 idxGstRegIndex = UINT8_MAX;
5530 break;
5531 case 6:
5532 idxGstRegBase = X86_GREG_xBP;
5533 idxGstRegIndex = UINT8_MAX;
5534 break;
5535#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
5536 default:
5537#endif
5538 case 7:
5539 idxGstRegBase = X86_GREG_xBX;
5540 idxGstRegIndex = UINT8_MAX;
5541 break;
5542 }
5543
5544 /*
5545 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
5546 */
5547 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5548 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
5549 kIemNativeGstRegUse_ReadOnly);
5550 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
5551 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
5552 kIemNativeGstRegUse_ReadOnly)
5553 : UINT8_MAX;
5554#ifdef RT_ARCH_AMD64
5555 if (idxRegIndex == UINT8_MAX)
5556 {
5557 if (u16EffAddr == 0)
5558 {
5559 /* movxz ret, base */
5560 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
5561 }
5562 else
5563 {
5564 /* lea ret32, [base64 + disp32] */
5565 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
5566 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5567 if (idxRegRet >= 8 || idxRegBase >= 8)
5568 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
5569 pbCodeBuf[off++] = 0x8d;
5570 if (idxRegBase != X86_GREG_x12 /*SIB*/)
5571 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
5572 else
5573 {
5574 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
5575 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
5576 }
5577 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
5578 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
5579 pbCodeBuf[off++] = 0;
5580 pbCodeBuf[off++] = 0;
5581 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5582
5583 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
5584 }
5585 }
5586 else
5587 {
5588 /* lea ret32, [index64 + base64 (+ disp32)] */
5589 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
5590 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5591 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
5592 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5593 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5594 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5595 pbCodeBuf[off++] = 0x8d;
5596 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
5597 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5598 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
5599 if (bMod == X86_MOD_MEM4)
5600 {
5601 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
5602 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
5603 pbCodeBuf[off++] = 0;
5604 pbCodeBuf[off++] = 0;
5605 }
5606 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5607 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
5608 }
5609
5610#elif defined(RT_ARCH_ARM64)
5611 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
5612 if (u16EffAddr == 0)
5613 {
5614 if (idxRegIndex == UINT8_MAX)
5615 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
5616 else
5617 {
5618 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
5619 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
5620 }
5621 }
5622 else
5623 {
5624 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
5625 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
5626 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
5627 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
5628 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
5629 else
5630 {
5631 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
5632 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
5633 }
5634 if (idxRegIndex != UINT8_MAX)
5635 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
5636 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
5637 }
5638
5639#else
5640# error "port me"
5641#endif
5642
5643 if (idxRegIndex != UINT8_MAX)
5644 iemNativeRegFreeTmp(pReNative, idxRegIndex);
5645 iemNativeRegFreeTmp(pReNative, idxRegBase);
5646 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5647 return off;
5648}
5649
5650
5651#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
5652 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
5653
5654/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
5655 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
5656DECL_INLINE_THROW(uint32_t)
5657iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5658 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
5659{
5660 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5661
5662 /*
5663 * Handle the disp32 form with no registers first.
5664 *
5665 * Convert to an immediate value, as that'll delay the register allocation
5666 * and assignment till the memory access / call / whatever and we can use
5667 * a more appropriate register (or none at all).
5668 */
5669 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
5670 {
5671 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
5672 return off;
5673 }
5674
5675 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
5676 uint32_t u32EffAddr = 0;
5677 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
5678 {
5679 case 0: break;
5680 case 1: u32EffAddr = (int8_t)u32Disp; break;
5681 case 2: u32EffAddr = u32Disp; break;
5682 default: AssertFailed();
5683 }
5684
5685 /* Get the register (or SIB) value. */
5686 uint8_t idxGstRegBase = UINT8_MAX;
5687 uint8_t idxGstRegIndex = UINT8_MAX;
5688 uint8_t cShiftIndex = 0;
5689 switch (bRm & X86_MODRM_RM_MASK)
5690 {
5691 case 0: idxGstRegBase = X86_GREG_xAX; break;
5692 case 1: idxGstRegBase = X86_GREG_xCX; break;
5693 case 2: idxGstRegBase = X86_GREG_xDX; break;
5694 case 3: idxGstRegBase = X86_GREG_xBX; break;
5695 case 4: /* SIB */
5696 {
5697 /* index /w scaling . */
5698 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
5699 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
5700 {
5701 case 0: idxGstRegIndex = X86_GREG_xAX; break;
5702 case 1: idxGstRegIndex = X86_GREG_xCX; break;
5703 case 2: idxGstRegIndex = X86_GREG_xDX; break;
5704 case 3: idxGstRegIndex = X86_GREG_xBX; break;
5705 case 4: cShiftIndex = 0; /*no index*/ break;
5706 case 5: idxGstRegIndex = X86_GREG_xBP; break;
5707 case 6: idxGstRegIndex = X86_GREG_xSI; break;
5708 case 7: idxGstRegIndex = X86_GREG_xDI; break;
5709 }
5710
5711 /* base */
5712 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
5713 {
5714 case 0: idxGstRegBase = X86_GREG_xAX; break;
5715 case 1: idxGstRegBase = X86_GREG_xCX; break;
5716 case 2: idxGstRegBase = X86_GREG_xDX; break;
5717 case 3: idxGstRegBase = X86_GREG_xBX; break;
5718 case 4:
5719 idxGstRegBase = X86_GREG_xSP;
5720 u32EffAddr += uSibAndRspOffset >> 8;
5721 break;
5722 case 5:
5723 if ((bRm & X86_MODRM_MOD_MASK) != 0)
5724 idxGstRegBase = X86_GREG_xBP;
5725 else
5726 {
5727 Assert(u32EffAddr == 0);
5728 u32EffAddr = u32Disp;
5729 }
5730 break;
5731 case 6: idxGstRegBase = X86_GREG_xSI; break;
5732 case 7: idxGstRegBase = X86_GREG_xDI; break;
5733 }
5734 break;
5735 }
5736 case 5: idxGstRegBase = X86_GREG_xBP; break;
5737 case 6: idxGstRegBase = X86_GREG_xSI; break;
5738 case 7: idxGstRegBase = X86_GREG_xDI; break;
5739 }
5740
5741 /*
5742 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
5743 * the start of the function.
5744 */
5745 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
5746 {
5747 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
5748 return off;
5749 }
5750
5751 /*
5752 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
5753 */
5754 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5755 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
5756 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
5757 kIemNativeGstRegUse_ReadOnly);
5758 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
5759 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
5760 kIemNativeGstRegUse_ReadOnly);
5761
5762 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
5763 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
5764 {
5765 idxRegBase = idxRegIndex;
5766 idxRegIndex = UINT8_MAX;
5767 }
5768
5769#ifdef RT_ARCH_AMD64
5770 if (idxRegIndex == UINT8_MAX)
5771 {
5772 if (u32EffAddr == 0)
5773 {
5774 /* mov ret, base */
5775 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
5776 }
5777 else
5778 {
5779 /* lea ret32, [base64 + disp32] */
5780 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
5781 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5782 if (idxRegRet >= 8 || idxRegBase >= 8)
5783 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
5784 pbCodeBuf[off++] = 0x8d;
5785 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
5786 if (idxRegBase != X86_GREG_x12 /*SIB*/)
5787 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
5788 else
5789 {
5790 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5791 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
5792 }
5793 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5794 if (bMod == X86_MOD_MEM4)
5795 {
5796 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5797 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5798 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5799 }
5800 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5801 }
5802 }
5803 else
5804 {
5805 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
5806 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5807 if (idxRegBase == UINT8_MAX)
5808 {
5809 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
5810 if (idxRegRet >= 8 || idxRegIndex >= 8)
5811 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5812 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5813 pbCodeBuf[off++] = 0x8d;
5814 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
5815 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
5816 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5817 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5818 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5819 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5820 }
5821 else
5822 {
5823 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
5824 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
5825 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5826 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5827 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5828 pbCodeBuf[off++] = 0x8d;
5829 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
5830 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
5831 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5832 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
5833 if (bMod != X86_MOD_MEM0)
5834 {
5835 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5836 if (bMod == X86_MOD_MEM4)
5837 {
5838 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5839 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5840 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5841 }
5842 }
5843 }
5844 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5845 }
5846
5847#elif defined(RT_ARCH_ARM64)
5848 if (u32EffAddr == 0)
5849 {
5850 if (idxRegIndex == UINT8_MAX)
5851 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
5852 else if (idxRegBase == UINT8_MAX)
5853 {
5854 if (cShiftIndex == 0)
5855 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
5856 else
5857 {
5858 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5859 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
5860 }
5861 }
5862 else
5863 {
5864 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5865 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
5866 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
5867 }
5868 }
5869 else
5870 {
5871 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
5872 {
5873 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5874 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
5875 }
5876 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
5877 {
5878 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5879 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
5880 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
5881 }
5882 else
5883 {
5884 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
5885 if (idxRegBase != UINT8_MAX)
5886 {
5887 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5888 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
5889 }
5890 }
5891 if (idxRegIndex != UINT8_MAX)
5892 {
5893 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5894 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
5895 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
5896 }
5897 }
5898
5899#else
5900# error "port me"
5901#endif
5902
5903 if (idxRegIndex != UINT8_MAX)
5904 iemNativeRegFreeTmp(pReNative, idxRegIndex);
5905 if (idxRegBase != UINT8_MAX)
5906 iemNativeRegFreeTmp(pReNative, idxRegBase);
5907 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5908 return off;
5909}
5910
5911
5912#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
5913 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
5914 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
5915
5916#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
5917 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
5918 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
5919
5920#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
5921 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
5922 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
5923
5924/**
5925 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
5926 *
5927 * @returns New off.
5928 * @param pReNative .
5929 * @param off .
5930 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
5931 * bit 4 to REX.X. The two bits are part of the
5932 * REG sub-field, which isn't needed in this
5933 * function.
5934 * @param uSibAndRspOffset Two parts:
5935 * - The first 8 bits make up the SIB byte.
5936 * - The next 8 bits are the fixed RSP/ESP offset
5937 * in case of a pop [xSP].
5938 * @param u32Disp The displacement byte/word/dword, if any.
5939 * @param cbInstr The size of the fully decoded instruction. Used
5940 * for RIP relative addressing.
5941 * @param idxVarRet The result variable number.
5942 * @param f64Bit Whether to use a 64-bit or 32-bit address size
5943 * when calculating the address.
5944 *
5945 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
5946 */
5947DECL_INLINE_THROW(uint32_t)
5948iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
5949 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
5950{
5951 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5952
5953 /*
5954 * Special case the rip + disp32 form first.
5955 */
5956 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
5957 {
5958#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5959 /* Need to take the current PC offset into account for the displacement, no need to flush here
5960 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
5961 u32Disp += pReNative->Core.offPc;
5962#endif
5963
5964 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5965 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
5966 kIemNativeGstRegUse_ReadOnly);
5967#ifdef RT_ARCH_AMD64
5968 if (f64Bit)
5969 {
5970 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
5971 if ((int32_t)offFinalDisp == offFinalDisp)
5972 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
5973 else
5974 {
5975 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
5976 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
5977 }
5978 }
5979 else
5980 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
5981
5982#elif defined(RT_ARCH_ARM64)
5983 if (f64Bit)
5984 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
5985 (int64_t)(int32_t)u32Disp + cbInstr);
5986 else
5987 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
5988 (int32_t)u32Disp + cbInstr);
5989
5990#else
5991# error "Port me!"
5992#endif
5993 iemNativeRegFreeTmp(pReNative, idxRegPc);
5994 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5995 return off;
5996 }
5997
5998 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
5999 int64_t i64EffAddr = 0;
6000 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6001 {
6002 case 0: break;
6003 case 1: i64EffAddr = (int8_t)u32Disp; break;
6004 case 2: i64EffAddr = (int32_t)u32Disp; break;
6005 default: AssertFailed();
6006 }
6007
6008 /* Get the register (or SIB) value. */
6009 uint8_t idxGstRegBase = UINT8_MAX;
6010 uint8_t idxGstRegIndex = UINT8_MAX;
6011 uint8_t cShiftIndex = 0;
6012 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
6013 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
6014 else /* SIB: */
6015 {
6016 /* index /w scaling . */
6017 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6018 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6019 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
6020 if (idxGstRegIndex == 4)
6021 {
6022 /* no index */
6023 cShiftIndex = 0;
6024 idxGstRegIndex = UINT8_MAX;
6025 }
6026
6027 /* base */
6028 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
6029 if (idxGstRegBase == 4)
6030 {
6031 /* pop [rsp] hack */
6032 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
6033 }
6034 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
6035 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
6036 {
6037 /* mod=0 and base=5 -> disp32, no base reg. */
6038 Assert(i64EffAddr == 0);
6039 i64EffAddr = (int32_t)u32Disp;
6040 idxGstRegBase = UINT8_MAX;
6041 }
6042 }
6043
6044 /*
6045 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6046 * the start of the function.
6047 */
6048 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6049 {
6050 if (f64Bit)
6051 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
6052 else
6053 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
6054 return off;
6055 }
6056
6057 /*
6058 * Now emit code that calculates:
6059 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6060 * or if !f64Bit:
6061 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6062 */
6063 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6064 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6065 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6066 kIemNativeGstRegUse_ReadOnly);
6067 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6068 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6069 kIemNativeGstRegUse_ReadOnly);
6070
6071 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6072 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6073 {
6074 idxRegBase = idxRegIndex;
6075 idxRegIndex = UINT8_MAX;
6076 }
6077
6078#ifdef RT_ARCH_AMD64
6079 uint8_t bFinalAdj;
6080 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
6081 bFinalAdj = 0; /* likely */
6082 else
6083 {
6084 /* pop [rsp] with a problematic disp32 value. Split out the
6085 RSP offset and add it separately afterwards (bFinalAdj). */
6086 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
6087 Assert(idxGstRegBase == X86_GREG_xSP);
6088 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
6089 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
6090 Assert(bFinalAdj != 0);
6091 i64EffAddr -= bFinalAdj;
6092 Assert((int32_t)i64EffAddr == i64EffAddr);
6093 }
6094 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
6095//pReNative->pInstrBuf[off++] = 0xcc;
6096
6097 if (idxRegIndex == UINT8_MAX)
6098 {
6099 if (u32EffAddr == 0)
6100 {
6101 /* mov ret, base */
6102 if (f64Bit)
6103 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
6104 else
6105 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6106 }
6107 else
6108 {
6109 /* lea ret, [base + disp32] */
6110 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6111 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6112 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
6113 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6114 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6115 | (f64Bit ? X86_OP_REX_W : 0);
6116 pbCodeBuf[off++] = 0x8d;
6117 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6118 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6119 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6120 else
6121 {
6122 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6123 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6124 }
6125 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6126 if (bMod == X86_MOD_MEM4)
6127 {
6128 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6129 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6130 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6131 }
6132 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6133 }
6134 }
6135 else
6136 {
6137 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6138 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6139 if (idxRegBase == UINT8_MAX)
6140 {
6141 /* lea ret, [(index64 << cShiftIndex) + disp32] */
6142 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
6143 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6144 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6145 | (f64Bit ? X86_OP_REX_W : 0);
6146 pbCodeBuf[off++] = 0x8d;
6147 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6148 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6149 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6150 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6151 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6152 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6153 }
6154 else
6155 {
6156 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6157 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6158 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6159 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6160 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6161 | (f64Bit ? X86_OP_REX_W : 0);
6162 pbCodeBuf[off++] = 0x8d;
6163 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6164 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6165 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6166 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6167 if (bMod != X86_MOD_MEM0)
6168 {
6169 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6170 if (bMod == X86_MOD_MEM4)
6171 {
6172 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6173 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6174 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6175 }
6176 }
6177 }
6178 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6179 }
6180
6181 if (!bFinalAdj)
6182 { /* likely */ }
6183 else
6184 {
6185 Assert(f64Bit);
6186 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
6187 }
6188
6189#elif defined(RT_ARCH_ARM64)
6190 if (i64EffAddr == 0)
6191 {
6192 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6193 if (idxRegIndex == UINT8_MAX)
6194 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
6195 else if (idxRegBase != UINT8_MAX)
6196 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6197 f64Bit, false /*fSetFlags*/, cShiftIndex);
6198 else
6199 {
6200 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
6201 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
6202 }
6203 }
6204 else
6205 {
6206 if (f64Bit)
6207 { /* likely */ }
6208 else
6209 i64EffAddr = (int32_t)i64EffAddr;
6210
6211 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
6212 {
6213 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6214 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
6215 }
6216 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
6217 {
6218 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6219 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
6220 }
6221 else
6222 {
6223 if (f64Bit)
6224 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
6225 else
6226 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
6227 if (idxRegBase != UINT8_MAX)
6228 {
6229 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6230 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
6231 }
6232 }
6233 if (idxRegIndex != UINT8_MAX)
6234 {
6235 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6236 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
6237 f64Bit, false /*fSetFlags*/, cShiftIndex);
6238 }
6239 }
6240
6241#else
6242# error "port me"
6243#endif
6244
6245 if (idxRegIndex != UINT8_MAX)
6246 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6247 if (idxRegBase != UINT8_MAX)
6248 iemNativeRegFreeTmp(pReNative, idxRegBase);
6249 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6250 return off;
6251}
6252
6253
6254/*********************************************************************************************************************************
6255* Memory fetches and stores common *
6256*********************************************************************************************************************************/
6257
6258typedef enum IEMNATIVEMITMEMOP
6259{
6260 kIemNativeEmitMemOp_Store = 0,
6261 kIemNativeEmitMemOp_Fetch,
6262 kIemNativeEmitMemOp_Fetch_Zx_U16,
6263 kIemNativeEmitMemOp_Fetch_Zx_U32,
6264 kIemNativeEmitMemOp_Fetch_Zx_U64,
6265 kIemNativeEmitMemOp_Fetch_Sx_U16,
6266 kIemNativeEmitMemOp_Fetch_Sx_U32,
6267 kIemNativeEmitMemOp_Fetch_Sx_U64
6268} IEMNATIVEMITMEMOP;
6269
6270/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
6271 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
6272 * (with iSegReg = UINT8_MAX). */
6273DECL_INLINE_THROW(uint32_t)
6274iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
6275 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
6276 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
6277{
6278 /*
6279 * Assert sanity.
6280 */
6281 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
6282 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
6283 Assert( enmOp != kIemNativeEmitMemOp_Store
6284 || pVarValue->enmKind == kIemNativeVarKind_Immediate
6285 || pVarValue->enmKind == kIemNativeVarKind_Stack);
6286 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
6287 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
6288 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
6289 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
6290 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6291 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
6292#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6293 Assert( cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8
6294 || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U));
6295#else
6296 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
6297#endif
6298 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6299#ifdef VBOX_STRICT
6300 if (iSegReg == UINT8_MAX)
6301 {
6302 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6303 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6304 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6305 switch (cbMem)
6306 {
6307 case 1:
6308 Assert( pfnFunction
6309 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
6310 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6311 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6312 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6313 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6314 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
6315 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
6316 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
6317 : UINT64_C(0xc000b000a0009000) ));
6318 break;
6319 case 2:
6320 Assert( pfnFunction
6321 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
6322 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6323 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6324 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6325 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
6326 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
6327 : UINT64_C(0xc000b000a0009000) ));
6328 break;
6329 case 4:
6330 Assert( pfnFunction
6331 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
6332 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
6333 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
6334 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
6335 : UINT64_C(0xc000b000a0009000) ));
6336 break;
6337 case 8:
6338 Assert( pfnFunction
6339 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
6340 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
6341 : UINT64_C(0xc000b000a0009000) ));
6342 break;
6343#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6344 case sizeof(RTUINT128U):
6345 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6346 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
6347 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
6348 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
6349 || ( enmOp == kIemNativeEmitMemOp_Store
6350 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
6351 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
6352 break;
6353 case sizeof(RTUINT256U):
6354 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6355 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
6356 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
6357 || ( enmOp == kIemNativeEmitMemOp_Store
6358 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
6359 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
6360 break;
6361#endif
6362 }
6363 }
6364 else
6365 {
6366 Assert(iSegReg < 6);
6367 switch (cbMem)
6368 {
6369 case 1:
6370 Assert( pfnFunction
6371 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
6372 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
6373 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6374 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6375 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6376 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
6377 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
6378 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
6379 : UINT64_C(0xc000b000a0009000) ));
6380 break;
6381 case 2:
6382 Assert( pfnFunction
6383 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
6384 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
6385 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
6386 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
6387 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
6388 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
6389 : UINT64_C(0xc000b000a0009000) ));
6390 break;
6391 case 4:
6392 Assert( pfnFunction
6393 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
6394 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
6395 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
6396 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
6397 : UINT64_C(0xc000b000a0009000) ));
6398 break;
6399 case 8:
6400 Assert( pfnFunction
6401 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
6402 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
6403 : UINT64_C(0xc000b000a0009000) ));
6404 break;
6405#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6406 case sizeof(RTUINT128U):
6407 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6408 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
6409 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
6410 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
6411 || ( enmOp == kIemNativeEmitMemOp_Store
6412 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
6413 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
6414 break;
6415 case sizeof(RTUINT256U):
6416 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6417 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
6418 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
6419 || ( enmOp == kIemNativeEmitMemOp_Store
6420 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
6421 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
6422 break;
6423#endif
6424 }
6425 }
6426#endif
6427
6428#ifdef VBOX_STRICT
6429 /*
6430 * Check that the fExec flags we've got make sense.
6431 */
6432 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6433#endif
6434
6435 /*
6436 * To keep things simple we have to commit any pending writes first as we
6437 * may end up making calls.
6438 */
6439 /** @todo we could postpone this till we make the call and reload the
6440 * registers after returning from the call. Not sure if that's sensible or
6441 * not, though. */
6442#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6443 off = iemNativeRegFlushPendingWrites(pReNative, off);
6444#else
6445 /* The program counter is treated differently for now. */
6446 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
6447#endif
6448
6449#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6450 /*
6451 * Move/spill/flush stuff out of call-volatile registers.
6452 * This is the easy way out. We could contain this to the tlb-miss branch
6453 * by saving and restoring active stuff here.
6454 */
6455 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
6456#endif
6457
6458 /*
6459 * Define labels and allocate the result register (trying for the return
6460 * register if we can).
6461 */
6462 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6463#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6464 uint8_t idxRegValueFetch = UINT8_MAX;
6465
6466 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6467 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6468 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
6469 else
6470 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6471 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
6472 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
6473 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
6474#else
6475 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6476 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
6477 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
6478 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
6479#endif
6480 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
6481
6482#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6483 uint8_t idxRegValueStore = UINT8_MAX;
6484
6485 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6486 idxRegValueStore = !TlbState.fSkip
6487 && enmOp == kIemNativeEmitMemOp_Store
6488 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6489 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6490 : UINT8_MAX;
6491 else
6492 idxRegValueStore = !TlbState.fSkip
6493 && enmOp == kIemNativeEmitMemOp_Store
6494 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6495 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6496 : UINT8_MAX;
6497
6498#else
6499 uint8_t const idxRegValueStore = !TlbState.fSkip
6500 && enmOp == kIemNativeEmitMemOp_Store
6501 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6502 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6503 : UINT8_MAX;
6504#endif
6505 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
6506 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6507 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6508 : UINT32_MAX;
6509
6510 /*
6511 * Jump to the TLB lookup code.
6512 */
6513 if (!TlbState.fSkip)
6514 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6515
6516 /*
6517 * TlbMiss:
6518 *
6519 * Call helper to do the fetching.
6520 * We flush all guest register shadow copies here.
6521 */
6522 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
6523
6524#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6525 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6526#else
6527 RT_NOREF(idxInstr);
6528#endif
6529
6530#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6531 if (pReNative->Core.offPc)
6532 {
6533 /*
6534 * Update the program counter but restore it at the end of the TlbMiss branch.
6535 * This should allow delaying more program counter updates for the TlbLookup and hit paths
6536 * which are hopefully much more frequent, reducing the amount of memory accesses.
6537 */
6538 /* Allocate a temporary PC register. */
6539 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6540
6541 /* Perform the addition and store the result. */
6542 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6543 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6544
6545 /* Free and flush the PC register. */
6546 iemNativeRegFreeTmp(pReNative, idxPcReg);
6547 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
6548 }
6549#endif
6550
6551#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6552 /* Save variables in volatile registers. */
6553 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
6554 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
6555 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
6556 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6557#endif
6558
6559 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
6560 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
6561#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6562 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6563 {
6564 /*
6565 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
6566 *
6567 * @note There was a register variable assigned to the variable for the TlbLookup case above
6568 * which must not be freed or the value loaded into the register will not be synced into the register
6569 * further down the road because the variable doesn't know it had a variable assigned.
6570 *
6571 * @note For loads it is not required to sync what is in the assigned register with the stack slot
6572 * as it will be overwritten anyway.
6573 */
6574 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
6575 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
6576 enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
6577 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
6578 }
6579 else
6580#endif
6581 if (enmOp == kIemNativeEmitMemOp_Store)
6582 {
6583 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
6584 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
6585#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6586 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6587#else
6588 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
6589 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
6590#endif
6591 }
6592
6593 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
6594 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
6595#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6596 fVolGregMask);
6597#else
6598 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
6599#endif
6600
6601 if (iSegReg != UINT8_MAX)
6602 {
6603 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
6604 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
6605 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
6606 }
6607
6608 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6609 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6610
6611 /* Done setting up parameters, make the call. */
6612 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6613
6614 /*
6615 * Put the result in the right register if this is a fetch.
6616 */
6617 if (enmOp != kIemNativeEmitMemOp_Store)
6618 {
6619#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6620 if ( cbMem == sizeof(RTUINT128U)
6621 || cbMem == sizeof(RTUINT256U))
6622 {
6623 Assert(enmOp == kIemNativeEmitMemOp_Fetch);
6624
6625 /* Sync the value on the stack with the host register assigned to the variable. */
6626 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
6627 }
6628 else
6629#endif
6630 {
6631 Assert(idxRegValueFetch == pVarValue->idxReg);
6632 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
6633 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
6634 }
6635 }
6636
6637#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6638 /* Restore variables and guest shadow registers to volatile registers. */
6639 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6640 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6641#endif
6642
6643#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6644 if (pReNative->Core.offPc)
6645 {
6646 /*
6647 * Time to restore the program counter to its original value.
6648 */
6649 /* Allocate a temporary PC register. */
6650 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6651
6652 /* Restore the original value. */
6653 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6654 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6655
6656 /* Free and flush the PC register. */
6657 iemNativeRegFreeTmp(pReNative, idxPcReg);
6658 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
6659 }
6660#endif
6661
6662#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6663 if (!TlbState.fSkip)
6664 {
6665 /* end of TlbMiss - Jump to the done label. */
6666 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6667 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6668
6669 /*
6670 * TlbLookup:
6671 */
6672 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
6673 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
6674 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
6675
6676 /*
6677 * Emit code to do the actual storing / fetching.
6678 */
6679 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
6680# ifdef VBOX_WITH_STATISTICS
6681 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
6682 enmOp == kIemNativeEmitMemOp_Store
6683 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
6684 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
6685# endif
6686 switch (enmOp)
6687 {
6688 case kIemNativeEmitMemOp_Store:
6689 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
6690 {
6691 switch (cbMem)
6692 {
6693 case 1:
6694 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6695 break;
6696 case 2:
6697 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6698 break;
6699 case 4:
6700 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6701 break;
6702 case 8:
6703 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6704 break;
6705#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6706 case sizeof(RTUINT128U):
6707 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6708 break;
6709 case sizeof(RTUINT256U):
6710 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6711 break;
6712#endif
6713 default:
6714 AssertFailed();
6715 }
6716 }
6717 else
6718 {
6719 switch (cbMem)
6720 {
6721 case 1:
6722 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
6723 idxRegMemResult, TlbState.idxReg1);
6724 break;
6725 case 2:
6726 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
6727 idxRegMemResult, TlbState.idxReg1);
6728 break;
6729 case 4:
6730 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
6731 idxRegMemResult, TlbState.idxReg1);
6732 break;
6733 case 8:
6734 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
6735 idxRegMemResult, TlbState.idxReg1);
6736 break;
6737 default:
6738 AssertFailed();
6739 }
6740 }
6741 break;
6742
6743 case kIemNativeEmitMemOp_Fetch:
6744 case kIemNativeEmitMemOp_Fetch_Zx_U16:
6745 case kIemNativeEmitMemOp_Fetch_Zx_U32:
6746 case kIemNativeEmitMemOp_Fetch_Zx_U64:
6747 switch (cbMem)
6748 {
6749 case 1:
6750 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6751 break;
6752 case 2:
6753 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6754 break;
6755 case 4:
6756 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6757 break;
6758 case 8:
6759 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6760 break;
6761#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6762 case sizeof(RTUINT128U):
6763 /*
6764 * No need to sync back the register with the stack, this is done by the generic variable handling
6765 * code if there is a register assigned to a variable and the stack must be accessed.
6766 */
6767 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6768 break;
6769 case sizeof(RTUINT256U):
6770 /*
6771 * No need to sync back the register with the stack, this is done by the generic variable handling
6772 * code if there is a register assigned to a variable and the stack must be accessed.
6773 */
6774 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6775 break;
6776#endif
6777 default:
6778 AssertFailed();
6779 }
6780 break;
6781
6782 case kIemNativeEmitMemOp_Fetch_Sx_U16:
6783 Assert(cbMem == 1);
6784 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6785 break;
6786
6787 case kIemNativeEmitMemOp_Fetch_Sx_U32:
6788 Assert(cbMem == 1 || cbMem == 2);
6789 if (cbMem == 1)
6790 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6791 else
6792 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6793 break;
6794
6795 case kIemNativeEmitMemOp_Fetch_Sx_U64:
6796 switch (cbMem)
6797 {
6798 case 1:
6799 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6800 break;
6801 case 2:
6802 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6803 break;
6804 case 4:
6805 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6806 break;
6807 default:
6808 AssertFailed();
6809 }
6810 break;
6811
6812 default:
6813 AssertFailed();
6814 }
6815
6816 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
6817
6818 /*
6819 * TlbDone:
6820 */
6821 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6822
6823 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
6824
6825# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6826 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
6827 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6828# endif
6829 }
6830#else
6831 RT_NOREF(fAlignMask, idxLabelTlbMiss);
6832#endif
6833
6834 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
6835 iemNativeVarRegisterRelease(pReNative, idxVarValue);
6836 return off;
6837}
6838
6839
6840
6841/*********************************************************************************************************************************
6842* Memory fetches (IEM_MEM_FETCH_XXX). *
6843*********************************************************************************************************************************/
6844
6845/* 8-bit segmented: */
6846#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
6847 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
6848 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
6849 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6850
6851#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
6852 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6853 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
6854 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6855
6856#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6857 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6858 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
6859 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6860
6861#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6862 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6863 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6864 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6865
6866#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
6867 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6868 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
6869 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
6870
6871#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6872 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6873 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
6874 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
6875
6876#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6877 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6878 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6879 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
6880
6881/* 16-bit segmented: */
6882#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
6883 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6884 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6885 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
6886
6887#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
6888 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6889 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6890 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
6891
6892#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6893 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6894 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
6895 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
6896
6897#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6898 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6899 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6900 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
6901
6902#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6903 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6904 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
6905 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
6906
6907#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6908 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6909 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6910 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
6911
6912
6913/* 32-bit segmented: */
6914#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6915 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6916 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6917 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6918
6919#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
6920 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6921 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6922 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
6923
6924#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6925 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6926 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6927 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6928
6929#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6930 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6931 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6932 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
6933
6934#define IEM_MC_FETCH_MEM_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
6935 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
6936 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6937 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
6938
6939#define IEM_MC_FETCH_MEM_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
6940 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
6941 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6942 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6943
6944#define IEM_MC_FETCH_MEM_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
6945 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, \
6946 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
6947 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
6948
6949AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
6950#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
6951 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, \
6952 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
6953 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6954
6955
6956/* 64-bit segmented: */
6957#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6958 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6959 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
6960 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
6961
6962AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
6963#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
6964 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, \
6965 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
6966 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
6967
6968
6969/* 8-bit flat: */
6970#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
6971 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
6972 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
6973 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
6974
6975#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
6976 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
6977 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
6978 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
6979
6980#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
6981 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
6982 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
6983 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
6984
6985#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
6986 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
6987 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6988 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
6989
6990#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
6991 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
6992 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
6993 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
6994
6995#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
6996 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
6997 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
6998 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
6999
7000#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
7001 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7002 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7003 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7004
7005
7006/* 16-bit flat: */
7007#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
7008 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7009 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7010 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7011
7012#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
7013 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7014 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7015 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7016
7017#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
7018 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7019 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7020 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7021
7022#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
7023 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7024 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7025 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7026
7027#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
7028 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7029 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7030 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7031
7032#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
7033 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7034 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7035 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7036
7037/* 32-bit flat: */
7038#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
7039 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7040 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7041 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7042
7043#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
7044 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7045 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7046 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7047
7048#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
7049 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7050 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7051 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7052
7053#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
7054 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7055 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7056 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7057
7058#define IEM_MC_FETCH_MEM_FLAT_I16(a_i32Dst, a_GCPtrMem) \
7059 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
7060 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7061 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7062
7063#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
7064 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
7065 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7066 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7067
7068#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
7069 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, \
7070 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7071 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7072
7073#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
7074 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, \
7075 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
7076 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7077
7078
7079/* 64-bit flat: */
7080#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
7081 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7082 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7083 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7084
7085#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
7086 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, \
7087 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
7088 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7089
7090#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7091/* 128-bit segmented: */
7092#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
7093 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7094 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7095 (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
7096
7097#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
7098 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7099 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7100 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7101
7102AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
7103#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
7104 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, \
7105 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7106 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7107
7108#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7109 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7110 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7111 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7112
7113/* 128-bit flat: */
7114#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
7115 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7116 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7117 (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
7118
7119#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
7120 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7121 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7122 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7123
7124#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
7125 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, \
7126 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7127 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7128
7129#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
7130 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7131 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7132 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7133
7134/* 256-bit segmented: */
7135#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
7136 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7137 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7138 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7139
7140#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7141 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7142 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7143 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7144
7145#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
7146 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7147 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7148 (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
7149
7150
7151/* 256-bit flat: */
7152#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
7153 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
7154 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7155 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7156
7157#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
7158 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
7159 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7160 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7161
7162#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
7163 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
7164 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7165 (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
7166#endif
7167
7168
7169/*********************************************************************************************************************************
7170* Memory stores (IEM_MEM_STORE_XXX). *
7171*********************************************************************************************************************************/
7172
7173#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
7174 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
7175 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
7176 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
7177
7178#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
7179 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
7180 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
7181 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
7182
7183#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
7184 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
7185 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
7186 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
7187
7188#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
7189 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
7190 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
7191 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
7192
7193
7194#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
7195 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
7196 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
7197 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
7198
7199#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
7200 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
7201 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
7202 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
7203
7204#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
7205 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
7206 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
7207 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
7208
7209#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
7210 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
7211 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
7212 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
7213
7214
7215#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
7216 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
7217 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
7218
7219#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
7220 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7221 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
7222
7223#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
7224 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
7225 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
7226
7227#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
7228 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7229 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
7230
7231
7232#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
7233 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7234 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
7235
7236#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
7237 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7238 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
7239
7240#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
7241 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7242 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
7243
7244#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
7245 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7246 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
7247
7248/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
7249 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
7250DECL_INLINE_THROW(uint32_t)
7251iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
7252 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
7253{
7254 /*
7255 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
7256 * to do the grunt work.
7257 */
7258 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
7259 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
7260 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
7261 pfnFunction, idxInstr);
7262 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
7263 return off;
7264}
7265
7266
7267#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7268# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
7269 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
7270 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
7271 (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
7272
7273# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
7274 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
7275 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
7276 (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
7277
7278# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
7279 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
7280 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
7281 (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
7282
7283# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
7284 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
7285 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
7286 (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
7287
7288
7289# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
7290 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
7291 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
7292 (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, pCallEntry->idxInstr)
7293
7294# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
7295 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
7296 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
7297 (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
7298
7299# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
7300 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
7301 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
7302 (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
7303
7304# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
7305 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
7306 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
7307 (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
7308#endif
7309
7310
7311
7312/*********************************************************************************************************************************
7313* Stack Accesses. *
7314*********************************************************************************************************************************/
7315/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
7316#define IEM_MC_PUSH_U16(a_u16Value) \
7317 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
7318 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
7319#define IEM_MC_PUSH_U32(a_u32Value) \
7320 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
7321 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
7322#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
7323 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
7324 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
7325#define IEM_MC_PUSH_U64(a_u64Value) \
7326 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
7327 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
7328
7329#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
7330 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
7331 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
7332#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
7333 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
7334 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
7335#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
7336 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
7337 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
7338
7339#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
7340 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
7341 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
7342#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
7343 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
7344 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
7345
7346
7347/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
7348DECL_INLINE_THROW(uint32_t)
7349iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
7350 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
7351{
7352 /*
7353 * Assert sanity.
7354 */
7355 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
7356 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
7357#ifdef VBOX_STRICT
7358 if (RT_BYTE2(cBitsVarAndFlat) != 0)
7359 {
7360 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7361 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7362 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7363 Assert( pfnFunction
7364 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
7365 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
7366 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
7367 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
7368 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
7369 : UINT64_C(0xc000b000a0009000) ));
7370 }
7371 else
7372 Assert( pfnFunction
7373 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
7374 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
7375 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
7376 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
7377 : UINT64_C(0xc000b000a0009000) ));
7378#endif
7379
7380#ifdef VBOX_STRICT
7381 /*
7382 * Check that the fExec flags we've got make sense.
7383 */
7384 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7385#endif
7386
7387 /*
7388 * To keep things simple we have to commit any pending writes first as we
7389 * may end up making calls.
7390 */
7391 /** @todo we could postpone this till we make the call and reload the
7392 * registers after returning from the call. Not sure if that's sensible or
7393 * not, though. */
7394 off = iemNativeRegFlushPendingWrites(pReNative, off);
7395
7396 /*
7397 * First we calculate the new RSP and the effective stack pointer value.
7398 * For 64-bit mode and flat 32-bit these two are the same.
7399 * (Code structure is very similar to that of PUSH)
7400 */
7401 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
7402 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
7403 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
7404 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
7405 ? cbMem : sizeof(uint16_t);
7406 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
7407 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
7408 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
7409 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
7410 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
7411 if (cBitsFlat != 0)
7412 {
7413 Assert(idxRegEffSp == idxRegRsp);
7414 Assert(cBitsFlat == 32 || cBitsFlat == 64);
7415 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
7416 if (cBitsFlat == 64)
7417 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
7418 else
7419 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
7420 }
7421 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
7422 {
7423 Assert(idxRegEffSp != idxRegRsp);
7424 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
7425 kIemNativeGstRegUse_ReadOnly);
7426#ifdef RT_ARCH_AMD64
7427 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7428#else
7429 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7430#endif
7431 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
7432 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
7433 offFixupJumpToUseOtherBitSp = off;
7434 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7435 {
7436 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
7437 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7438 }
7439 else
7440 {
7441 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
7442 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7443 }
7444 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7445 }
7446 /* SpUpdateEnd: */
7447 uint32_t const offLabelSpUpdateEnd = off;
7448
7449 /*
7450 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
7451 * we're skipping lookup).
7452 */
7453 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
7454 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
7455 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7456 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
7457 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7458 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7459 : UINT32_MAX;
7460 uint8_t const idxRegValue = !TlbState.fSkip
7461 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7462 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
7463 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
7464 : UINT8_MAX;
7465 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
7466
7467
7468 if (!TlbState.fSkip)
7469 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7470 else
7471 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
7472
7473 /*
7474 * Use16BitSp:
7475 */
7476 if (cBitsFlat == 0)
7477 {
7478#ifdef RT_ARCH_AMD64
7479 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7480#else
7481 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7482#endif
7483 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
7484 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7485 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7486 else
7487 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7488 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
7489 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7490 }
7491
7492 /*
7493 * TlbMiss:
7494 *
7495 * Call helper to do the pushing.
7496 */
7497 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
7498
7499#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7500 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7501#else
7502 RT_NOREF(idxInstr);
7503#endif
7504
7505 /* Save variables in volatile registers. */
7506 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7507 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
7508 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
7509 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
7510 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7511
7512 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
7513 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
7514 {
7515 /* Swap them using ARG0 as temp register: */
7516 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
7517 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
7518 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
7519 }
7520 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
7521 {
7522 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
7523 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
7524 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7525
7526 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
7527 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
7528 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7529 }
7530 else
7531 {
7532 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
7533 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7534
7535 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
7536 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
7537 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG));
7538 }
7539
7540 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7541 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7542
7543 /* Done setting up parameters, make the call. */
7544 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7545
7546 /* Restore variables and guest shadow registers to volatile registers. */
7547 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7548 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7549
7550#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7551 if (!TlbState.fSkip)
7552 {
7553 /* end of TlbMiss - Jump to the done label. */
7554 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7555 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7556
7557 /*
7558 * TlbLookup:
7559 */
7560 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
7561 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
7562
7563 /*
7564 * Emit code to do the actual storing / fetching.
7565 */
7566 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
7567# ifdef VBOX_WITH_STATISTICS
7568 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7569 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
7570# endif
7571 if (idxRegValue != UINT8_MAX)
7572 {
7573 switch (cbMemAccess)
7574 {
7575 case 2:
7576 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7577 break;
7578 case 4:
7579 if (!fIsIntelSeg)
7580 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7581 else
7582 {
7583 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
7584 PUSH FS in real mode, so we have to try emulate that here.
7585 We borrow the now unused idxReg1 from the TLB lookup code here. */
7586 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
7587 kIemNativeGstReg_EFlags);
7588 if (idxRegEfl != UINT8_MAX)
7589 {
7590#ifdef ARCH_AMD64
7591 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
7592 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
7593 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7594#else
7595 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
7596 off, TlbState.idxReg1, idxRegEfl,
7597 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7598#endif
7599 iemNativeRegFreeTmp(pReNative, idxRegEfl);
7600 }
7601 else
7602 {
7603 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
7604 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
7605 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
7606 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7607 }
7608 /* ASSUMES the upper half of idxRegValue is ZERO. */
7609 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
7610 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
7611 }
7612 break;
7613 case 8:
7614 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7615 break;
7616 default:
7617 AssertFailed();
7618 }
7619 }
7620 else
7621 {
7622 switch (cbMemAccess)
7623 {
7624 case 2:
7625 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
7626 idxRegMemResult, TlbState.idxReg1);
7627 break;
7628 case 4:
7629 Assert(!fIsSegReg);
7630 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
7631 idxRegMemResult, TlbState.idxReg1);
7632 break;
7633 case 8:
7634 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
7635 break;
7636 default:
7637 AssertFailed();
7638 }
7639 }
7640
7641 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
7642 TlbState.freeRegsAndReleaseVars(pReNative);
7643
7644 /*
7645 * TlbDone:
7646 *
7647 * Commit the new RSP value.
7648 */
7649 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7650 }
7651#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
7652
7653#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
7654 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
7655#endif
7656 iemNativeRegFreeTmp(pReNative, idxRegRsp);
7657 if (idxRegEffSp != idxRegRsp)
7658 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
7659
7660 /* The value variable is implictly flushed. */
7661 if (idxRegValue != UINT8_MAX)
7662 iemNativeVarRegisterRelease(pReNative, idxVarValue);
7663 iemNativeVarFreeLocal(pReNative, idxVarValue);
7664
7665 return off;
7666}
7667
7668
7669
7670/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
7671#define IEM_MC_POP_GREG_U16(a_iGReg) \
7672 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
7673 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
7674#define IEM_MC_POP_GREG_U32(a_iGReg) \
7675 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
7676 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
7677#define IEM_MC_POP_GREG_U64(a_iGReg) \
7678 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
7679 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
7680
7681#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
7682 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
7683 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
7684#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
7685 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
7686 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
7687
7688#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
7689 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
7690 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
7691#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
7692 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
7693 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
7694
7695
7696DECL_FORCE_INLINE_THROW(uint32_t)
7697iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
7698 uint8_t idxRegTmp)
7699{
7700 /* Use16BitSp: */
7701#ifdef RT_ARCH_AMD64
7702 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
7703 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
7704 RT_NOREF(idxRegTmp);
7705#else
7706 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
7707 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
7708 /* add tmp, regrsp, #cbMem */
7709 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
7710 /* and tmp, tmp, #0xffff */
7711 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
7712 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
7713 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
7714 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
7715#endif
7716 return off;
7717}
7718
7719
7720DECL_FORCE_INLINE(uint32_t)
7721iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
7722{
7723 /* Use32BitSp: */
7724 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
7725 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
7726 return off;
7727}
7728
7729
7730/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
7731DECL_INLINE_THROW(uint32_t)
7732iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
7733 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
7734{
7735 /*
7736 * Assert sanity.
7737 */
7738 Assert(idxGReg < 16);
7739#ifdef VBOX_STRICT
7740 if (RT_BYTE2(cBitsVarAndFlat) != 0)
7741 {
7742 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7743 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7744 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7745 Assert( pfnFunction
7746 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
7747 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
7748 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
7749 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
7750 : UINT64_C(0xc000b000a0009000) ));
7751 }
7752 else
7753 Assert( pfnFunction
7754 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
7755 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
7756 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
7757 : UINT64_C(0xc000b000a0009000) ));
7758#endif
7759
7760#ifdef VBOX_STRICT
7761 /*
7762 * Check that the fExec flags we've got make sense.
7763 */
7764 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7765#endif
7766
7767 /*
7768 * To keep things simple we have to commit any pending writes first as we
7769 * may end up making calls.
7770 */
7771 off = iemNativeRegFlushPendingWrites(pReNative, off);
7772
7773 /*
7774 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
7775 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
7776 * directly as the effective stack pointer.
7777 * (Code structure is very similar to that of PUSH)
7778 */
7779 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
7780 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
7781 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
7782 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
7783 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
7784 /** @todo can do a better job picking the register here. For cbMem >= 4 this
7785 * will be the resulting register value. */
7786 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
7787
7788 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
7789 if (cBitsFlat != 0)
7790 {
7791 Assert(idxRegEffSp == idxRegRsp);
7792 Assert(cBitsFlat == 32 || cBitsFlat == 64);
7793 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
7794 }
7795 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
7796 {
7797 Assert(idxRegEffSp != idxRegRsp);
7798 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
7799 kIemNativeGstRegUse_ReadOnly);
7800#ifdef RT_ARCH_AMD64
7801 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7802#else
7803 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7804#endif
7805 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
7806 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
7807 offFixupJumpToUseOtherBitSp = off;
7808 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7809 {
7810/** @todo can skip idxRegRsp updating when popping ESP. */
7811 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
7812 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7813 }
7814 else
7815 {
7816 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
7817 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
7818 }
7819 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7820 }
7821 /* SpUpdateEnd: */
7822 uint32_t const offLabelSpUpdateEnd = off;
7823
7824 /*
7825 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
7826 * we're skipping lookup).
7827 */
7828 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
7829 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
7830 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7831 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
7832 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7833 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7834 : UINT32_MAX;
7835
7836 if (!TlbState.fSkip)
7837 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7838 else
7839 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
7840
7841 /*
7842 * Use16BitSp:
7843 */
7844 if (cBitsFlat == 0)
7845 {
7846#ifdef RT_ARCH_AMD64
7847 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7848#else
7849 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7850#endif
7851 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
7852 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7853 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
7854 else
7855 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7856 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
7857 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7858 }
7859
7860 /*
7861 * TlbMiss:
7862 *
7863 * Call helper to do the pushing.
7864 */
7865 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
7866
7867#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7868 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7869#else
7870 RT_NOREF(idxInstr);
7871#endif
7872
7873 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7874 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
7875 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
7876 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7877
7878
7879 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
7880 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
7881 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7882
7883 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7884 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7885
7886 /* Done setting up parameters, make the call. */
7887 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7888
7889 /* Move the return register content to idxRegMemResult. */
7890 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
7891 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
7892
7893 /* Restore variables and guest shadow registers to volatile registers. */
7894 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7895 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7896
7897#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7898 if (!TlbState.fSkip)
7899 {
7900 /* end of TlbMiss - Jump to the done label. */
7901 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7902 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7903
7904 /*
7905 * TlbLookup:
7906 */
7907 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
7908 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
7909
7910 /*
7911 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
7912 */
7913 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7914# ifdef VBOX_WITH_STATISTICS
7915 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7916 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
7917# endif
7918 switch (cbMem)
7919 {
7920 case 2:
7921 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
7922 break;
7923 case 4:
7924 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
7925 break;
7926 case 8:
7927 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
7928 break;
7929 default:
7930 AssertFailed();
7931 }
7932
7933 TlbState.freeRegsAndReleaseVars(pReNative);
7934
7935 /*
7936 * TlbDone:
7937 *
7938 * Set the new RSP value (FLAT accesses needs to calculate it first) and
7939 * commit the popped register value.
7940 */
7941 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7942 }
7943#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
7944
7945 if (idxGReg != X86_GREG_xSP)
7946 {
7947 /* Set the register. */
7948 if (cbMem >= sizeof(uint32_t))
7949 {
7950#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
7951 AssertMsg( pReNative->idxCurCall == 0
7952 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
7953 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
7954#endif
7955 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
7956#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
7957 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
7958#endif
7959#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
7960 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
7961 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
7962#endif
7963 }
7964 else
7965 {
7966 Assert(cbMem == sizeof(uint16_t));
7967 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
7968 kIemNativeGstRegUse_ForUpdate);
7969 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
7970#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
7971 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
7972#endif
7973 iemNativeRegFreeTmp(pReNative, idxRegDst);
7974 }
7975
7976 /* Complete RSP calculation for FLAT mode. */
7977 if (idxRegEffSp == idxRegRsp)
7978 {
7979 if (cBitsFlat == 64)
7980 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
7981 else
7982 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
7983 }
7984 }
7985 else
7986 {
7987 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
7988 if (cbMem == sizeof(uint64_t))
7989 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
7990 else if (cbMem == sizeof(uint32_t))
7991 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
7992 else
7993 {
7994 if (idxRegEffSp == idxRegRsp)
7995 {
7996 if (cBitsFlat == 64)
7997 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
7998 else
7999 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
8000 }
8001 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
8002 }
8003 }
8004
8005#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8006 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
8007#endif
8008
8009 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8010 if (idxRegEffSp != idxRegRsp)
8011 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8012 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8013
8014 return off;
8015}
8016
8017
8018
8019/*********************************************************************************************************************************
8020* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
8021*********************************************************************************************************************************/
8022
8023#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8024 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8025 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
8026 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
8027
8028#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8029 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8030 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
8031 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
8032
8033#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8034 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8035 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
8036 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
8037
8038#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8039 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8040 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
8041 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
8042
8043
8044#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8045 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8046 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
8047 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
8048
8049#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8050 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8051 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
8052 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
8053
8054#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8055 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8056 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
8057 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8058
8059#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8060 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8061 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
8062 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
8063
8064#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8065 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
8066 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
8067 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8068
8069
8070#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8071 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8072 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
8073 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
8074
8075#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8076 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8077 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
8078 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
8079
8080#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8081 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8082 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
8083 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8084
8085#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8086 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8087 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
8088 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
8089
8090#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8091 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
8092 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
8093 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8094
8095
8096#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8097 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8098 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8099 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
8100
8101#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8102 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8103 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8104 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
8105#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8106 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8107 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8108 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8109
8110#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8111 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8112 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8113 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
8114
8115#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8116 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
8117 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8118 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8119
8120
8121#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8122 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8123 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8124 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
8125
8126#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8127 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8128 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
8129 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
8130
8131
8132#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8133 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8134 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
8135 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
8136
8137#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8138 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8139 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
8140 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
8141
8142#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8143 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8144 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
8145 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
8146
8147#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8148 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8149 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
8150 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
8151
8152
8153
8154#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8155 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8156 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
8157 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
8158
8159#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8160 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8161 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
8162 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
8163
8164#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8165 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8166 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
8167 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
8168
8169#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8170 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8171 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
8172 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
8173
8174
8175#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8176 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8177 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
8178 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
8179
8180#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8181 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8182 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
8183 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
8184
8185#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8186 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8187 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
8188 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
8189
8190#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8191 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8192 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
8193 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
8194
8195#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
8196 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
8197 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
8198 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
8199
8200
8201#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8202 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8203 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
8204 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
8205
8206#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8207 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8208 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
8209 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
8210
8211#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8212 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8213 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
8214 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
8215
8216#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8217 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8218 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
8219 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
8220
8221#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
8222 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
8223 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
8224 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
8225
8226
8227#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8228 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8229 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8230 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
8231
8232#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8233 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8234 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8235 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
8236
8237#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8238 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8239 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8240 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
8241
8242#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8243 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8244 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8245 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
8246
8247#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
8248 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
8249 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8250 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
8251
8252
8253#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
8254 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
8255 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8256 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
8257
8258#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
8259 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
8260 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
8261 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
8262
8263
8264#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8265 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8266 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
8267 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
8268
8269#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8270 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8271 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
8272 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
8273
8274#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8275 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8276 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
8277 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
8278
8279#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8280 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8281 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
8282 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
8283
8284
8285DECL_INLINE_THROW(uint32_t)
8286iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
8287 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
8288 uintptr_t pfnFunction, uint8_t idxInstr)
8289{
8290 /*
8291 * Assert sanity.
8292 */
8293 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
8294 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
8295 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
8296 && pVarMem->cbVar == sizeof(void *),
8297 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8298
8299 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
8300 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
8301 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
8302 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
8303 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8304
8305 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
8306 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
8307 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
8308 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
8309 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8310
8311 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
8312
8313 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
8314
8315#ifdef VBOX_STRICT
8316# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
8317 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
8318 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
8319 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
8320 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
8321# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
8322 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
8323 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
8324 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
8325
8326 if (iSegReg == UINT8_MAX)
8327 {
8328 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8329 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8330 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8331 switch (cbMem)
8332 {
8333 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
8334 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
8335 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
8336 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
8337 case 10:
8338 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
8339 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
8340 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
8341 break;
8342 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
8343# if 0
8344 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
8345 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
8346# endif
8347 default: AssertFailed(); break;
8348 }
8349 }
8350 else
8351 {
8352 Assert(iSegReg < 6);
8353 switch (cbMem)
8354 {
8355 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
8356 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
8357 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
8358 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
8359 case 10:
8360 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
8361 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
8362 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
8363 break;
8364 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
8365# if 0
8366 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
8367 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
8368# endif
8369 default: AssertFailed(); break;
8370 }
8371 }
8372# undef IEM_MAP_HLP_FN
8373# undef IEM_MAP_HLP_FN_NO_AT
8374#endif
8375
8376#ifdef VBOX_STRICT
8377 /*
8378 * Check that the fExec flags we've got make sense.
8379 */
8380 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8381#endif
8382
8383 /*
8384 * To keep things simple we have to commit any pending writes first as we
8385 * may end up making calls.
8386 */
8387 off = iemNativeRegFlushPendingWrites(pReNative, off);
8388
8389#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8390 /*
8391 * Move/spill/flush stuff out of call-volatile registers.
8392 * This is the easy way out. We could contain this to the tlb-miss branch
8393 * by saving and restoring active stuff here.
8394 */
8395 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
8396 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
8397#endif
8398
8399 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
8400 while the tlb-miss codepath will temporarily put it on the stack.
8401 Set the the type to stack here so we don't need to do it twice below. */
8402 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
8403 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
8404 /** @todo use a tmp register from TlbState, since they'll be free after tlb
8405 * lookup is done. */
8406
8407 /*
8408 * Define labels and allocate the result register (trying for the return
8409 * register if we can).
8410 */
8411 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8412 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
8413 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
8414 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
8415 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
8416 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8417 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8418 : UINT32_MAX;
8419//off=iemNativeEmitBrk(pReNative, off, 0);
8420 /*
8421 * Jump to the TLB lookup code.
8422 */
8423 if (!TlbState.fSkip)
8424 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8425
8426 /*
8427 * TlbMiss:
8428 *
8429 * Call helper to do the fetching.
8430 * We flush all guest register shadow copies here.
8431 */
8432 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
8433
8434#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8435 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8436#else
8437 RT_NOREF(idxInstr);
8438#endif
8439
8440#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8441 /* Save variables in volatile registers. */
8442 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
8443 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8444#endif
8445
8446 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
8447 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
8448#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8449 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
8450#else
8451 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8452#endif
8453
8454 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
8455 if (iSegReg != UINT8_MAX)
8456 {
8457 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
8458 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
8459 }
8460
8461 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
8462 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
8463 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
8464
8465 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8466 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8467
8468 /* Done setting up parameters, make the call. */
8469 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8470
8471 /*
8472 * Put the output in the right registers.
8473 */
8474 Assert(idxRegMemResult == pVarMem->idxReg);
8475 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
8476 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
8477
8478#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8479 /* Restore variables and guest shadow registers to volatile registers. */
8480 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8481 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8482#endif
8483
8484 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
8485 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
8486
8487#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8488 if (!TlbState.fSkip)
8489 {
8490 /* end of tlbsmiss - Jump to the done label. */
8491 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8492 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8493
8494 /*
8495 * TlbLookup:
8496 */
8497 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
8498 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8499# ifdef VBOX_WITH_STATISTICS
8500 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
8501 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
8502# endif
8503
8504 /* [idxVarUnmapInfo] = 0; */
8505 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
8506
8507 /*
8508 * TlbDone:
8509 */
8510 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8511
8512 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
8513
8514# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8515 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
8516 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8517# endif
8518 }
8519#else
8520 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
8521#endif
8522
8523 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
8524 iemNativeVarRegisterRelease(pReNative, idxVarMem);
8525
8526 return off;
8527}
8528
8529
8530#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
8531 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
8532 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
8533
8534#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
8535 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
8536 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
8537
8538#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
8539 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
8540 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
8541
8542#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
8543 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
8544 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
8545
8546DECL_INLINE_THROW(uint32_t)
8547iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
8548 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
8549{
8550 /*
8551 * Assert sanity.
8552 */
8553 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
8554#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
8555 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
8556#endif
8557 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
8558 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
8559 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
8560#ifdef VBOX_STRICT
8561 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
8562 {
8563 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
8564 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
8565 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
8566 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
8567 case IEM_ACCESS_TYPE_WRITE:
8568 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
8569 case IEM_ACCESS_TYPE_READ:
8570 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
8571 default: AssertFailed();
8572 }
8573#else
8574 RT_NOREF(fAccess);
8575#endif
8576
8577 /*
8578 * To keep things simple we have to commit any pending writes first as we
8579 * may end up making calls (there shouldn't be any at this point, so this
8580 * is just for consistency).
8581 */
8582 /** @todo we could postpone this till we make the call and reload the
8583 * registers after returning from the call. Not sure if that's sensible or
8584 * not, though. */
8585 off = iemNativeRegFlushPendingWrites(pReNative, off);
8586
8587 /*
8588 * Move/spill/flush stuff out of call-volatile registers.
8589 *
8590 * We exclude any register holding the bUnmapInfo variable, as we'll be
8591 * checking it after returning from the call and will free it afterwards.
8592 */
8593 /** @todo save+restore active registers and maybe guest shadows in miss
8594 * scenario. */
8595 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
8596 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
8597
8598 /*
8599 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
8600 * to call the unmap helper function.
8601 *
8602 * The likelyhood of it being zero is higher than for the TLB hit when doing
8603 * the mapping, as a TLB miss for an well aligned and unproblematic memory
8604 * access should also end up with a mapping that won't need special unmapping.
8605 */
8606 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
8607 * should speed up things for the pure interpreter as well when TLBs
8608 * are enabled. */
8609#ifdef RT_ARCH_AMD64
8610 if (pVarUnmapInfo->idxReg == UINT8_MAX)
8611 {
8612 /* test byte [rbp - xxx], 0ffh */
8613 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
8614 pbCodeBuf[off++] = 0xf6;
8615 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
8616 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
8617 pbCodeBuf[off++] = 0xff;
8618 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8619 }
8620 else
8621#endif
8622 {
8623 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
8624 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
8625 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
8626 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
8627 }
8628 uint32_t const offJmpFixup = off;
8629 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
8630
8631 /*
8632 * Call the unmap helper function.
8633 */
8634#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
8635 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8636#else
8637 RT_NOREF(idxInstr);
8638#endif
8639
8640 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
8641 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
8642 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8643
8644 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8645 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8646
8647 /* Done setting up parameters, make the call. */
8648 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8649
8650 /* The bUnmapInfo variable is implictly free by these MCs. */
8651 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
8652
8653 /*
8654 * Done, just fixup the jump for the non-call case.
8655 */
8656 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
8657
8658 return off;
8659}
8660
8661
8662
8663/*********************************************************************************************************************************
8664* State and Exceptions *
8665*********************************************************************************************************************************/
8666
8667#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8668#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8669
8670#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8671#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8672#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8673
8674#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8675#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8676#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8677
8678
8679DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
8680{
8681 /** @todo this needs a lot more work later. */
8682 RT_NOREF(pReNative, fForChange);
8683 return off;
8684}
8685
8686
8687
8688/*********************************************************************************************************************************
8689* Emitters for FPU related operations. *
8690*********************************************************************************************************************************/
8691
8692#define IEM_MC_FETCH_FCW(a_u16Fcw) \
8693 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
8694
8695/** Emits code for IEM_MC_FETCH_FCW. */
8696DECL_INLINE_THROW(uint32_t)
8697iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
8698{
8699 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8700 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
8701
8702 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8703
8704 /* Allocate a temporary FCW register. */
8705 /** @todo eliminate extra register */
8706 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
8707 kIemNativeGstRegUse_ReadOnly);
8708
8709 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
8710
8711 /* Free but don't flush the FCW register. */
8712 iemNativeRegFreeTmp(pReNative, idxFcwReg);
8713 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8714
8715 return off;
8716}
8717
8718
8719#define IEM_MC_FETCH_FSW(a_u16Fsw) \
8720 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
8721
8722/** Emits code for IEM_MC_FETCH_FSW. */
8723DECL_INLINE_THROW(uint32_t)
8724iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
8725{
8726 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8727 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
8728
8729 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
8730 /* Allocate a temporary FSW register. */
8731 /** @todo eliminate extra register */
8732 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
8733 kIemNativeGstRegUse_ReadOnly);
8734
8735 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
8736
8737 /* Free but don't flush the FSW register. */
8738 iemNativeRegFreeTmp(pReNative, idxFswReg);
8739 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8740
8741 return off;
8742}
8743
8744
8745
8746#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8747
8748
8749/*********************************************************************************************************************************
8750* Emitters for SSE/AVX specific operations. *
8751*********************************************************************************************************************************/
8752
8753#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
8754 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
8755
8756/** Emits code for IEM_MC_COPY_XREG_U128. */
8757DECL_INLINE_THROW(uint32_t)
8758iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
8759{
8760 /* This is a nop if the source and destination register are the same. */
8761 if (iXRegDst != iXRegSrc)
8762 {
8763 /* Allocate destination and source register. */
8764 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
8765 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
8766 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
8767 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8768
8769 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
8770
8771 /* Free but don't flush the source and destination register. */
8772 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8773 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8774 }
8775
8776 return off;
8777}
8778
8779
8780#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
8781 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
8782
8783/** Emits code for IEM_MC_FETCH_XREG_U128. */
8784DECL_INLINE_THROW(uint32_t)
8785iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
8786{
8787 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8788 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
8789
8790 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8791 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8792
8793 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
8794
8795 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
8796
8797 /* Free but don't flush the source register. */
8798 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8799 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
8800
8801 return off;
8802}
8803
8804
8805#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
8806 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
8807
8808#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
8809 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
8810
8811/** Emits code for IEM_MC_FETCH_XREG_U64. */
8812DECL_INLINE_THROW(uint32_t)
8813iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
8814{
8815 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8816 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
8817
8818 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8819 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8820
8821 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8822 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8823
8824 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
8825
8826 /* Free but don't flush the source register. */
8827 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8828 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8829
8830 return off;
8831}
8832
8833
8834#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
8835 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
8836
8837#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
8838 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
8839
8840/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
8841DECL_INLINE_THROW(uint32_t)
8842iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
8843{
8844 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8845 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
8846
8847 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8848 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8849
8850 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8851 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8852
8853 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
8854
8855 /* Free but don't flush the source register. */
8856 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8857 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8858
8859 return off;
8860}
8861
8862
8863#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
8864 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
8865
8866/** Emits code for IEM_MC_FETCH_XREG_U16. */
8867DECL_INLINE_THROW(uint32_t)
8868iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
8869{
8870 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8871 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
8872
8873 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8874 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8875
8876 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8877 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8878
8879 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
8880
8881 /* Free but don't flush the source register. */
8882 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8883 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8884
8885 return off;
8886}
8887
8888
8889#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
8890 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
8891
8892/** Emits code for IEM_MC_FETCH_XREG_U8. */
8893DECL_INLINE_THROW(uint32_t)
8894iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
8895{
8896 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8897 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
8898
8899 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8900 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8901
8902 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8903 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8904
8905 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
8906
8907 /* Free but don't flush the source register. */
8908 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8909 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8910
8911 return off;
8912}
8913
8914
8915#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
8916 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
8917
8918AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
8919#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
8920 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
8921
8922
8923/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
8924DECL_INLINE_THROW(uint32_t)
8925iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8926{
8927 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8928 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8929
8930 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8931 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
8932 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
8933
8934 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
8935
8936 /* Free but don't flush the source register. */
8937 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8938 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8939
8940 return off;
8941}
8942
8943
8944#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
8945 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
8946
8947#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
8948 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
8949
8950#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
8951 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
8952
8953#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
8954 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
8955
8956#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
8957 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
8958
8959#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
8960 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
8961
8962/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
8963DECL_INLINE_THROW(uint32_t)
8964iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
8965 uint8_t cbLocal, uint8_t iElem)
8966{
8967 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8968 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
8969
8970#ifdef VBOX_STRICT
8971 switch (cbLocal)
8972 {
8973 case sizeof(uint64_t): Assert(iElem < 2); break;
8974 case sizeof(uint32_t): Assert(iElem < 4); break;
8975 case sizeof(uint16_t): Assert(iElem < 8); break;
8976 case sizeof(uint8_t): Assert(iElem < 16); break;
8977 default: AssertFailed();
8978 }
8979#endif
8980
8981 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8982 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
8983 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
8984
8985 switch (cbLocal)
8986 {
8987 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
8988 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
8989 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
8990 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
8991 default: AssertFailed();
8992 }
8993
8994 /* Free but don't flush the source register. */
8995 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8996 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8997
8998 return off;
8999}
9000
9001
9002#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
9003 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
9004
9005/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
9006DECL_INLINE_THROW(uint32_t)
9007iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
9008{
9009 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9010 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9011
9012 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9013 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
9014 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9015
9016 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
9017 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
9018 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
9019
9020 /* Free but don't flush the source register. */
9021 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9022 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9023
9024 return off;
9025}
9026
9027
9028#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
9029 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
9030
9031/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
9032DECL_INLINE_THROW(uint32_t)
9033iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
9034{
9035 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9036 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9037
9038 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9039 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
9040 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9041
9042 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
9043 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
9044 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
9045
9046 /* Free but don't flush the source register. */
9047 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9048 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9049
9050 return off;
9051}
9052
9053
9054#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
9055 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
9056
9057/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
9058DECL_INLINE_THROW(uint32_t)
9059iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
9060 uint8_t idxSrcVar, uint8_t iDwSrc)
9061{
9062 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9063 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9064
9065 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9066 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
9067 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9068
9069 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
9070 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
9071
9072 /* Free but don't flush the destination register. */
9073 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9074 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9075
9076 return off;
9077}
9078
9079
9080#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
9081 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
9082
9083/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
9084DECL_INLINE_THROW(uint32_t)
9085iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
9086{
9087 /*
9088 * The iYRegSrc == iYRegDst case needs to be treated differently here, because if iYRegDst gets allocated first for the full write
9089 * it won't load the actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get duplicated from the already
9090 * allocated host register for iYRegDst containing garbage. This will be catched by the guest register value checking in debug builds.
9091 */
9092 if (iYRegDst != iYRegSrc)
9093 {
9094 /* Allocate destination and source register. */
9095 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9096 kIemNativeGstSimdRegLdStSz_256,
9097 kIemNativeGstRegUse_ForFullWrite);
9098 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9099 kIemNativeGstSimdRegLdStSz_Low128,
9100 kIemNativeGstRegUse_ReadOnly);
9101
9102 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9103 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9104
9105 /* Free but don't flush the source and destination register. */
9106 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9107 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9108 }
9109 else
9110 {
9111 /* This effectively only clears the upper 128-bits of the register. */
9112 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9113 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
9114
9115 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
9116
9117 /* Free but don't flush the destination register. */
9118 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
9119 }
9120
9121 return off;
9122}
9123
9124
9125#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
9126 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
9127
9128/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
9129DECL_INLINE_THROW(uint32_t)
9130iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
9131{
9132 /*
9133 * The iYRegSrc == iYRegDst case needs to be treated differently here, because if iYRegDst gets allocated first for the full write
9134 * it won't load the actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get duplicated from the already
9135 * allocated host register for iYRegDst containing garbage. This will be catched by the guest register value checking in debug builds.
9136 * iYRegSrc == iYRegDst would effectively only clear any upper 256-bits for a zmm register we don't support yet, so this is just a nop.
9137 */
9138 if (iYRegDst != iYRegSrc)
9139 {
9140 /* Allocate destination and source register. */
9141 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9142 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
9143 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9144 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9145
9146 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9147
9148 /* Free but don't flush the source and destination register. */
9149 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9150 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9151 }
9152
9153 return off;
9154}
9155
9156
9157#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
9158 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
9159
9160/** Emits code for IEM_MC_FETCH_YREG_U128. */
9161DECL_INLINE_THROW(uint32_t)
9162iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
9163{
9164 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9165 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
9166
9167 Assert(iDQWord <= 1);
9168 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9169 iDQWord == 1
9170 ? kIemNativeGstSimdRegLdStSz_High128
9171 : kIemNativeGstSimdRegLdStSz_Low128,
9172 kIemNativeGstRegUse_ReadOnly);
9173
9174 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9175 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9176
9177 if (iDQWord == 1)
9178 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9179 else
9180 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9181
9182 /* Free but don't flush the source register. */
9183 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9184 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9185
9186 return off;
9187}
9188
9189
9190#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
9191 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
9192
9193/** Emits code for IEM_MC_FETCH_YREG_U64. */
9194DECL_INLINE_THROW(uint32_t)
9195iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
9196{
9197 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9198 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9199
9200 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9201 iQWord >= 2
9202 ? kIemNativeGstSimdRegLdStSz_High128
9203 : kIemNativeGstSimdRegLdStSz_Low128,
9204 kIemNativeGstRegUse_ReadOnly);
9205
9206 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9207 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9208
9209 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
9210
9211 /* Free but don't flush the source register. */
9212 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9213 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9214
9215 return off;
9216}
9217
9218
9219#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
9220 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
9221
9222/** Emits code for IEM_MC_FETCH_YREG_U32. */
9223DECL_INLINE_THROW(uint32_t)
9224iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
9225{
9226 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9227 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9228
9229 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9230 iDWord >= 4
9231 ? kIemNativeGstSimdRegLdStSz_High128
9232 : kIemNativeGstSimdRegLdStSz_Low128,
9233 kIemNativeGstRegUse_ReadOnly);
9234
9235 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9236 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9237
9238 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
9239
9240 /* Free but don't flush the source register. */
9241 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9242 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9243
9244 return off;
9245}
9246
9247
9248#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
9249 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
9250
9251/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
9252DECL_INLINE_THROW(uint32_t)
9253iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
9254{
9255 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9256 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
9257
9258 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
9259
9260 /* Free but don't flush the register. */
9261 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
9262
9263 return off;
9264}
9265
9266
9267#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
9268 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
9269
9270/** Emits code for IEM_MC_STORE_YREG_U128. */
9271DECL_INLINE_THROW(uint32_t)
9272iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
9273{
9274 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9275 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9276
9277 Assert(iDQword <= 1);
9278 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9279 iDQword == 0
9280 ? kIemNativeGstSimdRegLdStSz_Low128
9281 : kIemNativeGstSimdRegLdStSz_High128,
9282 kIemNativeGstRegUse_ForFullWrite);
9283
9284 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9285
9286 if (iDQword == 0)
9287 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9288 else
9289 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
9290
9291 /* Free but don't flush the source register. */
9292 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9293 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9294
9295 return off;
9296}
9297
9298
9299#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
9300 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
9301
9302/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
9303DECL_INLINE_THROW(uint32_t)
9304iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9305{
9306 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9307 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9308
9309 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9310 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9311
9312 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9313
9314 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9315 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9316
9317 /* Free but don't flush the source register. */
9318 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9319 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9320
9321 return off;
9322}
9323
9324
9325#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
9326 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
9327
9328/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
9329DECL_INLINE_THROW(uint32_t)
9330iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9331{
9332 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9333 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
9334
9335 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9336 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9337
9338 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9339
9340 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9341 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9342
9343 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9344 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9345
9346 return off;
9347}
9348
9349
9350#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
9351 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
9352
9353/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
9354DECL_INLINE_THROW(uint32_t)
9355iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9356{
9357 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9358 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
9359
9360 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9361 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9362
9363 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9364
9365 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9366 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9367
9368 /* Free but don't flush the source register. */
9369 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9370 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9371
9372 return off;
9373}
9374
9375
9376#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
9377 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
9378
9379/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
9380DECL_INLINE_THROW(uint32_t)
9381iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9382{
9383 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9384 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9385
9386 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9387 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9388
9389 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9390
9391 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9392 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9393
9394 /* Free but don't flush the source register. */
9395 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9396 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9397
9398 return off;
9399}
9400
9401
9402#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
9403 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
9404
9405/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
9406DECL_INLINE_THROW(uint32_t)
9407iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9408{
9409 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9410 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9411
9412 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9413 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9414
9415 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9416
9417 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9418 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9419
9420 /* Free but don't flush the source register. */
9421 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9422 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9423
9424 return off;
9425}
9426
9427
9428#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
9429 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
9430
9431/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
9432DECL_INLINE_THROW(uint32_t)
9433iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9434{
9435 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9436 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
9437
9438 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9439 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9440
9441 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9442
9443 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9444
9445 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9446 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9447
9448 return off;
9449}
9450
9451
9452#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
9453 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
9454
9455/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
9456DECL_INLINE_THROW(uint32_t)
9457iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9458{
9459 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9460 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
9461
9462 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9463 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9464
9465 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9466
9467 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9468
9469 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9470 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9471
9472 return off;
9473}
9474
9475
9476#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
9477 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
9478
9479/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
9480DECL_INLINE_THROW(uint32_t)
9481iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9482{
9483 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9484 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9485
9486 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9487 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9488
9489 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9490
9491 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9492
9493 /* Free but don't flush the source register. */
9494 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9495 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9496
9497 return off;
9498}
9499
9500
9501#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
9502 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
9503
9504/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
9505DECL_INLINE_THROW(uint32_t)
9506iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9507{
9508 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9509 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9510
9511 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9512 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9513
9514 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9515
9516 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9517
9518 /* Free but don't flush the source register. */
9519 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9520 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9521
9522 return off;
9523}
9524
9525
9526#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
9527 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
9528
9529/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
9530DECL_INLINE_THROW(uint32_t)
9531iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9532{
9533 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9534 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9535
9536 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9537 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9538
9539 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9540
9541 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
9542
9543 /* Free but don't flush the source register. */
9544 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9545 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9546
9547 return off;
9548}
9549
9550
9551#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
9552 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
9553
9554/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
9555DECL_INLINE_THROW(uint32_t)
9556iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9557{
9558 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9559 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9560
9561 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9562 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9563
9564 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9565
9566 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
9567 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
9568
9569 /* Free but don't flush the source register. */
9570 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9571 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9572
9573 return off;
9574}
9575
9576
9577#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
9578 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
9579
9580/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
9581DECL_INLINE_THROW(uint32_t)
9582iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9583{
9584 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9585 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9586
9587 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9588 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9589
9590 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9591
9592 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
9593 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
9594
9595 /* Free but don't flush the source register. */
9596 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9597 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9598
9599 return off;
9600}
9601
9602
9603#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
9604 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
9605
9606/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
9607DECL_INLINE_THROW(uint32_t)
9608iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
9609{
9610 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9611 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9612
9613 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9614 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9615 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
9616 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
9617 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9618
9619 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
9620 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
9621 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9622
9623 /* Free but don't flush the source and destination registers. */
9624 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
9625 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9626 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9627
9628 return off;
9629}
9630
9631
9632#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
9633 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
9634
9635/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
9636DECL_INLINE_THROW(uint32_t)
9637iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
9638{
9639 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9640 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9641
9642 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9643 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9644 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
9645 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
9646 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9647
9648 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
9649 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
9650 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9651
9652 /* Free but don't flush the source and destination registers. */
9653 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
9654 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9655 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9656
9657 return off;
9658}
9659
9660
9661#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
9662 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
9663
9664
9665/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
9666DECL_INLINE_THROW(uint32_t)
9667iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
9668{
9669 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9670 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
9671
9672 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
9673 if (bImm8Mask & RT_BIT(0))
9674 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
9675 if (bImm8Mask & RT_BIT(1))
9676 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
9677 if (bImm8Mask & RT_BIT(2))
9678 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
9679 if (bImm8Mask & RT_BIT(3))
9680 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
9681
9682 /* Free but don't flush the destination register. */
9683 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9684
9685 return off;
9686}
9687
9688
9689#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
9690 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
9691
9692
9693/** Emits code for IEM_MC_FETCH_YREG_U256. */
9694DECL_INLINE_THROW(uint32_t)
9695iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
9696{
9697 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9698 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
9699
9700 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9701 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
9702 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9703
9704 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
9705
9706 /* Free but don't flush the source register. */
9707 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9708 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9709
9710 return off;
9711}
9712
9713
9714#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
9715 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
9716
9717
9718/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX. */
9719DECL_INLINE_THROW(uint32_t)
9720iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
9721{
9722 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9723 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
9724
9725 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9726 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9727 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
9728
9729 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
9730
9731 /* Free but don't flush the source register. */
9732 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9733 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9734
9735 return off;
9736}
9737
9738
9739#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
9740 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
9741
9742
9743/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
9744DECL_INLINE_THROW(uint32_t)
9745iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
9746 uint8_t idxSrcVar, uint8_t iDwSrc)
9747{
9748 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9749 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
9750
9751 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9752 iDwDst < 4
9753 ? kIemNativeGstSimdRegLdStSz_Low128
9754 : kIemNativeGstSimdRegLdStSz_High128,
9755 kIemNativeGstRegUse_ForUpdate);
9756 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
9757 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
9758
9759 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
9760 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
9761
9762 /* Free but don't flush the source register. */
9763 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9764 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9765 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9766
9767 return off;
9768}
9769
9770
9771#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
9772 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
9773
9774
9775/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
9776DECL_INLINE_THROW(uint32_t)
9777iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
9778 uint8_t idxSrcVar, uint8_t iQwSrc)
9779{
9780 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9781 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
9782
9783 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9784 iQwDst < 2
9785 ? kIemNativeGstSimdRegLdStSz_Low128
9786 : kIemNativeGstSimdRegLdStSz_High128,
9787 kIemNativeGstRegUse_ForUpdate);
9788 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
9789 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
9790
9791 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
9792 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
9793
9794 /* Free but don't flush the source register. */
9795 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9796 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9797 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9798
9799 return off;
9800}
9801
9802
9803#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
9804 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
9805
9806
9807/** Emits code for IEM_MC_STORE_YREG_U64. */
9808DECL_INLINE_THROW(uint32_t)
9809iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
9810{
9811 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9812 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9813
9814 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9815 iQwDst < 2
9816 ? kIemNativeGstSimdRegLdStSz_Low128
9817 : kIemNativeGstSimdRegLdStSz_High128,
9818 kIemNativeGstRegUse_ForUpdate);
9819
9820 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9821
9822 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
9823
9824 /* Free but don't flush the source register. */
9825 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9826 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9827
9828 return off;
9829}
9830
9831
9832#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
9833 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
9834
9835/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
9836DECL_INLINE_THROW(uint32_t)
9837iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
9838{
9839 RT_NOREF(pReNative, iYReg);
9840 /** @todo Needs to be implemented when support for AVX-512 is added. */
9841 return off;
9842}
9843
9844
9845
9846/*********************************************************************************************************************************
9847* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
9848*********************************************************************************************************************************/
9849
9850/**
9851 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
9852 */
9853DECL_INLINE_THROW(uint32_t)
9854iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs)
9855{
9856 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
9857 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
9858 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
9859 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
9860
9861 /*
9862 * Need to do the FPU preparation.
9863 */
9864 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
9865
9866 /*
9867 * Do all the call setup and cleanup.
9868 */
9869 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS, false /*fFlushPendingWrites*/);
9870
9871 /*
9872 * Load the MXCSR register into the first argument and mask out the current exception flags.
9873 */
9874 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
9875 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
9876
9877 /*
9878 * Make the call.
9879 */
9880 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
9881
9882 /*
9883 * The updated MXCSR is in the return register.
9884 */
9885 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegMxCsr, IEMNATIVE_CALL_RET_GREG);
9886
9887#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
9888 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
9889 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
9890#endif
9891 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9892
9893 return off;
9894}
9895
9896
9897#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
9898 off = iemNativeEmitCallSseAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
9899
9900/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
9901DECL_INLINE_THROW(uint32_t)
9902iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
9903{
9904 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
9905 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
9906 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2);
9907}
9908
9909
9910#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
9911 off = iemNativeEmitCallSseAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
9912
9913/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
9914DECL_INLINE_THROW(uint32_t)
9915iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
9916{
9917 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
9918 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
9919 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
9920 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3);
9921}
9922
9923
9924/*********************************************************************************************************************************
9925* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
9926*********************************************************************************************************************************/
9927
9928#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
9929 off = iemNativeEmitCallAvxAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
9930
9931/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
9932DECL_INLINE_THROW(uint32_t)
9933iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
9934{
9935 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
9936 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
9937 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2);
9938}
9939
9940
9941#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
9942 off = iemNativeEmitCallAvxAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
9943
9944/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
9945DECL_INLINE_THROW(uint32_t)
9946iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
9947{
9948 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
9949 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
9950 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
9951 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3);
9952}
9953#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
9954
9955
9956/*********************************************************************************************************************************
9957* Include instruction emitters. *
9958*********************************************************************************************************************************/
9959#include "target-x86/IEMAllN8veEmit-x86.h"
9960
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette