VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 104956

Last change on this file since 104956 was 104956, checked in by vboxsync, 5 months ago

VMM/IEM: TLB statistics reorg. bugref:10687

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 430.5 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 104956 2024-06-18 11:44:59Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include <VBox/vmm/tm.h>
55#include "IEMInternal.h"
56#include <VBox/vmm/vmcc.h>
57#include <VBox/log.h>
58#include <VBox/err.h>
59#include <VBox/dis.h>
60#include <VBox/param.h>
61#include <iprt/assert.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
71# include "/opt/local/include/capstone/capstone.h"
72#endif
73
74#include "IEMInline.h"
75#include "IEMThreadedFunctions.h"
76#include "IEMN8veRecompiler.h"
77#include "IEMN8veRecompilerEmit.h"
78#include "IEMN8veRecompilerTlbLookup.h"
79#include "IEMNativeFunctions.h"
80
81
82/*
83 * Narrow down configs here to avoid wasting time on unused configs here.
84 * Note! Same checks in IEMAllThrdRecompiler.cpp.
85 */
86
87#ifndef IEM_WITH_CODE_TLB
88# error The code TLB must be enabled for the recompiler.
89#endif
90
91#ifndef IEM_WITH_DATA_TLB
92# error The data TLB must be enabled for the recompiler.
93#endif
94
95#ifndef IEM_WITH_SETJMP
96# error The setjmp approach must be enabled for the recompiler.
97#endif
98
99/** @todo eliminate this clang build hack. */
100#if RT_CLANG_PREREQ(4, 0)
101# pragma GCC diagnostic ignored "-Wunused-function"
102#endif
103
104
105/*********************************************************************************************************************************
106* Internal Functions *
107*********************************************************************************************************************************/
108#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
109static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
110#endif
111DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
112DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
113 IEMNATIVEGSTREG enmGstReg, uint32_t off);
114DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
115
116
117
118/*********************************************************************************************************************************
119* Native Recompilation *
120*********************************************************************************************************************************/
121
122
123/**
124 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
125 */
126IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
127{
128 pVCpu->iem.s.cInstructions += idxInstr;
129 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
130}
131
132
133/**
134 * Helping iemNativeHlpReturnBreakViaLookup and iemNativeHlpReturnBreakViaLookupWithTlb.
135 */
136DECL_FORCE_INLINE(bool) iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(PVMCPU pVCpu)
137{
138 uint64_t fCpu = pVCpu->fLocalForcedActions;
139 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
140 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
141 | VMCPU_FF_TLB_FLUSH
142 | VMCPU_FF_UNHALT );
143 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
144 if (RT_LIKELY( ( !fCpu
145 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
146 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
147 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
148 && !VM_FF_IS_ANY_SET(pVCpu->CTX_SUFF(pVM), VM_FF_ALL_MASK) ))
149 return false;
150 return true;
151}
152
153
154/**
155 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
156 */
157template <bool const a_fWithIrqCheck>
158IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookup,(PVMCPUCC pVCpu, uint8_t idxTbLookup,
159 uint32_t fFlags, RTGCPHYS GCPhysPc))
160{
161 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
162 Assert(idxTbLookup < pTb->cTbLookupEntries);
163 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
164#if 1
165 PIEMTB const pNewTb = *ppNewTb;
166 if (pNewTb)
167 {
168# ifdef VBOX_STRICT
169 uint64_t const uFlatPcAssert = pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base;
170 AssertMsg( (uFlatPcAssert & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.uInstrBufPc
171 && (GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.GCPhysInstrBuf
172 && (GCPhysPc & GUEST_PAGE_OFFSET_MASK) == (uFlatPcAssert & GUEST_PAGE_OFFSET_MASK),
173 ("GCPhysPc=%RGp uFlatPcAssert=%#RX64 uInstrBufPc=%#RX64 GCPhysInstrBuf=%RGp\n",
174 GCPhysPc, uFlatPcAssert, pVCpu->iem.s.uInstrBufPc, pVCpu->iem.s.GCPhysInstrBuf));
175# endif
176 if (pNewTb->GCPhysPc == GCPhysPc)
177 {
178# ifdef VBOX_STRICT
179 uint32_t fAssertFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
180 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
181 fAssertFlags |= IEMTB_F_INHIBIT_SHADOW;
182 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
183 fAssertFlags |= IEMTB_F_INHIBIT_NMI;
184 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
185 {
186 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
187 if (offFromLim < X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
188 fAssertFlags |= IEMTB_F_CS_LIM_CHECKS;
189 }
190 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
191 AssertMsg(fFlags == fAssertFlags, ("fFlags=%#RX32 fAssertFlags=%#RX32 cs:rip=%04x:%#010RX64\n",
192 fFlags, fAssertFlags, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
193#endif
194
195 /*
196 * Check them + type.
197 */
198 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
199 {
200 /*
201 * Check for interrupts and stuff.
202 */
203 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithTlb.
204 * The main problem are the statistics and to some degree the logging. :/ */
205 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
206 {
207 /* Do polling. */
208 uint64_t const cTbExecNative = pVCpu->iem.s.cTbExecNative;
209 if ( RT_LIKELY(cTbExecNative & 511)
210 || !TMTimerPollBoolWith32BitMilliTS(pVCpu->CTX_SUFF(pVM), pVCpu, &pVCpu->iem.s.msRecompilerPollNow) )
211 {
212 /*
213 * Success. Update statistics and switch to the next TB.
214 */
215 pVCpu->iem.s.cTbExecNative = cTbExecNative + 1;
216 if (a_fWithIrqCheck)
217 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1Irq);
218 else
219 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoIrq);
220
221 pNewTb->cUsed += 1;
222 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
223 pVCpu->iem.s.pCurTbR3 = pNewTb;
224 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
225 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
226 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
227 return (uintptr_t)pNewTb->Native.paInstructions;
228 }
229 }
230 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: IRQ or FF pending\n"));
231 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1PendingIrq);
232 }
233 else
234 {
235 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
236 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
237 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchFlags);
238 }
239 }
240 else
241 {
242 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
243 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
244 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchGCPhysPc);
245 }
246 }
247 else
248 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoTb);
249#else
250 NOREF(GCPhysPc);
251#endif
252
253 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
254 return 0;
255}
256
257
258/**
259 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
260 */
261template <bool const a_fWithIrqCheck>
262IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookupWithTlb,(PVMCPUCC pVCpu, uint8_t idxTbLookup))
263{
264 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
265 Assert(idxTbLookup < pTb->cTbLookupEntries);
266 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
267#if 1
268 PIEMTB const pNewTb = *ppNewTb;
269 if (pNewTb)
270 {
271 /*
272 * Calculate the flags for the next TB and check if they match.
273 */
274 uint32_t fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
275 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (CPUMCTX_INHIBIT_SHADOW | CPUMCTX_INHIBIT_NMI)))
276 { /* likely */ }
277 else
278 {
279 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
280 fFlags |= IEMTB_F_INHIBIT_SHADOW;
281 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
282 fFlags |= IEMTB_F_INHIBIT_NMI;
283 }
284 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
285 {
286 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
287 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
288 { /* likely */ }
289 else
290 fFlags |= IEMTB_F_CS_LIM_CHECKS;
291 }
292 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
293
294 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
295 {
296 /*
297 * Do the TLB lookup for flat RIP and compare the result with the next TB.
298 *
299 * Note! This replicates iemGetPcWithPhysAndCode and iemGetPcWithPhysAndCodeMissed.
300 */
301 /* Calc the effective PC. */
302 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
303 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
304 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
305
306 /* Advance within the current buffer (PAGE) when possible. */
307 RTGCPHYS GCPhysPc;
308 uint64_t off;
309 if ( pVCpu->iem.s.pbInstrBuf
310 && (off = uPc - pVCpu->iem.s.uInstrBufPc) < pVCpu->iem.s.cbInstrBufTotal) /*ugly*/
311 {
312 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
313 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
314 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
315 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
316 else
317 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
318 GCPhysPc = pVCpu->iem.s.GCPhysInstrBuf + off;
319 }
320 else
321 {
322 pVCpu->iem.s.pbInstrBuf = NULL;
323 pVCpu->iem.s.offCurInstrStart = 0;
324 pVCpu->iem.s.offInstrNextByte = 0;
325 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
326 GCPhysPc = pVCpu->iem.s.pbInstrBuf ? pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart : NIL_RTGCPHYS;
327 }
328
329 if (pNewTb->GCPhysPc == GCPhysPc)
330 {
331 /*
332 * Check for interrupts and stuff.
333 */
334 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithPc.
335 * The main problem are the statistics and to some degree the logging. :/ */
336 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
337 {
338 /* Do polling. */
339 uint64_t const cTbExecNative = pVCpu->iem.s.cTbExecNative;
340 if ( RT_LIKELY(cTbExecNative & 511)
341 || !TMTimerPollBoolWith32BitMilliTS(pVCpu->CTX_SUFF(pVM), pVCpu, &pVCpu->iem.s.msRecompilerPollNow) )
342 {
343 /*
344 * Success. Update statistics and switch to the next TB.
345 */
346 pVCpu->iem.s.cTbExecNative = cTbExecNative + 1;
347 if (a_fWithIrqCheck)
348 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2Irq);
349 else
350 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoIrq);
351
352 pNewTb->cUsed += 1;
353 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
354 pVCpu->iem.s.pCurTbR3 = pNewTb;
355 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
356 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
357 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
358 return (uintptr_t)pNewTb->Native.paInstructions;
359 }
360 }
361 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: IRQ or FF pending\n"));
362 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2PendingIrq);
363 }
364 else
365 {
366 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
367 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
368 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchGCPhysPc);
369 }
370 }
371 else
372 {
373 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
374 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
375 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchFlags);
376 }
377 }
378 else
379 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb);
380#else
381 NOREF(fFlags);
382 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb); /* just for some stats, even if misleading */
383#endif
384
385 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
386 return 0;
387}
388
389
390/**
391 * Used by TB code when it wants to raise a \#DE.
392 */
393IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
394{
395 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseDe);
396 iemRaiseDivideErrorJmp(pVCpu);
397#ifndef _MSC_VER
398 return VINF_IEM_RAISED_XCPT; /* not reached */
399#endif
400}
401
402
403/**
404 * Used by TB code when it wants to raise a \#UD.
405 */
406IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
407{
408 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseUd);
409 iemRaiseUndefinedOpcodeJmp(pVCpu);
410#ifndef _MSC_VER
411 return VINF_IEM_RAISED_XCPT; /* not reached */
412#endif
413}
414
415
416/**
417 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
418 *
419 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
420 */
421IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
422{
423 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseRelated);
424 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
425 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
426 iemRaiseUndefinedOpcodeJmp(pVCpu);
427 else
428 iemRaiseDeviceNotAvailableJmp(pVCpu);
429#ifndef _MSC_VER
430 return VINF_IEM_RAISED_XCPT; /* not reached */
431#endif
432}
433
434
435/**
436 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
437 *
438 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
439 */
440IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
441{
442 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseAvxRelated);
443 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
444 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
445 iemRaiseUndefinedOpcodeJmp(pVCpu);
446 else
447 iemRaiseDeviceNotAvailableJmp(pVCpu);
448#ifndef _MSC_VER
449 return VINF_IEM_RAISED_XCPT; /* not reached */
450#endif
451}
452
453
454/**
455 * Used by TB code when it wants to raise an SSE/AVX floating point exception related \#UD or \#XF.
456 *
457 * See IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT.
458 */
459IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseAvxFpRelated,(PVMCPUCC pVCpu))
460{
461 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseAvxFpRelated);
462 if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)
463 iemRaiseSimdFpExceptionJmp(pVCpu);
464 else
465 iemRaiseUndefinedOpcodeJmp(pVCpu);
466#ifndef _MSC_VER
467 return VINF_IEM_RAISED_XCPT; /* not reached */
468#endif
469}
470
471
472/**
473 * Used by TB code when it wants to raise a \#NM.
474 */
475IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
476{
477 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseNm);
478 iemRaiseDeviceNotAvailableJmp(pVCpu);
479#ifndef _MSC_VER
480 return VINF_IEM_RAISED_XCPT; /* not reached */
481#endif
482}
483
484
485/**
486 * Used by TB code when it wants to raise a \#GP(0).
487 */
488IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
489{
490 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseGp0);
491 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
492#ifndef _MSC_VER
493 return VINF_IEM_RAISED_XCPT; /* not reached */
494#endif
495}
496
497
498/**
499 * Used by TB code when it wants to raise a \#MF.
500 */
501IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
502{
503 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseMf);
504 iemRaiseMathFaultJmp(pVCpu);
505#ifndef _MSC_VER
506 return VINF_IEM_RAISED_XCPT; /* not reached */
507#endif
508}
509
510
511/**
512 * Used by TB code when it wants to raise a \#XF.
513 */
514IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
515{
516 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseXf);
517 iemRaiseSimdFpExceptionJmp(pVCpu);
518#ifndef _MSC_VER
519 return VINF_IEM_RAISED_XCPT; /* not reached */
520#endif
521}
522
523
524/**
525 * Used by TB code when detecting opcode changes.
526 * @see iemThreadeFuncWorkerObsoleteTb
527 */
528IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
529{
530 /* We set fSafeToFree to false where as we're being called in the context
531 of a TB callback function, which for native TBs means we cannot release
532 the executable memory till we've returned our way back to iemTbExec as
533 that return path codes via the native code generated for the TB. */
534 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
535 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitObsoleteTb);
536 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
537 return VINF_IEM_REEXEC_BREAK;
538}
539
540
541/**
542 * Used by TB code when we need to switch to a TB with CS.LIM checking.
543 */
544IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
545{
546 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
547 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
548 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
549 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
550 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
551 return VINF_IEM_REEXEC_BREAK;
552}
553
554
555/**
556 * Used by TB code when we missed a PC check after a branch.
557 */
558IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
559{
560 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
561 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
562 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
563 pVCpu->iem.s.pbInstrBuf));
564 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
565 return VINF_IEM_REEXEC_BREAK;
566}
567
568
569
570/*********************************************************************************************************************************
571* Helpers: Segmented memory fetches and stores. *
572*********************************************************************************************************************************/
573
574/**
575 * Used by TB code to load unsigned 8-bit data w/ segmentation.
576 */
577IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
578{
579#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
580 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
581#else
582 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
583#endif
584}
585
586
587/**
588 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
589 * to 16 bits.
590 */
591IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
592{
593#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
594 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
595#else
596 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
597#endif
598}
599
600
601/**
602 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
603 * to 32 bits.
604 */
605IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
606{
607#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
608 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
609#else
610 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
611#endif
612}
613
614/**
615 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
616 * to 64 bits.
617 */
618IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
619{
620#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
621 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
622#else
623 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
624#endif
625}
626
627
628/**
629 * Used by TB code to load unsigned 16-bit data w/ segmentation.
630 */
631IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
632{
633#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
634 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
635#else
636 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
637#endif
638}
639
640
641/**
642 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
643 * to 32 bits.
644 */
645IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
646{
647#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
648 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
649#else
650 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
651#endif
652}
653
654
655/**
656 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
657 * to 64 bits.
658 */
659IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
660{
661#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
662 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
663#else
664 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
665#endif
666}
667
668
669/**
670 * Used by TB code to load unsigned 32-bit data w/ segmentation.
671 */
672IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
673{
674#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
675 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
676#else
677 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
678#endif
679}
680
681
682/**
683 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
684 * to 64 bits.
685 */
686IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
687{
688#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
689 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
690#else
691 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
692#endif
693}
694
695
696/**
697 * Used by TB code to load unsigned 64-bit data w/ segmentation.
698 */
699IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
700{
701#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
702 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
703#else
704 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
705#endif
706}
707
708
709#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
710/**
711 * Used by TB code to load 128-bit data w/ segmentation.
712 */
713IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
714{
715#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
716 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
717#else
718 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
719#endif
720}
721
722
723/**
724 * Used by TB code to load 128-bit data w/ segmentation.
725 */
726IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
727{
728#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
729 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
730#else
731 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
732#endif
733}
734
735
736/**
737 * Used by TB code to load 128-bit data w/ segmentation.
738 */
739IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
740{
741#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
742 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
743#else
744 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
745#endif
746}
747
748
749/**
750 * Used by TB code to load 256-bit data w/ segmentation.
751 */
752IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
753{
754#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
755 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
756#else
757 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
758#endif
759}
760
761
762/**
763 * Used by TB code to load 256-bit data w/ segmentation.
764 */
765IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
766{
767#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
768 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
769#else
770 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
771#endif
772}
773#endif
774
775
776/**
777 * Used by TB code to store unsigned 8-bit data w/ segmentation.
778 */
779IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
780{
781#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
782 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
783#else
784 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
785#endif
786}
787
788
789/**
790 * Used by TB code to store unsigned 16-bit data w/ segmentation.
791 */
792IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
793{
794#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
795 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
796#else
797 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
798#endif
799}
800
801
802/**
803 * Used by TB code to store unsigned 32-bit data w/ segmentation.
804 */
805IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
806{
807#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
808 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
809#else
810 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
811#endif
812}
813
814
815/**
816 * Used by TB code to store unsigned 64-bit data w/ segmentation.
817 */
818IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
819{
820#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
821 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
822#else
823 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
824#endif
825}
826
827
828#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
829/**
830 * Used by TB code to store unsigned 128-bit data w/ segmentation.
831 */
832IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
833{
834#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
835 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
836#else
837 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
838#endif
839}
840
841
842/**
843 * Used by TB code to store unsigned 128-bit data w/ segmentation.
844 */
845IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
846{
847#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
848 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
849#else
850 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
851#endif
852}
853
854
855/**
856 * Used by TB code to store unsigned 256-bit data w/ segmentation.
857 */
858IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
859{
860#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
861 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
862#else
863 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
864#endif
865}
866
867
868/**
869 * Used by TB code to store unsigned 256-bit data w/ segmentation.
870 */
871IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
872{
873#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
874 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
875#else
876 iemMemStoreDataU256AlignedAvxJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
877#endif
878}
879#endif
880
881
882
883/**
884 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
885 */
886IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
887{
888#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
889 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
890#else
891 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
892#endif
893}
894
895
896/**
897 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
898 */
899IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
900{
901#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
902 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
903#else
904 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
905#endif
906}
907
908
909/**
910 * Used by TB code to store an 32-bit selector value onto a generic stack.
911 *
912 * Intel CPUs doesn't do write a whole dword, thus the special function.
913 */
914IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
915{
916#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
917 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
918#else
919 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
920#endif
921}
922
923
924/**
925 * Used by TB code to push unsigned 64-bit value onto a generic stack.
926 */
927IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
928{
929#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
930 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
931#else
932 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
933#endif
934}
935
936
937/**
938 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
939 */
940IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
941{
942#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
943 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
944#else
945 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
946#endif
947}
948
949
950/**
951 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
952 */
953IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
954{
955#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
956 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
957#else
958 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
959#endif
960}
961
962
963/**
964 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
965 */
966IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
967{
968#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
969 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
970#else
971 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
972#endif
973}
974
975
976
977/*********************************************************************************************************************************
978* Helpers: Flat memory fetches and stores. *
979*********************************************************************************************************************************/
980
981/**
982 * Used by TB code to load unsigned 8-bit data w/ flat address.
983 * @note Zero extending the value to 64-bit to simplify assembly.
984 */
985IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
986{
987#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
988 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
989#else
990 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
991#endif
992}
993
994
995/**
996 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
997 * to 16 bits.
998 * @note Zero extending the value to 64-bit to simplify assembly.
999 */
1000IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1001{
1002#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1003 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1004#else
1005 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1006#endif
1007}
1008
1009
1010/**
1011 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1012 * to 32 bits.
1013 * @note Zero extending the value to 64-bit to simplify assembly.
1014 */
1015IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1016{
1017#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1018 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1019#else
1020 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1021#endif
1022}
1023
1024
1025/**
1026 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1027 * to 64 bits.
1028 */
1029IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1030{
1031#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1032 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1033#else
1034 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1035#endif
1036}
1037
1038
1039/**
1040 * Used by TB code to load unsigned 16-bit data w/ flat address.
1041 * @note Zero extending the value to 64-bit to simplify assembly.
1042 */
1043IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1044{
1045#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1046 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1047#else
1048 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1049#endif
1050}
1051
1052
1053/**
1054 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1055 * to 32 bits.
1056 * @note Zero extending the value to 64-bit to simplify assembly.
1057 */
1058IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1059{
1060#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1061 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1062#else
1063 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1064#endif
1065}
1066
1067
1068/**
1069 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1070 * to 64 bits.
1071 * @note Zero extending the value to 64-bit to simplify assembly.
1072 */
1073IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1074{
1075#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1076 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1077#else
1078 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1079#endif
1080}
1081
1082
1083/**
1084 * Used by TB code to load unsigned 32-bit data w/ flat address.
1085 * @note Zero extending the value to 64-bit to simplify assembly.
1086 */
1087IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1088{
1089#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1090 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1091#else
1092 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1093#endif
1094}
1095
1096
1097/**
1098 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
1099 * to 64 bits.
1100 * @note Zero extending the value to 64-bit to simplify assembly.
1101 */
1102IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1103{
1104#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1105 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1106#else
1107 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1108#endif
1109}
1110
1111
1112/**
1113 * Used by TB code to load unsigned 64-bit data w/ flat address.
1114 */
1115IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1116{
1117#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1118 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1119#else
1120 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
1121#endif
1122}
1123
1124
1125#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1126/**
1127 * Used by TB code to load unsigned 128-bit data w/ flat address.
1128 */
1129IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1130{
1131#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1132 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1133#else
1134 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, GCPtrMem);
1135#endif
1136}
1137
1138
1139/**
1140 * Used by TB code to load unsigned 128-bit data w/ flat address.
1141 */
1142IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1143{
1144#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1145 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1146#else
1147 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, GCPtrMem);
1148#endif
1149}
1150
1151
1152/**
1153 * Used by TB code to load unsigned 128-bit data w/ flat address.
1154 */
1155IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1156{
1157#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1158 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1159#else
1160 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, GCPtrMem);
1161#endif
1162}
1163
1164
1165/**
1166 * Used by TB code to load unsigned 256-bit data w/ flat address.
1167 */
1168IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1169{
1170#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1171 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1172#else
1173 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, GCPtrMem);
1174#endif
1175}
1176
1177
1178/**
1179 * Used by TB code to load unsigned 256-bit data w/ flat address.
1180 */
1181IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1182{
1183#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1184 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1185#else
1186 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, GCPtrMem);
1187#endif
1188}
1189#endif
1190
1191
1192/**
1193 * Used by TB code to store unsigned 8-bit data w/ flat address.
1194 */
1195IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
1196{
1197#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1198 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
1199#else
1200 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
1201#endif
1202}
1203
1204
1205/**
1206 * Used by TB code to store unsigned 16-bit data w/ flat address.
1207 */
1208IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1209{
1210#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1211 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
1212#else
1213 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
1214#endif
1215}
1216
1217
1218/**
1219 * Used by TB code to store unsigned 32-bit data w/ flat address.
1220 */
1221IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1222{
1223#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1224 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
1225#else
1226 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
1227#endif
1228}
1229
1230
1231/**
1232 * Used by TB code to store unsigned 64-bit data w/ flat address.
1233 */
1234IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1235{
1236#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1237 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
1238#else
1239 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
1240#endif
1241}
1242
1243
1244#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1245/**
1246 * Used by TB code to store unsigned 128-bit data w/ flat address.
1247 */
1248IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1249{
1250#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1251 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1252#else
1253 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
1254#endif
1255}
1256
1257
1258/**
1259 * Used by TB code to store unsigned 128-bit data w/ flat address.
1260 */
1261IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1262{
1263#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1264 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1265#else
1266 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
1267#endif
1268}
1269
1270
1271/**
1272 * Used by TB code to store unsigned 256-bit data w/ flat address.
1273 */
1274IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1275{
1276#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1277 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1278#else
1279 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
1280#endif
1281}
1282
1283
1284/**
1285 * Used by TB code to store unsigned 256-bit data w/ flat address.
1286 */
1287IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1288{
1289#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1290 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1291#else
1292 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
1293#endif
1294}
1295#endif
1296
1297
1298
1299/**
1300 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
1301 */
1302IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1303{
1304#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1305 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1306#else
1307 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1308#endif
1309}
1310
1311
1312/**
1313 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
1314 */
1315IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1316{
1317#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1318 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1319#else
1320 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1321#endif
1322}
1323
1324
1325/**
1326 * Used by TB code to store a segment selector value onto a flat stack.
1327 *
1328 * Intel CPUs doesn't do write a whole dword, thus the special function.
1329 */
1330IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1331{
1332#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1333 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1334#else
1335 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1336#endif
1337}
1338
1339
1340/**
1341 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
1342 */
1343IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1344{
1345#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1346 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1347#else
1348 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1349#endif
1350}
1351
1352
1353/**
1354 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1355 */
1356IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1357{
1358#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1359 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1360#else
1361 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
1362#endif
1363}
1364
1365
1366/**
1367 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1368 */
1369IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1370{
1371#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1372 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1373#else
1374 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
1375#endif
1376}
1377
1378
1379/**
1380 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1381 */
1382IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1383{
1384#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1385 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1386#else
1387 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
1388#endif
1389}
1390
1391
1392
1393/*********************************************************************************************************************************
1394* Helpers: Segmented memory mapping. *
1395*********************************************************************************************************************************/
1396
1397/**
1398 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
1399 * segmentation.
1400 */
1401IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1402 RTGCPTR GCPtrMem, uint8_t iSegReg))
1403{
1404#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1405 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1406#else
1407 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1408#endif
1409}
1410
1411
1412/**
1413 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
1414 */
1415IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1416 RTGCPTR GCPtrMem, uint8_t iSegReg))
1417{
1418#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1419 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1420#else
1421 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1422#endif
1423}
1424
1425
1426/**
1427 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
1428 */
1429IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1430 RTGCPTR GCPtrMem, uint8_t iSegReg))
1431{
1432#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1433 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1434#else
1435 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1436#endif
1437}
1438
1439
1440/**
1441 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
1442 */
1443IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1444 RTGCPTR GCPtrMem, uint8_t iSegReg))
1445{
1446#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1447 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1448#else
1449 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1450#endif
1451}
1452
1453
1454/**
1455 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
1456 * segmentation.
1457 */
1458IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1459 RTGCPTR GCPtrMem, uint8_t iSegReg))
1460{
1461#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1462 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1463#else
1464 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1465#endif
1466}
1467
1468
1469/**
1470 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
1471 */
1472IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1473 RTGCPTR GCPtrMem, uint8_t iSegReg))
1474{
1475#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1476 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1477#else
1478 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1479#endif
1480}
1481
1482
1483/**
1484 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
1485 */
1486IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1487 RTGCPTR GCPtrMem, uint8_t iSegReg))
1488{
1489#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1490 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1491#else
1492 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1493#endif
1494}
1495
1496
1497/**
1498 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
1499 */
1500IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1501 RTGCPTR GCPtrMem, uint8_t iSegReg))
1502{
1503#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1504 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1505#else
1506 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1507#endif
1508}
1509
1510
1511/**
1512 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
1513 * segmentation.
1514 */
1515IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1516 RTGCPTR GCPtrMem, uint8_t iSegReg))
1517{
1518#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1519 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1520#else
1521 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1522#endif
1523}
1524
1525
1526/**
1527 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
1528 */
1529IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1530 RTGCPTR GCPtrMem, uint8_t iSegReg))
1531{
1532#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1533 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1534#else
1535 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1536#endif
1537}
1538
1539
1540/**
1541 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
1542 */
1543IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1544 RTGCPTR GCPtrMem, uint8_t iSegReg))
1545{
1546#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1547 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1548#else
1549 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1550#endif
1551}
1552
1553
1554/**
1555 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
1556 */
1557IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1558 RTGCPTR GCPtrMem, uint8_t iSegReg))
1559{
1560#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1561 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1562#else
1563 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1564#endif
1565}
1566
1567
1568/**
1569 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
1570 * segmentation.
1571 */
1572IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1573 RTGCPTR GCPtrMem, uint8_t iSegReg))
1574{
1575#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1576 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1577#else
1578 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1579#endif
1580}
1581
1582
1583/**
1584 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
1585 */
1586IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1587 RTGCPTR GCPtrMem, uint8_t iSegReg))
1588{
1589#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1590 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1591#else
1592 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1593#endif
1594}
1595
1596
1597/**
1598 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
1599 */
1600IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1601 RTGCPTR GCPtrMem, uint8_t iSegReg))
1602{
1603#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1604 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1605#else
1606 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1607#endif
1608}
1609
1610
1611/**
1612 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
1613 */
1614IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1615 RTGCPTR GCPtrMem, uint8_t iSegReg))
1616{
1617#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1618 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1619#else
1620 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1621#endif
1622}
1623
1624
1625/**
1626 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
1627 */
1628IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1629 RTGCPTR GCPtrMem, uint8_t iSegReg))
1630{
1631#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1632 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1633#else
1634 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1635#endif
1636}
1637
1638
1639/**
1640 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
1641 */
1642IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1643 RTGCPTR GCPtrMem, uint8_t iSegReg))
1644{
1645#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1646 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1647#else
1648 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1649#endif
1650}
1651
1652
1653/**
1654 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
1655 * segmentation.
1656 */
1657IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1658 RTGCPTR GCPtrMem, uint8_t iSegReg))
1659{
1660#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1661 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1662#else
1663 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1664#endif
1665}
1666
1667
1668/**
1669 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
1670 */
1671IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1672 RTGCPTR GCPtrMem, uint8_t iSegReg))
1673{
1674#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1675 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1676#else
1677 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1678#endif
1679}
1680
1681
1682/**
1683 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
1684 */
1685IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1686 RTGCPTR GCPtrMem, uint8_t iSegReg))
1687{
1688#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1689 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1690#else
1691 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1692#endif
1693}
1694
1695
1696/**
1697 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
1698 */
1699IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1700 RTGCPTR GCPtrMem, uint8_t iSegReg))
1701{
1702#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1703 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1704#else
1705 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1706#endif
1707}
1708
1709
1710/*********************************************************************************************************************************
1711* Helpers: Flat memory mapping. *
1712*********************************************************************************************************************************/
1713
1714/**
1715 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
1716 * address.
1717 */
1718IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1719{
1720#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1721 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1722#else
1723 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1724#endif
1725}
1726
1727
1728/**
1729 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
1730 */
1731IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1732{
1733#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1734 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1735#else
1736 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1737#endif
1738}
1739
1740
1741/**
1742 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
1743 */
1744IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1745{
1746#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1747 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1748#else
1749 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1750#endif
1751}
1752
1753
1754/**
1755 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
1756 */
1757IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1758{
1759#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1760 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1761#else
1762 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1763#endif
1764}
1765
1766
1767/**
1768 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
1769 * address.
1770 */
1771IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1772{
1773#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1774 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1775#else
1776 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1777#endif
1778}
1779
1780
1781/**
1782 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
1783 */
1784IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1785{
1786#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1787 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1788#else
1789 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1790#endif
1791}
1792
1793
1794/**
1795 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
1796 */
1797IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1798{
1799#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1800 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1801#else
1802 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1803#endif
1804}
1805
1806
1807/**
1808 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
1809 */
1810IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1811{
1812#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1813 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1814#else
1815 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1816#endif
1817}
1818
1819
1820/**
1821 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
1822 * address.
1823 */
1824IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1825{
1826#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1827 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1828#else
1829 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1830#endif
1831}
1832
1833
1834/**
1835 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
1836 */
1837IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1838{
1839#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1840 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1841#else
1842 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1843#endif
1844}
1845
1846
1847/**
1848 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
1849 */
1850IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1851{
1852#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1853 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1854#else
1855 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1856#endif
1857}
1858
1859
1860/**
1861 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
1862 */
1863IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1864{
1865#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1866 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1867#else
1868 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1869#endif
1870}
1871
1872
1873/**
1874 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
1875 * address.
1876 */
1877IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1878{
1879#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1880 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1881#else
1882 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1883#endif
1884}
1885
1886
1887/**
1888 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
1889 */
1890IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1891{
1892#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1893 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1894#else
1895 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1896#endif
1897}
1898
1899
1900/**
1901 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
1902 */
1903IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1904{
1905#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1906 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1907#else
1908 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1909#endif
1910}
1911
1912
1913/**
1914 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
1915 */
1916IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1917{
1918#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1919 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1920#else
1921 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1922#endif
1923}
1924
1925
1926/**
1927 * Used by TB code to map 80-bit float data writeonly w/ flat address.
1928 */
1929IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1930{
1931#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1932 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1933#else
1934 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1935#endif
1936}
1937
1938
1939/**
1940 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
1941 */
1942IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1943{
1944#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1945 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1946#else
1947 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1948#endif
1949}
1950
1951
1952/**
1953 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
1954 * address.
1955 */
1956IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1957{
1958#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1959 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1960#else
1961 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1962#endif
1963}
1964
1965
1966/**
1967 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
1968 */
1969IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1970{
1971#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1972 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1973#else
1974 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1975#endif
1976}
1977
1978
1979/**
1980 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
1981 */
1982IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1983{
1984#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1985 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1986#else
1987 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1988#endif
1989}
1990
1991
1992/**
1993 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
1994 */
1995IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1996{
1997#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1998 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1999#else
2000 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2001#endif
2002}
2003
2004
2005/*********************************************************************************************************************************
2006* Helpers: Commit, rollback & unmap *
2007*********************************************************************************************************************************/
2008
2009/**
2010 * Used by TB code to commit and unmap a read-write memory mapping.
2011 */
2012IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2013{
2014 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2015}
2016
2017
2018/**
2019 * Used by TB code to commit and unmap a read-write memory mapping.
2020 */
2021IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2022{
2023 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2024}
2025
2026
2027/**
2028 * Used by TB code to commit and unmap a write-only memory mapping.
2029 */
2030IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2031{
2032 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2033}
2034
2035
2036/**
2037 * Used by TB code to commit and unmap a read-only memory mapping.
2038 */
2039IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2040{
2041 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2042}
2043
2044
2045/**
2046 * Reinitializes the native recompiler state.
2047 *
2048 * Called before starting a new recompile job.
2049 */
2050static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2051{
2052 pReNative->cLabels = 0;
2053 pReNative->bmLabelTypes = 0;
2054 pReNative->cFixups = 0;
2055#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2056 pReNative->pDbgInfo->cEntries = 0;
2057 pReNative->pDbgInfo->offNativeLast = UINT32_MAX;
2058#endif
2059 pReNative->pTbOrg = pTb;
2060 pReNative->cCondDepth = 0;
2061 pReNative->uCondSeqNo = 0;
2062 pReNative->uCheckIrqSeqNo = 0;
2063 pReNative->uTlbSeqNo = 0;
2064
2065#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2066 pReNative->Core.offPc = 0;
2067 pReNative->Core.cInstrPcUpdateSkipped = 0;
2068#endif
2069#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2070 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2071#endif
2072 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2073#if IEMNATIVE_HST_GREG_COUNT < 32
2074 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2075#endif
2076 ;
2077 pReNative->Core.bmHstRegsWithGstShadow = 0;
2078 pReNative->Core.bmGstRegShadows = 0;
2079#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2080 pReNative->Core.bmGstRegShadowDirty = 0;
2081#endif
2082 pReNative->Core.bmVars = 0;
2083 pReNative->Core.bmStack = 0;
2084 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2085 pReNative->Core.u64ArgVars = UINT64_MAX;
2086
2087 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 22);
2088 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2089 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2090 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2091 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2092 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2093 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2094 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2095 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2096 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2097 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2098 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2099 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
2100 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
2101 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
2102 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
2103 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
2104 pReNative->aidxUniqueLabels[16] = UINT32_MAX;
2105 pReNative->aidxUniqueLabels[17] = UINT32_MAX;
2106 pReNative->aidxUniqueLabels[18] = UINT32_MAX;
2107 pReNative->aidxUniqueLabels[19] = UINT32_MAX;
2108 pReNative->aidxUniqueLabels[20] = UINT32_MAX;
2109 pReNative->aidxUniqueLabels[21] = UINT32_MAX;
2110
2111 pReNative->idxLastCheckIrqCallNo = UINT32_MAX;
2112
2113 /* Full host register reinit: */
2114 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2115 {
2116 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2117 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2118 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2119 }
2120
2121 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2122 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2123#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2124 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2125#endif
2126#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2127 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2128#endif
2129#ifdef IEMNATIVE_REG_FIXED_TMP1
2130 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
2131#endif
2132#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2133 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
2134#endif
2135 );
2136 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2137 {
2138 fRegs &= ~RT_BIT_32(idxReg);
2139 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2140 }
2141
2142 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2143#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2144 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2145#endif
2146#ifdef IEMNATIVE_REG_FIXED_TMP0
2147 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2148#endif
2149#ifdef IEMNATIVE_REG_FIXED_TMP1
2150 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
2151#endif
2152#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2153 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
2154#endif
2155
2156#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2157 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
2158# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
2159 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
2160# endif
2161 ;
2162 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
2163 pReNative->Core.bmGstSimdRegShadows = 0;
2164 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
2165 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
2166
2167 /* Full host register reinit: */
2168 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
2169 {
2170 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
2171 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
2172 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
2173 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
2174 }
2175
2176 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK;
2177 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2178 {
2179 fRegs &= ~RT_BIT_32(idxReg);
2180 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
2181 }
2182
2183#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
2184 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2185#endif
2186
2187#endif
2188
2189 return pReNative;
2190}
2191
2192
2193/**
2194 * Allocates and initializes the native recompiler state.
2195 *
2196 * This is called the first time an EMT wants to recompile something.
2197 *
2198 * @returns Pointer to the new recompiler state.
2199 * @param pVCpu The cross context virtual CPU structure of the calling
2200 * thread.
2201 * @param pTb The TB that's about to be recompiled.
2202 * @thread EMT(pVCpu)
2203 */
2204static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
2205{
2206 VMCPU_ASSERT_EMT(pVCpu);
2207
2208 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
2209 AssertReturn(pReNative, NULL);
2210
2211 /*
2212 * Try allocate all the buffers and stuff we need.
2213 */
2214 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
2215 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
2216 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
2217#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2218 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
2219#endif
2220 if (RT_LIKELY( pReNative->pInstrBuf
2221 && pReNative->paLabels
2222 && pReNative->paFixups)
2223#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2224 && pReNative->pDbgInfo
2225#endif
2226 )
2227 {
2228 /*
2229 * Set the buffer & array sizes on success.
2230 */
2231 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
2232 pReNative->cLabelsAlloc = _8K;
2233 pReNative->cFixupsAlloc = _16K;
2234#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2235 pReNative->cDbgInfoAlloc = _16K;
2236#endif
2237
2238 /* Other constant stuff: */
2239 pReNative->pVCpu = pVCpu;
2240
2241 /*
2242 * Done, just need to save it and reinit it.
2243 */
2244 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
2245 return iemNativeReInit(pReNative, pTb);
2246 }
2247
2248 /*
2249 * Failed. Cleanup and return.
2250 */
2251 AssertFailed();
2252 RTMemFree(pReNative->pInstrBuf);
2253 RTMemFree(pReNative->paLabels);
2254 RTMemFree(pReNative->paFixups);
2255#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2256 RTMemFree(pReNative->pDbgInfo);
2257#endif
2258 RTMemFree(pReNative);
2259 return NULL;
2260}
2261
2262
2263/**
2264 * Creates a label
2265 *
2266 * If the label does not yet have a defined position,
2267 * call iemNativeLabelDefine() later to set it.
2268 *
2269 * @returns Label ID. Throws VBox status code on failure, so no need to check
2270 * the return value.
2271 * @param pReNative The native recompile state.
2272 * @param enmType The label type.
2273 * @param offWhere The instruction offset of the label. UINT32_MAX if the
2274 * label is not yet defined (default).
2275 * @param uData Data associated with the lable. Only applicable to
2276 * certain type of labels. Default is zero.
2277 */
2278DECL_HIDDEN_THROW(uint32_t)
2279iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2280 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2281{
2282 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2283
2284 /*
2285 * Locate existing label definition.
2286 *
2287 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2288 * and uData is zero.
2289 */
2290 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2291 uint32_t const cLabels = pReNative->cLabels;
2292 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2293#ifndef VBOX_STRICT
2294 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2295 && offWhere == UINT32_MAX
2296 && uData == 0
2297#endif
2298 )
2299 {
2300#ifndef VBOX_STRICT
2301 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2302 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2303 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2304 if (idxLabel < pReNative->cLabels)
2305 return idxLabel;
2306#else
2307 for (uint32_t i = 0; i < cLabels; i++)
2308 if ( paLabels[i].enmType == enmType
2309 && paLabels[i].uData == uData)
2310 {
2311 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2312 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2313 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2314 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2315 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2316 return i;
2317 }
2318 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2319 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2320#endif
2321 }
2322
2323 /*
2324 * Make sure we've got room for another label.
2325 */
2326 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2327 { /* likely */ }
2328 else
2329 {
2330 uint32_t cNew = pReNative->cLabelsAlloc;
2331 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2332 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2333 cNew *= 2;
2334 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2335 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2336 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2337 pReNative->paLabels = paLabels;
2338 pReNative->cLabelsAlloc = cNew;
2339 }
2340
2341 /*
2342 * Define a new label.
2343 */
2344 paLabels[cLabels].off = offWhere;
2345 paLabels[cLabels].enmType = enmType;
2346 paLabels[cLabels].uData = uData;
2347 pReNative->cLabels = cLabels + 1;
2348
2349 Assert((unsigned)enmType < 64);
2350 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2351
2352 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2353 {
2354 Assert(uData == 0);
2355 pReNative->aidxUniqueLabels[enmType] = cLabels;
2356 }
2357
2358 if (offWhere != UINT32_MAX)
2359 {
2360#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2361 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2362 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2363#endif
2364 }
2365 return cLabels;
2366}
2367
2368
2369/**
2370 * Defines the location of an existing label.
2371 *
2372 * @param pReNative The native recompile state.
2373 * @param idxLabel The label to define.
2374 * @param offWhere The position.
2375 */
2376DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
2377{
2378 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
2379 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
2380 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2381 pLabel->off = offWhere;
2382#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2383 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2384 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2385#endif
2386}
2387
2388
2389/**
2390 * Looks up a lable.
2391 *
2392 * @returns Label ID if found, UINT32_MAX if not.
2393 */
2394static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2395 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
2396{
2397 Assert((unsigned)enmType < 64);
2398 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
2399 {
2400 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2401 return pReNative->aidxUniqueLabels[enmType];
2402
2403 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2404 uint32_t const cLabels = pReNative->cLabels;
2405 for (uint32_t i = 0; i < cLabels; i++)
2406 if ( paLabels[i].enmType == enmType
2407 && paLabels[i].uData == uData
2408 && ( paLabels[i].off == offWhere
2409 || offWhere == UINT32_MAX
2410 || paLabels[i].off == UINT32_MAX))
2411 return i;
2412 }
2413 return UINT32_MAX;
2414}
2415
2416
2417/**
2418 * Adds a fixup.
2419 *
2420 * @throws VBox status code (int) on failure.
2421 * @param pReNative The native recompile state.
2422 * @param offWhere The instruction offset of the fixup location.
2423 * @param idxLabel The target label ID for the fixup.
2424 * @param enmType The fixup type.
2425 * @param offAddend Fixup addend if applicable to the type. Default is 0.
2426 */
2427DECL_HIDDEN_THROW(void)
2428iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
2429 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
2430{
2431 Assert(idxLabel <= UINT16_MAX);
2432 Assert((unsigned)enmType <= UINT8_MAX);
2433#ifdef RT_ARCH_ARM64
2434 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
2435 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
2436 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
2437#endif
2438
2439 /*
2440 * Make sure we've room.
2441 */
2442 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
2443 uint32_t const cFixups = pReNative->cFixups;
2444 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
2445 { /* likely */ }
2446 else
2447 {
2448 uint32_t cNew = pReNative->cFixupsAlloc;
2449 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2450 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2451 cNew *= 2;
2452 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2453 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2454 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2455 pReNative->paFixups = paFixups;
2456 pReNative->cFixupsAlloc = cNew;
2457 }
2458
2459 /*
2460 * Add the fixup.
2461 */
2462 paFixups[cFixups].off = offWhere;
2463 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2464 paFixups[cFixups].enmType = enmType;
2465 paFixups[cFixups].offAddend = offAddend;
2466 pReNative->cFixups = cFixups + 1;
2467}
2468
2469
2470/**
2471 * Slow code path for iemNativeInstrBufEnsure.
2472 */
2473DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2474{
2475 /* Double the buffer size till we meet the request. */
2476 uint32_t cNew = pReNative->cInstrBufAlloc;
2477 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
2478 do
2479 cNew *= 2;
2480 while (cNew < off + cInstrReq);
2481
2482 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2483#ifdef RT_ARCH_ARM64
2484 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2485#else
2486 uint32_t const cbMaxInstrBuf = _2M;
2487#endif
2488 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2489
2490 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2491 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2492
2493#ifdef VBOX_STRICT
2494 pReNative->offInstrBufChecked = off + cInstrReq;
2495#endif
2496 pReNative->cInstrBufAlloc = cNew;
2497 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2498}
2499
2500#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2501
2502/**
2503 * Grows the static debug info array used during recompilation.
2504 *
2505 * @returns Pointer to the new debug info block; throws VBox status code on
2506 * failure, so no need to check the return value.
2507 */
2508DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2509{
2510 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2511 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2512 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2513 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2514 pReNative->pDbgInfo = pDbgInfo;
2515 pReNative->cDbgInfoAlloc = cNew;
2516 return pDbgInfo;
2517}
2518
2519
2520/**
2521 * Adds a new debug info uninitialized entry, returning the pointer to it.
2522 */
2523DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2524{
2525 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2526 { /* likely */ }
2527 else
2528 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2529 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2530}
2531
2532
2533/**
2534 * Debug Info: Adds a native offset record, if necessary.
2535 */
2536DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2537{
2538 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2539
2540 /*
2541 * Do we need this one?
2542 */
2543 uint32_t const offPrev = pDbgInfo->offNativeLast;
2544 if (offPrev == off)
2545 return;
2546 AssertStmt(offPrev < off || offPrev == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2547
2548 /*
2549 * Add it.
2550 */
2551 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2552 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2553 pEntry->NativeOffset.offNative = off;
2554 pDbgInfo->offNativeLast = off;
2555}
2556
2557
2558/**
2559 * Debug Info: Record info about a label.
2560 */
2561static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2562{
2563 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2564 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2565 pEntry->Label.uUnused = 0;
2566 pEntry->Label.enmLabel = (uint8_t)enmType;
2567 pEntry->Label.uData = uData;
2568}
2569
2570
2571/**
2572 * Debug Info: Record info about a threaded call.
2573 */
2574static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2575{
2576 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2577 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2578 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2579 pEntry->ThreadedCall.uUnused = 0;
2580 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2581}
2582
2583
2584/**
2585 * Debug Info: Record info about a new guest instruction.
2586 */
2587static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2588{
2589 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2590 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2591 pEntry->GuestInstruction.uUnused = 0;
2592 pEntry->GuestInstruction.fExec = fExec;
2593}
2594
2595
2596/**
2597 * Debug Info: Record info about guest register shadowing.
2598 */
2599DECL_HIDDEN_THROW(void)
2600iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2601 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
2602{
2603 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2604 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2605 pEntry->GuestRegShadowing.uUnused = 0;
2606 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2607 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2608 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2609#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2610 Assert( idxHstReg != UINT8_MAX
2611 || !(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg)));
2612#endif
2613}
2614
2615
2616# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2617/**
2618 * Debug Info: Record info about guest register shadowing.
2619 */
2620DECL_HIDDEN_THROW(void)
2621iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
2622 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
2623{
2624 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2625 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
2626 pEntry->GuestSimdRegShadowing.uUnused = 0;
2627 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
2628 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
2629 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
2630}
2631# endif
2632
2633
2634# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2635/**
2636 * Debug Info: Record info about delayed RIP updates.
2637 */
2638DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
2639{
2640 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2641 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
2642 pEntry->DelayedPcUpdate.offPc = offPc;
2643 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
2644}
2645# endif
2646
2647# if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
2648
2649/**
2650 * Debug Info: Record info about a dirty guest register.
2651 */
2652DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegDirty(PIEMRECOMPILERSTATE pReNative, bool fSimdReg,
2653 uint8_t idxGstReg, uint8_t idxHstReg)
2654{
2655 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2656 pEntry->GuestRegDirty.uType = kIemTbDbgEntryType_GuestRegDirty;
2657 pEntry->GuestRegDirty.fSimdReg = fSimdReg ? 1 : 0;
2658 pEntry->GuestRegDirty.idxGstReg = idxGstReg;
2659 pEntry->GuestRegDirty.idxHstReg = idxHstReg;
2660}
2661
2662
2663/**
2664 * Debug Info: Record info about a dirty guest register writeback operation.
2665 */
2666DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegWriteback(PIEMRECOMPILERSTATE pReNative, bool fSimdReg, uint64_t fGstReg)
2667{
2668 unsigned const cBitsGstRegMask = 25;
2669 uint32_t const fGstRegMask = RT_BIT_32(cBitsGstRegMask) - 1U;
2670
2671 /* The first block of 25 bits: */
2672 if (fGstReg & fGstRegMask)
2673 {
2674 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2675 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2676 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2677 pEntry->GuestRegWriteback.cShift = 0;
2678 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2679 fGstReg &= ~(uint64_t)fGstRegMask;
2680 if (!fGstReg)
2681 return;
2682 }
2683
2684 /* The second block of 25 bits: */
2685 fGstReg >>= cBitsGstRegMask;
2686 if (fGstReg & fGstRegMask)
2687 {
2688 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2689 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2690 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2691 pEntry->GuestRegWriteback.cShift = 0;
2692 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2693 fGstReg &= ~(uint64_t)fGstRegMask;
2694 if (!fGstReg)
2695 return;
2696 }
2697
2698 /* The last block with 14 bits: */
2699 fGstReg >>= cBitsGstRegMask;
2700 Assert(fGstReg & fGstRegMask);
2701 Assert((fGstReg & ~(uint64_t)fGstRegMask) == 0);
2702 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2703 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2704 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2705 pEntry->GuestRegWriteback.cShift = 2;
2706 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2707}
2708
2709# endif /* defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR) */
2710
2711#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2712
2713
2714/*********************************************************************************************************************************
2715* Register Allocator *
2716*********************************************************************************************************************************/
2717
2718/**
2719 * Register parameter indexes (indexed by argument number).
2720 */
2721DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2722{
2723 IEMNATIVE_CALL_ARG0_GREG,
2724 IEMNATIVE_CALL_ARG1_GREG,
2725 IEMNATIVE_CALL_ARG2_GREG,
2726 IEMNATIVE_CALL_ARG3_GREG,
2727#if defined(IEMNATIVE_CALL_ARG4_GREG)
2728 IEMNATIVE_CALL_ARG4_GREG,
2729# if defined(IEMNATIVE_CALL_ARG5_GREG)
2730 IEMNATIVE_CALL_ARG5_GREG,
2731# if defined(IEMNATIVE_CALL_ARG6_GREG)
2732 IEMNATIVE_CALL_ARG6_GREG,
2733# if defined(IEMNATIVE_CALL_ARG7_GREG)
2734 IEMNATIVE_CALL_ARG7_GREG,
2735# endif
2736# endif
2737# endif
2738#endif
2739};
2740AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2741
2742/**
2743 * Call register masks indexed by argument count.
2744 */
2745DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2746{
2747 0,
2748 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2749 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2750 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2751 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2752 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2753#if defined(IEMNATIVE_CALL_ARG4_GREG)
2754 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2755 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2756# if defined(IEMNATIVE_CALL_ARG5_GREG)
2757 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2758 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2759# if defined(IEMNATIVE_CALL_ARG6_GREG)
2760 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2761 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2762 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2763# if defined(IEMNATIVE_CALL_ARG7_GREG)
2764 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2765 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2766 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2767# endif
2768# endif
2769# endif
2770#endif
2771};
2772
2773#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2774/**
2775 * BP offset of the stack argument slots.
2776 *
2777 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2778 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2779 */
2780DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2781{
2782 IEMNATIVE_FP_OFF_STACK_ARG0,
2783# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2784 IEMNATIVE_FP_OFF_STACK_ARG1,
2785# endif
2786# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2787 IEMNATIVE_FP_OFF_STACK_ARG2,
2788# endif
2789# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2790 IEMNATIVE_FP_OFF_STACK_ARG3,
2791# endif
2792};
2793AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2794#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2795
2796/**
2797 * Info about shadowed guest register values.
2798 * @see IEMNATIVEGSTREG
2799 */
2800DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
2801{
2802#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2803 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2804 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2805 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2806 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2807 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2808 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2809 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2810 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2811 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2812 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2813 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2814 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2815 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2816 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2817 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2818 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2819 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2820 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
2821 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
2822 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
2823 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2824 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2825 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2826 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2827 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2828 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2829 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
2830 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
2831 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
2832 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
2833 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
2834 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
2835 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2836 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2837 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2838 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2839 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2840 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2841 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2842 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2843 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2844 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2845 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2846 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2847 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
2848 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
2849 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
2850 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2851#undef CPUMCTX_OFF_AND_SIZE
2852};
2853AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2854
2855
2856/** Host CPU general purpose register names. */
2857DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2858{
2859#ifdef RT_ARCH_AMD64
2860 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2861#elif RT_ARCH_ARM64
2862 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2863 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2864#else
2865# error "port me"
2866#endif
2867};
2868
2869
2870#if 0 /* unused */
2871/**
2872 * Tries to locate a suitable register in the given register mask.
2873 *
2874 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2875 * failed.
2876 *
2877 * @returns Host register number on success, returns UINT8_MAX on failure.
2878 */
2879static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2880{
2881 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2882 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2883 if (fRegs)
2884 {
2885 /** @todo pick better here: */
2886 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2887
2888 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2889 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2890 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2891 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2892
2893 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2894 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2895 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2896 return idxReg;
2897 }
2898 return UINT8_MAX;
2899}
2900#endif /* unused */
2901
2902
2903#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2904/**
2905 * Stores the host reg @a idxHstReg into guest shadow register @a enmGstReg.
2906 *
2907 * @returns New code buffer offset on success, UINT32_MAX on failure.
2908 * @param pReNative .
2909 * @param off The current code buffer position.
2910 * @param enmGstReg The guest register to store to.
2911 * @param idxHstReg The host register to store from.
2912 */
2913DECL_FORCE_INLINE_THROW(uint32_t)
2914iemNativeEmitStoreGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg, uint8_t idxHstReg)
2915{
2916 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
2917 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
2918
2919 switch (g_aGstShadowInfo[enmGstReg].cb)
2920 {
2921 case sizeof(uint64_t):
2922 return iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2923 case sizeof(uint32_t):
2924 return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2925 case sizeof(uint16_t):
2926 return iemNativeEmitStoreGprToVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2927#if 0 /* not present in the table. */
2928 case sizeof(uint8_t):
2929 return iemNativeEmitStoreGprToVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2930#endif
2931 default:
2932 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
2933 }
2934}
2935
2936
2937/**
2938 * Emits code to flush a pending write of the given guest register if any.
2939 *
2940 * @returns New code buffer offset.
2941 * @param pReNative The native recompile state.
2942 * @param off Current code buffer position.
2943 * @param enmGstReg The guest register to flush.
2944 */
2945DECL_HIDDEN_THROW(uint32_t)
2946iemNativeRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg)
2947{
2948 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2949
2950 Assert( ( enmGstReg >= kIemNativeGstReg_GprFirst
2951 && enmGstReg <= kIemNativeGstReg_GprLast)
2952 || enmGstReg == kIemNativeGstReg_MxCsr);
2953 Assert( idxHstReg != UINT8_MAX
2954 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
2955 Log12(("iemNativeRegFlushPendingWrite: Clearing guest register %s shadowed by host %s (off=%#x)\n",
2956 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
2957
2958 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
2959
2960 pReNative->Core.bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
2961 return off;
2962}
2963
2964
2965/**
2966 * Flush the given set of guest registers if marked as dirty.
2967 *
2968 * @returns New code buffer offset.
2969 * @param pReNative The native recompile state.
2970 * @param off Current code buffer position.
2971 * @param fFlushGstReg The guest register set to flush (default is flush everything).
2972 */
2973DECL_HIDDEN_THROW(uint32_t)
2974iemNativeRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstReg /*= UINT64_MAX*/)
2975{
2976 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fFlushGstReg;
2977 if (bmGstRegShadowDirty)
2978 {
2979# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2980 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2981 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, bmGstRegShadowDirty);
2982# endif
2983 do
2984 {
2985 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
2986 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
2987 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
2988 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
2989 } while (bmGstRegShadowDirty);
2990 }
2991
2992 return off;
2993}
2994
2995
2996/**
2997 * Flush all shadowed guest registers marked as dirty for the given host register.
2998 *
2999 * @returns New code buffer offset.
3000 * @param pReNative The native recompile state.
3001 * @param off Current code buffer position.
3002 * @param idxHstReg The host register.
3003 *
3004 * @note This doesn't do any unshadowing of guest registers from the host register.
3005 */
3006DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushDirtyGuestByHostRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg)
3007{
3008 /* We need to flush any pending guest register writes this host register shadows. */
3009 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3010 if (pReNative->Core.bmGstRegShadowDirty & fGstRegShadows)
3011 {
3012# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3013 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3014 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, pReNative->Core.bmGstRegShadowDirty & fGstRegShadows);
3015# endif
3016 /** @todo r=bird: This is a crap way of enumerating a bitmask where we're
3017 * likely to only have a single bit set. It'll be in the 0..15 range,
3018 * but still it's 15 unnecessary loops for the last guest register. */
3019
3020 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fGstRegShadows;
3021 do
3022 {
3023 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
3024 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
3025 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3026 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
3027 } while (bmGstRegShadowDirty);
3028 }
3029
3030 return off;
3031}
3032#endif
3033
3034
3035/**
3036 * Locate a register, possibly freeing one up.
3037 *
3038 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3039 * failed.
3040 *
3041 * @returns Host register number on success. Returns UINT8_MAX if no registers
3042 * found, the caller is supposed to deal with this and raise a
3043 * allocation type specific status code (if desired).
3044 *
3045 * @throws VBox status code if we're run into trouble spilling a variable of
3046 * recording debug info. Does NOT throw anything if we're out of
3047 * registers, though.
3048 */
3049static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3050 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3051{
3052 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3053 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3054 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3055
3056 /*
3057 * Try a freed register that's shadowing a guest register.
3058 */
3059 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3060 if (fRegs)
3061 {
3062 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3063
3064#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3065 /*
3066 * When we have livness information, we use it to kick out all shadowed
3067 * guest register that will not be needed any more in this TB. If we're
3068 * lucky, this may prevent us from ending up here again.
3069 *
3070 * Note! We must consider the previous entry here so we don't free
3071 * anything that the current threaded function requires (current
3072 * entry is produced by the next threaded function).
3073 */
3074 uint32_t const idxCurCall = pReNative->idxCurCall;
3075 if (idxCurCall > 0)
3076 {
3077 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3078
3079# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3080 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3081 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3082 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3083#else
3084 /* Construct a mask of the registers not in the read or write state.
3085 Note! We could skips writes, if they aren't from us, as this is just
3086 a hack to prevent trashing registers that have just been written
3087 or will be written when we retire the current instruction. */
3088 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3089 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3090 & IEMLIVENESSBIT_MASK;
3091#endif
3092 /* Merge EFLAGS. */
3093 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3094 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3095 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3096 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3097 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3098
3099 /* If it matches any shadowed registers. */
3100 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3101 {
3102#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3103 /* Writeback any dirty shadow registers we are about to unshadow. */
3104 *poff = iemNativeRegFlushDirtyGuest(pReNative, *poff, fToFreeMask);
3105#endif
3106
3107 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3108 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3109 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3110
3111 /* See if we've got any unshadowed registers we can return now. */
3112 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3113 if (fUnshadowedRegs)
3114 {
3115 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3116 return (fPreferVolatile
3117 ? ASMBitFirstSetU32(fUnshadowedRegs)
3118 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3119 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3120 - 1;
3121 }
3122 }
3123 }
3124#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3125
3126 unsigned const idxReg = (fPreferVolatile
3127 ? ASMBitFirstSetU32(fRegs)
3128 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3129 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3130 - 1;
3131
3132 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3133 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3134 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3135 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3136
3137#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3138 /* We need to flush any pending guest register writes this host register shadows. */
3139 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
3140#endif
3141
3142 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3143 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3144 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3145 return idxReg;
3146 }
3147
3148 /*
3149 * Try free up a variable that's in a register.
3150 *
3151 * We do two rounds here, first evacuating variables we don't need to be
3152 * saved on the stack, then in the second round move things to the stack.
3153 */
3154 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3155 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3156 {
3157 uint32_t fVars = pReNative->Core.bmVars;
3158 while (fVars)
3159 {
3160 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3161 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3162#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3163 if (pReNative->Core.aVars[idxVar].fSimdReg) /* Need to ignore SIMD variables here or we end up freeing random registers. */
3164 continue;
3165#endif
3166
3167 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3168 && (RT_BIT_32(idxReg) & fRegMask)
3169 && ( iLoop == 0
3170 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3171 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3172 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3173 {
3174 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3175 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3176 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3177 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3178 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3179 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3180#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3181 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3182#endif
3183
3184 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3185 {
3186 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3187 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3188 }
3189
3190 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3191 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3192
3193 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3194 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3195 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3196 return idxReg;
3197 }
3198 fVars &= ~RT_BIT_32(idxVar);
3199 }
3200 }
3201
3202 return UINT8_MAX;
3203}
3204
3205
3206/**
3207 * Reassigns a variable to a different register specified by the caller.
3208 *
3209 * @returns The new code buffer position.
3210 * @param pReNative The native recompile state.
3211 * @param off The current code buffer position.
3212 * @param idxVar The variable index.
3213 * @param idxRegOld The old host register number.
3214 * @param idxRegNew The new host register number.
3215 * @param pszCaller The caller for logging.
3216 */
3217static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3218 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3219{
3220 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3221 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3222#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3223 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3224#endif
3225 RT_NOREF(pszCaller);
3226
3227#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3228 Assert(!(pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3229#endif
3230 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3231
3232 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3233#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3234 Assert(!(fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3235#endif
3236 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3237 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3238 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3239
3240 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3241 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3242 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3243 if (fGstRegShadows)
3244 {
3245 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3246 | RT_BIT_32(idxRegNew);
3247 while (fGstRegShadows)
3248 {
3249 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3250 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3251
3252 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3253 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3254 }
3255 }
3256
3257 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3258 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3259 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3260 return off;
3261}
3262
3263
3264/**
3265 * Moves a variable to a different register or spills it onto the stack.
3266 *
3267 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3268 * kinds can easily be recreated if needed later.
3269 *
3270 * @returns The new code buffer position.
3271 * @param pReNative The native recompile state.
3272 * @param off The current code buffer position.
3273 * @param idxVar The variable index.
3274 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3275 * call-volatile registers.
3276 */
3277DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3278 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
3279{
3280 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3281 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3282 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3283 Assert(!pVar->fRegAcquired);
3284
3285 uint8_t const idxRegOld = pVar->idxReg;
3286 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3287 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3288 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3289 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3290 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3291 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3292 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3293 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3294#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3295 Assert(!(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3296#endif
3297
3298
3299 /** @todo Add statistics on this.*/
3300 /** @todo Implement basic variable liveness analysis (python) so variables
3301 * can be freed immediately once no longer used. This has the potential to
3302 * be trashing registers and stack for dead variables.
3303 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3304
3305 /*
3306 * First try move it to a different register, as that's cheaper.
3307 */
3308 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3309 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3310 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3311 if (fRegs)
3312 {
3313 /* Avoid using shadow registers, if possible. */
3314 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3315 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3316 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3317 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3318 }
3319
3320 /*
3321 * Otherwise we must spill the register onto the stack.
3322 */
3323 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3324 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3325 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3326 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3327
3328 pVar->idxReg = UINT8_MAX;
3329 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3330 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3331 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3332 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3333 return off;
3334}
3335
3336
3337/**
3338 * Allocates a temporary host general purpose register.
3339 *
3340 * This may emit code to save register content onto the stack in order to free
3341 * up a register.
3342 *
3343 * @returns The host register number; throws VBox status code on failure,
3344 * so no need to check the return value.
3345 * @param pReNative The native recompile state.
3346 * @param poff Pointer to the variable with the code buffer position.
3347 * This will be update if we need to move a variable from
3348 * register to stack in order to satisfy the request.
3349 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3350 * registers (@c true, default) or the other way around
3351 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3352 */
3353DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3354{
3355 /*
3356 * Try find a completely unused register, preferably a call-volatile one.
3357 */
3358 uint8_t idxReg;
3359 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3360 & ~pReNative->Core.bmHstRegsWithGstShadow
3361 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3362 if (fRegs)
3363 {
3364 if (fPreferVolatile)
3365 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3366 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3367 else
3368 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3369 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3370 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3371 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3372 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3373 }
3374 else
3375 {
3376 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3377 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3378 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3379 }
3380 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3381}
3382
3383
3384/**
3385 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3386 * registers.
3387 *
3388 * @returns The host register number; throws VBox status code on failure,
3389 * so no need to check the return value.
3390 * @param pReNative The native recompile state.
3391 * @param poff Pointer to the variable with the code buffer position.
3392 * This will be update if we need to move a variable from
3393 * register to stack in order to satisfy the request.
3394 * @param fRegMask Mask of acceptable registers.
3395 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3396 * registers (@c true, default) or the other way around
3397 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3398 */
3399DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3400 bool fPreferVolatile /*= true*/)
3401{
3402 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3403 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3404
3405 /*
3406 * Try find a completely unused register, preferably a call-volatile one.
3407 */
3408 uint8_t idxReg;
3409 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3410 & ~pReNative->Core.bmHstRegsWithGstShadow
3411 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3412 & fRegMask;
3413 if (fRegs)
3414 {
3415 if (fPreferVolatile)
3416 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3417 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3418 else
3419 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3420 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3421 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3422 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3423 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3424 }
3425 else
3426 {
3427 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
3428 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3429 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3430 }
3431 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3432}
3433
3434
3435/**
3436 * Allocates a temporary register for loading an immediate value into.
3437 *
3438 * This will emit code to load the immediate, unless there happens to be an
3439 * unused register with the value already loaded.
3440 *
3441 * The caller will not modify the returned register, it must be considered
3442 * read-only. Free using iemNativeRegFreeTmpImm.
3443 *
3444 * @returns The host register number; throws VBox status code on failure, so no
3445 * need to check the return value.
3446 * @param pReNative The native recompile state.
3447 * @param poff Pointer to the variable with the code buffer position.
3448 * @param uImm The immediate value that the register must hold upon
3449 * return.
3450 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3451 * registers (@c true, default) or the other way around
3452 * (@c false).
3453 *
3454 * @note Reusing immediate values has not been implemented yet.
3455 */
3456DECL_HIDDEN_THROW(uint8_t)
3457iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3458{
3459 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
3460 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3461 return idxReg;
3462}
3463
3464
3465/**
3466 * Allocates a temporary host general purpose register for keeping a guest
3467 * register value.
3468 *
3469 * Since we may already have a register holding the guest register value,
3470 * code will be emitted to do the loading if that's not the case. Code may also
3471 * be emitted if we have to free up a register to satify the request.
3472 *
3473 * @returns The host register number; throws VBox status code on failure, so no
3474 * need to check the return value.
3475 * @param pReNative The native recompile state.
3476 * @param poff Pointer to the variable with the code buffer
3477 * position. This will be update if we need to move a
3478 * variable from register to stack in order to satisfy
3479 * the request.
3480 * @param enmGstReg The guest register that will is to be updated.
3481 * @param enmIntendedUse How the caller will be using the host register.
3482 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
3483 * register is okay (default). The ASSUMPTION here is
3484 * that the caller has already flushed all volatile
3485 * registers, so this is only applied if we allocate a
3486 * new register.
3487 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
3488 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3489 */
3490DECL_HIDDEN_THROW(uint8_t)
3491iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
3492 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
3493 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
3494{
3495 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3496#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3497 AssertMsg( fSkipLivenessAssert
3498 || pReNative->idxCurCall == 0
3499 || enmGstReg == kIemNativeGstReg_Pc
3500 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3501 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3502 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
3503 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3504 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
3505 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3506#endif
3507 RT_NOREF(fSkipLivenessAssert);
3508#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
3509 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3510#endif
3511 uint32_t const fRegMask = !fNoVolatileRegs
3512 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
3513 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3514
3515 /*
3516 * First check if the guest register value is already in a host register.
3517 */
3518 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3519 {
3520 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3521 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3522 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3523 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3524
3525 /* It's not supposed to be allocated... */
3526 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3527 {
3528 /*
3529 * If the register will trash the guest shadow copy, try find a
3530 * completely unused register we can use instead. If that fails,
3531 * we need to disassociate the host reg from the guest reg.
3532 */
3533 /** @todo would be nice to know if preserving the register is in any way helpful. */
3534 /* If the purpose is calculations, try duplicate the register value as
3535 we'll be clobbering the shadow. */
3536 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
3537 && ( ~pReNative->Core.bmHstRegs
3538 & ~pReNative->Core.bmHstRegsWithGstShadow
3539 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3540 {
3541 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
3542
3543 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3544
3545 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3546 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3547 g_apszIemNativeHstRegNames[idxRegNew]));
3548 idxReg = idxRegNew;
3549 }
3550 /* If the current register matches the restrictions, go ahead and allocate
3551 it for the caller. */
3552 else if (fRegMask & RT_BIT_32(idxReg))
3553 {
3554 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3555 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3556 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3557 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3558 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
3559 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3560 else
3561 {
3562 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
3563 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
3564 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3565 }
3566 }
3567 /* Otherwise, allocate a register that satisfies the caller and transfer
3568 the shadowing if compatible with the intended use. (This basically
3569 means the call wants a non-volatile register (RSP push/pop scenario).) */
3570 else
3571 {
3572 Assert(fNoVolatileRegs);
3573 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
3574 !fNoVolatileRegs
3575 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
3576 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3577 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3578 {
3579 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3580 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
3581 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
3582 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3583 }
3584 else
3585 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3586 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3587 g_apszIemNativeHstRegNames[idxRegNew]));
3588 idxReg = idxRegNew;
3589 }
3590 }
3591 else
3592 {
3593 /*
3594 * Oops. Shadowed guest register already allocated!
3595 *
3596 * Allocate a new register, copy the value and, if updating, the
3597 * guest shadow copy assignment to the new register.
3598 */
3599 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3600 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
3601 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
3602 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
3603
3604 /** @todo share register for readonly access. */
3605 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
3606 enmIntendedUse == kIemNativeGstRegUse_Calculation);
3607
3608 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3609 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3610
3611 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3612 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3613 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
3614 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3615 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3616 else
3617 {
3618 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3619 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
3620 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3621 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3622 }
3623 idxReg = idxRegNew;
3624 }
3625 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
3626
3627#ifdef VBOX_STRICT
3628 /* Strict builds: Check that the value is correct. */
3629 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3630#endif
3631
3632#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3633 /** @todo r=aeichner Implement for registers other than GPR as well. */
3634 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3635 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3636 && ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3637 && enmGstReg <= kIemNativeGstReg_GprLast)
3638 || enmGstReg == kIemNativeGstReg_MxCsr))
3639 {
3640# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3641 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3642 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
3643# endif
3644 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3645 }
3646#endif
3647
3648 return idxReg;
3649 }
3650
3651 /*
3652 * Allocate a new register, load it with the guest value and designate it as a copy of the
3653 */
3654 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
3655
3656 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3657 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
3658
3659 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3660 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
3661 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
3662 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3663
3664#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3665 /** @todo r=aeichner Implement for registers other than GPR as well. */
3666 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3667 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3668 && ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3669 && enmGstReg <= kIemNativeGstReg_GprLast)
3670 || enmGstReg == kIemNativeGstReg_MxCsr))
3671 {
3672# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3673 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3674 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxRegNew);
3675# endif
3676 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3677 }
3678#endif
3679
3680 return idxRegNew;
3681}
3682
3683
3684/**
3685 * Allocates a temporary host general purpose register that already holds the
3686 * given guest register value.
3687 *
3688 * The use case for this function is places where the shadowing state cannot be
3689 * modified due to branching and such. This will fail if the we don't have a
3690 * current shadow copy handy or if it's incompatible. The only code that will
3691 * be emitted here is value checking code in strict builds.
3692 *
3693 * The intended use can only be readonly!
3694 *
3695 * @returns The host register number, UINT8_MAX if not present.
3696 * @param pReNative The native recompile state.
3697 * @param poff Pointer to the instruction buffer offset.
3698 * Will be updated in strict builds if a register is
3699 * found.
3700 * @param enmGstReg The guest register that will is to be updated.
3701 * @note In strict builds, this may throw instruction buffer growth failures.
3702 * Non-strict builds will not throw anything.
3703 * @sa iemNativeRegAllocTmpForGuestReg
3704 */
3705DECL_HIDDEN_THROW(uint8_t)
3706iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3707{
3708 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3709#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3710 AssertMsg( pReNative->idxCurCall == 0
3711 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3712 || enmGstReg == kIemNativeGstReg_Pc,
3713 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3714#endif
3715
3716 /*
3717 * First check if the guest register value is already in a host register.
3718 */
3719 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3720 {
3721 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3722 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3723 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3724 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3725
3726 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3727 {
3728 /*
3729 * We only do readonly use here, so easy compared to the other
3730 * variant of this code.
3731 */
3732 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3733 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3734 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3735 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
3736 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3737
3738#ifdef VBOX_STRICT
3739 /* Strict builds: Check that the value is correct. */
3740 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3741#else
3742 RT_NOREF(poff);
3743#endif
3744 return idxReg;
3745 }
3746 }
3747
3748 return UINT8_MAX;
3749}
3750
3751
3752/**
3753 * Allocates argument registers for a function call.
3754 *
3755 * @returns New code buffer offset on success; throws VBox status code on failure, so no
3756 * need to check the return value.
3757 * @param pReNative The native recompile state.
3758 * @param off The current code buffer offset.
3759 * @param cArgs The number of arguments the function call takes.
3760 */
3761DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
3762{
3763 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
3764 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
3765 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3766 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3767
3768 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
3769 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
3770 else if (cArgs == 0)
3771 return true;
3772
3773 /*
3774 * Do we get luck and all register are free and not shadowing anything?
3775 */
3776 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
3777 for (uint32_t i = 0; i < cArgs; i++)
3778 {
3779 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3780 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3781 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3782 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3783 }
3784 /*
3785 * Okay, not lucky so we have to free up the registers.
3786 */
3787 else
3788 for (uint32_t i = 0; i < cArgs; i++)
3789 {
3790 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3791 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
3792 {
3793 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
3794 {
3795 case kIemNativeWhat_Var:
3796 {
3797 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
3798 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3799 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
3800 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3801 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
3802#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3803 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3804#endif
3805
3806 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
3807 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3808 else
3809 {
3810 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
3811 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3812 }
3813 break;
3814 }
3815
3816 case kIemNativeWhat_Tmp:
3817 case kIemNativeWhat_Arg:
3818 case kIemNativeWhat_rc:
3819 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3820 default:
3821 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
3822 }
3823
3824 }
3825 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3826 {
3827 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3828 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3829 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3830#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3831 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3832#endif
3833 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3834 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3835 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3836 }
3837 else
3838 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3839 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3840 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3841 }
3842 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
3843 return true;
3844}
3845
3846
3847DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
3848
3849
3850#if 0
3851/**
3852 * Frees a register assignment of any type.
3853 *
3854 * @param pReNative The native recompile state.
3855 * @param idxHstReg The register to free.
3856 *
3857 * @note Does not update variables.
3858 */
3859DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3860{
3861 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3862 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3863 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
3864 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
3865 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
3866 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
3867 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
3868 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
3869 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
3870 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
3871 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3872 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3873 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3874 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3875
3876 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3877 /* no flushing, right:
3878 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3879 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3880 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3881 */
3882}
3883#endif
3884
3885
3886/**
3887 * Frees a temporary register.
3888 *
3889 * Any shadow copies of guest registers assigned to the host register will not
3890 * be flushed by this operation.
3891 */
3892DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3893{
3894 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3895 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
3896 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3897 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
3898 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3899}
3900
3901
3902/**
3903 * Frees a temporary immediate register.
3904 *
3905 * It is assumed that the call has not modified the register, so it still hold
3906 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
3907 */
3908DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3909{
3910 iemNativeRegFreeTmp(pReNative, idxHstReg);
3911}
3912
3913
3914/**
3915 * Frees a register assigned to a variable.
3916 *
3917 * The register will be disassociated from the variable.
3918 */
3919DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
3920{
3921 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3922 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
3923 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
3924 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3925 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
3926#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3927 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3928#endif
3929
3930 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3931 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3932 if (!fFlushShadows)
3933 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
3934 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
3935 else
3936 {
3937 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3938 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3939#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3940 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadowsOld));
3941#endif
3942 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3943 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
3944 uint64_t fGstRegShadows = fGstRegShadowsOld;
3945 while (fGstRegShadows)
3946 {
3947 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3948 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3949
3950 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
3951 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
3952 }
3953 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
3954 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
3955 }
3956}
3957
3958
3959#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3960# if defined(LOG_ENABLED) || defined(IEMNATIVE_WITH_TB_DEBUG_INFO)
3961/** Host CPU SIMD register names. */
3962DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
3963{
3964# ifdef RT_ARCH_AMD64
3965 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
3966# elif RT_ARCH_ARM64
3967 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
3968 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
3969# else
3970# error "port me"
3971# endif
3972};
3973# endif
3974
3975
3976/**
3977 * Frees a SIMD register assigned to a variable.
3978 *
3979 * The register will be disassociated from the variable.
3980 */
3981DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
3982{
3983 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
3984 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
3985 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
3986 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3987 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
3988 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3989
3990 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3991 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
3992 if (!fFlushShadows)
3993 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
3994 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
3995 else
3996 {
3997 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3998 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
3999 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4000 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
4001 uint64_t fGstRegShadows = fGstRegShadowsOld;
4002 while (fGstRegShadows)
4003 {
4004 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4005 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4006
4007 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
4008 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
4009 }
4010 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4011 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4012 }
4013}
4014
4015
4016/**
4017 * Reassigns a variable to a different SIMD register specified by the caller.
4018 *
4019 * @returns The new code buffer position.
4020 * @param pReNative The native recompile state.
4021 * @param off The current code buffer position.
4022 * @param idxVar The variable index.
4023 * @param idxRegOld The old host register number.
4024 * @param idxRegNew The new host register number.
4025 * @param pszCaller The caller for logging.
4026 */
4027static uint32_t iemNativeSimdRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4028 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
4029{
4030 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4031 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
4032 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4033 RT_NOREF(pszCaller);
4034
4035 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4036 & pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows));
4037 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxRegNew, off);
4038
4039 uint64_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4040 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4041 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4042
4043 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
4044 pszCaller, idxVar, g_apszIemNativeHstSimdRegNames[idxRegOld], g_apszIemNativeHstSimdRegNames[idxRegNew], fGstRegShadows));
4045 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
4046
4047 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U))
4048 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxRegNew, idxRegOld);
4049 else
4050 {
4051 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U));
4052 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxRegNew, idxRegOld);
4053 }
4054
4055 pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
4056 pReNative->Core.aHstSimdRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
4057 pReNative->Core.aHstSimdRegs[idxRegNew].idxVar = idxVar;
4058 if (fGstRegShadows)
4059 {
4060 pReNative->Core.bmHstSimdRegsWithGstShadow = (pReNative->Core.bmHstSimdRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
4061 | RT_BIT_32(idxRegNew);
4062 while (fGstRegShadows)
4063 {
4064 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4065 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4066
4067 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxRegOld);
4068 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = idxRegNew;
4069 }
4070 }
4071
4072 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
4073 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4074 pReNative->Core.bmHstSimdRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstSimdRegs & ~RT_BIT_32(idxRegOld));
4075 return off;
4076}
4077
4078
4079/**
4080 * Moves a variable to a different register or spills it onto the stack.
4081 *
4082 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
4083 * kinds can easily be recreated if needed later.
4084 *
4085 * @returns The new code buffer position.
4086 * @param pReNative The native recompile state.
4087 * @param off The current code buffer position.
4088 * @param idxVar The variable index.
4089 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
4090 * call-volatile registers.
4091 */
4092DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4093 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK*/)
4094{
4095 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4096 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4097 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
4098 Assert(!pVar->fRegAcquired);
4099 Assert(!pVar->fSimdReg);
4100
4101 uint8_t const idxRegOld = pVar->idxReg;
4102 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
4103 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegOld));
4104 Assert(pReNative->Core.aHstSimdRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
4105 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows)
4106 == pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows);
4107 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4108 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxRegOld))
4109 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4110 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4111 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4112
4113 /** @todo Add statistics on this.*/
4114 /** @todo Implement basic variable liveness analysis (python) so variables
4115 * can be freed immediately once no longer used. This has the potential to
4116 * be trashing registers and stack for dead variables.
4117 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4118
4119 /*
4120 * First try move it to a different register, as that's cheaper.
4121 */
4122 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4123 fForbiddenRegs |= IEMNATIVE_SIMD_REG_FIXED_MASK;
4124 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & ~fForbiddenRegs;
4125 if (fRegs)
4126 {
4127 /* Avoid using shadow registers, if possible. */
4128 if (fRegs & ~pReNative->Core.bmHstSimdRegsWithGstShadow)
4129 fRegs &= ~pReNative->Core.bmHstSimdRegsWithGstShadow;
4130 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4131 return iemNativeSimdRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeSimdRegMoveOrSpillStackVar");
4132 }
4133
4134 /*
4135 * Otherwise we must spill the register onto the stack.
4136 */
4137 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4138 Log12(("iemNativeSimdRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4139 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4140
4141 if (pVar->cbVar == sizeof(RTUINT128U))
4142 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4143 else
4144 {
4145 Assert(pVar->cbVar == sizeof(RTUINT256U));
4146 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4147 }
4148
4149 pVar->idxReg = UINT8_MAX;
4150 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4151 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
4152 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4153 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4154 return off;
4155}
4156
4157
4158/**
4159 * Called right before emitting a call instruction to move anything important
4160 * out of call-volatile SIMD registers, free and flush the call-volatile SIMD registers,
4161 * optionally freeing argument variables.
4162 *
4163 * @returns New code buffer offset, UINT32_MAX on failure.
4164 * @param pReNative The native recompile state.
4165 * @param off The code buffer offset.
4166 * @param cArgs The number of arguments the function call takes.
4167 * It is presumed that the host register part of these have
4168 * been allocated as such already and won't need moving,
4169 * just freeing.
4170 * @param fKeepVars Mask of variables that should keep their register
4171 * assignments. Caller must take care to handle these.
4172 */
4173DECL_HIDDEN_THROW(uint32_t)
4174iemNativeSimdRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4175{
4176 Assert(!cArgs); RT_NOREF(cArgs);
4177
4178 /* fKeepVars will reduce this mask. */
4179 uint32_t fSimdRegsToFree = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
4180
4181 /*
4182 * Move anything important out of volatile registers.
4183 */
4184 uint32_t fSimdRegsToMove = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4185#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
4186 & ~RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)
4187#endif
4188 ;
4189
4190 fSimdRegsToMove &= pReNative->Core.bmHstSimdRegs;
4191 if (!fSimdRegsToMove)
4192 { /* likely */ }
4193 else
4194 {
4195 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: fSimdRegsToMove=%#x\n", fSimdRegsToMove));
4196 while (fSimdRegsToMove != 0)
4197 {
4198 unsigned const idxSimdReg = ASMBitFirstSetU32(fSimdRegsToMove) - 1;
4199 fSimdRegsToMove &= ~RT_BIT_32(idxSimdReg);
4200
4201 switch (pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat)
4202 {
4203 case kIemNativeWhat_Var:
4204 {
4205 uint8_t const idxVar = pReNative->Core.aHstRegs[idxSimdReg].idxVar;
4206 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4207 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4208 Assert(pVar->idxReg == idxSimdReg);
4209 Assert(pVar->fSimdReg);
4210 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4211 {
4212 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxSimdReg=%d\n",
4213 idxVar, pVar->enmKind, pVar->idxReg));
4214 if (pVar->enmKind != kIemNativeVarKind_Stack)
4215 pVar->idxReg = UINT8_MAX;
4216 else
4217 off = iemNativeSimdRegMoveOrSpillStackVar(pReNative, off, idxVar);
4218 }
4219 else
4220 fSimdRegsToFree &= ~RT_BIT_32(idxSimdReg);
4221 continue;
4222 }
4223
4224 case kIemNativeWhat_Arg:
4225 AssertMsgFailed(("What?!?: %u\n", idxSimdReg));
4226 continue;
4227
4228 case kIemNativeWhat_rc:
4229 case kIemNativeWhat_Tmp:
4230 AssertMsgFailed(("Missing free: %u\n", idxSimdReg));
4231 continue;
4232
4233 case kIemNativeWhat_FixedReserved:
4234#ifdef RT_ARCH_ARM64
4235 continue; /* On ARM the upper half of the virtual 256-bit register. */
4236#endif
4237
4238 case kIemNativeWhat_FixedTmp:
4239 case kIemNativeWhat_pVCpuFixed:
4240 case kIemNativeWhat_pCtxFixed:
4241 case kIemNativeWhat_PcShadow:
4242 case kIemNativeWhat_Invalid:
4243 case kIemNativeWhat_End:
4244 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4245 }
4246 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4247 }
4248 }
4249
4250 /*
4251 * Do the actual freeing.
4252 */
4253 if (pReNative->Core.bmHstSimdRegs & fSimdRegsToFree)
4254 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegs %#x -> %#x\n",
4255 pReNative->Core.bmHstSimdRegs, pReNative->Core.bmHstSimdRegs & ~fSimdRegsToFree));
4256 pReNative->Core.bmHstSimdRegs &= ~fSimdRegsToFree;
4257
4258 /* If there are guest register shadows in any call-volatile register, we
4259 have to clear the corrsponding guest register masks for each register. */
4260 uint32_t fHstSimdRegsWithGstShadow = pReNative->Core.bmHstSimdRegsWithGstShadow & fSimdRegsToFree;
4261 if (fHstSimdRegsWithGstShadow)
4262 {
4263 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4264 pReNative->Core.bmHstSimdRegsWithGstShadow, pReNative->Core.bmHstSimdRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK, fHstSimdRegsWithGstShadow));
4265 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~fHstSimdRegsWithGstShadow;
4266 do
4267 {
4268 unsigned const idxSimdReg = ASMBitFirstSetU32(fHstSimdRegsWithGstShadow) - 1;
4269 fHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdReg);
4270
4271 AssertMsg(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows != 0, ("idxSimdReg=%#x\n", idxSimdReg));
4272
4273#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4274 /*
4275 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4276 * to call volatile registers).
4277 */
4278 if ( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4279 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows)
4280 off = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, off, idxSimdReg);
4281#endif
4282 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4283 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows));
4284
4285 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows;
4286 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
4287 } while (fHstSimdRegsWithGstShadow != 0);
4288 }
4289
4290 return off;
4291}
4292#endif
4293
4294
4295/**
4296 * Called right before emitting a call instruction to move anything important
4297 * out of call-volatile registers, free and flush the call-volatile registers,
4298 * optionally freeing argument variables.
4299 *
4300 * @returns New code buffer offset, UINT32_MAX on failure.
4301 * @param pReNative The native recompile state.
4302 * @param off The code buffer offset.
4303 * @param cArgs The number of arguments the function call takes.
4304 * It is presumed that the host register part of these have
4305 * been allocated as such already and won't need moving,
4306 * just freeing.
4307 * @param fKeepVars Mask of variables that should keep their register
4308 * assignments. Caller must take care to handle these.
4309 */
4310DECL_HIDDEN_THROW(uint32_t)
4311iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4312{
4313 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4314
4315 /* fKeepVars will reduce this mask. */
4316 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4317
4318 /*
4319 * Move anything important out of volatile registers.
4320 */
4321 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4322 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4323 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4324#ifdef IEMNATIVE_REG_FIXED_TMP0
4325 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4326#endif
4327#ifdef IEMNATIVE_REG_FIXED_TMP1
4328 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4329#endif
4330#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4331 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4332#endif
4333 & ~g_afIemNativeCallRegs[cArgs];
4334
4335 fRegsToMove &= pReNative->Core.bmHstRegs;
4336 if (!fRegsToMove)
4337 { /* likely */ }
4338 else
4339 {
4340 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4341 while (fRegsToMove != 0)
4342 {
4343 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4344 fRegsToMove &= ~RT_BIT_32(idxReg);
4345
4346 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4347 {
4348 case kIemNativeWhat_Var:
4349 {
4350 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4351 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4352 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4353 Assert(pVar->idxReg == idxReg);
4354#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4355 Assert(!pVar->fSimdReg);
4356#endif
4357 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4358 {
4359 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4360 idxVar, pVar->enmKind, pVar->idxReg));
4361 if (pVar->enmKind != kIemNativeVarKind_Stack)
4362 pVar->idxReg = UINT8_MAX;
4363 else
4364 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4365 }
4366 else
4367 fRegsToFree &= ~RT_BIT_32(idxReg);
4368 continue;
4369 }
4370
4371 case kIemNativeWhat_Arg:
4372 AssertMsgFailed(("What?!?: %u\n", idxReg));
4373 continue;
4374
4375 case kIemNativeWhat_rc:
4376 case kIemNativeWhat_Tmp:
4377 AssertMsgFailed(("Missing free: %u\n", idxReg));
4378 continue;
4379
4380 case kIemNativeWhat_FixedTmp:
4381 case kIemNativeWhat_pVCpuFixed:
4382 case kIemNativeWhat_pCtxFixed:
4383 case kIemNativeWhat_PcShadow:
4384 case kIemNativeWhat_FixedReserved:
4385 case kIemNativeWhat_Invalid:
4386 case kIemNativeWhat_End:
4387 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4388 }
4389 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4390 }
4391 }
4392
4393 /*
4394 * Do the actual freeing.
4395 */
4396 if (pReNative->Core.bmHstRegs & fRegsToFree)
4397 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4398 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4399 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4400
4401 /* If there are guest register shadows in any call-volatile register, we
4402 have to clear the corrsponding guest register masks for each register. */
4403 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4404 if (fHstRegsWithGstShadow)
4405 {
4406 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4407 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4408 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4409 do
4410 {
4411 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4412 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4413
4414 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4415
4416#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4417 /*
4418 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4419 * to call volatile registers).
4420 */
4421 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4422 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxReg);
4423 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4424#endif
4425
4426 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4427 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4428 } while (fHstRegsWithGstShadow != 0);
4429 }
4430
4431#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4432 /* Now for the SIMD registers, no argument support for now. */
4433 off = iemNativeSimdRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /*cArgs*/, fKeepVars);
4434#endif
4435
4436 return off;
4437}
4438
4439
4440/**
4441 * Flushes a set of guest register shadow copies.
4442 *
4443 * This is usually done after calling a threaded function or a C-implementation
4444 * of an instruction.
4445 *
4446 * @param pReNative The native recompile state.
4447 * @param fGstRegs Set of guest registers to flush.
4448 */
4449DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4450{
4451 /*
4452 * Reduce the mask by what's currently shadowed
4453 */
4454 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4455 fGstRegs &= bmGstRegShadowsOld;
4456 if (fGstRegs)
4457 {
4458 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4459 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4460 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4461 if (bmGstRegShadowsNew)
4462 {
4463 /*
4464 * Partial.
4465 */
4466 do
4467 {
4468 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4469 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4470 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4471 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4472 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4473#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4474 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4475#endif
4476
4477 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4478 fGstRegs &= ~fInThisHstReg;
4479 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4480 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4481 if (!fGstRegShadowsNew)
4482 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4483 } while (fGstRegs != 0);
4484 }
4485 else
4486 {
4487 /*
4488 * Clear all.
4489 */
4490 do
4491 {
4492 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4493 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4494 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4495 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4496 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4497#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4498 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4499#endif
4500
4501 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4502 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4503 } while (fGstRegs != 0);
4504 pReNative->Core.bmHstRegsWithGstShadow = 0;
4505 }
4506 }
4507}
4508
4509
4510/**
4511 * Flushes guest register shadow copies held by a set of host registers.
4512 *
4513 * This is used with the TLB lookup code for ensuring that we don't carry on
4514 * with any guest shadows in volatile registers, as these will get corrupted by
4515 * a TLB miss.
4516 *
4517 * @param pReNative The native recompile state.
4518 * @param fHstRegs Set of host registers to flush guest shadows for.
4519 */
4520DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4521{
4522 /*
4523 * Reduce the mask by what's currently shadowed.
4524 */
4525 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4526 fHstRegs &= bmHstRegsWithGstShadowOld;
4527 if (fHstRegs)
4528 {
4529 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4530 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4531 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4532 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4533 if (bmHstRegsWithGstShadowNew)
4534 {
4535 /*
4536 * Partial (likely).
4537 */
4538 uint64_t fGstShadows = 0;
4539 do
4540 {
4541 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4542 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4543 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4544 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4545#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4546 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4547#endif
4548
4549 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4550 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4551 fHstRegs &= ~RT_BIT_32(idxHstReg);
4552 } while (fHstRegs != 0);
4553 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4554 }
4555 else
4556 {
4557 /*
4558 * Clear all.
4559 */
4560 do
4561 {
4562 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4563 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4564 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4565 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4566#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4567 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4568#endif
4569
4570 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4571 fHstRegs &= ~RT_BIT_32(idxHstReg);
4572 } while (fHstRegs != 0);
4573 pReNative->Core.bmGstRegShadows = 0;
4574 }
4575 }
4576}
4577
4578
4579/**
4580 * Restores guest shadow copies in volatile registers.
4581 *
4582 * This is used after calling a helper function (think TLB miss) to restore the
4583 * register state of volatile registers.
4584 *
4585 * @param pReNative The native recompile state.
4586 * @param off The code buffer offset.
4587 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4588 * be active (allocated) w/o asserting. Hack.
4589 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4590 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4591 */
4592DECL_HIDDEN_THROW(uint32_t)
4593iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4594{
4595 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4596 if (fHstRegs)
4597 {
4598 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4599 do
4600 {
4601 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4602
4603 /* It's not fatal if a register is active holding a variable that
4604 shadowing a guest register, ASSUMING all pending guest register
4605 writes were flushed prior to the helper call. However, we'll be
4606 emitting duplicate restores, so it wasts code space. */
4607 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4608 RT_NOREF(fHstRegsActiveShadows);
4609
4610 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4611#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4612 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadows));
4613#endif
4614 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4615 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4616 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4617
4618 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4619 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4620
4621 fHstRegs &= ~RT_BIT_32(idxHstReg);
4622 } while (fHstRegs != 0);
4623 }
4624 return off;
4625}
4626
4627
4628
4629
4630/*********************************************************************************************************************************
4631* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
4632*********************************************************************************************************************************/
4633#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4634
4635/**
4636 * Info about shadowed guest SIMD register values.
4637 * @see IEMNATIVEGSTSIMDREG
4638 */
4639static struct
4640{
4641 /** Offset in VMCPU of XMM (low 128-bit) registers. */
4642 uint32_t offXmm;
4643 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
4644 uint32_t offYmm;
4645 /** Name (for logging). */
4646 const char *pszName;
4647} const g_aGstSimdShadowInfo[] =
4648{
4649#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
4650 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
4651 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
4652 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
4653 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
4654 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
4655 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
4656 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
4657 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
4658 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
4659 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
4660 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
4661 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
4662 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
4663 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
4664 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
4665 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
4666 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
4667#undef CPUMCTX_OFF_AND_SIZE
4668};
4669AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
4670
4671
4672/**
4673 * Frees a temporary SIMD register.
4674 *
4675 * Any shadow copies of guest registers assigned to the host register will not
4676 * be flushed by this operation.
4677 */
4678DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
4679{
4680 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
4681 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
4682 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
4683 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
4684 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
4685}
4686
4687
4688/**
4689 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
4690 *
4691 * @returns New code bufferoffset.
4692 * @param pReNative The native recompile state.
4693 * @param off Current code buffer position.
4694 * @param enmGstSimdReg The guest SIMD register to flush.
4695 */
4696DECL_HIDDEN_THROW(uint32_t)
4697iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
4698{
4699 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
4700
4701 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
4702 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
4703 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
4704 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
4705
4706 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
4707 {
4708 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4709 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
4710 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
4711 }
4712
4713 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
4714 {
4715 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4716 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
4717 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
4718 }
4719
4720 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
4721 return off;
4722}
4723
4724
4725/**
4726 * Flush the given set of guest SIMD registers if marked as dirty.
4727 *
4728 * @returns New code buffer offset.
4729 * @param pReNative The native recompile state.
4730 * @param off Current code buffer position.
4731 * @param fFlushGstSimdReg The guest SIMD register set to flush (default is flush everything).
4732 */
4733DECL_HIDDEN_THROW(uint32_t)
4734iemNativeSimdRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstSimdReg /*= UINT64_MAX*/)
4735{
4736 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4737 & fFlushGstSimdReg;
4738 if (bmGstSimdRegShadowDirty)
4739 {
4740# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4741 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4742 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
4743# endif
4744
4745 do
4746 {
4747 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
4748 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
4749 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
4750 } while (bmGstSimdRegShadowDirty);
4751 }
4752
4753 return off;
4754}
4755
4756
4757#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4758/**
4759 * Flush all shadowed guest SIMD registers marked as dirty for the given host SIMD register.
4760 *
4761 * @returns New code buffer offset.
4762 * @param pReNative The native recompile state.
4763 * @param off Current code buffer position.
4764 * @param idxHstSimdReg The host SIMD register.
4765 *
4766 * @note This doesn't do any unshadowing of guest registers from the host register.
4767 */
4768DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxHstSimdReg)
4769{
4770 /* We need to flush any pending guest register writes this host register shadows. */
4771 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4772 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
4773 if (bmGstSimdRegShadowDirty)
4774 {
4775# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4776 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4777 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
4778# endif
4779
4780 do
4781 {
4782 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
4783 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
4784 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
4785 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
4786 } while (bmGstSimdRegShadowDirty);
4787 }
4788
4789 return off;
4790}
4791#endif
4792
4793
4794/**
4795 * Locate a register, possibly freeing one up.
4796 *
4797 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4798 * failed.
4799 *
4800 * @returns Host register number on success. Returns UINT8_MAX if no registers
4801 * found, the caller is supposed to deal with this and raise a
4802 * allocation type specific status code (if desired).
4803 *
4804 * @throws VBox status code if we're run into trouble spilling a variable of
4805 * recording debug info. Does NOT throw anything if we're out of
4806 * registers, though.
4807 */
4808static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
4809 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
4810{
4811 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
4812 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
4813 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
4814
4815 /*
4816 * Try a freed register that's shadowing a guest register.
4817 */
4818 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
4819 if (fRegs)
4820 {
4821 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
4822
4823#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4824 /*
4825 * When we have livness information, we use it to kick out all shadowed
4826 * guest register that will not be needed any more in this TB. If we're
4827 * lucky, this may prevent us from ending up here again.
4828 *
4829 * Note! We must consider the previous entry here so we don't free
4830 * anything that the current threaded function requires (current
4831 * entry is produced by the next threaded function).
4832 */
4833 uint32_t const idxCurCall = pReNative->idxCurCall;
4834 if (idxCurCall > 0)
4835 {
4836 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
4837
4838# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4839 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
4840 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
4841 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
4842#else
4843 /* Construct a mask of the registers not in the read or write state.
4844 Note! We could skips writes, if they aren't from us, as this is just
4845 a hack to prevent trashing registers that have just been written
4846 or will be written when we retire the current instruction. */
4847 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
4848 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
4849 & IEMLIVENESSBIT_MASK;
4850#endif
4851 /* If it matches any shadowed registers. */
4852 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
4853 {
4854 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
4855 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
4856 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
4857
4858 /* See if we've got any unshadowed registers we can return now. */
4859 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
4860 if (fUnshadowedRegs)
4861 {
4862 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
4863 return (fPreferVolatile
4864 ? ASMBitFirstSetU32(fUnshadowedRegs)
4865 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4866 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
4867 - 1;
4868 }
4869 }
4870 }
4871#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4872
4873 unsigned const idxReg = (fPreferVolatile
4874 ? ASMBitFirstSetU32(fRegs)
4875 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4876 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
4877 - 1;
4878
4879 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
4880 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
4881 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
4882 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
4883
4884 /* We need to flush any pending guest register writes this host SIMD register shadows. */
4885 *poff = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, *poff, idxReg);
4886
4887 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4888 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
4889 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
4890 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4891 return idxReg;
4892 }
4893
4894 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
4895
4896 /*
4897 * Try free up a variable that's in a register.
4898 *
4899 * We do two rounds here, first evacuating variables we don't need to be
4900 * saved on the stack, then in the second round move things to the stack.
4901 */
4902 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
4903 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
4904 {
4905 uint32_t fVars = pReNative->Core.bmVars;
4906 while (fVars)
4907 {
4908 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
4909 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
4910 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* Ignore non SIMD variables here. */
4911 continue;
4912
4913 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
4914 && (RT_BIT_32(idxReg) & fRegMask)
4915 && ( iLoop == 0
4916 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
4917 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4918 && !pReNative->Core.aVars[idxVar].fRegAcquired)
4919 {
4920 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
4921 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
4922 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
4923 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
4924 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
4925 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
4926
4927 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4928 {
4929 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
4930 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
4931 }
4932
4933 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4934 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
4935
4936 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4937 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4938 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
4939 return idxReg;
4940 }
4941 fVars &= ~RT_BIT_32(idxVar);
4942 }
4943 }
4944
4945 AssertFailed();
4946 return UINT8_MAX;
4947}
4948
4949
4950/**
4951 * Flushes a set of guest register shadow copies.
4952 *
4953 * This is usually done after calling a threaded function or a C-implementation
4954 * of an instruction.
4955 *
4956 * @param pReNative The native recompile state.
4957 * @param fGstSimdRegs Set of guest SIMD registers to flush.
4958 */
4959DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
4960{
4961 /*
4962 * Reduce the mask by what's currently shadowed
4963 */
4964 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
4965 fGstSimdRegs &= bmGstSimdRegShadows;
4966 if (fGstSimdRegs)
4967 {
4968 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
4969 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
4970 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
4971 if (bmGstSimdRegShadowsNew)
4972 {
4973 /*
4974 * Partial.
4975 */
4976 do
4977 {
4978 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
4979 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
4980 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
4981 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
4982 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4983 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
4984
4985 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
4986 fGstSimdRegs &= ~fInThisHstReg;
4987 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4988 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4989 if (!fGstRegShadowsNew)
4990 {
4991 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4992 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4993 }
4994 } while (fGstSimdRegs != 0);
4995 }
4996 else
4997 {
4998 /*
4999 * Clear all.
5000 */
5001 do
5002 {
5003 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5004 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5005 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5006 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5007 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5008 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5009
5010 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5011 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5012 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5013 } while (fGstSimdRegs != 0);
5014 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5015 }
5016 }
5017}
5018
5019
5020/**
5021 * Allocates a temporary host SIMD register.
5022 *
5023 * This may emit code to save register content onto the stack in order to free
5024 * up a register.
5025 *
5026 * @returns The host register number; throws VBox status code on failure,
5027 * so no need to check the return value.
5028 * @param pReNative The native recompile state.
5029 * @param poff Pointer to the variable with the code buffer position.
5030 * This will be update if we need to move a variable from
5031 * register to stack in order to satisfy the request.
5032 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5033 * registers (@c true, default) or the other way around
5034 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5035 */
5036DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5037{
5038 /*
5039 * Try find a completely unused register, preferably a call-volatile one.
5040 */
5041 uint8_t idxSimdReg;
5042 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5043 & ~pReNative->Core.bmHstRegsWithGstShadow
5044 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5045 if (fRegs)
5046 {
5047 if (fPreferVolatile)
5048 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5049 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5050 else
5051 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5052 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5053 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5054 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5055
5056 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5057 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5058 }
5059 else
5060 {
5061 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5062 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5063 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5064 }
5065
5066 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5067 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5068}
5069
5070
5071/**
5072 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5073 * registers.
5074 *
5075 * @returns The host register number; throws VBox status code on failure,
5076 * so no need to check the return value.
5077 * @param pReNative The native recompile state.
5078 * @param poff Pointer to the variable with the code buffer position.
5079 * This will be update if we need to move a variable from
5080 * register to stack in order to satisfy the request.
5081 * @param fRegMask Mask of acceptable registers.
5082 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5083 * registers (@c true, default) or the other way around
5084 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5085 */
5086DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5087 bool fPreferVolatile /*= true*/)
5088{
5089 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5090 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5091
5092 /*
5093 * Try find a completely unused register, preferably a call-volatile one.
5094 */
5095 uint8_t idxSimdReg;
5096 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5097 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5098 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5099 & fRegMask;
5100 if (fRegs)
5101 {
5102 if (fPreferVolatile)
5103 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5104 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5105 else
5106 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5107 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5108 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5109 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5110
5111 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5112 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5113 }
5114 else
5115 {
5116 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5117 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5118 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5119 }
5120
5121 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5122 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5123}
5124
5125
5126/**
5127 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5128 *
5129 * @param pReNative The native recompile state.
5130 * @param idxHstSimdReg The host SIMD register to update the state for.
5131 * @param enmLoadSz The load size to set.
5132 */
5133DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5134 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5135{
5136 /* Everything valid already? -> nothing to do. */
5137 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5138 return;
5139
5140 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5141 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5142 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5143 {
5144 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5145 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5146 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5147 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5148 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5149 }
5150}
5151
5152
5153static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdRegDst,
5154 uint8_t idxHstSimdRegDst, uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5155{
5156 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5157 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5158 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5159 {
5160# ifdef RT_ARCH_ARM64
5161 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5162 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5163# endif
5164
5165 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5166 {
5167 switch (enmLoadSzDst)
5168 {
5169 case kIemNativeGstSimdRegLdStSz_256:
5170 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5171 break;
5172 case kIemNativeGstSimdRegLdStSz_Low128:
5173 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5174 break;
5175 case kIemNativeGstSimdRegLdStSz_High128:
5176 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5177 break;
5178 default:
5179 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5180 }
5181
5182 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5183 }
5184 }
5185 else
5186 {
5187 /* The source doesn't has the part loaded, so load the register from CPUMCTX. */
5188 Assert(enmLoadSzDst == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSzDst == kIemNativeGstSimdRegLdStSz_High128);
5189 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, idxHstSimdRegDst, enmGstSimdRegDst, enmLoadSzDst);
5190 }
5191
5192 return off;
5193}
5194
5195
5196/**
5197 * Allocates a temporary host SIMD register for keeping a guest
5198 * SIMD register value.
5199 *
5200 * Since we may already have a register holding the guest register value,
5201 * code will be emitted to do the loading if that's not the case. Code may also
5202 * be emitted if we have to free up a register to satify the request.
5203 *
5204 * @returns The host register number; throws VBox status code on failure, so no
5205 * need to check the return value.
5206 * @param pReNative The native recompile state.
5207 * @param poff Pointer to the variable with the code buffer
5208 * position. This will be update if we need to move a
5209 * variable from register to stack in order to satisfy
5210 * the request.
5211 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5212 * @param enmIntendedUse How the caller will be using the host register.
5213 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5214 * register is okay (default). The ASSUMPTION here is
5215 * that the caller has already flushed all volatile
5216 * registers, so this is only applied if we allocate a
5217 * new register.
5218 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5219 */
5220DECL_HIDDEN_THROW(uint8_t)
5221iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5222 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5223 bool fNoVolatileRegs /*= false*/)
5224{
5225 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5226#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5227 AssertMsg( pReNative->idxCurCall == 0
5228 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5229 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5230 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5231 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5232 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5233 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5234#endif
5235#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5236 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5237#endif
5238 uint32_t const fRegMask = !fNoVolatileRegs
5239 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5240 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5241
5242 /*
5243 * First check if the guest register value is already in a host register.
5244 */
5245 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5246 {
5247 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5248 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5249 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5250 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5251
5252 /* It's not supposed to be allocated... */
5253 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5254 {
5255 /*
5256 * If the register will trash the guest shadow copy, try find a
5257 * completely unused register we can use instead. If that fails,
5258 * we need to disassociate the host reg from the guest reg.
5259 */
5260 /** @todo would be nice to know if preserving the register is in any way helpful. */
5261 /* If the purpose is calculations, try duplicate the register value as
5262 we'll be clobbering the shadow. */
5263 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5264 && ( ~pReNative->Core.bmHstSimdRegs
5265 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5266 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5267 {
5268 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5269
5270 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5271
5272 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5273 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5274 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5275 idxSimdReg = idxRegNew;
5276 }
5277 /* If the current register matches the restrictions, go ahead and allocate
5278 it for the caller. */
5279 else if (fRegMask & RT_BIT_32(idxSimdReg))
5280 {
5281 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5282 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5283 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5284 {
5285 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5286 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxSimdReg, idxSimdReg, enmLoadSz);
5287 else
5288 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5289 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5290 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5291 }
5292 else
5293 {
5294 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5295 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5296 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5297 }
5298 }
5299 /* Otherwise, allocate a register that satisfies the caller and transfer
5300 the shadowing if compatible with the intended use. (This basically
5301 means the call wants a non-volatile register (RSP push/pop scenario).) */
5302 else
5303 {
5304 Assert(fNoVolatileRegs);
5305 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5306 !fNoVolatileRegs
5307 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5308 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5309 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5310 {
5311 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5312 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5313 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5314 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5315 }
5316 else
5317 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5318 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5319 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5320 idxSimdReg = idxRegNew;
5321 }
5322 }
5323 else
5324 {
5325 /*
5326 * Oops. Shadowed guest register already allocated!
5327 *
5328 * Allocate a new register, copy the value and, if updating, the
5329 * guest shadow copy assignment to the new register.
5330 */
5331 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5332 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5333 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5334 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5335
5336 /** @todo share register for readonly access. */
5337 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5338 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5339
5340 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5341 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5342 else
5343 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5344
5345 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5346 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5347 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5348 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5349 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5350 else
5351 {
5352 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5353 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5354 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5355 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5356 }
5357 idxSimdReg = idxRegNew;
5358 }
5359 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5360
5361#ifdef VBOX_STRICT
5362 /* Strict builds: Check that the value is correct. */
5363 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5364 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5365#endif
5366
5367 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5368 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5369 {
5370# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5371 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5372 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxSimdReg);
5373# endif
5374
5375 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5376 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5377 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5378 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5379 else
5380 {
5381 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5382 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5383 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5384 }
5385 }
5386
5387 return idxSimdReg;
5388 }
5389
5390 /*
5391 * Allocate a new register, load it with the guest value and designate it as a copy of the
5392 */
5393 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5394
5395 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5396 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5397 else
5398 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5399
5400 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5401 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5402
5403 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5404 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5405 {
5406# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5407 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5408 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxRegNew);
5409# endif
5410
5411 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5412 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5413 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5414 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5415 else
5416 {
5417 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5418 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5419 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5420 }
5421 }
5422
5423 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5424 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5425
5426 return idxRegNew;
5427}
5428
5429
5430/**
5431 * Flushes guest SIMD register shadow copies held by a set of host registers.
5432 *
5433 * This is used whenever calling an external helper for ensuring that we don't carry on
5434 * with any guest shadows in volatile registers, as these will get corrupted by the caller.
5435 *
5436 * @param pReNative The native recompile state.
5437 * @param fHstSimdRegs Set of host SIMD registers to flush guest shadows for.
5438 */
5439DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstSimdRegs) RT_NOEXCEPT
5440{
5441 /*
5442 * Reduce the mask by what's currently shadowed.
5443 */
5444 uint32_t const bmHstSimdRegsWithGstShadowOld = pReNative->Core.bmHstSimdRegsWithGstShadow;
5445 fHstSimdRegs &= bmHstSimdRegsWithGstShadowOld;
5446 if (fHstSimdRegs)
5447 {
5448 uint32_t const bmHstSimdRegsWithGstShadowNew = bmHstSimdRegsWithGstShadowOld & ~fHstSimdRegs;
5449 Log12(("iemNativeSimdRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5450 fHstSimdRegs, bmHstSimdRegsWithGstShadowOld, bmHstSimdRegsWithGstShadowNew));
5451 pReNative->Core.bmHstSimdRegsWithGstShadow = bmHstSimdRegsWithGstShadowNew;
5452 if (bmHstSimdRegsWithGstShadowNew)
5453 {
5454 /*
5455 * Partial (likely).
5456 */
5457 uint64_t fGstShadows = 0;
5458 do
5459 {
5460 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5461 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5462 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5463 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5464 Assert(!(( pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5465 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5466
5467 fGstShadows |= pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5468 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5469 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5470 } while (fHstSimdRegs != 0);
5471 pReNative->Core.bmGstSimdRegShadows &= ~fGstShadows;
5472 }
5473 else
5474 {
5475 /*
5476 * Clear all.
5477 */
5478 do
5479 {
5480 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5481 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5482 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5483 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5484 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5485 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5486
5487 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5488 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5489 } while (fHstSimdRegs != 0);
5490 pReNative->Core.bmGstSimdRegShadows = 0;
5491 }
5492 }
5493}
5494#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5495
5496
5497
5498/*********************************************************************************************************************************
5499* Code emitters for flushing pending guest register writes and sanity checks *
5500*********************************************************************************************************************************/
5501
5502#ifdef VBOX_STRICT
5503/**
5504 * Does internal register allocator sanity checks.
5505 */
5506DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5507{
5508 /*
5509 * Iterate host registers building a guest shadowing set.
5510 */
5511 uint64_t bmGstRegShadows = 0;
5512 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5513 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5514 while (bmHstRegsWithGstShadow)
5515 {
5516 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5517 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5518 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5519
5520 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5521 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5522 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5523 bmGstRegShadows |= fThisGstRegShadows;
5524 while (fThisGstRegShadows)
5525 {
5526 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5527 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5528 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5529 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5530 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5531 }
5532 }
5533 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5534 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5535 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5536
5537 /*
5538 * Now the other way around, checking the guest to host index array.
5539 */
5540 bmHstRegsWithGstShadow = 0;
5541 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5542 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5543 while (bmGstRegShadows)
5544 {
5545 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5546 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5547 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5548
5549 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5550 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5551 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5552 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5553 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5554 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5555 }
5556 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5557 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5558 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5559}
5560#endif /* VBOX_STRICT */
5561
5562
5563/**
5564 * Flushes any delayed guest register writes.
5565 *
5566 * This must be called prior to calling CImpl functions and any helpers that use
5567 * the guest state (like raising exceptions) and such.
5568 *
5569 * @note This function does not flush any shadowing information for guest registers. This needs to be done by
5570 * the caller if it wishes to do so.
5571 */
5572DECL_HIDDEN_THROW(uint32_t)
5573iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, uint64_t fGstSimdShwExcept)
5574{
5575#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5576 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
5577 off = iemNativeEmitPcWriteback(pReNative, off);
5578#else
5579 RT_NOREF(pReNative, fGstShwExcept);
5580#endif
5581
5582#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5583 off = iemNativeRegFlushDirtyGuest(pReNative, off, ~fGstShwExcept);
5584#endif
5585
5586#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5587 off = iemNativeSimdRegFlushDirtyGuest(pReNative, off, ~fGstSimdShwExcept);
5588#endif
5589
5590 return off;
5591}
5592
5593
5594#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5595/**
5596 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5597 */
5598DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5599{
5600 Assert(pReNative->Core.offPc);
5601# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5602 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5603 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5604# endif
5605
5606# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5607 /* Allocate a temporary PC register. */
5608 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5609
5610 /* Perform the addition and store the result. */
5611 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5612 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5613
5614 /* Free but don't flush the PC register. */
5615 iemNativeRegFreeTmp(pReNative, idxPcReg);
5616# else
5617 /* Compare the shadow with the context value, they should match. */
5618 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5619 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5620# endif
5621
5622 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5623 pReNative->Core.offPc = 0;
5624 pReNative->Core.cInstrPcUpdateSkipped = 0;
5625
5626 return off;
5627}
5628#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
5629
5630
5631/*********************************************************************************************************************************
5632* Code Emitters (larger snippets) *
5633*********************************************************************************************************************************/
5634
5635/**
5636 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5637 * extending to 64-bit width.
5638 *
5639 * @returns New code buffer offset on success, UINT32_MAX on failure.
5640 * @param pReNative .
5641 * @param off The current code buffer position.
5642 * @param idxHstReg The host register to load the guest register value into.
5643 * @param enmGstReg The guest register to load.
5644 *
5645 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5646 * that is something the caller needs to do if applicable.
5647 */
5648DECL_HIDDEN_THROW(uint32_t)
5649iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5650{
5651 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
5652 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5653
5654 switch (g_aGstShadowInfo[enmGstReg].cb)
5655 {
5656 case sizeof(uint64_t):
5657 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5658 case sizeof(uint32_t):
5659 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5660 case sizeof(uint16_t):
5661 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5662#if 0 /* not present in the table. */
5663 case sizeof(uint8_t):
5664 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5665#endif
5666 default:
5667 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5668 }
5669}
5670
5671
5672#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5673/**
5674 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
5675 *
5676 * @returns New code buffer offset on success, UINT32_MAX on failure.
5677 * @param pReNative The recompiler state.
5678 * @param off The current code buffer position.
5679 * @param idxHstSimdReg The host register to load the guest register value into.
5680 * @param enmGstSimdReg The guest register to load.
5681 * @param enmLoadSz The load size of the register.
5682 *
5683 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5684 * that is something the caller needs to do if applicable.
5685 */
5686DECL_HIDDEN_THROW(uint32_t)
5687iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
5688 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5689{
5690 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
5691
5692 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
5693 switch (enmLoadSz)
5694 {
5695 case kIemNativeGstSimdRegLdStSz_256:
5696 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5697 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5698 case kIemNativeGstSimdRegLdStSz_Low128:
5699 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5700 case kIemNativeGstSimdRegLdStSz_High128:
5701 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5702 default:
5703 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5704 }
5705}
5706#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5707
5708#ifdef VBOX_STRICT
5709
5710/**
5711 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5712 *
5713 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5714 * Trashes EFLAGS on AMD64.
5715 */
5716DECL_HIDDEN_THROW(uint32_t)
5717iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5718{
5719# ifdef RT_ARCH_AMD64
5720 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5721
5722 /* rol reg64, 32 */
5723 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5724 pbCodeBuf[off++] = 0xc1;
5725 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5726 pbCodeBuf[off++] = 32;
5727
5728 /* test reg32, ffffffffh */
5729 if (idxReg >= 8)
5730 pbCodeBuf[off++] = X86_OP_REX_B;
5731 pbCodeBuf[off++] = 0xf7;
5732 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5733 pbCodeBuf[off++] = 0xff;
5734 pbCodeBuf[off++] = 0xff;
5735 pbCodeBuf[off++] = 0xff;
5736 pbCodeBuf[off++] = 0xff;
5737
5738 /* je/jz +1 */
5739 pbCodeBuf[off++] = 0x74;
5740 pbCodeBuf[off++] = 0x01;
5741
5742 /* int3 */
5743 pbCodeBuf[off++] = 0xcc;
5744
5745 /* rol reg64, 32 */
5746 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5747 pbCodeBuf[off++] = 0xc1;
5748 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5749 pbCodeBuf[off++] = 32;
5750
5751# elif defined(RT_ARCH_ARM64)
5752 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5753 /* lsr tmp0, reg64, #32 */
5754 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
5755 /* cbz tmp0, +1 */
5756 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5757 /* brk #0x1100 */
5758 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
5759
5760# else
5761# error "Port me!"
5762# endif
5763 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5764 return off;
5765}
5766
5767
5768/**
5769 * Emitting code that checks that the content of register @a idxReg is the same
5770 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
5771 * instruction if that's not the case.
5772 *
5773 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5774 * Trashes EFLAGS on AMD64.
5775 */
5776DECL_HIDDEN_THROW(uint32_t)
5777iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5778{
5779#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5780 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
5781 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg))
5782 return off;
5783#endif
5784
5785# ifdef RT_ARCH_AMD64
5786 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5787
5788 /* cmp reg, [mem] */
5789 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5790 {
5791 if (idxReg >= 8)
5792 pbCodeBuf[off++] = X86_OP_REX_R;
5793 pbCodeBuf[off++] = 0x38;
5794 }
5795 else
5796 {
5797 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5798 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5799 else
5800 {
5801 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5802 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5803 else
5804 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5805 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5806 if (idxReg >= 8)
5807 pbCodeBuf[off++] = X86_OP_REX_R;
5808 }
5809 pbCodeBuf[off++] = 0x39;
5810 }
5811 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
5812
5813 /* je/jz +1 */
5814 pbCodeBuf[off++] = 0x74;
5815 pbCodeBuf[off++] = 0x01;
5816
5817 /* int3 */
5818 pbCodeBuf[off++] = 0xcc;
5819
5820 /* For values smaller than the register size, we must check that the rest
5821 of the register is all zeros. */
5822 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
5823 {
5824 /* test reg64, imm32 */
5825 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5826 pbCodeBuf[off++] = 0xf7;
5827 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5828 pbCodeBuf[off++] = 0;
5829 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
5830 pbCodeBuf[off++] = 0xff;
5831 pbCodeBuf[off++] = 0xff;
5832
5833 /* je/jz +1 */
5834 pbCodeBuf[off++] = 0x74;
5835 pbCodeBuf[off++] = 0x01;
5836
5837 /* int3 */
5838 pbCodeBuf[off++] = 0xcc;
5839 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5840 }
5841 else
5842 {
5843 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5844 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
5845 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
5846 }
5847
5848# elif defined(RT_ARCH_ARM64)
5849 /* mov TMP0, [gstreg] */
5850 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
5851
5852 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5853 /* sub tmp0, tmp0, idxReg */
5854 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
5855 /* cbz tmp0, +1 */
5856 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5857 /* brk #0x1000+enmGstReg */
5858 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
5859 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5860
5861# else
5862# error "Port me!"
5863# endif
5864 return off;
5865}
5866
5867
5868# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5869# ifdef RT_ARCH_AMD64
5870/**
5871 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
5872 */
5873DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
5874{
5875 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
5876 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5877 if (idxSimdReg >= 8)
5878 pbCodeBuf[off++] = X86_OP_REX_R;
5879 pbCodeBuf[off++] = 0x0f;
5880 pbCodeBuf[off++] = 0x38;
5881 pbCodeBuf[off++] = 0x29;
5882 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
5883
5884 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
5885 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5886 pbCodeBuf[off++] = X86_OP_REX_W
5887 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
5888 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5889 pbCodeBuf[off++] = 0x0f;
5890 pbCodeBuf[off++] = 0x3a;
5891 pbCodeBuf[off++] = 0x16;
5892 pbCodeBuf[off++] = 0xeb;
5893 pbCodeBuf[off++] = 0x00;
5894
5895 /* cmp tmp0, 0xffffffffffffffff. */
5896 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5897 pbCodeBuf[off++] = 0x83;
5898 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
5899 pbCodeBuf[off++] = 0xff;
5900
5901 /* je/jz +1 */
5902 pbCodeBuf[off++] = 0x74;
5903 pbCodeBuf[off++] = 0x01;
5904
5905 /* int3 */
5906 pbCodeBuf[off++] = 0xcc;
5907
5908 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
5909 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5910 pbCodeBuf[off++] = X86_OP_REX_W
5911 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
5912 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5913 pbCodeBuf[off++] = 0x0f;
5914 pbCodeBuf[off++] = 0x3a;
5915 pbCodeBuf[off++] = 0x16;
5916 pbCodeBuf[off++] = 0xeb;
5917 pbCodeBuf[off++] = 0x01;
5918
5919 /* cmp tmp0, 0xffffffffffffffff. */
5920 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5921 pbCodeBuf[off++] = 0x83;
5922 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
5923 pbCodeBuf[off++] = 0xff;
5924
5925 /* je/jz +1 */
5926 pbCodeBuf[off++] = 0x74;
5927 pbCodeBuf[off++] = 0x01;
5928
5929 /* int3 */
5930 pbCodeBuf[off++] = 0xcc;
5931
5932 return off;
5933}
5934# endif
5935
5936
5937/**
5938 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
5939 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
5940 * instruction if that's not the case.
5941 *
5942 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
5943 * Trashes EFLAGS on AMD64.
5944 */
5945DECL_HIDDEN_THROW(uint32_t)
5946iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
5947 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5948{
5949 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
5950 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
5951 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
5952 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
5953 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
5954 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5955 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
5956 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
5957 return off;
5958
5959# ifdef RT_ARCH_AMD64
5960 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5961 {
5962 /* movdqa vectmp0, idxSimdReg */
5963 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
5964
5965 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
5966
5967 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
5968 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5969 }
5970
5971 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5972 {
5973 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
5974 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
5975
5976 /* vextracti128 vectmp0, idxSimdReg, 1 */
5977 pbCodeBuf[off++] = X86_OP_VEX3;
5978 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
5979 | X86_OP_VEX3_BYTE1_X
5980 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
5981 | 0x03; /* Opcode map */
5982 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
5983 pbCodeBuf[off++] = 0x39;
5984 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
5985 pbCodeBuf[off++] = 0x01;
5986
5987 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
5988 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5989 }
5990# elif defined(RT_ARCH_ARM64)
5991 /* mov vectmp0, [gstreg] */
5992 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
5993
5994 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5995 {
5996 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5997 /* eor vectmp0, vectmp0, idxSimdReg */
5998 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
5999 /* uaddlv vectmp0, vectmp0.16B */
6000 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
6001 /* umov tmp0, vectmp0.H[0] */
6002 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6003 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6004 /* cbz tmp0, +1 */
6005 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6006 /* brk #0x1000+enmGstReg */
6007 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6008 }
6009
6010 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6011 {
6012 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6013 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6014 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
6015 /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
6016 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
6017 /* umov tmp0, (vectmp0 + 1).H[0] */
6018 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6019 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6020 /* cbz tmp0, +1 */
6021 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6022 /* brk #0x1000+enmGstReg */
6023 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6024 }
6025
6026# else
6027# error "Port me!"
6028# endif
6029
6030 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6031 return off;
6032}
6033# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6034
6035
6036/**
6037 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6038 * important bits.
6039 *
6040 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6041 * Trashes EFLAGS on AMD64.
6042 */
6043DECL_HIDDEN_THROW(uint32_t)
6044iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6045{
6046 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6047 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6048 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6049 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6050
6051#ifdef RT_ARCH_AMD64
6052 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6053
6054 /* je/jz +1 */
6055 pbCodeBuf[off++] = 0x74;
6056 pbCodeBuf[off++] = 0x01;
6057
6058 /* int3 */
6059 pbCodeBuf[off++] = 0xcc;
6060
6061# elif defined(RT_ARCH_ARM64)
6062 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6063
6064 /* b.eq +1 */
6065 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6066 /* brk #0x2000 */
6067 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6068
6069# else
6070# error "Port me!"
6071# endif
6072 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6073
6074 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6075 return off;
6076}
6077
6078#endif /* VBOX_STRICT */
6079
6080
6081#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6082/**
6083 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
6084 */
6085DECL_HIDDEN_THROW(uint32_t)
6086iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
6087{
6088 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
6089
6090 fEflNeeded &= X86_EFL_STATUS_BITS;
6091 if (fEflNeeded)
6092 {
6093# ifdef RT_ARCH_AMD64
6094 /* test dword [pVCpu + offVCpu], imm32 */
6095 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6096 if (fEflNeeded <= 0xff)
6097 {
6098 pCodeBuf[off++] = 0xf6;
6099 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6100 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6101 }
6102 else
6103 {
6104 pCodeBuf[off++] = 0xf7;
6105 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6106 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6107 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
6108 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
6109 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
6110 }
6111 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6112
6113# else
6114 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6115 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
6116 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
6117# ifdef RT_ARCH_ARM64
6118 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
6119 off = iemNativeEmitBrk(pReNative, off, 0x7777);
6120# else
6121# error "Port me!"
6122# endif
6123 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6124# endif
6125 }
6126 return off;
6127}
6128#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
6129
6130
6131/**
6132 * Emits a code for checking the return code of a call and rcPassUp, returning
6133 * from the code if either are non-zero.
6134 */
6135DECL_HIDDEN_THROW(uint32_t)
6136iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6137{
6138#ifdef RT_ARCH_AMD64
6139 /*
6140 * AMD64: eax = call status code.
6141 */
6142
6143 /* edx = rcPassUp */
6144 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6145# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6146 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6147# endif
6148
6149 /* edx = eax | rcPassUp */
6150 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6151 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6152 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6153 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6154
6155 /* Jump to non-zero status return path. */
6156 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
6157
6158 /* done. */
6159
6160#elif RT_ARCH_ARM64
6161 /*
6162 * ARM64: w0 = call status code.
6163 */
6164# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6165 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6166# endif
6167 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6168
6169 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6170
6171 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6172
6173 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6174 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6175 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
6176
6177#else
6178# error "port me"
6179#endif
6180 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6181 RT_NOREF_PV(idxInstr);
6182 return off;
6183}
6184
6185
6186/**
6187 * Emits code to check if the content of @a idxAddrReg is a canonical address,
6188 * raising a \#GP(0) if it isn't.
6189 *
6190 * @returns New code buffer offset, UINT32_MAX on failure.
6191 * @param pReNative The native recompile state.
6192 * @param off The code buffer offset.
6193 * @param idxAddrReg The host register with the address to check.
6194 * @param idxInstr The current instruction.
6195 */
6196DECL_HIDDEN_THROW(uint32_t)
6197iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
6198{
6199 /*
6200 * Make sure we don't have any outstanding guest register writes as we may
6201 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6202 */
6203 off = iemNativeRegFlushPendingWrites(pReNative, off);
6204
6205#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6206 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6207#else
6208 RT_NOREF(idxInstr);
6209#endif
6210
6211#ifdef RT_ARCH_AMD64
6212 /*
6213 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
6214 * return raisexcpt();
6215 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
6216 */
6217 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6218
6219 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
6220 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
6221 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
6222 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
6223 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeExitReason_RaiseGp0);
6224
6225 iemNativeRegFreeTmp(pReNative, iTmpReg);
6226
6227#elif defined(RT_ARCH_ARM64)
6228 /*
6229 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
6230 * return raisexcpt();
6231 * ----
6232 * mov x1, 0x800000000000
6233 * add x1, x0, x1
6234 * cmp xzr, x1, lsr 48
6235 * b.ne .Lraisexcpt
6236 */
6237 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6238
6239 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
6240 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
6241 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
6242 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeExitReason_RaiseGp0);
6243
6244 iemNativeRegFreeTmp(pReNative, iTmpReg);
6245
6246#else
6247# error "Port me"
6248#endif
6249 return off;
6250}
6251
6252
6253/**
6254 * Emits code to check if that the content of @a idxAddrReg is within the limit
6255 * of CS, raising a \#GP(0) if it isn't.
6256 *
6257 * @returns New code buffer offset; throws VBox status code on error.
6258 * @param pReNative The native recompile state.
6259 * @param off The code buffer offset.
6260 * @param idxAddrReg The host register (32-bit) with the address to
6261 * check.
6262 * @param idxInstr The current instruction.
6263 */
6264DECL_HIDDEN_THROW(uint32_t)
6265iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6266 uint8_t idxAddrReg, uint8_t idxInstr)
6267{
6268 /*
6269 * Make sure we don't have any outstanding guest register writes as we may
6270 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6271 */
6272 off = iemNativeRegFlushPendingWrites(pReNative, off);
6273
6274#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6275 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6276#else
6277 RT_NOREF(idxInstr);
6278#endif
6279
6280 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6281 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6282 kIemNativeGstRegUse_ReadOnly);
6283
6284 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6285 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6286
6287 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6288 return off;
6289}
6290
6291
6292/**
6293 * Emits a call to a CImpl function or something similar.
6294 */
6295DECL_HIDDEN_THROW(uint32_t)
6296iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6297 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6298{
6299 /* Writeback everything. */
6300 off = iemNativeRegFlushPendingWrites(pReNative, off);
6301
6302 /*
6303 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6304 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6305 */
6306 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6307 fGstShwFlush
6308 | RT_BIT_64(kIemNativeGstReg_Pc)
6309 | RT_BIT_64(kIemNativeGstReg_EFlags));
6310 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6311
6312 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6313
6314 /*
6315 * Load the parameters.
6316 */
6317#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6318 /* Special code the hidden VBOXSTRICTRC pointer. */
6319 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6320 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6321 if (cAddParams > 0)
6322 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6323 if (cAddParams > 1)
6324 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6325 if (cAddParams > 2)
6326 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6327 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6328
6329#else
6330 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6331 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6332 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6333 if (cAddParams > 0)
6334 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6335 if (cAddParams > 1)
6336 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6337 if (cAddParams > 2)
6338# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6339 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6340# else
6341 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6342# endif
6343#endif
6344
6345 /*
6346 * Make the call.
6347 */
6348 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6349
6350#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6351 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6352#endif
6353
6354 /*
6355 * Check the status code.
6356 */
6357 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6358}
6359
6360
6361/**
6362 * Emits a call to a threaded worker function.
6363 */
6364DECL_HIDDEN_THROW(uint32_t)
6365iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6366{
6367 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6368
6369 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6370 off = iemNativeRegFlushPendingWrites(pReNative, off);
6371
6372 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6373 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6374
6375#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6376 /* The threaded function may throw / long jmp, so set current instruction
6377 number if we're counting. */
6378 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6379#endif
6380
6381 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6382
6383#ifdef RT_ARCH_AMD64
6384 /* Load the parameters and emit the call. */
6385# ifdef RT_OS_WINDOWS
6386# ifndef VBOXSTRICTRC_STRICT_ENABLED
6387 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6388 if (cParams > 0)
6389 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6390 if (cParams > 1)
6391 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6392 if (cParams > 2)
6393 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6394# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6395 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6396 if (cParams > 0)
6397 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6398 if (cParams > 1)
6399 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6400 if (cParams > 2)
6401 {
6402 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6403 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6404 }
6405 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6406# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6407# else
6408 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6409 if (cParams > 0)
6410 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6411 if (cParams > 1)
6412 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6413 if (cParams > 2)
6414 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6415# endif
6416
6417 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6418
6419# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6420 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6421# endif
6422
6423#elif RT_ARCH_ARM64
6424 /*
6425 * ARM64:
6426 */
6427 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6428 if (cParams > 0)
6429 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6430 if (cParams > 1)
6431 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6432 if (cParams > 2)
6433 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6434
6435 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6436
6437#else
6438# error "port me"
6439#endif
6440
6441 /*
6442 * Check the status code.
6443 */
6444 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6445
6446 return off;
6447}
6448
6449#ifdef VBOX_WITH_STATISTICS
6450
6451/**
6452 * Emits code to update the thread call statistics.
6453 */
6454DECL_INLINE_THROW(uint32_t)
6455iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6456{
6457 /*
6458 * Update threaded function stats.
6459 */
6460 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6461 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6462# if defined(RT_ARCH_ARM64)
6463 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6464 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6465 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6466 iemNativeRegFreeTmp(pReNative, idxTmp1);
6467 iemNativeRegFreeTmp(pReNative, idxTmp2);
6468# else
6469 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6470# endif
6471 return off;
6472}
6473
6474
6475/**
6476 * Emits code to update the TB exit reason statistics.
6477 */
6478DECL_INLINE_THROW(uint32_t)
6479iemNativeEmitNativeTbExitStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t const offVCpu)
6480{
6481 uint8_t const idxStatsTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6482 uint8_t const idxStatsTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6483 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, idxStatsTmp1, idxStatsTmp2, offVCpu);
6484 iemNativeRegFreeTmp(pReNative, idxStatsTmp1);
6485 iemNativeRegFreeTmp(pReNative, idxStatsTmp2);
6486
6487 return off;
6488}
6489
6490#endif /* VBOX_WITH_STATISTICS */
6491
6492/**
6493 * Worker for iemNativeEmitReturnBreakViaLookup.
6494 */
6495static uint32_t iemNativeEmitViaLookupDoOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabelReturnBreak,
6496 IEMNATIVELABELTYPE enmLabel, uintptr_t pfnHelper)
6497{
6498 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
6499 if (idxLabel != UINT32_MAX)
6500 {
6501 iemNativeLabelDefine(pReNative, idxLabel, off);
6502
6503 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6504 off = iemNativeEmitCallImm(pReNative, off, pfnHelper);
6505
6506 /* Jump to ReturnBreak if the return register is NULL. */
6507 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, IEMNATIVE_CALL_RET_GREG,
6508 true /*f64Bit*/, idxLabelReturnBreak);
6509
6510 /* Okay, continue executing the next TB. */
6511 off = iemNativeEmitJmpViaGpr(pReNative, off, IEMNATIVE_CALL_RET_GREG);
6512 }
6513 return off;
6514}
6515
6516/**
6517 * Emits the code at the ReturnBreakViaLookup, ReturnBreakViaLookupWithIrq,
6518 * ReturnBreakViaLookupWithTlb and ReturnBreakViaLookupWithTlbAndIrq labels
6519 * (returns VINF_IEM_REEXEC_FINISH_WITH_FLAGS or jumps to the next TB).
6520 */
6521static uint32_t iemNativeEmitReturnBreakViaLookup(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6522{
6523 uint32_t const idxLabelReturnBreak = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnBreak);
6524
6525 /*
6526 * The lookup table index is in IEMNATIVE_CALL_ARG1_GREG for all.
6527 * The GCPhysPc is in IEMNATIVE_CALL_ARG2_GREG for ReturnBreakViaLookupWithPc.
6528 */
6529 off = iemNativeEmitViaLookupDoOne(pReNative, off, idxLabelReturnBreak, kIemNativeLabelType_ReturnBreakViaLookup,
6530 (uintptr_t)iemNativeHlpReturnBreakViaLookup<false /*a_fWithIrqCheck*/>);
6531 off = iemNativeEmitViaLookupDoOne(pReNative, off, idxLabelReturnBreak, kIemNativeLabelType_ReturnBreakViaLookupWithIrq,
6532 (uintptr_t)iemNativeHlpReturnBreakViaLookup<true /*a_fWithIrqCheck*/>);
6533 off = iemNativeEmitViaLookupDoOne(pReNative, off, idxLabelReturnBreak, kIemNativeLabelType_ReturnBreakViaLookupWithTlb,
6534 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<false /*a_fWithIrqCheck*/>);
6535 off = iemNativeEmitViaLookupDoOne(pReNative, off, idxLabelReturnBreak, kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq,
6536 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<true /*a_fWithIrqCheck*/>);
6537 return off;
6538}
6539
6540
6541/**
6542 * Emits the code at the ReturnWithFlags label (returns VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6543 */
6544static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6545{
6546 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6547 if (idxLabel != UINT32_MAX)
6548 {
6549 iemNativeLabelDefine(pReNative, idxLabel, off);
6550 /* set the return status */
6551 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6552 /* jump back to the return sequence. */
6553 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6554 }
6555 return off;
6556}
6557
6558
6559/**
6560 * Emits the code at the ReturnBreakFF label (returns VINF_IEM_REEXEC_BREAK_FF).
6561 */
6562static uint32_t iemNativeEmitReturnBreakFF(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6563{
6564 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreakFF);
6565 if (idxLabel != UINT32_MAX)
6566 {
6567 iemNativeLabelDefine(pReNative, idxLabel, off);
6568 /* set the return status */
6569 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK_FF);
6570 /* jump back to the return sequence. */
6571 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6572 }
6573 return off;
6574}
6575
6576
6577/**
6578 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6579 */
6580static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6581{
6582 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6583 if (idxLabel != UINT32_MAX)
6584 {
6585 iemNativeLabelDefine(pReNative, idxLabel, off);
6586 /* set the return status */
6587 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6588 /* jump back to the return sequence. */
6589 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6590 }
6591 return off;
6592}
6593
6594
6595/**
6596 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6597 */
6598static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6599{
6600 /*
6601 * Generate the rc + rcPassUp fiddling code if needed.
6602 */
6603 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6604 if (idxLabel != UINT32_MAX)
6605 {
6606 iemNativeLabelDefine(pReNative, idxLabel, off);
6607
6608 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6609#ifdef RT_ARCH_AMD64
6610# ifdef RT_OS_WINDOWS
6611# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6612 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6613# endif
6614 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6615 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6616# else
6617 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6618 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6619# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6620 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6621# endif
6622# endif
6623# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6624 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6625# endif
6626
6627#else
6628 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6629 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6630 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6631#endif
6632
6633 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6634 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6635 }
6636 return off;
6637}
6638
6639
6640/**
6641 * Emits a standard epilog.
6642 */
6643static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
6644{
6645 *pidxReturnLabel = UINT32_MAX;
6646
6647 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
6648 off = iemNativeRegFlushPendingWrites(pReNative, off);
6649
6650 /*
6651 * Successful return, so clear the return register (eax, w0).
6652 */
6653 pReNative->Core.bmHstRegs |= RT_BIT_32(IEMNATIVE_CALL_RET_GREG); /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK. */
6654 off = iemNativeEmitGprZero(pReNative, off, IEMNATIVE_CALL_RET_GREG);
6655
6656 /*
6657 * Define label for common return point.
6658 */
6659 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
6660 *pidxReturnLabel = idxReturn;
6661
6662 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6663
6664#ifdef IEMNATIVE_WITH_RECOMPILER_EPILOGUE_SINGLETON
6665 //off = iemNativeEmitBrk(pReNative, off, 0x7777);
6666 off = iemNativeEmitJmpImm(pReNative, off, (uintptr_t)iemNativeTbEpilog);
6667#else
6668 /*
6669 * Restore registers and return.
6670 */
6671# ifdef RT_ARCH_AMD64
6672 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6673
6674 /* Reposition esp at the r15 restore point. */
6675 pbCodeBuf[off++] = X86_OP_REX_W;
6676 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6677 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6678 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6679
6680 /* Pop non-volatile registers and return */
6681 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6682 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6683 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6684 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6685 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6686 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6687 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6688 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6689# ifdef RT_OS_WINDOWS
6690 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6691 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6692# endif
6693 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6694 pbCodeBuf[off++] = 0xc9; /* leave */
6695 pbCodeBuf[off++] = 0xc3; /* ret */
6696 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6697
6698# elif RT_ARCH_ARM64
6699 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6700
6701 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6702 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
6703 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6704 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6705 IEMNATIVE_FRAME_VAR_SIZE / 8);
6706 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
6707 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6708 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6709 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6710 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6711 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6712 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6713 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6714 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6715 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6716 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6717 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6718
6719 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
6720 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
6721 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
6722 IEMNATIVE_FRAME_SAVE_REG_SIZE);
6723
6724 /* retab / ret */
6725# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
6726 if (1)
6727 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6728 else
6729# endif
6730 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6731
6732# else
6733# error "port me"
6734# endif
6735 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6736#endif /* IEMNATIVE_WITH_RECOMPILER_EPILOGUE_SINGLETON */
6737
6738 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK. */
6739 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
6740
6741 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
6742}
6743
6744
6745#ifndef IEMNATIVE_WITH_RECOMPILER_PROLOGUE_SINGLETON
6746/**
6747 * Emits a standard prolog.
6748 */
6749static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6750{
6751#ifdef RT_ARCH_AMD64
6752 /*
6753 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
6754 * reserving 64 bytes for stack variables plus 4 non-register argument
6755 * slots. Fixed register assignment: xBX = pReNative;
6756 *
6757 * Since we always do the same register spilling, we can use the same
6758 * unwind description for all the code.
6759 */
6760 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6761 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
6762 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
6763 pbCodeBuf[off++] = 0x8b;
6764 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
6765 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
6766 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
6767# ifdef RT_OS_WINDOWS
6768 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
6769 pbCodeBuf[off++] = 0x8b;
6770 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
6771 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
6772 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
6773# else
6774 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
6775 pbCodeBuf[off++] = 0x8b;
6776 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
6777# endif
6778 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
6779 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
6780 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
6781 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
6782 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
6783 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
6784 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
6785 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
6786
6787# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6788 /* Save the frame pointer. */
6789 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
6790# endif
6791
6792 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
6793 X86_GREG_xSP,
6794 IEMNATIVE_FRAME_ALIGN_SIZE
6795 + IEMNATIVE_FRAME_VAR_SIZE
6796 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
6797 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
6798 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
6799 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
6800 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
6801
6802#elif RT_ARCH_ARM64
6803 /*
6804 * We set up a stack frame exactly like on x86, only we have to push the
6805 * return address our selves here. We save all non-volatile registers.
6806 */
6807 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
6808
6809# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
6810 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
6811 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
6812 * in any way conditional, so just emitting this instructions now and hoping for the best... */
6813 /* pacibsp */
6814 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
6815# endif
6816
6817 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
6818 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
6819 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6820 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6821 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
6822 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
6823 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6824 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6825 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6826 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6827 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6828 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6829 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6830 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6831 /* Save the BP and LR (ret address) registers at the top of the frame. */
6832 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6833 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6834 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6835 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
6836 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
6837 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
6838
6839 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
6840 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
6841
6842 /* mov r28, r0 */
6843 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
6844 /* mov r27, r1 */
6845 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
6846
6847# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6848 /* Save the frame pointer. */
6849 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
6850 ARMV8_A64_REG_X2);
6851# endif
6852
6853#else
6854# error "port me"
6855#endif
6856 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6857 return off;
6858}
6859#endif
6860
6861
6862/*********************************************************************************************************************************
6863* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
6864*********************************************************************************************************************************/
6865
6866/**
6867 * Internal work that allocates a variable with kind set to
6868 * kIemNativeVarKind_Invalid and no current stack allocation.
6869 *
6870 * The kind will either be set by the caller or later when the variable is first
6871 * assigned a value.
6872 *
6873 * @returns Unpacked index.
6874 * @internal
6875 */
6876static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6877{
6878 Assert(cbType > 0 && cbType <= 64);
6879 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
6880 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
6881 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
6882 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
6883 pReNative->Core.aVars[idxVar].cbVar = cbType;
6884 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
6885 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6886 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
6887 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
6888 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
6889 pReNative->Core.aVars[idxVar].fRegAcquired = false;
6890 pReNative->Core.aVars[idxVar].u.uValue = 0;
6891#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6892 pReNative->Core.aVars[idxVar].fSimdReg = false;
6893#endif
6894 return idxVar;
6895}
6896
6897
6898/**
6899 * Internal work that allocates an argument variable w/o setting enmKind.
6900 *
6901 * @returns Unpacked index.
6902 * @internal
6903 */
6904static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6905{
6906 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
6907 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6908 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
6909
6910 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6911 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
6912 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
6913 return idxVar;
6914}
6915
6916
6917/**
6918 * Gets the stack slot for a stack variable, allocating one if necessary.
6919 *
6920 * Calling this function implies that the stack slot will contain a valid
6921 * variable value. The caller deals with any register currently assigned to the
6922 * variable, typically by spilling it into the stack slot.
6923 *
6924 * @returns The stack slot number.
6925 * @param pReNative The recompiler state.
6926 * @param idxVar The variable.
6927 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
6928 */
6929DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6930{
6931 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6932 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6933 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
6934
6935 /* Already got a slot? */
6936 uint8_t const idxStackSlot = pVar->idxStackSlot;
6937 if (idxStackSlot != UINT8_MAX)
6938 {
6939 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
6940 return idxStackSlot;
6941 }
6942
6943 /*
6944 * A single slot is easy to allocate.
6945 * Allocate them from the top end, closest to BP, to reduce the displacement.
6946 */
6947 if (pVar->cbVar <= sizeof(uint64_t))
6948 {
6949 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
6950 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6951 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
6952 pVar->idxStackSlot = (uint8_t)iSlot;
6953 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
6954 return (uint8_t)iSlot;
6955 }
6956
6957 /*
6958 * We need more than one stack slot.
6959 *
6960 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
6961 */
6962 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
6963 Assert(pVar->cbVar <= 64);
6964 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
6965 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
6966 uint32_t bmStack = pReNative->Core.bmStack;
6967 while (bmStack != UINT32_MAX)
6968 {
6969 unsigned iSlot = ASMBitLastSetU32(~bmStack);
6970 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6971 iSlot = (iSlot - 1) & ~fBitAlignMask;
6972 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
6973 {
6974 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
6975 pVar->idxStackSlot = (uint8_t)iSlot;
6976 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
6977 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
6978 return (uint8_t)iSlot;
6979 }
6980
6981 bmStack |= (fBitAllocMask << iSlot);
6982 }
6983 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6984}
6985
6986
6987/**
6988 * Changes the variable to a stack variable.
6989 *
6990 * Currently this is s only possible to do the first time the variable is used,
6991 * switching later is can be implemented but not done.
6992 *
6993 * @param pReNative The recompiler state.
6994 * @param idxVar The variable.
6995 * @throws VERR_IEM_VAR_IPE_2
6996 */
6997DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6998{
6999 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7000 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7001 if (pVar->enmKind != kIemNativeVarKind_Stack)
7002 {
7003 /* We could in theory transition from immediate to stack as well, but it
7004 would involve the caller doing work storing the value on the stack. So,
7005 till that's required we only allow transition from invalid. */
7006 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7007 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7008 pVar->enmKind = kIemNativeVarKind_Stack;
7009
7010 /* Note! We don't allocate a stack slot here, that's only done when a
7011 slot is actually needed to hold a variable value. */
7012 }
7013}
7014
7015
7016/**
7017 * Sets it to a variable with a constant value.
7018 *
7019 * This does not require stack storage as we know the value and can always
7020 * reload it, unless of course it's referenced.
7021 *
7022 * @param pReNative The recompiler state.
7023 * @param idxVar The variable.
7024 * @param uValue The immediate value.
7025 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7026 */
7027DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7028{
7029 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7030 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7031 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7032 {
7033 /* Only simple transitions for now. */
7034 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7035 pVar->enmKind = kIemNativeVarKind_Immediate;
7036 }
7037 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7038
7039 pVar->u.uValue = uValue;
7040 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7041 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7042 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7043}
7044
7045
7046/**
7047 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7048 *
7049 * This does not require stack storage as we know the value and can always
7050 * reload it. Loading is postponed till needed.
7051 *
7052 * @param pReNative The recompiler state.
7053 * @param idxVar The variable. Unpacked.
7054 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7055 *
7056 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7057 * @internal
7058 */
7059static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7060{
7061 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7062 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7063
7064 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7065 {
7066 /* Only simple transitions for now. */
7067 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7068 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7069 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7070 }
7071 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7072
7073 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7074
7075 /* Update the other variable, ensure it's a stack variable. */
7076 /** @todo handle variables with const values... that'll go boom now. */
7077 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7078 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7079}
7080
7081
7082/**
7083 * Sets the variable to a reference (pointer) to a guest register reference.
7084 *
7085 * This does not require stack storage as we know the value and can always
7086 * reload it. Loading is postponed till needed.
7087 *
7088 * @param pReNative The recompiler state.
7089 * @param idxVar The variable.
7090 * @param enmRegClass The class guest registers to reference.
7091 * @param idxReg The register within @a enmRegClass to reference.
7092 *
7093 * @throws VERR_IEM_VAR_IPE_2
7094 */
7095DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7096 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7097{
7098 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7099 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7100
7101 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7102 {
7103 /* Only simple transitions for now. */
7104 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7105 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7106 }
7107 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7108
7109 pVar->u.GstRegRef.enmClass = enmRegClass;
7110 pVar->u.GstRegRef.idx = idxReg;
7111}
7112
7113
7114DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7115{
7116 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7117}
7118
7119
7120DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7121{
7122 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7123
7124 /* Since we're using a generic uint64_t value type, we must truncate it if
7125 the variable is smaller otherwise we may end up with too large value when
7126 scaling up a imm8 w/ sign-extension.
7127
7128 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7129 in the bios, bx=1) when running on arm, because clang expect 16-bit
7130 register parameters to have bits 16 and up set to zero. Instead of
7131 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7132 CF value in the result. */
7133 switch (cbType)
7134 {
7135 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7136 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7137 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7138 }
7139 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7140 return idxVar;
7141}
7142
7143
7144DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7145{
7146 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7147 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7148 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7149 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7150 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7151 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7152
7153 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7154 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7155 return idxArgVar;
7156}
7157
7158
7159DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7160{
7161 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7162 /* Don't set to stack now, leave that to the first use as for instance
7163 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7164 return idxVar;
7165}
7166
7167
7168DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7169{
7170 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7171
7172 /* Since we're using a generic uint64_t value type, we must truncate it if
7173 the variable is smaller otherwise we may end up with too large value when
7174 scaling up a imm8 w/ sign-extension. */
7175 switch (cbType)
7176 {
7177 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7178 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7179 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7180 }
7181 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7182 return idxVar;
7183}
7184
7185
7186DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocAssign(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t cbType, uint8_t idxVarOther)
7187{
7188 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7189 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7190
7191 uint8_t const idxVarOtherReg = iemNativeVarRegisterAcquire(pReNative, idxVarOther, poff, true /*fInitialized*/);
7192 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, poff);
7193
7194 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxVarReg, idxVarOtherReg);
7195
7196 /* Truncate the value to this variables size. */
7197 switch (cbType)
7198 {
7199 case sizeof(uint8_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xff)); break;
7200 case sizeof(uint16_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffff)); break;
7201 case sizeof(uint32_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffffffff)); break;
7202 }
7203
7204 iemNativeVarRegisterRelease(pReNative, idxVarOther);
7205 iemNativeVarRegisterRelease(pReNative, idxVar);
7206 return idxVar;
7207}
7208
7209
7210/**
7211 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7212 * fixed till we call iemNativeVarRegisterRelease.
7213 *
7214 * @returns The host register number.
7215 * @param pReNative The recompiler state.
7216 * @param idxVar The variable.
7217 * @param poff Pointer to the instruction buffer offset.
7218 * In case a register needs to be freed up or the value
7219 * loaded off the stack.
7220 * @param fInitialized Set if the variable must already have been initialized.
7221 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7222 * the case.
7223 * @param idxRegPref Preferred register number or UINT8_MAX.
7224 */
7225DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7226 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7227{
7228 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7229 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7230 Assert(pVar->cbVar <= 8);
7231 Assert(!pVar->fRegAcquired);
7232
7233 uint8_t idxReg = pVar->idxReg;
7234 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7235 {
7236 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7237 && pVar->enmKind < kIemNativeVarKind_End);
7238 pVar->fRegAcquired = true;
7239 return idxReg;
7240 }
7241
7242 /*
7243 * If the kind of variable has not yet been set, default to 'stack'.
7244 */
7245 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7246 && pVar->enmKind < kIemNativeVarKind_End);
7247 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7248 iemNativeVarSetKindToStack(pReNative, idxVar);
7249
7250 /*
7251 * We have to allocate a register for the variable, even if its a stack one
7252 * as we don't know if there are modification being made to it before its
7253 * finalized (todo: analyze and insert hints about that?).
7254 *
7255 * If we can, we try get the correct register for argument variables. This
7256 * is assuming that most argument variables are fetched as close as possible
7257 * to the actual call, so that there aren't any interfering hidden calls
7258 * (memory accesses, etc) inbetween.
7259 *
7260 * If we cannot or it's a variable, we make sure no argument registers
7261 * that will be used by this MC block will be allocated here, and we always
7262 * prefer non-volatile registers to avoid needing to spill stuff for internal
7263 * call.
7264 */
7265 /** @todo Detect too early argument value fetches and warn about hidden
7266 * calls causing less optimal code to be generated in the python script. */
7267
7268 uint8_t const uArgNo = pVar->uArgNo;
7269 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7270 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7271 {
7272 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7273
7274#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7275 /* Writeback any dirty shadow registers we are about to unshadow. */
7276 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
7277#endif
7278
7279 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7280 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7281 }
7282 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7283 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7284 {
7285 /** @todo there must be a better way for this and boot cArgsX? */
7286 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgsX, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7287 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7288 & ~pReNative->Core.bmHstRegsWithGstShadow
7289 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7290 & fNotArgsMask;
7291 if (fRegs)
7292 {
7293 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7294 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7295 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7296 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7297 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7298 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7299 }
7300 else
7301 {
7302 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7303 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7304 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7305 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7306 }
7307 }
7308 else
7309 {
7310 idxReg = idxRegPref;
7311 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7312 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7313 }
7314 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7315 pVar->idxReg = idxReg;
7316
7317#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7318 pVar->fSimdReg = false;
7319#endif
7320
7321 /*
7322 * Load it off the stack if we've got a stack slot.
7323 */
7324 uint8_t const idxStackSlot = pVar->idxStackSlot;
7325 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7326 {
7327 Assert(fInitialized);
7328 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7329 switch (pVar->cbVar)
7330 {
7331 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7332 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7333 case 3: AssertFailed(); RT_FALL_THRU();
7334 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7335 default: AssertFailed(); RT_FALL_THRU();
7336 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7337 }
7338 }
7339 else
7340 {
7341 Assert(idxStackSlot == UINT8_MAX);
7342 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7343 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7344 else
7345 {
7346 /*
7347 * Convert from immediate to stack/register. This is currently only
7348 * required by IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR, IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR
7349 * and IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR in connection with BT, BTS, BTR, and BTC.
7350 */
7351 AssertStmt(fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7352 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u uValue=%RX64 converting from immediate to stack\n",
7353 idxVar, idxReg, pVar->u.uValue));
7354 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7355 pVar->enmKind = kIemNativeVarKind_Stack;
7356 }
7357 }
7358
7359 pVar->fRegAcquired = true;
7360 return idxReg;
7361}
7362
7363
7364#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7365/**
7366 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
7367 * fixed till we call iemNativeVarRegisterRelease.
7368 *
7369 * @returns The host register number.
7370 * @param pReNative The recompiler state.
7371 * @param idxVar The variable.
7372 * @param poff Pointer to the instruction buffer offset.
7373 * In case a register needs to be freed up or the value
7374 * loaded off the stack.
7375 * @param fInitialized Set if the variable must already have been initialized.
7376 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7377 * the case.
7378 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
7379 */
7380DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7381 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7382{
7383 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7384 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7385 Assert( pVar->cbVar == sizeof(RTUINT128U)
7386 || pVar->cbVar == sizeof(RTUINT256U));
7387 Assert(!pVar->fRegAcquired);
7388
7389 uint8_t idxReg = pVar->idxReg;
7390 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
7391 {
7392 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7393 && pVar->enmKind < kIemNativeVarKind_End);
7394 pVar->fRegAcquired = true;
7395 return idxReg;
7396 }
7397
7398 /*
7399 * If the kind of variable has not yet been set, default to 'stack'.
7400 */
7401 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7402 && pVar->enmKind < kIemNativeVarKind_End);
7403 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7404 iemNativeVarSetKindToStack(pReNative, idxVar);
7405
7406 /*
7407 * We have to allocate a register for the variable, even if its a stack one
7408 * as we don't know if there are modification being made to it before its
7409 * finalized (todo: analyze and insert hints about that?).
7410 *
7411 * If we can, we try get the correct register for argument variables. This
7412 * is assuming that most argument variables are fetched as close as possible
7413 * to the actual call, so that there aren't any interfering hidden calls
7414 * (memory accesses, etc) inbetween.
7415 *
7416 * If we cannot or it's a variable, we make sure no argument registers
7417 * that will be used by this MC block will be allocated here, and we always
7418 * prefer non-volatile registers to avoid needing to spill stuff for internal
7419 * call.
7420 */
7421 /** @todo Detect too early argument value fetches and warn about hidden
7422 * calls causing less optimal code to be generated in the python script. */
7423
7424 uint8_t const uArgNo = pVar->uArgNo;
7425 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7426
7427 /* SIMD is bit simpler for now because there is no support for arguments. */
7428 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7429 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7430 {
7431 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7432 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7433 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7434 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7435 & fNotArgsMask;
7436 if (fRegs)
7437 {
7438 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7439 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7440 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7441 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7442 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7443 }
7444 else
7445 {
7446 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7447 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7448 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7449 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7450 }
7451 }
7452 else
7453 {
7454 idxReg = idxRegPref;
7455 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7456 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7457 }
7458 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7459
7460 pVar->fSimdReg = true;
7461 pVar->idxReg = idxReg;
7462
7463 /*
7464 * Load it off the stack if we've got a stack slot.
7465 */
7466 uint8_t const idxStackSlot = pVar->idxStackSlot;
7467 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7468 {
7469 Assert(fInitialized);
7470 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7471 switch (pVar->cbVar)
7472 {
7473 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7474 default: AssertFailed(); RT_FALL_THRU();
7475 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7476 }
7477 }
7478 else
7479 {
7480 Assert(idxStackSlot == UINT8_MAX);
7481 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7482 }
7483 pVar->fRegAcquired = true;
7484 return idxReg;
7485}
7486#endif
7487
7488
7489/**
7490 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7491 * guest register.
7492 *
7493 * This function makes sure there is a register for it and sets it to be the
7494 * current shadow copy of @a enmGstReg.
7495 *
7496 * @returns The host register number.
7497 * @param pReNative The recompiler state.
7498 * @param idxVar The variable.
7499 * @param enmGstReg The guest register this variable will be written to
7500 * after this call.
7501 * @param poff Pointer to the instruction buffer offset.
7502 * In case a register needs to be freed up or if the
7503 * variable content needs to be loaded off the stack.
7504 *
7505 * @note We DO NOT expect @a idxVar to be an argument variable,
7506 * because we can only in the commit stage of an instruction when this
7507 * function is used.
7508 */
7509DECL_HIDDEN_THROW(uint8_t)
7510iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7511{
7512 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7513 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7514 Assert(!pVar->fRegAcquired);
7515 AssertMsgStmt( pVar->cbVar <= 8
7516 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7517 || pVar->enmKind == kIemNativeVarKind_Stack),
7518 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7519 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7520 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7521
7522 /*
7523 * This shouldn't ever be used for arguments, unless it's in a weird else
7524 * branch that doesn't do any calling and even then it's questionable.
7525 *
7526 * However, in case someone writes crazy wrong MC code and does register
7527 * updates before making calls, just use the regular register allocator to
7528 * ensure we get a register suitable for the intended argument number.
7529 */
7530 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7531
7532 /*
7533 * If there is already a register for the variable, we transfer/set the
7534 * guest shadow copy assignment to it.
7535 */
7536 uint8_t idxReg = pVar->idxReg;
7537 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7538 {
7539#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7540 if (enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast)
7541 {
7542# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7543 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
7544 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
7545# endif
7546 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
7547 }
7548#endif
7549
7550 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7551 {
7552 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7553 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7554 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7555 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7556 }
7557 else
7558 {
7559 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7560 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7561 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7562 }
7563 /** @todo figure this one out. We need some way of making sure the register isn't
7564 * modified after this point, just in case we start writing crappy MC code. */
7565 pVar->enmGstReg = enmGstReg;
7566 pVar->fRegAcquired = true;
7567 return idxReg;
7568 }
7569 Assert(pVar->uArgNo == UINT8_MAX);
7570
7571 /*
7572 * Because this is supposed to be the commit stage, we're just tag along with the
7573 * temporary register allocator and upgrade it to a variable register.
7574 */
7575 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7576 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7577 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7578 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7579 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7580 pVar->idxReg = idxReg;
7581
7582 /*
7583 * Now we need to load the register value.
7584 */
7585 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7586 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7587 else
7588 {
7589 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7590 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7591 switch (pVar->cbVar)
7592 {
7593 case sizeof(uint64_t):
7594 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7595 break;
7596 case sizeof(uint32_t):
7597 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7598 break;
7599 case sizeof(uint16_t):
7600 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7601 break;
7602 case sizeof(uint8_t):
7603 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7604 break;
7605 default:
7606 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7607 }
7608 }
7609
7610 pVar->fRegAcquired = true;
7611 return idxReg;
7612}
7613
7614
7615/**
7616 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7617 *
7618 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7619 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7620 * requirement of flushing anything in volatile host registers when making a
7621 * call.
7622 *
7623 * @returns New @a off value.
7624 * @param pReNative The recompiler state.
7625 * @param off The code buffer position.
7626 * @param fHstRegsNotToSave Set of registers not to save & restore.
7627 */
7628DECL_HIDDEN_THROW(uint32_t)
7629iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7630{
7631 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7632 if (fHstRegs)
7633 {
7634 do
7635 {
7636 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7637 fHstRegs &= ~RT_BIT_32(idxHstReg);
7638
7639 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7640 {
7641 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7642 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7643 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7644 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7645 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7646 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7647 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7648 {
7649 case kIemNativeVarKind_Stack:
7650 {
7651 /* Temporarily spill the variable register. */
7652 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7653 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7654 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7655 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7656 continue;
7657 }
7658
7659 case kIemNativeVarKind_Immediate:
7660 case kIemNativeVarKind_VarRef:
7661 case kIemNativeVarKind_GstRegRef:
7662 /* It is weird to have any of these loaded at this point. */
7663 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7664 continue;
7665
7666 case kIemNativeVarKind_End:
7667 case kIemNativeVarKind_Invalid:
7668 break;
7669 }
7670 AssertFailed();
7671 }
7672 else
7673 {
7674 /*
7675 * Allocate a temporary stack slot and spill the register to it.
7676 */
7677 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7678 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7679 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7680 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7681 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7682 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7683 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7684 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7685 }
7686 } while (fHstRegs);
7687 }
7688#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7689
7690 /*
7691 * Guest register shadows are flushed to CPUMCTX at the moment and don't need allocating a stack slot
7692 * which would be more difficult due to spanning multiple stack slots and different sizes
7693 * (besides we only have a limited amount of slots at the moment).
7694 *
7695 * However the shadows need to be flushed out as the guest SIMD register might get corrupted by
7696 * the callee. This asserts that the registers were written back earlier and are not in the dirty state.
7697 */
7698 iemNativeSimdRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK);
7699
7700 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7701 if (fHstRegs)
7702 {
7703 do
7704 {
7705 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7706 fHstRegs &= ~RT_BIT_32(idxHstReg);
7707
7708 /* Fixed reserved and temporary registers don't need saving. */
7709 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved
7710 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp)
7711 continue;
7712
7713 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7714
7715 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7716 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7717 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7718 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7719 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7720 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7721 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7722 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7723 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7724 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7725 {
7726 case kIemNativeVarKind_Stack:
7727 {
7728 /* Temporarily spill the variable register. */
7729 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7730 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7731 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7732 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7733 if (cbVar == sizeof(RTUINT128U))
7734 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7735 else
7736 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7737 continue;
7738 }
7739
7740 case kIemNativeVarKind_Immediate:
7741 case kIemNativeVarKind_VarRef:
7742 case kIemNativeVarKind_GstRegRef:
7743 /* It is weird to have any of these loaded at this point. */
7744 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7745 continue;
7746
7747 case kIemNativeVarKind_End:
7748 case kIemNativeVarKind_Invalid:
7749 break;
7750 }
7751 AssertFailed();
7752 } while (fHstRegs);
7753 }
7754#endif
7755 return off;
7756}
7757
7758
7759/**
7760 * Emit code to restore volatile registers after to a call to a helper.
7761 *
7762 * @returns New @a off value.
7763 * @param pReNative The recompiler state.
7764 * @param off The code buffer position.
7765 * @param fHstRegsNotToSave Set of registers not to save & restore.
7766 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7767 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7768 */
7769DECL_HIDDEN_THROW(uint32_t)
7770iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7771{
7772 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7773 if (fHstRegs)
7774 {
7775 do
7776 {
7777 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7778 fHstRegs &= ~RT_BIT_32(idxHstReg);
7779
7780 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7781 {
7782 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7783 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7784 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7785 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7786 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7787 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7788 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7789 {
7790 case kIemNativeVarKind_Stack:
7791 {
7792 /* Unspill the variable register. */
7793 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7794 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7795 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7796 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7797 continue;
7798 }
7799
7800 case kIemNativeVarKind_Immediate:
7801 case kIemNativeVarKind_VarRef:
7802 case kIemNativeVarKind_GstRegRef:
7803 /* It is weird to have any of these loaded at this point. */
7804 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7805 continue;
7806
7807 case kIemNativeVarKind_End:
7808 case kIemNativeVarKind_Invalid:
7809 break;
7810 }
7811 AssertFailed();
7812 }
7813 else
7814 {
7815 /*
7816 * Restore from temporary stack slot.
7817 */
7818 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
7819 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
7820 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
7821 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
7822
7823 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7824 }
7825 } while (fHstRegs);
7826 }
7827#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7828 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7829 if (fHstRegs)
7830 {
7831 do
7832 {
7833 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7834 fHstRegs &= ~RT_BIT_32(idxHstReg);
7835
7836 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
7837 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
7838 continue;
7839 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7840
7841 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7842 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7843 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7844 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7845 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7846 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7847 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7848 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7849 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7850 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7851 {
7852 case kIemNativeVarKind_Stack:
7853 {
7854 /* Unspill the variable register. */
7855 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7856 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7857 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7858 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7859
7860 if (cbVar == sizeof(RTUINT128U))
7861 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7862 else
7863 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7864 continue;
7865 }
7866
7867 case kIemNativeVarKind_Immediate:
7868 case kIemNativeVarKind_VarRef:
7869 case kIemNativeVarKind_GstRegRef:
7870 /* It is weird to have any of these loaded at this point. */
7871 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7872 continue;
7873
7874 case kIemNativeVarKind_End:
7875 case kIemNativeVarKind_Invalid:
7876 break;
7877 }
7878 AssertFailed();
7879 } while (fHstRegs);
7880 }
7881#endif
7882 return off;
7883}
7884
7885
7886/**
7887 * Worker that frees the stack slots for variable @a idxVar if any allocated.
7888 *
7889 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
7890 *
7891 * ASSUMES that @a idxVar is valid and unpacked.
7892 */
7893DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7894{
7895 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
7896 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7897 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7898 {
7899 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
7900 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
7901 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
7902 Assert(cSlots > 0);
7903 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
7904 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7905 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
7906 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
7907 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7908 }
7909 else
7910 Assert(idxStackSlot == UINT8_MAX);
7911}
7912
7913
7914/**
7915 * Worker that frees a single variable.
7916 *
7917 * ASSUMES that @a idxVar is valid and unpacked.
7918 */
7919DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7920{
7921 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
7922 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
7923 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7924
7925 /* Free the host register first if any assigned. */
7926 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7927#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7928 if ( idxHstReg != UINT8_MAX
7929 && pReNative->Core.aVars[idxVar].fSimdReg)
7930 {
7931 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7932 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7933 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
7934 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
7935 }
7936 else
7937#endif
7938 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7939 {
7940 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7941 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7942 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7943 }
7944
7945 /* Free argument mapping. */
7946 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7947 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
7948 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
7949
7950 /* Free the stack slots. */
7951 iemNativeVarFreeStackSlots(pReNative, idxVar);
7952
7953 /* Free the actual variable. */
7954 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7955 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7956}
7957
7958
7959/**
7960 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
7961 */
7962DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
7963{
7964 while (bmVars != 0)
7965 {
7966 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7967 bmVars &= ~RT_BIT_32(idxVar);
7968
7969#if 1 /** @todo optimize by simplifying this later... */
7970 iemNativeVarFreeOneWorker(pReNative, idxVar);
7971#else
7972 /* Only need to free the host register, the rest is done as bulk updates below. */
7973 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7974 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7975 {
7976 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7977 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7978 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7979 }
7980#endif
7981 }
7982#if 0 /** @todo optimize by simplifying this later... */
7983 pReNative->Core.bmVars = 0;
7984 pReNative->Core.bmStack = 0;
7985 pReNative->Core.u64ArgVars = UINT64_MAX;
7986#endif
7987}
7988
7989
7990
7991/*********************************************************************************************************************************
7992* Emitters for IEM_MC_CALL_CIMPL_XXX *
7993*********************************************************************************************************************************/
7994
7995/**
7996 * Emits code to load a reference to the given guest register into @a idxGprDst.
7997 */
7998DECL_HIDDEN_THROW(uint32_t)
7999iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8000 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8001{
8002#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8003 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8004#endif
8005
8006 /*
8007 * Get the offset relative to the CPUMCTX structure.
8008 */
8009 uint32_t offCpumCtx;
8010 switch (enmClass)
8011 {
8012 case kIemNativeGstRegRef_Gpr:
8013 Assert(idxRegInClass < 16);
8014 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8015 break;
8016
8017 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8018 Assert(idxRegInClass < 4);
8019 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8020 break;
8021
8022 case kIemNativeGstRegRef_EFlags:
8023 Assert(idxRegInClass == 0);
8024 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8025 break;
8026
8027 case kIemNativeGstRegRef_MxCsr:
8028 Assert(idxRegInClass == 0);
8029 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8030 break;
8031
8032 case kIemNativeGstRegRef_FpuReg:
8033 Assert(idxRegInClass < 8);
8034 AssertFailed(); /** @todo what kind of indexing? */
8035 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8036 break;
8037
8038 case kIemNativeGstRegRef_MReg:
8039 Assert(idxRegInClass < 8);
8040 AssertFailed(); /** @todo what kind of indexing? */
8041 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8042 break;
8043
8044 case kIemNativeGstRegRef_XReg:
8045 Assert(idxRegInClass < 16);
8046 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8047 break;
8048
8049 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
8050 Assert(idxRegInClass == 0);
8051 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
8052 break;
8053
8054 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
8055 Assert(idxRegInClass == 0);
8056 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
8057 break;
8058
8059 default:
8060 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8061 }
8062
8063 /*
8064 * Load the value into the destination register.
8065 */
8066#ifdef RT_ARCH_AMD64
8067 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8068
8069#elif defined(RT_ARCH_ARM64)
8070 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8071 Assert(offCpumCtx < 4096);
8072 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8073
8074#else
8075# error "Port me!"
8076#endif
8077
8078 return off;
8079}
8080
8081
8082/**
8083 * Common code for CIMPL and AIMPL calls.
8084 *
8085 * These are calls that uses argument variables and such. They should not be
8086 * confused with internal calls required to implement an MC operation,
8087 * like a TLB load and similar.
8088 *
8089 * Upon return all that is left to do is to load any hidden arguments and
8090 * perform the call. All argument variables are freed.
8091 *
8092 * @returns New code buffer offset; throws VBox status code on error.
8093 * @param pReNative The native recompile state.
8094 * @param off The code buffer offset.
8095 * @param cArgs The total nubmer of arguments (includes hidden
8096 * count).
8097 * @param cHiddenArgs The number of hidden arguments. The hidden
8098 * arguments must not have any variable declared for
8099 * them, whereas all the regular arguments must
8100 * (tstIEMCheckMc ensures this).
8101 * @param fFlushPendingWrites Flag whether to flush pending writes (default true),
8102 * this will still flush pending writes in call volatile registers if false.
8103 */
8104DECL_HIDDEN_THROW(uint32_t)
8105iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs,
8106 bool fFlushPendingWrites /*= true*/)
8107{
8108#ifdef VBOX_STRICT
8109 /*
8110 * Assert sanity.
8111 */
8112 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8113 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8114 for (unsigned i = 0; i < cHiddenArgs; i++)
8115 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8116 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8117 {
8118 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8119 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8120 }
8121 iemNativeRegAssertSanity(pReNative);
8122#endif
8123
8124 /* We don't know what the called function makes use of, so flush any pending register writes. */
8125 RT_NOREF(fFlushPendingWrites);
8126#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8127 if (fFlushPendingWrites)
8128#endif
8129 off = iemNativeRegFlushPendingWrites(pReNative, off);
8130
8131 /*
8132 * Before we do anything else, go over variables that are referenced and
8133 * make sure they are not in a register.
8134 */
8135 uint32_t bmVars = pReNative->Core.bmVars;
8136 if (bmVars)
8137 {
8138 do
8139 {
8140 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8141 bmVars &= ~RT_BIT_32(idxVar);
8142
8143 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8144 {
8145 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8146#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8147 if ( idxRegOld != UINT8_MAX
8148 && pReNative->Core.aVars[idxVar].fSimdReg)
8149 {
8150 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8151 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
8152
8153 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8154 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8155 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8156 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8157 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
8158 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8159 else
8160 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8161
8162 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
8163 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
8164
8165 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8166 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
8167 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8168 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
8169 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
8170 }
8171 else
8172#endif
8173 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
8174 {
8175 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8176 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8177 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8178 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8179 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8180
8181 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8182 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8183 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8184 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8185 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8186 }
8187 }
8188 } while (bmVars != 0);
8189#if 0 //def VBOX_STRICT
8190 iemNativeRegAssertSanity(pReNative);
8191#endif
8192 }
8193
8194 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8195
8196#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8197 /*
8198 * At the very first step go over the host registers that will be used for arguments
8199 * don't shadow anything which needs writing back first.
8200 */
8201 for (uint32_t i = 0; i < cRegArgs; i++)
8202 {
8203 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8204
8205 /* Writeback any dirty guest shadows before using this register. */
8206 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows)
8207 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxArgReg);
8208 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows));
8209 }
8210#endif
8211
8212 /*
8213 * First, go over the host registers that will be used for arguments and make
8214 * sure they either hold the desired argument or are free.
8215 */
8216 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8217 {
8218 for (uint32_t i = 0; i < cRegArgs; i++)
8219 {
8220 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8221 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8222 {
8223 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8224 {
8225 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8226 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8227 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8228 Assert(pVar->idxReg == idxArgReg);
8229 uint8_t const uArgNo = pVar->uArgNo;
8230 if (uArgNo == i)
8231 { /* prefect */ }
8232 /* The variable allocator logic should make sure this is impossible,
8233 except for when the return register is used as a parameter (ARM,
8234 but not x86). */
8235#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8236 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8237 {
8238# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8239# error "Implement this"
8240# endif
8241 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8242 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8243 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8244 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8245 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8246 }
8247#endif
8248 else
8249 {
8250 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8251
8252 if (pVar->enmKind == kIemNativeVarKind_Stack)
8253 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8254 else
8255 {
8256 /* just free it, can be reloaded if used again */
8257 pVar->idxReg = UINT8_MAX;
8258 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8259 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8260 }
8261 }
8262 }
8263 else
8264 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8265 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8266 }
8267 }
8268#if 0 //def VBOX_STRICT
8269 iemNativeRegAssertSanity(pReNative);
8270#endif
8271 }
8272
8273 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8274
8275#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8276 /*
8277 * If there are any stack arguments, make sure they are in their place as well.
8278 *
8279 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8280 * the caller) be loading it later and it must be free (see first loop).
8281 */
8282 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8283 {
8284 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8285 {
8286 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8287 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8288 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8289 {
8290 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8291 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
8292 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
8293 pVar->idxReg = UINT8_MAX;
8294 }
8295 else
8296 {
8297 /* Use ARG0 as temp for stuff we need registers for. */
8298 switch (pVar->enmKind)
8299 {
8300 case kIemNativeVarKind_Stack:
8301 {
8302 uint8_t const idxStackSlot = pVar->idxStackSlot;
8303 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8304 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8305 iemNativeStackCalcBpDisp(idxStackSlot));
8306 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8307 continue;
8308 }
8309
8310 case kIemNativeVarKind_Immediate:
8311 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8312 continue;
8313
8314 case kIemNativeVarKind_VarRef:
8315 {
8316 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8317 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8318 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8319 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8320 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8321# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8322 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8323 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8324 if ( fSimdReg
8325 && idxRegOther != UINT8_MAX)
8326 {
8327 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8328 if (cbVar == sizeof(RTUINT128U))
8329 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8330 else
8331 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8332 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8333 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8334 }
8335 else
8336# endif
8337 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8338 {
8339 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8340 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8341 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8342 }
8343 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8344 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8345 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8346 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8347 continue;
8348 }
8349
8350 case kIemNativeVarKind_GstRegRef:
8351 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8352 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8353 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8354 continue;
8355
8356 case kIemNativeVarKind_Invalid:
8357 case kIemNativeVarKind_End:
8358 break;
8359 }
8360 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8361 }
8362 }
8363# if 0 //def VBOX_STRICT
8364 iemNativeRegAssertSanity(pReNative);
8365# endif
8366 }
8367#else
8368 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8369#endif
8370
8371 /*
8372 * Make sure the argument variables are loaded into their respective registers.
8373 *
8374 * We can optimize this by ASSUMING that any register allocations are for
8375 * registeres that have already been loaded and are ready. The previous step
8376 * saw to that.
8377 */
8378 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8379 {
8380 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8381 {
8382 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8383 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8384 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8385 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8386 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8387 else
8388 {
8389 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8390 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8391 {
8392 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8393 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8394 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8395 | RT_BIT_32(idxArgReg);
8396 pVar->idxReg = idxArgReg;
8397 }
8398 else
8399 {
8400 /* Use ARG0 as temp for stuff we need registers for. */
8401 switch (pVar->enmKind)
8402 {
8403 case kIemNativeVarKind_Stack:
8404 {
8405 uint8_t const idxStackSlot = pVar->idxStackSlot;
8406 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8407 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8408 continue;
8409 }
8410
8411 case kIemNativeVarKind_Immediate:
8412 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8413 continue;
8414
8415 case kIemNativeVarKind_VarRef:
8416 {
8417 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8418 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8419 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8420 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8421 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8422 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8423#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8424 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8425 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8426 if ( fSimdReg
8427 && idxRegOther != UINT8_MAX)
8428 {
8429 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8430 if (cbVar == sizeof(RTUINT128U))
8431 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8432 else
8433 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8434 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8435 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8436 }
8437 else
8438#endif
8439 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8440 {
8441 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8442 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8443 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8444 }
8445 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8446 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8447 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8448 continue;
8449 }
8450
8451 case kIemNativeVarKind_GstRegRef:
8452 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8453 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8454 continue;
8455
8456 case kIemNativeVarKind_Invalid:
8457 case kIemNativeVarKind_End:
8458 break;
8459 }
8460 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8461 }
8462 }
8463 }
8464#if 0 //def VBOX_STRICT
8465 iemNativeRegAssertSanity(pReNative);
8466#endif
8467 }
8468#ifdef VBOX_STRICT
8469 else
8470 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8471 {
8472 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8473 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8474 }
8475#endif
8476
8477 /*
8478 * Free all argument variables (simplified).
8479 * Their lifetime always expires with the call they are for.
8480 */
8481 /** @todo Make the python script check that arguments aren't used after
8482 * IEM_MC_CALL_XXXX. */
8483 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8484 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8485 * an argument value. There is also some FPU stuff. */
8486 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8487 {
8488 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8489 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8490
8491 /* no need to free registers: */
8492 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8493 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8494 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8495 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8496 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8497 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8498
8499 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8500 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8501 iemNativeVarFreeStackSlots(pReNative, idxVar);
8502 }
8503 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8504
8505 /*
8506 * Flush volatile registers as we make the call.
8507 */
8508 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8509
8510 return off;
8511}
8512
8513
8514
8515/*********************************************************************************************************************************
8516* TLB Lookup. *
8517*********************************************************************************************************************************/
8518
8519/**
8520 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8521 */
8522DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
8523{
8524 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
8525 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
8526 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
8527 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
8528
8529 /* Do the lookup manually. */
8530 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
8531 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
8532 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
8533 if (RT_LIKELY(pTlbe->uTag == uTag))
8534 {
8535 /*
8536 * Check TLB page table level access flags.
8537 */
8538 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8539 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8540 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8541 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8542 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8543 | IEMTLBE_F_PG_UNASSIGNED
8544 | IEMTLBE_F_PT_NO_ACCESSED
8545 | fNoWriteNoDirty | fNoUser);
8546 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8547 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8548 {
8549 /*
8550 * Return the address.
8551 */
8552 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8553 if ((uintptr_t)pbAddr == uResult)
8554 return;
8555 RT_NOREF(cbMem);
8556 AssertFailed();
8557 }
8558 else
8559 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8560 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8561 }
8562 else
8563 AssertFailed();
8564 RT_BREAKPOINT();
8565}
8566
8567/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8568
8569
8570
8571/*********************************************************************************************************************************
8572* Recompiler Core. *
8573*********************************************************************************************************************************/
8574
8575/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8576static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8577{
8578 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8579 pDis->cbCachedInstr += cbMaxRead;
8580 RT_NOREF(cbMinRead);
8581 return VERR_NO_DATA;
8582}
8583
8584
8585DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8586{
8587 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8588 {
8589#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8590 ENTRY(fLocalForcedActions),
8591 ENTRY(iem.s.rcPassUp),
8592 ENTRY(iem.s.fExec),
8593 ENTRY(iem.s.pbInstrBuf),
8594 ENTRY(iem.s.uInstrBufPc),
8595 ENTRY(iem.s.GCPhysInstrBuf),
8596 ENTRY(iem.s.cbInstrBufTotal),
8597 ENTRY(iem.s.idxTbCurInstr),
8598 ENTRY(iem.s.fSkippingEFlags),
8599#ifdef VBOX_WITH_STATISTICS
8600 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8601 ENTRY(iem.s.StatNativeTlbHitsForStore),
8602 ENTRY(iem.s.StatNativeTlbHitsForStack),
8603 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8604 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8605 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8606 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8607 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8608#endif
8609 ENTRY(iem.s.DataTlb.uTlbRevision),
8610 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8611 ENTRY(iem.s.DataTlb.cTlbCoreHits),
8612 ENTRY(iem.s.DataTlb.cTlbInlineCodeHits),
8613 ENTRY(iem.s.DataTlb.cTlbNativeMissTag),
8614 ENTRY(iem.s.DataTlb.cTlbNativeMissFlagsAndPhysRev),
8615 ENTRY(iem.s.DataTlb.cTlbNativeMissAlignment),
8616 ENTRY(iem.s.DataTlb.cTlbNativeMissCrossPage),
8617 ENTRY(iem.s.DataTlb.cTlbNativeMissNonCanonical),
8618 ENTRY(iem.s.DataTlb.aEntries),
8619 ENTRY(iem.s.CodeTlb.uTlbRevision),
8620 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8621 ENTRY(iem.s.CodeTlb.cTlbCoreHits),
8622 ENTRY(iem.s.CodeTlb.cTlbNativeMissTag),
8623 ENTRY(iem.s.CodeTlb.cTlbNativeMissFlagsAndPhysRev),
8624 ENTRY(iem.s.CodeTlb.cTlbNativeMissAlignment),
8625 ENTRY(iem.s.CodeTlb.cTlbNativeMissCrossPage),
8626 ENTRY(iem.s.CodeTlb.cTlbNativeMissNonCanonical),
8627 ENTRY(iem.s.CodeTlb.aEntries),
8628 ENTRY(pVMR3),
8629 ENTRY(cpum.GstCtx.rax),
8630 ENTRY(cpum.GstCtx.ah),
8631 ENTRY(cpum.GstCtx.rcx),
8632 ENTRY(cpum.GstCtx.ch),
8633 ENTRY(cpum.GstCtx.rdx),
8634 ENTRY(cpum.GstCtx.dh),
8635 ENTRY(cpum.GstCtx.rbx),
8636 ENTRY(cpum.GstCtx.bh),
8637 ENTRY(cpum.GstCtx.rsp),
8638 ENTRY(cpum.GstCtx.rbp),
8639 ENTRY(cpum.GstCtx.rsi),
8640 ENTRY(cpum.GstCtx.rdi),
8641 ENTRY(cpum.GstCtx.r8),
8642 ENTRY(cpum.GstCtx.r9),
8643 ENTRY(cpum.GstCtx.r10),
8644 ENTRY(cpum.GstCtx.r11),
8645 ENTRY(cpum.GstCtx.r12),
8646 ENTRY(cpum.GstCtx.r13),
8647 ENTRY(cpum.GstCtx.r14),
8648 ENTRY(cpum.GstCtx.r15),
8649 ENTRY(cpum.GstCtx.es.Sel),
8650 ENTRY(cpum.GstCtx.es.u64Base),
8651 ENTRY(cpum.GstCtx.es.u32Limit),
8652 ENTRY(cpum.GstCtx.es.Attr),
8653 ENTRY(cpum.GstCtx.cs.Sel),
8654 ENTRY(cpum.GstCtx.cs.u64Base),
8655 ENTRY(cpum.GstCtx.cs.u32Limit),
8656 ENTRY(cpum.GstCtx.cs.Attr),
8657 ENTRY(cpum.GstCtx.ss.Sel),
8658 ENTRY(cpum.GstCtx.ss.u64Base),
8659 ENTRY(cpum.GstCtx.ss.u32Limit),
8660 ENTRY(cpum.GstCtx.ss.Attr),
8661 ENTRY(cpum.GstCtx.ds.Sel),
8662 ENTRY(cpum.GstCtx.ds.u64Base),
8663 ENTRY(cpum.GstCtx.ds.u32Limit),
8664 ENTRY(cpum.GstCtx.ds.Attr),
8665 ENTRY(cpum.GstCtx.fs.Sel),
8666 ENTRY(cpum.GstCtx.fs.u64Base),
8667 ENTRY(cpum.GstCtx.fs.u32Limit),
8668 ENTRY(cpum.GstCtx.fs.Attr),
8669 ENTRY(cpum.GstCtx.gs.Sel),
8670 ENTRY(cpum.GstCtx.gs.u64Base),
8671 ENTRY(cpum.GstCtx.gs.u32Limit),
8672 ENTRY(cpum.GstCtx.gs.Attr),
8673 ENTRY(cpum.GstCtx.rip),
8674 ENTRY(cpum.GstCtx.eflags),
8675 ENTRY(cpum.GstCtx.uRipInhibitInt),
8676 ENTRY(cpum.GstCtx.cr0),
8677 ENTRY(cpum.GstCtx.cr4),
8678 ENTRY(cpum.GstCtx.aXcr[0]),
8679 ENTRY(cpum.GstCtx.aXcr[1]),
8680#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8681 ENTRY(cpum.GstCtx.XState.x87.MXCSR),
8682 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8683 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8684 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8685 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8686 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8687 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8688 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8689 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8690 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8691 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8692 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8693 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8694 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8695 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8696 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8697 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8698 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8699 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8700 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8701 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8702 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8703 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8704 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8705 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8706 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8707 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8708 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8709 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8710 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8711 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8712 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8713 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8714#endif
8715#undef ENTRY
8716 };
8717#ifdef VBOX_STRICT
8718 static bool s_fOrderChecked = false;
8719 if (!s_fOrderChecked)
8720 {
8721 s_fOrderChecked = true;
8722 uint32_t offPrev = s_aMembers[0].off;
8723 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8724 {
8725 Assert(s_aMembers[i].off > offPrev);
8726 offPrev = s_aMembers[i].off;
8727 }
8728 }
8729#endif
8730
8731 /*
8732 * Binary lookup.
8733 */
8734 unsigned iStart = 0;
8735 unsigned iEnd = RT_ELEMENTS(s_aMembers);
8736 for (;;)
8737 {
8738 unsigned const iCur = iStart + (iEnd - iStart) / 2;
8739 uint32_t const offCur = s_aMembers[iCur].off;
8740 if (off < offCur)
8741 {
8742 if (iCur != iStart)
8743 iEnd = iCur;
8744 else
8745 break;
8746 }
8747 else if (off > offCur)
8748 {
8749 if (iCur + 1 < iEnd)
8750 iStart = iCur + 1;
8751 else
8752 break;
8753 }
8754 else
8755 return s_aMembers[iCur].pszName;
8756 }
8757#ifdef VBOX_WITH_STATISTICS
8758 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
8759 return "iem.s.acThreadedFuncStats[iFn]";
8760#endif
8761 return NULL;
8762}
8763
8764
8765DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
8766{
8767 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
8768#if defined(RT_ARCH_AMD64)
8769 static const char * const a_apszMarkers[] =
8770 {
8771 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
8772 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
8773 };
8774#endif
8775
8776 char szDisBuf[512];
8777 DISSTATE Dis;
8778 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
8779 uint32_t const cNative = pTb->Native.cInstructions;
8780 uint32_t offNative = 0;
8781#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8782 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
8783#endif
8784 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8785 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8786 : DISCPUMODE_64BIT;
8787#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8788 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
8789#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8790 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
8791#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8792# error "Port me"
8793#else
8794 csh hDisasm = ~(size_t)0;
8795# if defined(RT_ARCH_AMD64)
8796 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
8797# elif defined(RT_ARCH_ARM64)
8798 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
8799# else
8800# error "Port me"
8801# endif
8802 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
8803
8804 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
8805 //Assert(rcCs == CS_ERR_OK);
8806#endif
8807
8808 /*
8809 * Print TB info.
8810 */
8811 pHlp->pfnPrintf(pHlp,
8812 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
8813 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
8814 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
8815 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
8816#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8817 if (pDbgInfo && pDbgInfo->cEntries > 1)
8818 {
8819 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
8820
8821 /*
8822 * This disassembly is driven by the debug info which follows the native
8823 * code and indicates when it starts with the next guest instructions,
8824 * where labels are and such things.
8825 */
8826 uint32_t idxThreadedCall = 0;
8827 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
8828 uint8_t idxRange = UINT8_MAX;
8829 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
8830 uint32_t offRange = 0;
8831 uint32_t offOpcodes = 0;
8832 uint32_t const cbOpcodes = pTb->cbOpcodes;
8833 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
8834 uint32_t const cDbgEntries = pDbgInfo->cEntries;
8835 uint32_t iDbgEntry = 1;
8836 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
8837
8838 while (offNative < cNative)
8839 {
8840 /* If we're at or have passed the point where the next chunk of debug
8841 info starts, process it. */
8842 if (offDbgNativeNext <= offNative)
8843 {
8844 offDbgNativeNext = UINT32_MAX;
8845 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
8846 {
8847 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
8848 {
8849 case kIemTbDbgEntryType_GuestInstruction:
8850 {
8851 /* Did the exec flag change? */
8852 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
8853 {
8854 pHlp->pfnPrintf(pHlp,
8855 " fExec change %#08x -> %#08x %s\n",
8856 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8857 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8858 szDisBuf, sizeof(szDisBuf)));
8859 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
8860 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8861 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8862 : DISCPUMODE_64BIT;
8863 }
8864
8865 /* New opcode range? We need to fend up a spurious debug info entry here for cases
8866 where the compilation was aborted before the opcode was recorded and the actual
8867 instruction was translated to a threaded call. This may happen when we run out
8868 of ranges, or when some complicated interrupts/FFs are found to be pending or
8869 similar. So, we just deal with it here rather than in the compiler code as it
8870 is a lot simpler to do here. */
8871 if ( idxRange == UINT8_MAX
8872 || idxRange >= cRanges
8873 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
8874 {
8875 idxRange += 1;
8876 if (idxRange < cRanges)
8877 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
8878 else
8879 continue;
8880 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
8881 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
8882 + (pTb->aRanges[idxRange].idxPhysPage == 0
8883 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8884 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
8885 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8886 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
8887 pTb->aRanges[idxRange].idxPhysPage);
8888 GCPhysPc += offRange;
8889 }
8890
8891 /* Disassemble the instruction. */
8892 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
8893 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
8894 uint32_t cbInstr = 1;
8895 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8896 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
8897 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8898 if (RT_SUCCESS(rc))
8899 {
8900 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8901 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8902 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8903 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8904
8905 static unsigned const s_offMarker = 55;
8906 static char const s_szMarker[] = " ; <--- guest";
8907 if (cch < s_offMarker)
8908 {
8909 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
8910 cch = s_offMarker;
8911 }
8912 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
8913 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
8914
8915 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
8916 }
8917 else
8918 {
8919 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
8920 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
8921 cbInstr = 1;
8922 }
8923 GCPhysPc += cbInstr;
8924 offOpcodes += cbInstr;
8925 offRange += cbInstr;
8926 continue;
8927 }
8928
8929 case kIemTbDbgEntryType_ThreadedCall:
8930 pHlp->pfnPrintf(pHlp,
8931 " Call #%u to %s (%u args) - %s\n",
8932 idxThreadedCall,
8933 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8934 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8935 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
8936 idxThreadedCall++;
8937 continue;
8938
8939 case kIemTbDbgEntryType_GuestRegShadowing:
8940 {
8941 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8942 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
8943 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
8944 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
8945 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8946 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
8947 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s \n", pszGstReg,
8948 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
8949 else
8950 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
8951 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
8952 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8953 continue;
8954 }
8955
8956#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8957 case kIemTbDbgEntryType_GuestSimdRegShadowing:
8958 {
8959 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8960 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
8961 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
8962 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
8963 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8964 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
8965 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
8966 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
8967 else
8968 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
8969 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
8970 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8971 continue;
8972 }
8973#endif
8974
8975 case kIemTbDbgEntryType_Label:
8976 {
8977 const char *pszName = "what_the_fudge";
8978 const char *pszComment = "";
8979 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
8980 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
8981 {
8982 case kIemNativeLabelType_Return: pszName = "Return"; break;
8983 case kIemNativeLabelType_ReturnBreak: pszName = "ReturnBreak"; break;
8984 case kIemNativeLabelType_ReturnBreakFF: pszName = "ReturnBreakFF"; break;
8985 case kIemNativeLabelType_ReturnWithFlags: pszName = "ReturnWithFlags"; break;
8986 case kIemNativeLabelType_ReturnBreakViaLookup: pszName = "ReturnBreakViaLookup"; break;
8987 case kIemNativeLabelType_ReturnBreakViaLookupWithIrq: pszName = "ReturnBreakViaLookupWithIrq"; break;
8988 case kIemNativeLabelType_ReturnBreakViaLookupWithTlb: pszName = "ReturnBreakViaLookupWithTlb"; break;
8989 case kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq: pszName = "ReturnBreakViaLookupWithTlbAndIrq"; break;
8990 case kIemNativeLabelType_NonZeroRetOrPassUp: pszName = "NonZeroRetOrPassUp"; break;
8991 case kIemNativeLabelType_RaiseDe: pszName = "RaiseDe"; break;
8992 case kIemNativeLabelType_RaiseUd: pszName = "RaiseUd"; break;
8993 case kIemNativeLabelType_RaiseSseRelated: pszName = "RaiseSseRelated"; break;
8994 case kIemNativeLabelType_RaiseAvxRelated: pszName = "RaiseAvxRelated"; break;
8995 case kIemNativeLabelType_RaiseSseAvxFpRelated: pszName = "RaiseSseAvxFpRelated"; break;
8996 case kIemNativeLabelType_RaiseNm: pszName = "RaiseNm"; break;
8997 case kIemNativeLabelType_RaiseGp0: pszName = "RaiseGp0"; break;
8998 case kIemNativeLabelType_RaiseMf: pszName = "RaiseMf"; break;
8999 case kIemNativeLabelType_RaiseXf: pszName = "RaiseXf"; break;
9000 case kIemNativeLabelType_ObsoleteTb: pszName = "ObsoleteTb"; break;
9001 case kIemNativeLabelType_NeedCsLimChecking: pszName = "NeedCsLimChecking"; break;
9002 case kIemNativeLabelType_CheckBranchMiss: pszName = "CheckBranchMiss"; break;
9003 case kIemNativeLabelType_If:
9004 pszName = "If";
9005 fNumbered = true;
9006 break;
9007 case kIemNativeLabelType_Else:
9008 pszName = "Else";
9009 fNumbered = true;
9010 pszComment = " ; regs state restored pre-if-block";
9011 break;
9012 case kIemNativeLabelType_Endif:
9013 pszName = "Endif";
9014 fNumbered = true;
9015 break;
9016 case kIemNativeLabelType_CheckIrq:
9017 pszName = "CheckIrq_CheckVM";
9018 fNumbered = true;
9019 break;
9020 case kIemNativeLabelType_TlbLookup:
9021 pszName = "TlbLookup";
9022 fNumbered = true;
9023 break;
9024 case kIemNativeLabelType_TlbMiss:
9025 pszName = "TlbMiss";
9026 fNumbered = true;
9027 break;
9028 case kIemNativeLabelType_TlbDone:
9029 pszName = "TlbDone";
9030 fNumbered = true;
9031 break;
9032 case kIemNativeLabelType_Invalid:
9033 case kIemNativeLabelType_End:
9034 break;
9035 }
9036 if (fNumbered)
9037 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
9038 else
9039 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
9040 continue;
9041 }
9042
9043 case kIemTbDbgEntryType_NativeOffset:
9044 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
9045 Assert(offDbgNativeNext >= offNative);
9046 break;
9047
9048#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9049 case kIemTbDbgEntryType_DelayedPcUpdate:
9050 pHlp->pfnPrintf(pHlp, " Updating guest PC value by %u (cInstrSkipped=%u)\n",
9051 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
9052 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
9053 continue;
9054#endif
9055
9056#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
9057 case kIemTbDbgEntryType_GuestRegDirty:
9058 {
9059 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9060 const char * const pszGstReg = pEntry->GuestRegDirty.fSimdReg
9061 ? g_aGstSimdShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName
9062 : g_aGstShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName;
9063 const char * const pszHstReg = pEntry->GuestRegDirty.fSimdReg
9064 ? g_apszIemNativeHstSimdRegNames[pEntry->GuestRegDirty.idxHstReg]
9065 : g_apszIemNativeHstRegNames[pEntry->GuestRegDirty.idxHstReg];
9066 pHlp->pfnPrintf(pHlp, " Guest register %s (shadowed by %s) is now marked dirty (intent)\n",
9067 pszGstReg, pszHstReg);
9068 continue;
9069 }
9070
9071 case kIemTbDbgEntryType_GuestRegWriteback:
9072 pHlp->pfnPrintf(pHlp, " Writing dirty %s registers (gst %#RX32)\n",
9073 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fSimdReg ? "SIMD" : "general",
9074 (uint64_t)pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fGstReg
9075 << (pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.cShift * 25));
9076 continue;
9077#endif
9078
9079 default:
9080 AssertFailed();
9081 }
9082 iDbgEntry++;
9083 break;
9084 }
9085 }
9086
9087 /*
9088 * Disassemble the next native instruction.
9089 */
9090 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9091# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9092 uint32_t cbInstr = sizeof(paNative[0]);
9093 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9094 if (RT_SUCCESS(rc))
9095 {
9096# if defined(RT_ARCH_AMD64)
9097 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9098 {
9099 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9100 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9101 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9102 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9103 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9104 uInfo & 0x8000 ? "recompiled" : "todo");
9105 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9106 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9107 else
9108 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9109 }
9110 else
9111# endif
9112 {
9113 const char *pszAnnotation = NULL;
9114# ifdef RT_ARCH_AMD64
9115 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9116 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9117 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9118 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9119 PCDISOPPARAM pMemOp;
9120 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
9121 pMemOp = &Dis.Param1;
9122 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
9123 pMemOp = &Dis.Param2;
9124 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
9125 pMemOp = &Dis.Param3;
9126 else
9127 pMemOp = NULL;
9128 if ( pMemOp
9129 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
9130 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
9131 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
9132 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
9133
9134#elif defined(RT_ARCH_ARM64)
9135 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9136 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9137 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9138# else
9139# error "Port me"
9140# endif
9141 if (pszAnnotation)
9142 {
9143 static unsigned const s_offAnnotation = 55;
9144 size_t const cchAnnotation = strlen(pszAnnotation);
9145 size_t cchDis = strlen(szDisBuf);
9146 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
9147 {
9148 if (cchDis < s_offAnnotation)
9149 {
9150 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
9151 cchDis = s_offAnnotation;
9152 }
9153 szDisBuf[cchDis++] = ' ';
9154 szDisBuf[cchDis++] = ';';
9155 szDisBuf[cchDis++] = ' ';
9156 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
9157 }
9158 }
9159 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9160 }
9161 }
9162 else
9163 {
9164# if defined(RT_ARCH_AMD64)
9165 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9166 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9167# elif defined(RT_ARCH_ARM64)
9168 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9169# else
9170# error "Port me"
9171# endif
9172 cbInstr = sizeof(paNative[0]);
9173 }
9174 offNative += cbInstr / sizeof(paNative[0]);
9175
9176# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9177 cs_insn *pInstr;
9178 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9179 (uintptr_t)pNativeCur, 1, &pInstr);
9180 if (cInstrs > 0)
9181 {
9182 Assert(cInstrs == 1);
9183 const char *pszAnnotation = NULL;
9184# if defined(RT_ARCH_ARM64)
9185 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
9186 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
9187 {
9188 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
9189 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
9190 char *psz = strchr(pInstr->op_str, '[');
9191 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
9192 {
9193 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
9194 int32_t off = -1;
9195 psz += 4;
9196 if (*psz == ']')
9197 off = 0;
9198 else if (*psz == ',')
9199 {
9200 psz = RTStrStripL(psz + 1);
9201 if (*psz == '#')
9202 off = RTStrToInt32(&psz[1]);
9203 /** @todo deal with index registers and LSL as well... */
9204 }
9205 if (off >= 0)
9206 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
9207 }
9208 }
9209# endif
9210
9211 size_t const cchOp = strlen(pInstr->op_str);
9212# if defined(RT_ARCH_AMD64)
9213 if (pszAnnotation)
9214 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9215 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9216 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9217 else
9218 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9219 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9220
9221# else
9222 if (pszAnnotation)
9223 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9224 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9225 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9226 else
9227 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9228 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9229# endif
9230 offNative += pInstr->size / sizeof(*pNativeCur);
9231 cs_free(pInstr, cInstrs);
9232 }
9233 else
9234 {
9235# if defined(RT_ARCH_AMD64)
9236 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9237 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9238# else
9239 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9240# endif
9241 offNative++;
9242 }
9243# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9244 }
9245 }
9246 else
9247#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9248 {
9249 /*
9250 * No debug info, just disassemble the x86 code and then the native code.
9251 *
9252 * First the guest code:
9253 */
9254 for (unsigned i = 0; i < pTb->cRanges; i++)
9255 {
9256 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
9257 + (pTb->aRanges[i].idxPhysPage == 0
9258 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9259 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
9260 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9261 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
9262 unsigned off = pTb->aRanges[i].offOpcodes;
9263 /** @todo this ain't working when crossing pages! */
9264 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
9265 while (off < cbOpcodes)
9266 {
9267 uint32_t cbInstr = 1;
9268 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9269 &pTb->pabOpcodes[off], cbOpcodes - off,
9270 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9271 if (RT_SUCCESS(rc))
9272 {
9273 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9274 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9275 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9276 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9277 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
9278 GCPhysPc += cbInstr;
9279 off += cbInstr;
9280 }
9281 else
9282 {
9283 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
9284 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
9285 break;
9286 }
9287 }
9288 }
9289
9290 /*
9291 * Then the native code:
9292 */
9293 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
9294 while (offNative < cNative)
9295 {
9296 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9297# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9298 uint32_t cbInstr = sizeof(paNative[0]);
9299 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9300 if (RT_SUCCESS(rc))
9301 {
9302# if defined(RT_ARCH_AMD64)
9303 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9304 {
9305 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9306 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9307 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9308 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9309 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9310 uInfo & 0x8000 ? "recompiled" : "todo");
9311 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9312 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9313 else
9314 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9315 }
9316 else
9317# endif
9318 {
9319# ifdef RT_ARCH_AMD64
9320 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9321 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9322 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9323 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9324# elif defined(RT_ARCH_ARM64)
9325 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9326 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9327 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9328# else
9329# error "Port me"
9330# endif
9331 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9332 }
9333 }
9334 else
9335 {
9336# if defined(RT_ARCH_AMD64)
9337 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9338 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9339# else
9340 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9341# endif
9342 cbInstr = sizeof(paNative[0]);
9343 }
9344 offNative += cbInstr / sizeof(paNative[0]);
9345
9346# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9347 cs_insn *pInstr;
9348 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9349 (uintptr_t)pNativeCur, 1, &pInstr);
9350 if (cInstrs > 0)
9351 {
9352 Assert(cInstrs == 1);
9353# if defined(RT_ARCH_AMD64)
9354 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9355 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9356# else
9357 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9358 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9359# endif
9360 offNative += pInstr->size / sizeof(*pNativeCur);
9361 cs_free(pInstr, cInstrs);
9362 }
9363 else
9364 {
9365# if defined(RT_ARCH_AMD64)
9366 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9367 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9368# else
9369 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9370# endif
9371 offNative++;
9372 }
9373# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9374 }
9375 }
9376
9377#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9378 /* Cleanup. */
9379 cs_close(&hDisasm);
9380#endif
9381}
9382
9383
9384/**
9385 * Recompiles the given threaded TB into a native one.
9386 *
9387 * In case of failure the translation block will be returned as-is.
9388 *
9389 * @returns pTb.
9390 * @param pVCpu The cross context virtual CPU structure of the calling
9391 * thread.
9392 * @param pTb The threaded translation to recompile to native.
9393 */
9394DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
9395{
9396#if 0 /* For profiling the native recompiler code. */
9397l_profile_again:
9398#endif
9399 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
9400
9401 /*
9402 * The first time thru, we allocate the recompiler state, the other times
9403 * we just need to reset it before using it again.
9404 */
9405 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
9406 if (RT_LIKELY(pReNative))
9407 iemNativeReInit(pReNative, pTb);
9408 else
9409 {
9410 pReNative = iemNativeInit(pVCpu, pTb);
9411 AssertReturn(pReNative, pTb);
9412 }
9413
9414#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9415 /*
9416 * First do liveness analysis. This is done backwards.
9417 */
9418 {
9419 uint32_t idxCall = pTb->Thrd.cCalls;
9420 if (idxCall <= pReNative->cLivenessEntriesAlloc)
9421 { /* likely */ }
9422 else
9423 {
9424 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
9425 while (idxCall > cAlloc)
9426 cAlloc *= 2;
9427 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
9428 AssertReturn(pvNew, pTb);
9429 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
9430 pReNative->cLivenessEntriesAlloc = cAlloc;
9431 }
9432 AssertReturn(idxCall > 0, pTb);
9433 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
9434
9435 /* The initial (final) entry. */
9436 idxCall--;
9437 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
9438
9439 /* Loop backwards thru the calls and fill in the other entries. */
9440 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
9441 while (idxCall > 0)
9442 {
9443 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
9444 if (pfnLiveness)
9445 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
9446 else
9447 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
9448 pCallEntry--;
9449 idxCall--;
9450 }
9451
9452# ifdef VBOX_WITH_STATISTICS
9453 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
9454 to 'clobbered' rather that 'input'. */
9455 /** @todo */
9456# endif
9457 }
9458#endif
9459
9460 /*
9461 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
9462 * for aborting if an error happens.
9463 */
9464 uint32_t cCallsLeft = pTb->Thrd.cCalls;
9465#ifdef LOG_ENABLED
9466 uint32_t const cCallsOrg = cCallsLeft;
9467#endif
9468 uint32_t off = 0;
9469 int rc = VINF_SUCCESS;
9470 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9471 {
9472#ifndef IEMNATIVE_WITH_RECOMPILER_PROLOGUE_SINGLETON
9473 /*
9474 * Emit prolog code (fixed).
9475 */
9476 off = iemNativeEmitProlog(pReNative, off);
9477#endif
9478
9479 /*
9480 * Convert the calls to native code.
9481 */
9482#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9483 int32_t iGstInstr = -1;
9484#endif
9485#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
9486 uint32_t cThreadedCalls = 0;
9487 uint32_t cRecompiledCalls = 0;
9488#endif
9489#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9490 uint32_t idxCurCall = 0;
9491#endif
9492 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
9493 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
9494 while (cCallsLeft-- > 0)
9495 {
9496 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
9497#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9498 pReNative->idxCurCall = idxCurCall;
9499#endif
9500
9501 /*
9502 * Debug info, assembly markup and statistics.
9503 */
9504#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
9505 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
9506 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
9507#endif
9508#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9509 iemNativeDbgInfoAddNativeOffset(pReNative, off);
9510 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
9511 {
9512 if (iGstInstr < (int32_t)pTb->cInstructions)
9513 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
9514 else
9515 Assert(iGstInstr == pTb->cInstructions);
9516 iGstInstr = pCallEntry->idxInstr;
9517 }
9518 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
9519#endif
9520#if defined(VBOX_STRICT)
9521 off = iemNativeEmitMarker(pReNative, off,
9522 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
9523#endif
9524#if defined(VBOX_STRICT)
9525 iemNativeRegAssertSanity(pReNative);
9526#endif
9527#ifdef VBOX_WITH_STATISTICS
9528 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
9529#endif
9530
9531 /*
9532 * Actual work.
9533 */
9534 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
9535 pfnRecom ? "(recompiled)" : "(todo)"));
9536 if (pfnRecom) /** @todo stats on this. */
9537 {
9538 off = pfnRecom(pReNative, off, pCallEntry);
9539 STAM_REL_STATS({cRecompiledCalls++;});
9540 }
9541 else
9542 {
9543 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
9544 STAM_REL_STATS({cThreadedCalls++;});
9545 }
9546 Assert(off <= pReNative->cInstrBufAlloc);
9547 Assert(pReNative->cCondDepth == 0);
9548
9549#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
9550 if (LogIs2Enabled())
9551 {
9552 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
9553# ifndef IEMLIVENESS_EXTENDED_LAYOUT
9554 static const char s_achState[] = "CUXI";
9555# else
9556 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
9557# endif
9558
9559 char szGpr[17];
9560 for (unsigned i = 0; i < 16; i++)
9561 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
9562 szGpr[16] = '\0';
9563
9564 char szSegBase[X86_SREG_COUNT + 1];
9565 char szSegLimit[X86_SREG_COUNT + 1];
9566 char szSegAttrib[X86_SREG_COUNT + 1];
9567 char szSegSel[X86_SREG_COUNT + 1];
9568 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
9569 {
9570 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
9571 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
9572 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
9573 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
9574 }
9575 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
9576 = szSegSel[X86_SREG_COUNT] = '\0';
9577
9578 char szEFlags[8];
9579 for (unsigned i = 0; i < 7; i++)
9580 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
9581 szEFlags[7] = '\0';
9582
9583 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
9584 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
9585 }
9586#endif
9587
9588 /*
9589 * Advance.
9590 */
9591 pCallEntry++;
9592#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9593 idxCurCall++;
9594#endif
9595 }
9596
9597 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
9598 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
9599 if (!cThreadedCalls)
9600 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
9601
9602#ifdef VBOX_WITH_STATISTICS
9603 off = iemNativeEmitNativeTbExitStats(pReNative, off, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbFinished));
9604#endif
9605
9606 /*
9607 * Emit the epilog code.
9608 */
9609 uint32_t idxReturnLabel;
9610 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
9611
9612 /*
9613 * Generate special jump labels.
9614 */
9615 if (pReNative->bmLabelTypes & ( RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookup)
9616 | RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookupWithIrq)
9617 | RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookupWithTlb)
9618 | RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq) ))
9619 off = iemNativeEmitReturnBreakViaLookup(pReNative, off); /* Must come before ReturnBreak! */
9620
9621 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
9622 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
9623
9624 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreakFF))
9625 off = iemNativeEmitReturnBreakFF(pReNative, off, idxReturnLabel);
9626
9627 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
9628 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
9629
9630 /*
9631 * Generate simple TB tail labels that just calls a help with a pVCpu
9632 * arg and either return or longjmps/throws a non-zero status.
9633 *
9634 * The array entries must be ordered by enmLabel value so we can index
9635 * using fTailLabels bit numbers.
9636 */
9637 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9638 static struct
9639 {
9640 IEMNATIVELABELTYPE enmLabel;
9641 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9642 } const g_aSimpleTailLabels[] =
9643 {
9644 { kIemNativeLabelType_Invalid, NULL },
9645 { kIemNativeLabelType_RaiseDe, iemNativeHlpExecRaiseDe },
9646 { kIemNativeLabelType_RaiseUd, iemNativeHlpExecRaiseUd },
9647 { kIemNativeLabelType_RaiseSseRelated, iemNativeHlpExecRaiseSseRelated },
9648 { kIemNativeLabelType_RaiseAvxRelated, iemNativeHlpExecRaiseAvxRelated },
9649 { kIemNativeLabelType_RaiseSseAvxFpRelated, iemNativeHlpExecRaiseSseAvxFpRelated },
9650 { kIemNativeLabelType_RaiseNm, iemNativeHlpExecRaiseNm },
9651 { kIemNativeLabelType_RaiseGp0, iemNativeHlpExecRaiseGp0 },
9652 { kIemNativeLabelType_RaiseMf, iemNativeHlpExecRaiseMf },
9653 { kIemNativeLabelType_RaiseXf, iemNativeHlpExecRaiseXf },
9654 { kIemNativeLabelType_ObsoleteTb, iemNativeHlpObsoleteTb },
9655 { kIemNativeLabelType_NeedCsLimChecking, iemNativeHlpNeedCsLimChecking },
9656 { kIemNativeLabelType_CheckBranchMiss, iemNativeHlpCheckBranchMiss },
9657 };
9658
9659 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
9660 AssertCompile(kIemNativeLabelType_Invalid == 0);
9661 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
9662 if (fTailLabels)
9663 {
9664 do
9665 {
9666 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
9667 fTailLabels &= ~RT_BIT_64(enmLabel);
9668 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
9669
9670 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
9671 Assert(idxLabel != UINT32_MAX);
9672 if (idxLabel != UINT32_MAX)
9673 {
9674 iemNativeLabelDefine(pReNative, idxLabel, off);
9675
9676 /* int pfnCallback(PVMCPUCC pVCpu) */
9677 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9678 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
9679
9680 /* jump back to the return sequence. */
9681 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
9682 }
9683
9684 } while (fTailLabels);
9685 }
9686 }
9687 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9688 {
9689 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
9690 return pTb;
9691 }
9692 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
9693 Assert(off <= pReNative->cInstrBufAlloc);
9694
9695 /*
9696 * Make sure all labels has been defined.
9697 */
9698 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
9699#ifdef VBOX_STRICT
9700 uint32_t const cLabels = pReNative->cLabels;
9701 for (uint32_t i = 0; i < cLabels; i++)
9702 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
9703#endif
9704
9705#if 0 /* For profiling the native recompiler code. */
9706 if (pTb->Thrd.cCalls >= 136)
9707 {
9708 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
9709 goto l_profile_again;
9710 }
9711#endif
9712
9713 /*
9714 * Allocate executable memory, copy over the code we've generated.
9715 */
9716 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
9717 if (pTbAllocator->pDelayedFreeHead)
9718 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
9719
9720 PIEMNATIVEINSTR paFinalInstrBufRx = NULL;
9721 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb, (void **)&paFinalInstrBufRx);
9722 AssertReturn(paFinalInstrBuf, pTb);
9723 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
9724
9725 /*
9726 * Apply fixups.
9727 */
9728 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
9729 uint32_t const cFixups = pReNative->cFixups;
9730 for (uint32_t i = 0; i < cFixups; i++)
9731 {
9732 Assert(paFixups[i].off < off);
9733 Assert(paFixups[i].idxLabel < cLabels);
9734 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
9735 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
9736 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
9737 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
9738 switch (paFixups[i].enmType)
9739 {
9740#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
9741 case kIemNativeFixupType_Rel32:
9742 Assert(paFixups[i].off + 4 <= off);
9743 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9744 continue;
9745
9746#elif defined(RT_ARCH_ARM64)
9747 case kIemNativeFixupType_RelImm26At0:
9748 {
9749 Assert(paFixups[i].off < off);
9750 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9751 Assert(offDisp >= -262144 && offDisp < 262144);
9752 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
9753 continue;
9754 }
9755
9756 case kIemNativeFixupType_RelImm19At5:
9757 {
9758 Assert(paFixups[i].off < off);
9759 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9760 Assert(offDisp >= -262144 && offDisp < 262144);
9761 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
9762 continue;
9763 }
9764
9765 case kIemNativeFixupType_RelImm14At5:
9766 {
9767 Assert(paFixups[i].off < off);
9768 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9769 Assert(offDisp >= -8192 && offDisp < 8192);
9770 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
9771 continue;
9772 }
9773
9774#endif
9775 case kIemNativeFixupType_Invalid:
9776 case kIemNativeFixupType_End:
9777 break;
9778 }
9779 AssertFailed();
9780 }
9781
9782 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBufRx, off * sizeof(IEMNATIVEINSTR));
9783 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
9784
9785 /*
9786 * Convert the translation block.
9787 */
9788 RTMemFree(pTb->Thrd.paCalls);
9789 pTb->Native.paInstructions = paFinalInstrBufRx;
9790 pTb->Native.cInstructions = off;
9791 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
9792#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9793 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
9794 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
9795#endif
9796
9797 Assert(pTbAllocator->cThreadedTbs > 0);
9798 pTbAllocator->cThreadedTbs -= 1;
9799 pTbAllocator->cNativeTbs += 1;
9800 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
9801
9802#ifdef LOG_ENABLED
9803 /*
9804 * Disassemble to the log if enabled.
9805 */
9806 if (LogIs3Enabled())
9807 {
9808 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
9809 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
9810# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
9811 RTLogFlush(NULL);
9812# endif
9813 }
9814#endif
9815 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
9816
9817 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
9818 return pTb;
9819}
9820
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette