VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 107044

Last change on this file since 107044 was 106724, checked in by vboxsync, 4 weeks ago

VMM/IEM: Unwind info for win.arm64 (attempt at it, anyway). jiraref:1253

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 456.6 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 106724 2024-10-27 01:07:47Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : Delayed PC updating.
11 * - Level 5 (Log5) : Postponed and skipped EFLAGS calculations.
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include <VBox/vmm/tm.h>
55#include "IEMInternal.h"
56#include <VBox/vmm/vmcc.h>
57#include <VBox/log.h>
58#include <VBox/err.h>
59#include <VBox/dis.h>
60#include <VBox/param.h>
61#include <iprt/assert.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
71# include "/opt/local/include/capstone/capstone.h"
72#endif
73
74#include "IEMInline.h"
75#include "IEMThreadedFunctions.h"
76#include "IEMN8veRecompiler.h"
77#include "IEMN8veRecompilerEmit.h"
78#include "IEMN8veRecompilerTlbLookup.h"
79#include "IEMNativeFunctions.h"
80#include "target-x86/IEMAllN8veEmit-x86.h"
81
82
83/*
84 * Narrow down configs here to avoid wasting time on unused configs here.
85 * Note! Same checks in IEMAllThrdRecompiler.cpp.
86 */
87
88#ifndef IEM_WITH_CODE_TLB
89# error The code TLB must be enabled for the recompiler.
90#endif
91
92#ifndef IEM_WITH_DATA_TLB
93# error The data TLB must be enabled for the recompiler.
94#endif
95
96#ifndef IEM_WITH_SETJMP
97# error The setjmp approach must be enabled for the recompiler.
98#endif
99
100/** @todo eliminate this clang build hack. */
101#if RT_CLANG_PREREQ(4, 0)
102# pragma GCC diagnostic ignored "-Wunused-function"
103#endif
104
105
106/*********************************************************************************************************************************
107* Internal Functions *
108*********************************************************************************************************************************/
109#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
110static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
111#endif
112DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
113DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
114 IEMNATIVEGSTREG enmGstReg, uint32_t off);
115DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
116static const char *iemNativeGetLabelName(IEMNATIVELABELTYPE enmLabel, bool fCommonCode = false);
117
118
119
120/*********************************************************************************************************************************
121* Native Recompilation *
122*********************************************************************************************************************************/
123
124
125/**
126 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
127 */
128IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
129{
130 pVCpu->iem.s.cInstructions += idxInstr;
131 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
132}
133
134
135/**
136 * Helping iemNativeHlpReturnBreakViaLookup and iemNativeHlpReturnBreakViaLookupWithTlb.
137 */
138DECL_FORCE_INLINE(bool) iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(PVMCPU pVCpu)
139{
140 uint64_t fCpu = pVCpu->fLocalForcedActions;
141 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
142 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
143 | VMCPU_FF_TLB_FLUSH
144 | VMCPU_FF_UNHALT );
145 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
146 if (RT_LIKELY( ( !fCpu
147 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
148 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
149 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
150 && !VM_FF_IS_ANY_SET(pVCpu->CTX_SUFF(pVM), VM_FF_ALL_MASK) ))
151 return false;
152 return true;
153}
154
155
156/**
157 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
158 */
159template<bool const a_fWithIrqCheck>
160IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookup,(PVMCPUCC pVCpu, uint8_t idxTbLookup,
161 uint32_t fFlags, RTGCPHYS GCPhysPc))
162{
163 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
164 Assert(idxTbLookup < pTb->cTbLookupEntries);
165 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
166#if 1
167 PIEMTB const pNewTb = *ppNewTb;
168 if (pNewTb)
169 {
170# ifdef VBOX_STRICT
171 uint64_t const uFlatPcAssert = pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base;
172 AssertMsg( (uFlatPcAssert & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.uInstrBufPc
173 && (GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.GCPhysInstrBuf
174 && (GCPhysPc & GUEST_PAGE_OFFSET_MASK) == (uFlatPcAssert & GUEST_PAGE_OFFSET_MASK),
175 ("GCPhysPc=%RGp uFlatPcAssert=%#RX64 uInstrBufPc=%#RX64 GCPhysInstrBuf=%RGp\n",
176 GCPhysPc, uFlatPcAssert, pVCpu->iem.s.uInstrBufPc, pVCpu->iem.s.GCPhysInstrBuf));
177# endif
178 if (pNewTb->GCPhysPc == GCPhysPc)
179 {
180# ifdef VBOX_STRICT
181 uint32_t fAssertFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
182 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
183 fAssertFlags |= IEMTB_F_INHIBIT_SHADOW;
184 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
185 fAssertFlags |= IEMTB_F_INHIBIT_NMI;
186# if 1 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. */
187 Assert(IEM_F_MODE_X86_IS_FLAT(fFlags));
188# else
189 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
190 {
191 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
192 if (offFromLim < X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
193 fAssertFlags |= IEMTB_F_CS_LIM_CHECKS;
194 }
195# endif
196 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
197 AssertMsg(fFlags == fAssertFlags, ("fFlags=%#RX32 fAssertFlags=%#RX32 cs:rip=%04x:%#010RX64\n",
198 fFlags, fAssertFlags, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
199#endif
200
201 /*
202 * Check them + type.
203 */
204 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
205 {
206 /*
207 * Check for interrupts and stuff.
208 */
209 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithTlb.
210 * The main problem are the statistics and to some degree the logging. :/ */
211 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
212 {
213 /* Do polling. */
214 if ( RT_LIKELY((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
215 || iemPollTimers(pVCpu->CTX_SUFF(pVM), pVCpu) == VINF_SUCCESS)
216 {
217 /*
218 * Success. Update statistics and switch to the next TB.
219 */
220 if (a_fWithIrqCheck)
221 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1Irq);
222 else
223 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoIrq);
224
225 pNewTb->cUsed += 1;
226 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
227 pVCpu->iem.s.pCurTbR3 = pNewTb;
228 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
229 pVCpu->iem.s.cTbExecNative += 1;
230 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
231 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
232 return (uintptr_t)pNewTb->Native.paInstructions;
233 }
234 }
235 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: IRQ or FF pending\n"));
236 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1PendingIrq);
237 }
238 else
239 {
240 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
241 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
242 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchFlags);
243 }
244 }
245 else
246 {
247 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
248 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
249 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchGCPhysPc);
250 }
251 }
252 else
253 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoTb);
254#else
255 NOREF(GCPhysPc);
256#endif
257
258 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
259 return 0;
260}
261
262
263/**
264 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
265 */
266template <bool const a_fWithIrqCheck>
267IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookupWithTlb,(PVMCPUCC pVCpu, uint8_t idxTbLookup))
268{
269 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
270 Assert(idxTbLookup < pTb->cTbLookupEntries);
271 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
272#if 1
273 PIEMTB const pNewTb = *ppNewTb;
274 if (pNewTb)
275 {
276 /*
277 * Calculate the flags for the next TB and check if they match.
278 */
279 uint32_t fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
280 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (CPUMCTX_INHIBIT_SHADOW | CPUMCTX_INHIBIT_NMI)))
281 { /* likely */ }
282 else
283 {
284 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
285 fFlags |= IEMTB_F_INHIBIT_SHADOW;
286 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
287 fFlags |= IEMTB_F_INHIBIT_NMI;
288 }
289 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
290 {
291 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
292 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
293 { /* likely */ }
294 else
295 fFlags |= IEMTB_F_CS_LIM_CHECKS;
296 }
297 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
298
299 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
300 {
301 /*
302 * Do the TLB lookup for flat RIP and compare the result with the next TB.
303 *
304 * Note! This replicates iemGetPcWithPhysAndCode and iemGetPcWithPhysAndCodeMissed.
305 */
306 /* Calc the effective PC. */
307 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
308 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
309 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
310
311 /* Advance within the current buffer (PAGE) when possible. */
312 RTGCPHYS GCPhysPc;
313 uint64_t off;
314 if ( pVCpu->iem.s.pbInstrBuf
315 && (off = uPc - pVCpu->iem.s.uInstrBufPc) < pVCpu->iem.s.cbInstrBufTotal) /*ugly*/
316 {
317 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
318 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
319 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
320 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
321 else
322 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
323 GCPhysPc = pVCpu->iem.s.GCPhysInstrBuf + off;
324 }
325 else
326 {
327 pVCpu->iem.s.pbInstrBuf = NULL;
328 pVCpu->iem.s.offCurInstrStart = 0;
329 pVCpu->iem.s.offInstrNextByte = 0;
330 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
331 GCPhysPc = pVCpu->iem.s.pbInstrBuf ? pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart : NIL_RTGCPHYS;
332 }
333
334 if (pNewTb->GCPhysPc == GCPhysPc)
335 {
336 /*
337 * Check for interrupts and stuff.
338 */
339 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithPc.
340 * The main problem are the statistics and to some degree the logging. :/ */
341 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
342 {
343 /* Do polling. */
344 if ( RT_LIKELY((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
345 || iemPollTimers(pVCpu->CTX_SUFF(pVM), pVCpu) == VINF_SUCCESS)
346 {
347 /*
348 * Success. Update statistics and switch to the next TB.
349 */
350 if (a_fWithIrqCheck)
351 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2Irq);
352 else
353 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoIrq);
354
355 pNewTb->cUsed += 1;
356 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
357 pVCpu->iem.s.pCurTbR3 = pNewTb;
358 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
359 pVCpu->iem.s.cTbExecNative += 1;
360 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
361 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
362 return (uintptr_t)pNewTb->Native.paInstructions;
363 }
364 }
365 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: IRQ or FF pending\n"));
366 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2PendingIrq);
367 }
368 else
369 {
370 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
371 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
372 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchGCPhysPc);
373 }
374 }
375 else
376 {
377 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
378 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
379 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchFlags);
380 }
381 }
382 else
383 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb);
384#else
385 NOREF(fFlags);
386 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb); /* just for some stats, even if misleading */
387#endif
388
389 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
390 return 0;
391}
392
393
394/**
395 * Used by TB code when it wants to raise a \#DE.
396 */
397IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
398{
399 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseDe);
400 iemRaiseDivideErrorJmp(pVCpu);
401#ifndef _MSC_VER
402 return VINF_IEM_RAISED_XCPT; /* not reached */
403#endif
404}
405
406
407/**
408 * Used by TB code when it wants to raise a \#UD.
409 */
410IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
411{
412 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseUd);
413 iemRaiseUndefinedOpcodeJmp(pVCpu);
414#ifndef _MSC_VER
415 return VINF_IEM_RAISED_XCPT; /* not reached */
416#endif
417}
418
419
420/**
421 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
422 *
423 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
424 */
425IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
426{
427 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseRelated);
428 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
429 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
430 iemRaiseUndefinedOpcodeJmp(pVCpu);
431 else
432 iemRaiseDeviceNotAvailableJmp(pVCpu);
433#ifndef _MSC_VER
434 return VINF_IEM_RAISED_XCPT; /* not reached */
435#endif
436}
437
438
439/**
440 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
441 *
442 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
443 */
444IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
445{
446 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseAvxRelated);
447 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
448 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
449 iemRaiseUndefinedOpcodeJmp(pVCpu);
450 else
451 iemRaiseDeviceNotAvailableJmp(pVCpu);
452#ifndef _MSC_VER
453 return VINF_IEM_RAISED_XCPT; /* not reached */
454#endif
455}
456
457
458/**
459 * Used by TB code when it wants to raise an SSE/AVX floating point exception related \#UD or \#XF.
460 *
461 * See IEM_MC_CALL_AVX_XXX/IEM_MC_CALL_SSE_XXX.
462 */
463IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseAvxFpRelated,(PVMCPUCC pVCpu))
464{
465 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseAvxFpRelated);
466 if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)
467 iemRaiseSimdFpExceptionJmp(pVCpu);
468 else
469 iemRaiseUndefinedOpcodeJmp(pVCpu);
470#ifndef _MSC_VER
471 return VINF_IEM_RAISED_XCPT; /* not reached */
472#endif
473}
474
475
476/**
477 * Used by TB code when it wants to raise a \#NM.
478 */
479IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
480{
481 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseNm);
482 iemRaiseDeviceNotAvailableJmp(pVCpu);
483#ifndef _MSC_VER
484 return VINF_IEM_RAISED_XCPT; /* not reached */
485#endif
486}
487
488
489/**
490 * Used by TB code when it wants to raise a \#GP(0).
491 */
492IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
493{
494 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseGp0);
495 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
496#ifndef _MSC_VER
497 return VINF_IEM_RAISED_XCPT; /* not reached */
498#endif
499}
500
501
502/**
503 * Used by TB code when it wants to raise a \#MF.
504 */
505IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
506{
507 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseMf);
508 iemRaiseMathFaultJmp(pVCpu);
509#ifndef _MSC_VER
510 return VINF_IEM_RAISED_XCPT; /* not reached */
511#endif
512}
513
514
515/**
516 * Used by TB code when it wants to raise a \#XF.
517 */
518IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
519{
520 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseXf);
521 iemRaiseSimdFpExceptionJmp(pVCpu);
522#ifndef _MSC_VER
523 return VINF_IEM_RAISED_XCPT; /* not reached */
524#endif
525}
526
527
528/**
529 * Used by TB code when detecting opcode changes.
530 * @see iemThreadeFuncWorkerObsoleteTb
531 */
532IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
533{
534 /* We set fSafeToFree to false where as we're being called in the context
535 of a TB callback function, which for native TBs means we cannot release
536 the executable memory till we've returned our way back to iemTbExec as
537 that return path codes via the native code generated for the TB. */
538 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
539 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitObsoleteTb);
540 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
541 return VINF_IEM_REEXEC_BREAK;
542}
543
544
545/**
546 * Used by TB code when we need to switch to a TB with CS.LIM checking.
547 */
548IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
549{
550 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
551 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
552 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
553 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
554 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
555 return VINF_IEM_REEXEC_BREAK;
556}
557
558
559/**
560 * Used by TB code when we missed a PC check after a branch.
561 */
562IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
563{
564 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
565 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
566 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
567 pVCpu->iem.s.pbInstrBuf));
568 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
569 return VINF_IEM_REEXEC_BREAK;
570}
571
572
573
574/*********************************************************************************************************************************
575* Helpers: Segmented memory fetches and stores. *
576*********************************************************************************************************************************/
577
578/**
579 * Used by TB code to load unsigned 8-bit data w/ segmentation.
580 */
581IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
582{
583#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
584 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
585#else
586 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
587#endif
588}
589
590
591/**
592 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
593 * to 16 bits.
594 */
595IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
596{
597#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
598 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
599#else
600 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
601#endif
602}
603
604
605/**
606 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
607 * to 32 bits.
608 */
609IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
610{
611#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
612 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
613#else
614 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
615#endif
616}
617
618/**
619 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
620 * to 64 bits.
621 */
622IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
623{
624#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
625 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
626#else
627 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
628#endif
629}
630
631
632/**
633 * Used by TB code to load unsigned 16-bit data w/ segmentation.
634 */
635IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
636{
637#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
638 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
639#else
640 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
641#endif
642}
643
644
645/**
646 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
647 * to 32 bits.
648 */
649IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
650{
651#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
652 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
653#else
654 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
655#endif
656}
657
658
659/**
660 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
661 * to 64 bits.
662 */
663IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
664{
665#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
666 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
667#else
668 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
669#endif
670}
671
672
673/**
674 * Used by TB code to load unsigned 32-bit data w/ segmentation.
675 */
676IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
677{
678#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
679 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
680#else
681 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
682#endif
683}
684
685
686/**
687 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
688 * to 64 bits.
689 */
690IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
691{
692#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
693 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
694#else
695 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
696#endif
697}
698
699
700/**
701 * Used by TB code to load unsigned 64-bit data w/ segmentation.
702 */
703IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
704{
705#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
706 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
707#else
708 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
709#endif
710}
711
712
713/**
714 * Used by TB code to load 128-bit data w/ segmentation.
715 */
716IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
717{
718#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
719 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
720#else
721 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
722#endif
723}
724
725
726/**
727 * Used by TB code to load 128-bit data w/ segmentation.
728 */
729IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
730{
731#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
732 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
733#else
734 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
735#endif
736}
737
738
739/**
740 * Used by TB code to load 128-bit data w/ segmentation.
741 */
742IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
743{
744#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
745 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
746#else
747 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
748#endif
749}
750
751
752/**
753 * Used by TB code to load 256-bit data w/ segmentation.
754 */
755IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
756{
757#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
758 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
759#else
760 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
761#endif
762}
763
764
765/**
766 * Used by TB code to load 256-bit data w/ segmentation.
767 */
768IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
769{
770#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
771 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
772#else
773 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
774#endif
775}
776
777
778/**
779 * Used by TB code to store unsigned 8-bit data w/ segmentation.
780 */
781IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
782{
783#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
784 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
785#else
786 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
787#endif
788}
789
790
791/**
792 * Used by TB code to store unsigned 16-bit data w/ segmentation.
793 */
794IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
795{
796#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
797 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
798#else
799 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
800#endif
801}
802
803
804/**
805 * Used by TB code to store unsigned 32-bit data w/ segmentation.
806 */
807IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
808{
809#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
810 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
811#else
812 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
813#endif
814}
815
816
817/**
818 * Used by TB code to store unsigned 64-bit data w/ segmentation.
819 */
820IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
821{
822#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
823 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
824#else
825 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
826#endif
827}
828
829
830/**
831 * Used by TB code to store unsigned 128-bit data w/ segmentation.
832 */
833IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
834{
835#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
836 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
837#else
838 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
839#endif
840}
841
842
843/**
844 * Used by TB code to store unsigned 128-bit data w/ segmentation.
845 */
846IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
847{
848#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
849 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
850#else
851 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
852#endif
853}
854
855
856/**
857 * Used by TB code to store unsigned 256-bit data w/ segmentation.
858 */
859IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
860{
861#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
862 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
863#else
864 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
865#endif
866}
867
868
869/**
870 * Used by TB code to store unsigned 256-bit data w/ segmentation.
871 */
872IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
873{
874#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
875 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
876#else
877 iemMemStoreDataU256AlignedAvxJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
878#endif
879}
880
881
882/**
883 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
884 */
885IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
886{
887#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
888 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
889#else
890 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
891#endif
892}
893
894
895/**
896 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
897 */
898IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
899{
900#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
901 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
902#else
903 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
904#endif
905}
906
907
908/**
909 * Used by TB code to store an 32-bit selector value onto a generic stack.
910 *
911 * Intel CPUs doesn't do write a whole dword, thus the special function.
912 */
913IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
914{
915#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
916 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
917#else
918 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
919#endif
920}
921
922
923/**
924 * Used by TB code to push unsigned 64-bit value onto a generic stack.
925 */
926IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
927{
928#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
929 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
930#else
931 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
932#endif
933}
934
935
936/**
937 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
938 */
939IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
940{
941#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
942 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
943#else
944 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
945#endif
946}
947
948
949/**
950 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
951 */
952IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
953{
954#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
955 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
956#else
957 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
958#endif
959}
960
961
962/**
963 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
964 */
965IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
966{
967#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
968 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
969#else
970 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
971#endif
972}
973
974
975
976/*********************************************************************************************************************************
977* Helpers: Flat memory fetches and stores. *
978*********************************************************************************************************************************/
979
980/**
981 * Used by TB code to load unsigned 8-bit data w/ flat address.
982 * @note Zero extending the value to 64-bit to simplify assembly.
983 */
984IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
985{
986#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
987 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
988#else
989 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
990#endif
991}
992
993
994/**
995 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
996 * to 16 bits.
997 * @note Zero extending the value to 64-bit to simplify assembly.
998 */
999IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1000{
1001#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1002 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1003#else
1004 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1005#endif
1006}
1007
1008
1009/**
1010 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1011 * to 32 bits.
1012 * @note Zero extending the value to 64-bit to simplify assembly.
1013 */
1014IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1015{
1016#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1017 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1018#else
1019 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1020#endif
1021}
1022
1023
1024/**
1025 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1026 * to 64 bits.
1027 */
1028IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1029{
1030#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1031 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1032#else
1033 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1034#endif
1035}
1036
1037
1038/**
1039 * Used by TB code to load unsigned 16-bit data w/ flat address.
1040 * @note Zero extending the value to 64-bit to simplify assembly.
1041 */
1042IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1043{
1044#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1045 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1046#else
1047 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1048#endif
1049}
1050
1051
1052/**
1053 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1054 * to 32 bits.
1055 * @note Zero extending the value to 64-bit to simplify assembly.
1056 */
1057IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1058{
1059#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1060 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1061#else
1062 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1063#endif
1064}
1065
1066
1067/**
1068 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1069 * to 64 bits.
1070 * @note Zero extending the value to 64-bit to simplify assembly.
1071 */
1072IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1073{
1074#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1075 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1076#else
1077 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1078#endif
1079}
1080
1081
1082/**
1083 * Used by TB code to load unsigned 32-bit data w/ flat address.
1084 * @note Zero extending the value to 64-bit to simplify assembly.
1085 */
1086IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1087{
1088#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1089 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1090#else
1091 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1092#endif
1093}
1094
1095
1096/**
1097 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
1098 * to 64 bits.
1099 * @note Zero extending the value to 64-bit to simplify assembly.
1100 */
1101IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1102{
1103#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1104 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1105#else
1106 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1107#endif
1108}
1109
1110
1111/**
1112 * Used by TB code to load unsigned 64-bit data w/ flat address.
1113 */
1114IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1115{
1116#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1117 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1118#else
1119 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
1120#endif
1121}
1122
1123
1124/**
1125 * Used by TB code to load unsigned 128-bit data w/ flat address.
1126 */
1127IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1128{
1129#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1130 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1131#else
1132 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, GCPtrMem);
1133#endif
1134}
1135
1136
1137/**
1138 * Used by TB code to load unsigned 128-bit data w/ flat address.
1139 */
1140IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1141{
1142#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1143 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1144#else
1145 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, GCPtrMem);
1146#endif
1147}
1148
1149
1150/**
1151 * Used by TB code to load unsigned 128-bit data w/ flat address.
1152 */
1153IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1154{
1155#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1156 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1157#else
1158 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, GCPtrMem);
1159#endif
1160}
1161
1162
1163/**
1164 * Used by TB code to load unsigned 256-bit data w/ flat address.
1165 */
1166IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1167{
1168#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1169 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1170#else
1171 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, GCPtrMem);
1172#endif
1173}
1174
1175
1176/**
1177 * Used by TB code to load unsigned 256-bit data w/ flat address.
1178 */
1179IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1180{
1181#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1182 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1183#else
1184 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, GCPtrMem);
1185#endif
1186}
1187
1188
1189/**
1190 * Used by TB code to store unsigned 8-bit data w/ flat address.
1191 */
1192IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
1193{
1194#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1195 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
1196#else
1197 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
1198#endif
1199}
1200
1201
1202/**
1203 * Used by TB code to store unsigned 16-bit data w/ flat address.
1204 */
1205IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1206{
1207#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1208 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
1209#else
1210 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
1211#endif
1212}
1213
1214
1215/**
1216 * Used by TB code to store unsigned 32-bit data w/ flat address.
1217 */
1218IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1219{
1220#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1221 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
1222#else
1223 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
1224#endif
1225}
1226
1227
1228/**
1229 * Used by TB code to store unsigned 64-bit data w/ flat address.
1230 */
1231IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1232{
1233#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1234 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
1235#else
1236 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
1237#endif
1238}
1239
1240
1241/**
1242 * Used by TB code to store unsigned 128-bit data w/ flat address.
1243 */
1244IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1245{
1246#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1247 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1248#else
1249 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
1250#endif
1251}
1252
1253
1254/**
1255 * Used by TB code to store unsigned 128-bit data w/ flat address.
1256 */
1257IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1258{
1259#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1260 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1261#else
1262 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
1263#endif
1264}
1265
1266
1267/**
1268 * Used by TB code to store unsigned 256-bit data w/ flat address.
1269 */
1270IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1271{
1272#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1273 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1274#else
1275 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
1276#endif
1277}
1278
1279
1280/**
1281 * Used by TB code to store unsigned 256-bit data w/ flat address.
1282 */
1283IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1284{
1285#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1286 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1287#else
1288 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
1289#endif
1290}
1291
1292
1293/**
1294 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
1295 */
1296IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1297{
1298#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1299 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1300#else
1301 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1302#endif
1303}
1304
1305
1306/**
1307 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
1308 */
1309IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1310{
1311#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1312 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1313#else
1314 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1315#endif
1316}
1317
1318
1319/**
1320 * Used by TB code to store a segment selector value onto a flat stack.
1321 *
1322 * Intel CPUs doesn't do write a whole dword, thus the special function.
1323 */
1324IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1325{
1326#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1327 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1328#else
1329 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1330#endif
1331}
1332
1333
1334/**
1335 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
1336 */
1337IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1338{
1339#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1340 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1341#else
1342 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1343#endif
1344}
1345
1346
1347/**
1348 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1349 */
1350IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1351{
1352#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1353 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1354#else
1355 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
1356#endif
1357}
1358
1359
1360/**
1361 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1362 */
1363IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1364{
1365#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1366 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1367#else
1368 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
1369#endif
1370}
1371
1372
1373/**
1374 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1375 */
1376IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1377{
1378#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1379 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1380#else
1381 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
1382#endif
1383}
1384
1385
1386
1387/*********************************************************************************************************************************
1388* Helpers: Segmented memory mapping. *
1389*********************************************************************************************************************************/
1390
1391/**
1392 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
1393 * segmentation.
1394 */
1395IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1396 RTGCPTR GCPtrMem, uint8_t iSegReg))
1397{
1398#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1399 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1400#else
1401 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1402#endif
1403}
1404
1405
1406/**
1407 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
1408 */
1409IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1410 RTGCPTR GCPtrMem, uint8_t iSegReg))
1411{
1412#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1413 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1414#else
1415 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1416#endif
1417}
1418
1419
1420/**
1421 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
1422 */
1423IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1424 RTGCPTR GCPtrMem, uint8_t iSegReg))
1425{
1426#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1427 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1428#else
1429 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1430#endif
1431}
1432
1433
1434/**
1435 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
1436 */
1437IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1438 RTGCPTR GCPtrMem, uint8_t iSegReg))
1439{
1440#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1441 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1442#else
1443 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1444#endif
1445}
1446
1447
1448/**
1449 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
1450 * segmentation.
1451 */
1452IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1453 RTGCPTR GCPtrMem, uint8_t iSegReg))
1454{
1455#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1456 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1457#else
1458 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1459#endif
1460}
1461
1462
1463/**
1464 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
1465 */
1466IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1467 RTGCPTR GCPtrMem, uint8_t iSegReg))
1468{
1469#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1470 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1471#else
1472 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1473#endif
1474}
1475
1476
1477/**
1478 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
1479 */
1480IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1481 RTGCPTR GCPtrMem, uint8_t iSegReg))
1482{
1483#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1484 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1485#else
1486 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1487#endif
1488}
1489
1490
1491/**
1492 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
1493 */
1494IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1495 RTGCPTR GCPtrMem, uint8_t iSegReg))
1496{
1497#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1498 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1499#else
1500 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1501#endif
1502}
1503
1504
1505/**
1506 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
1507 * segmentation.
1508 */
1509IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1510 RTGCPTR GCPtrMem, uint8_t iSegReg))
1511{
1512#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1513 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1514#else
1515 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1516#endif
1517}
1518
1519
1520/**
1521 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
1522 */
1523IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1524 RTGCPTR GCPtrMem, uint8_t iSegReg))
1525{
1526#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1527 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1528#else
1529 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1530#endif
1531}
1532
1533
1534/**
1535 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
1536 */
1537IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1538 RTGCPTR GCPtrMem, uint8_t iSegReg))
1539{
1540#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1541 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1542#else
1543 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1544#endif
1545}
1546
1547
1548/**
1549 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
1550 */
1551IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1552 RTGCPTR GCPtrMem, uint8_t iSegReg))
1553{
1554#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1555 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1556#else
1557 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1558#endif
1559}
1560
1561
1562/**
1563 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
1564 * segmentation.
1565 */
1566IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1567 RTGCPTR GCPtrMem, uint8_t iSegReg))
1568{
1569#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1570 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1571#else
1572 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1573#endif
1574}
1575
1576
1577/**
1578 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
1579 */
1580IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1581 RTGCPTR GCPtrMem, uint8_t iSegReg))
1582{
1583#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1584 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1585#else
1586 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1587#endif
1588}
1589
1590
1591/**
1592 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
1593 */
1594IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1595 RTGCPTR GCPtrMem, uint8_t iSegReg))
1596{
1597#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1598 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1599#else
1600 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1601#endif
1602}
1603
1604
1605/**
1606 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
1607 */
1608IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1609 RTGCPTR GCPtrMem, uint8_t iSegReg))
1610{
1611#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1612 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1613#else
1614 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1615#endif
1616}
1617
1618
1619/**
1620 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
1621 */
1622IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1623 RTGCPTR GCPtrMem, uint8_t iSegReg))
1624{
1625#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1626 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1627#else
1628 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1629#endif
1630}
1631
1632
1633/**
1634 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
1635 */
1636IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1637 RTGCPTR GCPtrMem, uint8_t iSegReg))
1638{
1639#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1640 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1641#else
1642 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1643#endif
1644}
1645
1646
1647/**
1648 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
1649 * segmentation.
1650 */
1651IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1652 RTGCPTR GCPtrMem, uint8_t iSegReg))
1653{
1654#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1655 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1656#else
1657 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1658#endif
1659}
1660
1661
1662/**
1663 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
1664 */
1665IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1666 RTGCPTR GCPtrMem, uint8_t iSegReg))
1667{
1668#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1669 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1670#else
1671 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1672#endif
1673}
1674
1675
1676/**
1677 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
1678 */
1679IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1680 RTGCPTR GCPtrMem, uint8_t iSegReg))
1681{
1682#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1683 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1684#else
1685 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1686#endif
1687}
1688
1689
1690/**
1691 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
1692 */
1693IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1694 RTGCPTR GCPtrMem, uint8_t iSegReg))
1695{
1696#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1697 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1698#else
1699 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1700#endif
1701}
1702
1703
1704/*********************************************************************************************************************************
1705* Helpers: Flat memory mapping. *
1706*********************************************************************************************************************************/
1707
1708/**
1709 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
1710 * address.
1711 */
1712IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1713{
1714#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1715 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1716#else
1717 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1718#endif
1719}
1720
1721
1722/**
1723 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
1724 */
1725IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1726{
1727#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1728 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1729#else
1730 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1731#endif
1732}
1733
1734
1735/**
1736 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
1737 */
1738IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1739{
1740#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1741 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1742#else
1743 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1744#endif
1745}
1746
1747
1748/**
1749 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
1750 */
1751IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1752{
1753#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1754 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1755#else
1756 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1757#endif
1758}
1759
1760
1761/**
1762 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
1763 * address.
1764 */
1765IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1766{
1767#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1768 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1769#else
1770 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1771#endif
1772}
1773
1774
1775/**
1776 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
1777 */
1778IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1779{
1780#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1781 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1782#else
1783 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1784#endif
1785}
1786
1787
1788/**
1789 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
1790 */
1791IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1792{
1793#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1794 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1795#else
1796 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1797#endif
1798}
1799
1800
1801/**
1802 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
1803 */
1804IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1805{
1806#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1807 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1808#else
1809 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1810#endif
1811}
1812
1813
1814/**
1815 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
1816 * address.
1817 */
1818IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1819{
1820#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1821 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1822#else
1823 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1824#endif
1825}
1826
1827
1828/**
1829 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
1830 */
1831IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1832{
1833#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1834 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1835#else
1836 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1837#endif
1838}
1839
1840
1841/**
1842 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
1843 */
1844IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1845{
1846#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1847 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1848#else
1849 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1850#endif
1851}
1852
1853
1854/**
1855 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
1856 */
1857IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1858{
1859#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1860 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1861#else
1862 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1863#endif
1864}
1865
1866
1867/**
1868 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
1869 * address.
1870 */
1871IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1872{
1873#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1874 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1875#else
1876 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1877#endif
1878}
1879
1880
1881/**
1882 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
1883 */
1884IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1885{
1886#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1887 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1888#else
1889 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1890#endif
1891}
1892
1893
1894/**
1895 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
1896 */
1897IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1898{
1899#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1900 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1901#else
1902 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1903#endif
1904}
1905
1906
1907/**
1908 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
1909 */
1910IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1911{
1912#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1913 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1914#else
1915 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1916#endif
1917}
1918
1919
1920/**
1921 * Used by TB code to map 80-bit float data writeonly w/ flat address.
1922 */
1923IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1924{
1925#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1926 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1927#else
1928 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1929#endif
1930}
1931
1932
1933/**
1934 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
1935 */
1936IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1937{
1938#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1939 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1940#else
1941 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1942#endif
1943}
1944
1945
1946/**
1947 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
1948 * address.
1949 */
1950IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1951{
1952#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1953 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1954#else
1955 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1956#endif
1957}
1958
1959
1960/**
1961 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
1962 */
1963IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1964{
1965#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1966 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1967#else
1968 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1969#endif
1970}
1971
1972
1973/**
1974 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
1975 */
1976IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1977{
1978#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1979 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1980#else
1981 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1982#endif
1983}
1984
1985
1986/**
1987 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
1988 */
1989IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1990{
1991#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1992 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1993#else
1994 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1995#endif
1996}
1997
1998
1999/*********************************************************************************************************************************
2000* Helpers: Commit, rollback & unmap *
2001*********************************************************************************************************************************/
2002
2003/**
2004 * Used by TB code to commit and unmap a read-write memory mapping.
2005 */
2006IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2007{
2008 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2009}
2010
2011
2012/**
2013 * Used by TB code to commit and unmap a read-write memory mapping.
2014 */
2015IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2016{
2017 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2018}
2019
2020
2021/**
2022 * Used by TB code to commit and unmap a write-only memory mapping.
2023 */
2024IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2025{
2026 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2027}
2028
2029
2030/**
2031 * Used by TB code to commit and unmap a read-only memory mapping.
2032 */
2033IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2034{
2035 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2036}
2037
2038
2039/**
2040 * Reinitializes the native recompiler state.
2041 *
2042 * Called before starting a new recompile job.
2043 */
2044static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2045{
2046 pReNative->cLabels = 0;
2047 pReNative->bmLabelTypes = 0;
2048 pReNative->cFixups = 0;
2049 pReNative->cTbExitFixups = 0;
2050#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2051 pReNative->pDbgInfo->cEntries = 0;
2052 pReNative->pDbgInfo->offNativeLast = UINT32_MAX;
2053#endif
2054 pReNative->pTbOrg = pTb;
2055 pReNative->cCondDepth = 0;
2056 pReNative->uCondSeqNo = 0;
2057 pReNative->uCheckIrqSeqNo = 0;
2058 pReNative->uTlbSeqNo = 0;
2059#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
2060 pReNative->fSkippingEFlags = 0;
2061#endif
2062#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
2063 pReNative->PostponedEfl.fEFlags = 0;
2064 pReNative->PostponedEfl.enmOp = kIemNativePostponedEflOp_Invalid;
2065 pReNative->PostponedEfl.cOpBits = 0;
2066 pReNative->PostponedEfl.idxReg1 = UINT8_MAX;
2067 pReNative->PostponedEfl.idxReg2 = UINT8_MAX;
2068#endif
2069
2070#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2071 pReNative->Core.offPc = 0;
2072# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || defined(VBOX_WITH_STATISTICS)
2073 pReNative->idxInstrPlusOneOfLastPcUpdate = 0;
2074# endif
2075# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2076 pReNative->Core.fDebugPcInitialized = false;
2077# endif
2078#endif
2079 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2080 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2081#if IEMNATIVE_HST_GREG_COUNT < 32
2082 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2083#endif
2084 ;
2085 pReNative->Core.bmHstRegsWithGstShadow = 0;
2086 pReNative->Core.bmGstRegShadows = 0;
2087#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2088 pReNative->Core.bmGstRegShadowDirty = 0;
2089#endif
2090 pReNative->Core.bmVars = 0;
2091 pReNative->Core.bmStack = 0;
2092 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2093 pReNative->Core.u64ArgVars = UINT64_MAX;
2094
2095 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 23);
2096 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2097 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2098 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2099 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2100 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2101 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2102 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2103 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2104 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2105 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2106 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2107 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
2108 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
2109 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
2110 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
2111 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
2112 pReNative->aidxUniqueLabels[16] = UINT32_MAX;
2113 pReNative->aidxUniqueLabels[17] = UINT32_MAX;
2114 pReNative->aidxUniqueLabels[18] = UINT32_MAX;
2115 pReNative->aidxUniqueLabels[19] = UINT32_MAX;
2116 pReNative->aidxUniqueLabels[20] = UINT32_MAX;
2117 pReNative->aidxUniqueLabels[21] = UINT32_MAX;
2118 pReNative->aidxUniqueLabels[22] = UINT32_MAX;
2119
2120 pReNative->idxLastCheckIrqCallNo = UINT32_MAX;
2121
2122 /* Full host register reinit: */
2123 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2124 {
2125 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2126 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2127 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2128 }
2129
2130 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2131 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2132#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2133 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2134#endif
2135#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2136 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2137#endif
2138#ifdef IEMNATIVE_REG_FIXED_TMP1
2139 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
2140#endif
2141#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2142 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
2143#endif
2144 );
2145 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2146 {
2147 fRegs &= ~RT_BIT_32(idxReg);
2148 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2149 }
2150
2151 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2152#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2153 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2154#endif
2155#ifdef IEMNATIVE_REG_FIXED_TMP0
2156 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2157#endif
2158#ifdef IEMNATIVE_REG_FIXED_TMP1
2159 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
2160#endif
2161#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2162 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
2163#endif
2164
2165 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
2166#if IEMNATIVE_HST_SIMD_REG_COUNT < 32
2167 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
2168#endif
2169 ;
2170 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
2171 pReNative->Core.bmGstSimdRegShadows = 0;
2172 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
2173 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
2174
2175 /* Full host register reinit: */
2176 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
2177 {
2178 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
2179 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
2180 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
2181 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
2182 }
2183
2184 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK;
2185 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2186 {
2187 fRegs &= ~RT_BIT_32(idxReg);
2188 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
2189 }
2190
2191#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
2192 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2193#endif
2194
2195 return pReNative;
2196}
2197
2198
2199/**
2200 * Used when done emitting the per-chunk code and for iemNativeInit bailout.
2201 */
2202static void iemNativeTerm(PIEMRECOMPILERSTATE pReNative)
2203{
2204 RTMemFree(pReNative->pInstrBuf);
2205 RTMemFree(pReNative->paLabels);
2206 RTMemFree(pReNative->paFixups);
2207 RTMemFree(pReNative->paTbExitFixups);
2208#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2209 RTMemFree(pReNative->pDbgInfo);
2210#endif
2211 RTMemFree(pReNative);
2212}
2213
2214
2215/**
2216 * Allocates and initializes the native recompiler state.
2217 *
2218 * This is called the first time an EMT wants to recompile something.
2219 *
2220 * @returns Pointer to the new recompiler state.
2221 * @param pVCpu The cross context virtual CPU structure of the calling
2222 * thread.
2223 * @param pTb The TB that's about to be recompiled. When this is NULL,
2224 * the recompiler state is for emitting the common per-chunk
2225 * code from iemNativeRecompileAttachExecMemChunkCtx.
2226 * @thread EMT(pVCpu)
2227 */
2228static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
2229{
2230 VMCPU_ASSERT_EMT(pVCpu);
2231
2232 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
2233 AssertReturn(pReNative, NULL);
2234
2235 /*
2236 * Try allocate all the buffers and stuff we need.
2237 */
2238 uint32_t const cFactor = pTb ? 1 : 32 /* per-chunk stuff doesn't really need anything but the code buffer */;
2239 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
2240 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K / cFactor);
2241 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K / cFactor);
2242 pReNative->paTbExitFixups = (PIEMNATIVEEXITFIXUP)RTMemAllocZ(sizeof(IEMNATIVEEXITFIXUP) * _8K / cFactor);
2243#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2244 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K / cFactor]));
2245#endif
2246 if (RT_LIKELY( pReNative->pInstrBuf
2247 && pReNative->paLabels
2248 && pReNative->paFixups
2249 && pReNative->paTbExitFixups)
2250#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2251 && pReNative->pDbgInfo
2252#endif
2253 )
2254 {
2255 /*
2256 * Set the buffer & array sizes on success.
2257 */
2258 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
2259 pReNative->cLabelsAlloc = _8K / cFactor;
2260 pReNative->cFixupsAlloc = _16K / cFactor;
2261 pReNative->cTbExitFixupsAlloc = _8K / cFactor;
2262#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2263 pReNative->cDbgInfoAlloc = _16K / cFactor;
2264#endif
2265
2266 /* Other constant stuff: */
2267 pReNative->pVCpu = pVCpu;
2268
2269 /*
2270 * Done, just reinit it.
2271 */
2272 return iemNativeReInit(pReNative, pTb);
2273 }
2274
2275 /*
2276 * Failed. Cleanup and return.
2277 */
2278 AssertFailed();
2279 iemNativeTerm(pReNative);
2280 return NULL;
2281}
2282
2283
2284/**
2285 * Creates a label
2286 *
2287 * If the label does not yet have a defined position,
2288 * call iemNativeLabelDefine() later to set it.
2289 *
2290 * @returns Label ID. Throws VBox status code on failure, so no need to check
2291 * the return value.
2292 * @param pReNative The native recompile state.
2293 * @param enmType The label type.
2294 * @param offWhere The instruction offset of the label. UINT32_MAX if the
2295 * label is not yet defined (default).
2296 * @param uData Data associated with the lable. Only applicable to
2297 * certain type of labels. Default is zero.
2298 */
2299DECL_HIDDEN_THROW(uint32_t)
2300iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2301 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2302{
2303 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2304#if defined(RT_ARCH_AMD64)
2305 Assert(enmType >= kIemNativeLabelType_LoopJumpTarget);
2306#endif
2307
2308 /*
2309 * Locate existing label definition.
2310 *
2311 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2312 * and uData is zero.
2313 */
2314 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2315 uint32_t const cLabels = pReNative->cLabels;
2316 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2317#ifndef VBOX_STRICT
2318 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2319 && offWhere == UINT32_MAX
2320 && uData == 0
2321#endif
2322 )
2323 {
2324#ifndef VBOX_STRICT
2325 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2326 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2327 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2328 if (idxLabel < pReNative->cLabels)
2329 return idxLabel;
2330#else
2331 for (uint32_t i = 0; i < cLabels; i++)
2332 if ( paLabels[i].enmType == enmType
2333 && paLabels[i].uData == uData)
2334 {
2335 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2336 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2337 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2338 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2339 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2340 return i;
2341 }
2342 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2343 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2344#endif
2345 }
2346
2347 /*
2348 * Make sure we've got room for another label.
2349 */
2350 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2351 { /* likely */ }
2352 else
2353 {
2354 uint32_t cNew = pReNative->cLabelsAlloc;
2355 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2356 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2357 cNew *= 2;
2358 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2359 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2360 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2361 pReNative->paLabels = paLabels;
2362 pReNative->cLabelsAlloc = cNew;
2363 }
2364
2365 /*
2366 * Define a new label.
2367 */
2368 paLabels[cLabels].off = offWhere;
2369 paLabels[cLabels].enmType = enmType;
2370 paLabels[cLabels].uData = uData;
2371 pReNative->cLabels = cLabels + 1;
2372
2373 Assert((unsigned)enmType < 64);
2374 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2375
2376 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2377 {
2378 Assert(uData == 0);
2379 pReNative->aidxUniqueLabels[enmType] = cLabels;
2380 }
2381
2382 if (offWhere != UINT32_MAX)
2383 {
2384#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2385 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2386 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2387#endif
2388 }
2389 return cLabels;
2390}
2391
2392
2393/**
2394 * Defines the location of an existing label.
2395 *
2396 * @param pReNative The native recompile state.
2397 * @param idxLabel The label to define.
2398 * @param offWhere The position.
2399 */
2400DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
2401{
2402 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
2403 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
2404 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2405 pLabel->off = offWhere;
2406#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2407 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2408 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2409#endif
2410}
2411
2412
2413/**
2414 * Looks up a lable.
2415 *
2416 * @returns Label ID if found, UINT32_MAX if not.
2417 */
2418DECLHIDDEN(uint32_t) iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2419 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/) RT_NOEXCEPT
2420{
2421 Assert((unsigned)enmType < 64);
2422 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
2423 {
2424 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2425 return pReNative->aidxUniqueLabels[enmType];
2426
2427 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2428 uint32_t const cLabels = pReNative->cLabels;
2429 for (uint32_t i = 0; i < cLabels; i++)
2430 if ( paLabels[i].enmType == enmType
2431 && paLabels[i].uData == uData
2432 && ( paLabels[i].off == offWhere
2433 || offWhere == UINT32_MAX
2434 || paLabels[i].off == UINT32_MAX))
2435 return i;
2436 }
2437 return UINT32_MAX;
2438}
2439
2440
2441/**
2442 * Adds a fixup.
2443 *
2444 * @throws VBox status code (int) on failure.
2445 * @param pReNative The native recompile state.
2446 * @param offWhere The instruction offset of the fixup location.
2447 * @param idxLabel The target label ID for the fixup.
2448 * @param enmType The fixup type.
2449 * @param offAddend Fixup addend if applicable to the type. Default is 0.
2450 */
2451DECL_HIDDEN_THROW(void)
2452iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
2453 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
2454{
2455 Assert(idxLabel <= UINT16_MAX);
2456 Assert((unsigned)enmType <= UINT8_MAX);
2457#ifdef RT_ARCH_ARM64
2458 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
2459 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
2460 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
2461#endif
2462
2463 /*
2464 * Make sure we've room.
2465 */
2466 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
2467 uint32_t const cFixups = pReNative->cFixups;
2468 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
2469 { /* likely */ }
2470 else
2471 {
2472 uint32_t cNew = pReNative->cFixupsAlloc;
2473 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2474 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2475 cNew *= 2;
2476 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2477 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2478 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2479 pReNative->paFixups = paFixups;
2480 pReNative->cFixupsAlloc = cNew;
2481 }
2482
2483 /*
2484 * Add the fixup.
2485 */
2486 paFixups[cFixups].off = offWhere;
2487 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2488 paFixups[cFixups].enmType = enmType;
2489 paFixups[cFixups].offAddend = offAddend;
2490 pReNative->cFixups = cFixups + 1;
2491}
2492
2493
2494/**
2495 * Adds a fixup to the per chunk tail code.
2496 *
2497 * @throws VBox status code (int) on failure.
2498 * @param pReNative The native recompile state.
2499 * @param offWhere The instruction offset of the fixup location.
2500 * @param enmExitReason The exit reason to jump to.
2501 */
2502DECL_HIDDEN_THROW(void)
2503iemNativeAddTbExitFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, IEMNATIVELABELTYPE enmExitReason)
2504{
2505 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
2506
2507 /*
2508 * Make sure we've room.
2509 */
2510 PIEMNATIVEEXITFIXUP paTbExitFixups = pReNative->paTbExitFixups;
2511 uint32_t const cTbExitFixups = pReNative->cTbExitFixups;
2512 if (RT_LIKELY(cTbExitFixups < pReNative->cTbExitFixupsAlloc))
2513 { /* likely */ }
2514 else
2515 {
2516 uint32_t cNew = pReNative->cTbExitFixupsAlloc;
2517 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2518 AssertStmt(cTbExitFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2519 cNew *= 2;
2520 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2521 paTbExitFixups = (PIEMNATIVEEXITFIXUP)RTMemRealloc(paTbExitFixups, cNew * sizeof(paTbExitFixups[0]));
2522 AssertStmt(paTbExitFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2523 pReNative->paTbExitFixups = paTbExitFixups;
2524 pReNative->cTbExitFixupsAlloc = cNew;
2525 }
2526
2527 /*
2528 * Add the fixup.
2529 */
2530 paTbExitFixups[cTbExitFixups].off = offWhere;
2531 paTbExitFixups[cTbExitFixups].enmExitReason = enmExitReason;
2532 pReNative->cTbExitFixups = cTbExitFixups + 1;
2533}
2534
2535
2536/**
2537 * Slow code path for iemNativeInstrBufEnsure.
2538 */
2539DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2540{
2541 /* Double the buffer size till we meet the request. */
2542 uint32_t cNew = pReNative->cInstrBufAlloc;
2543 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
2544 do
2545 cNew *= 2;
2546 while (cNew < off + cInstrReq);
2547
2548 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2549#ifdef RT_ARCH_ARM64
2550 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2551#else
2552 uint32_t const cbMaxInstrBuf = _2M;
2553#endif
2554 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2555
2556 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2557 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2558
2559#ifdef VBOX_STRICT
2560 pReNative->offInstrBufChecked = off + cInstrReq;
2561#endif
2562 pReNative->cInstrBufAlloc = cNew;
2563 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2564}
2565
2566#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2567
2568/**
2569 * Grows the static debug info array used during recompilation.
2570 *
2571 * @returns Pointer to the new debug info block; throws VBox status code on
2572 * failure, so no need to check the return value.
2573 */
2574DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2575{
2576 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2577 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2578 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2579 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2580 pReNative->pDbgInfo = pDbgInfo;
2581 pReNative->cDbgInfoAlloc = cNew;
2582 return pDbgInfo;
2583}
2584
2585
2586/**
2587 * Adds a new debug info uninitialized entry, returning the pointer to it.
2588 */
2589DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2590{
2591 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2592 { /* likely */ }
2593 else
2594 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2595 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2596}
2597
2598
2599/**
2600 * Debug Info: Adds a native offset record, if necessary.
2601 */
2602DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2603{
2604 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2605
2606 /*
2607 * Do we need this one?
2608 */
2609 uint32_t const offPrev = pDbgInfo->offNativeLast;
2610 if (offPrev == off)
2611 return;
2612 AssertStmt(offPrev < off || offPrev == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2613
2614 /*
2615 * Add it.
2616 */
2617 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2618 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2619 pEntry->NativeOffset.offNative = off;
2620 pDbgInfo->offNativeLast = off;
2621}
2622
2623
2624/**
2625 * Debug Info: Record info about a label.
2626 */
2627static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2628{
2629 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2630 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2631 pEntry->Label.uUnused = 0;
2632 pEntry->Label.enmLabel = (uint8_t)enmType;
2633 pEntry->Label.uData = uData;
2634}
2635
2636
2637/**
2638 * Debug Info: Record info about a threaded call.
2639 */
2640static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2641{
2642 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2643 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2644 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2645 pEntry->ThreadedCall.uUnused = 0;
2646 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2647}
2648
2649
2650/**
2651 * Debug Info: Record info about a new guest instruction.
2652 */
2653static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2654{
2655 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2656 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2657 pEntry->GuestInstruction.uUnused = 0;
2658 pEntry->GuestInstruction.fExec = fExec;
2659}
2660
2661
2662/**
2663 * Debug Info: Record info about guest register shadowing.
2664 */
2665DECL_HIDDEN_THROW(void)
2666iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2667 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
2668{
2669 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2670 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2671 pEntry->GuestRegShadowing.uUnused = 0;
2672 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2673 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2674 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2675# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2676 Assert( idxHstReg != UINT8_MAX
2677 || !(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg)));
2678# endif
2679}
2680
2681
2682/**
2683 * Debug Info: Record info about guest register shadowing.
2684 */
2685DECL_HIDDEN_THROW(void)
2686iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
2687 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
2688{
2689 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2690 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
2691 pEntry->GuestSimdRegShadowing.uUnused = 0;
2692 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
2693 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
2694 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
2695}
2696
2697
2698# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2699/**
2700 * Debug Info: Record info about delayed RIP updates.
2701 */
2702DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint64_t offPc, uint32_t cInstrSkipped)
2703{
2704 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2705 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
2706 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
2707 pEntry->DelayedPcUpdate.offPc = offPc; /** @todo support larger values */
2708}
2709# endif
2710
2711
2712/**
2713 * Debug Info: Record info about a dirty guest register.
2714 */
2715DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegDirty(PIEMRECOMPILERSTATE pReNative, bool fSimdReg,
2716 uint8_t idxGstReg, uint8_t idxHstReg)
2717{
2718 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2719 pEntry->GuestRegDirty.uType = kIemTbDbgEntryType_GuestRegDirty;
2720 pEntry->GuestRegDirty.fSimdReg = fSimdReg ? 1 : 0;
2721 pEntry->GuestRegDirty.idxGstReg = idxGstReg;
2722 pEntry->GuestRegDirty.idxHstReg = idxHstReg;
2723}
2724
2725
2726/**
2727 * Debug Info: Record info about a dirty guest register writeback operation.
2728 */
2729DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegWriteback(PIEMRECOMPILERSTATE pReNative, bool fSimdReg, uint64_t fGstReg)
2730{
2731 unsigned const cBitsGstRegMask = 25;
2732 uint32_t const fGstRegMask = RT_BIT_32(cBitsGstRegMask) - 1U;
2733
2734 /* The first block of 25 bits: */
2735 if (fGstReg & fGstRegMask)
2736 {
2737 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2738 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2739 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2740 pEntry->GuestRegWriteback.cShift = 0;
2741 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2742 fGstReg &= ~(uint64_t)fGstRegMask;
2743 if (!fGstReg)
2744 return;
2745 }
2746
2747 /* The second block of 25 bits: */
2748 fGstReg >>= cBitsGstRegMask;
2749 if (fGstReg & fGstRegMask)
2750 {
2751 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2752 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2753 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2754 pEntry->GuestRegWriteback.cShift = 0;
2755 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2756 fGstReg &= ~(uint64_t)fGstRegMask;
2757 if (!fGstReg)
2758 return;
2759 }
2760
2761 /* The last block with 14 bits: */
2762 fGstReg >>= cBitsGstRegMask;
2763 Assert(fGstReg & fGstRegMask);
2764 Assert((fGstReg & ~(uint64_t)fGstRegMask) == 0);
2765 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2766 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2767 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2768 pEntry->GuestRegWriteback.cShift = 2;
2769 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2770}
2771
2772
2773# ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
2774/**
2775 * Debug Info: Record info about emitting a postponed EFLAGS calculation.
2776 */
2777DECL_HIDDEN_THROW(void)
2778iemNativeDbgInfoAddPostponedEFlagsCalc(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVE_POSTPONED_EFL_OP_T enmOp,
2779 uint8_t cOpBits, uint8_t idxEmit)
2780{
2781 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2782 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2783 pEntry->PostponedEflCalc.uType = kIemTbDbgEntryType_PostponedEFlagsCalc;
2784 pEntry->PostponedEflCalc.enmOp = (unsigned)enmOp;
2785 pEntry->PostponedEflCalc.cOpBits = cOpBits;
2786 pEntry->PostponedEflCalc.idxEmit = idxEmit;
2787 pEntry->PostponedEflCalc.uUnused = 0;
2788}
2789# endif /* IEMNATIVE_WITH_EFLAGS_POSTPONING */
2790
2791#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2792
2793
2794/*********************************************************************************************************************************
2795* Register Allocator *
2796*********************************************************************************************************************************/
2797
2798/**
2799 * Register parameter indexes (indexed by argument number).
2800 */
2801DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2802{
2803 IEMNATIVE_CALL_ARG0_GREG,
2804 IEMNATIVE_CALL_ARG1_GREG,
2805 IEMNATIVE_CALL_ARG2_GREG,
2806 IEMNATIVE_CALL_ARG3_GREG,
2807#if defined(IEMNATIVE_CALL_ARG4_GREG)
2808 IEMNATIVE_CALL_ARG4_GREG,
2809# if defined(IEMNATIVE_CALL_ARG5_GREG)
2810 IEMNATIVE_CALL_ARG5_GREG,
2811# if defined(IEMNATIVE_CALL_ARG6_GREG)
2812 IEMNATIVE_CALL_ARG6_GREG,
2813# if defined(IEMNATIVE_CALL_ARG7_GREG)
2814 IEMNATIVE_CALL_ARG7_GREG,
2815# endif
2816# endif
2817# endif
2818#endif
2819};
2820AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2821
2822/**
2823 * Call register masks indexed by argument count.
2824 */
2825DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2826{
2827 0,
2828 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2829 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2830 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2831 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2832 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2833#if defined(IEMNATIVE_CALL_ARG4_GREG)
2834 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2835 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2836# if defined(IEMNATIVE_CALL_ARG5_GREG)
2837 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2838 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2839# if defined(IEMNATIVE_CALL_ARG6_GREG)
2840 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2841 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2842 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2843# if defined(IEMNATIVE_CALL_ARG7_GREG)
2844 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2845 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2846 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2847# endif
2848# endif
2849# endif
2850#endif
2851};
2852
2853#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2854/**
2855 * BP offset of the stack argument slots.
2856 *
2857 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2858 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2859 */
2860DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2861{
2862 IEMNATIVE_FP_OFF_STACK_ARG0,
2863# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2864 IEMNATIVE_FP_OFF_STACK_ARG1,
2865# endif
2866# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2867 IEMNATIVE_FP_OFF_STACK_ARG2,
2868# endif
2869# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2870 IEMNATIVE_FP_OFF_STACK_ARG3,
2871# endif
2872};
2873AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2874#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2875
2876/**
2877 * Info about shadowed guest register values.
2878 * @see IEMNATIVEGSTREG
2879 */
2880DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
2881{
2882#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2883 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2884 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2885 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2886 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2887 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2888 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2889 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2890 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2891 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2892 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2893 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2894 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2895 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2896 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2897 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2898 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2899 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
2900 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
2901 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
2902 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
2903 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2904 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2905 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2906 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2907 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2908 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2909 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
2910 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
2911 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
2912 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
2913 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
2914 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
2915 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2916 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2917 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2918 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2919 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2920 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2921 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2922 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2923 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2924 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2925 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2926 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2927 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
2928 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
2929 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2930 /* [kIemNativeGstReg_EFlags.Cf] = */ { UINT32_MAX, 0, "efl.cf", },
2931 /* [kIemNativeGstReg_EFlags.Of] = */ { UINT32_MAX, 0, "efl.of", },
2932 /* [kIemNativeGstReg_EFlags.Af] = */ { UINT32_MAX, 0, "efl.af", },
2933 /* [kIemNativeGstReg_EFlags.Zf] = */ { UINT32_MAX, 0, "efl.zf", },
2934 /* [kIemNativeGstReg_EFlags.Sf] = */ { UINT32_MAX, 0, "efl.sf", },
2935 /* [kIemNativeGstReg_EFlags.Of] = */ { UINT32_MAX, 0, "efl.of", },
2936 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2937#undef CPUMCTX_OFF_AND_SIZE
2938};
2939AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2940
2941
2942/** Host CPU general purpose register names. */
2943DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2944{
2945#ifdef RT_ARCH_AMD64
2946 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2947#elif RT_ARCH_ARM64
2948 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2949 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2950#else
2951# error "port me"
2952#endif
2953};
2954
2955
2956#if 0 /* unused */
2957/**
2958 * Tries to locate a suitable register in the given register mask.
2959 *
2960 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2961 * failed.
2962 *
2963 * @returns Host register number on success, returns UINT8_MAX on failure.
2964 */
2965static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2966{
2967 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2968 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2969 if (fRegs)
2970 {
2971 /** @todo pick better here: */
2972 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2973
2974 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2975 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2976 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2977 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2978
2979 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2980 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2981 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2982 return idxReg;
2983 }
2984 return UINT8_MAX;
2985}
2986#endif /* unused */
2987
2988#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2989
2990/**
2991 * Stores the host reg @a idxHstReg into guest shadow register @a enmGstReg.
2992 *
2993 * @returns New code buffer offset on success, UINT32_MAX on failure.
2994 * @param pReNative .
2995 * @param off The current code buffer position.
2996 * @param enmGstReg The guest register to store to.
2997 * @param idxHstReg The host register to store from.
2998 */
2999DECL_FORCE_INLINE_THROW(uint32_t)
3000iemNativeEmitStoreGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg, uint8_t idxHstReg)
3001{
3002 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
3003 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
3004
3005 switch (g_aGstShadowInfo[enmGstReg].cb)
3006 {
3007 case sizeof(uint64_t):
3008 return iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3009 case sizeof(uint32_t):
3010 return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3011 case sizeof(uint16_t):
3012 return iemNativeEmitStoreGprToVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3013# if 0 /* not present in the table. */
3014 case sizeof(uint8_t):
3015 return iemNativeEmitStoreGprToVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3016# endif
3017 default:
3018 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
3019 }
3020}
3021
3022
3023/**
3024 * Emits code to flush a pending write of the given guest register,
3025 * version with alternative core state.
3026 *
3027 * @returns New code buffer offset.
3028 * @param pReNative The native recompile state.
3029 * @param off Current code buffer position.
3030 * @param pCore Alternative core state.
3031 * @param enmGstReg The guest register to flush.
3032 */
3033DECL_HIDDEN_THROW(uint32_t)
3034iemNativeRegFlushPendingWriteEx(PIEMRECOMPILERSTATE pReNative, uint32_t off, PIEMNATIVECORESTATE pCore, IEMNATIVEGSTREG enmGstReg)
3035{
3036 uint8_t const idxHstReg = pCore->aidxGstRegShadows[enmGstReg];
3037
3038 AssertCompile(kIemNativeGstReg_GprFirst == 0);
3039 Assert( enmGstReg <= kIemNativeGstReg_GprLast
3040 || enmGstReg == kIemNativeGstReg_MxCsr);
3041 Assert( idxHstReg != UINT8_MAX
3042 && pCore->bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
3043 Log12(("iemNativeRegFlushPendingWriteEx: Clearing guest register %s shadowed by host %s (off=%#x)\n",
3044 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
3045
3046 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
3047
3048 pCore->bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
3049 return off;
3050}
3051
3052
3053/**
3054 * Emits code to flush a pending write of the given guest register.
3055 *
3056 * @returns New code buffer offset.
3057 * @param pReNative The native recompile state.
3058 * @param off Current code buffer position.
3059 * @param enmGstReg The guest register to flush.
3060 */
3061DECL_HIDDEN_THROW(uint32_t)
3062iemNativeRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg)
3063{
3064 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3065
3066 AssertCompile(kIemNativeGstReg_GprFirst == 0);
3067 Assert( enmGstReg <= kIemNativeGstReg_GprLast
3068 || enmGstReg == kIemNativeGstReg_MxCsr);
3069 Assert( idxHstReg != UINT8_MAX
3070 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
3071 Log12(("iemNativeRegFlushPendingWrite: Clearing guest register %s shadowed by host %s (off=%#x)\n",
3072 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
3073
3074 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
3075
3076 pReNative->Core.bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
3077 return off;
3078}
3079
3080
3081/**
3082 * Flush the given set of guest registers if marked as dirty.
3083 *
3084 * @returns New code buffer offset.
3085 * @param pReNative The native recompile state.
3086 * @param off Current code buffer position.
3087 * @param fFlushGstReg The guest register set to flush (default is flush everything).
3088 * @note Must not modify the host status flags!
3089 */
3090DECL_HIDDEN_THROW(uint32_t)
3091iemNativeRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstReg /*= UINT64_MAX*/)
3092{
3093 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fFlushGstReg;
3094 if (bmGstRegShadowDirty)
3095 {
3096# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3097 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3098 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, bmGstRegShadowDirty);
3099# endif
3100 do
3101 {
3102 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
3103 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
3104 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3105 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
3106 } while (bmGstRegShadowDirty);
3107 }
3108
3109 return off;
3110}
3111
3112
3113/**
3114 * Flush all shadowed guest registers marked as dirty for the given host register.
3115 *
3116 * @returns New code buffer offset.
3117 * @param pReNative The native recompile state.
3118 * @param off Current code buffer position.
3119 * @param idxHstReg The host register.
3120 *
3121 * @note This doesn't do any unshadowing of guest registers from the host register.
3122 *
3123 * @note Must not modify the host status flags!
3124 */
3125DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushDirtyGuestByHostRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg)
3126{
3127 /* We need to flush any pending guest register writes this host register shadows. */
3128 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3129 if (pReNative->Core.bmGstRegShadowDirty & fGstRegShadows)
3130 {
3131# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3132 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3133 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, pReNative->Core.bmGstRegShadowDirty & fGstRegShadows);
3134# endif
3135 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fGstRegShadows;
3136 do
3137 {
3138 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
3139 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
3140 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3141 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
3142 } while (bmGstRegShadowDirty);
3143 }
3144
3145 return off;
3146}
3147
3148#endif /* IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK */
3149
3150
3151/**
3152 * Locate a register, possibly freeing one up.
3153 *
3154 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3155 * failed.
3156 *
3157 * @returns Host register number on success. Returns UINT8_MAX if no registers
3158 * found, the caller is supposed to deal with this and raise a
3159 * allocation type specific status code (if desired).
3160 *
3161 * @throws VBox status code if we're run into trouble spilling a variable of
3162 * recording debug info. Does NOT throw anything if we're out of
3163 * registers, though.
3164 *
3165 * @note Must not modify the host status flags!
3166 */
3167static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3168 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3169{
3170 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3171 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3172 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3173
3174 /*
3175 * Try a freed register that's shadowing a guest register.
3176 */
3177 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3178 if (fRegs)
3179 {
3180 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3181
3182#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3183 /*
3184 * When we have liveness information, we use it to kick out all shadowed
3185 * guest register that will not be needed any more in this TB. If we're
3186 * lucky, this may prevent us from ending up here again.
3187 *
3188 * Note! We must consider the previous entry here so we don't free
3189 * anything that the current threaded function requires (current
3190 * entry is produced by the next threaded function).
3191 */
3192 uint32_t const idxCurCall = pReNative->idxCurCall;
3193 if (idxCurCall > 0)
3194 {
3195 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3196 uint64_t fToFreeMask = IEMLIVENESS_STATE_GET_CAN_BE_FREED_SET(pLivenessEntry);
3197
3198 /* Merge EFLAGS. */
3199 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3200 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3201 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3202 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3203 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3204
3205 /* If it matches any shadowed registers. */
3206 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3207 {
3208#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3209 /* Writeback any dirty shadow registers we are about to unshadow. */
3210 *poff = iemNativeRegFlushDirtyGuest(pReNative, *poff, fToFreeMask);
3211#endif
3212
3213 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3214 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3215 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3216
3217 /* See if we've got any unshadowed registers we can return now. */
3218 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3219 if (fUnshadowedRegs)
3220 {
3221 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3222 return (fPreferVolatile
3223 ? ASMBitFirstSetU32(fUnshadowedRegs)
3224 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3225 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3226 - 1;
3227 }
3228 }
3229 }
3230#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3231
3232 unsigned const idxReg = (fPreferVolatile
3233 ? ASMBitFirstSetU32(fRegs)
3234 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3235 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3236 - 1;
3237
3238 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3239 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3240 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3241 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3242
3243#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3244 /* We need to flush any pending guest register writes this host register shadows. */
3245 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
3246#endif
3247
3248 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3249 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3250 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3251 return idxReg;
3252 }
3253
3254 /*
3255 * Try free up a variable that's in a register.
3256 *
3257 * We do two rounds here, first evacuating variables we don't need to be
3258 * saved on the stack, then in the second round move things to the stack.
3259 */
3260 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3261 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3262 {
3263 uint32_t fVars = pReNative->Core.bmVars;
3264 while (fVars)
3265 {
3266 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3267 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* (this is the GPR allocator) */
3268 {
3269 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3270 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3271 && (RT_BIT_32(idxReg) & fRegMask)
3272 && ( iLoop == 0
3273 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3274 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3275 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3276 {
3277 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3278 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3279 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3280 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3281 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3282 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3283#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3284 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3285#endif
3286
3287 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3288 {
3289 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3290 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3291 }
3292
3293 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3294 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3295
3296 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3297 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3298 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3299 return idxReg;
3300 }
3301 }
3302 fVars &= ~RT_BIT_32(idxVar);
3303 }
3304 }
3305
3306 return UINT8_MAX;
3307}
3308
3309
3310/**
3311 * Reassigns a variable to a different register specified by the caller.
3312 *
3313 * @returns The new code buffer position.
3314 * @param pReNative The native recompile state.
3315 * @param off The current code buffer position.
3316 * @param idxVar The variable index.
3317 * @param idxRegOld The old host register number.
3318 * @param idxRegNew The new host register number.
3319 * @param pszCaller The caller for logging.
3320 */
3321static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3322 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3323{
3324 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3325 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3326 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3327 RT_NOREF(pszCaller);
3328
3329#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3330 Assert(!(pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3331#endif
3332 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3333
3334 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3335#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3336 Assert(!(fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3337#endif
3338 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3339 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3340 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3341
3342 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3343 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3344 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3345 if (fGstRegShadows)
3346 {
3347 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3348 | RT_BIT_32(idxRegNew);
3349 while (fGstRegShadows)
3350 {
3351 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3352 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3353
3354 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3355 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3356 }
3357 }
3358
3359 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3360 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3361 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3362 return off;
3363}
3364
3365
3366/**
3367 * Moves a variable to a different register or spills it onto the stack.
3368 *
3369 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3370 * kinds can easily be recreated if needed later.
3371 *
3372 * @returns The new code buffer position.
3373 * @param pReNative The native recompile state.
3374 * @param off The current code buffer position.
3375 * @param idxVar The variable index.
3376 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3377 * call-volatile registers.
3378 */
3379DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3380 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
3381{
3382 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3383 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3384 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3385 Assert(!pVar->fRegAcquired);
3386
3387 uint8_t const idxRegOld = pVar->idxReg;
3388 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3389 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3390 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3391 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3392 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3393 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3394 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3395 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3396#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3397 Assert(!(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3398#endif
3399
3400
3401 /** @todo Add statistics on this.*/
3402 /** @todo Implement basic variable liveness analysis (python) so variables
3403 * can be freed immediately once no longer used. This has the potential to
3404 * be trashing registers and stack for dead variables.
3405 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3406
3407 /*
3408 * First try move it to a different register, as that's cheaper.
3409 */
3410 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3411 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3412 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3413 if (fRegs)
3414 {
3415 /* Avoid using shadow registers, if possible. */
3416 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3417 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3418 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3419 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3420 }
3421
3422 /*
3423 * Otherwise we must spill the register onto the stack.
3424 */
3425 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3426 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3427 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3428 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3429
3430 pVar->idxReg = UINT8_MAX;
3431 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3432 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3433 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3434 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3435 return off;
3436}
3437
3438
3439/**
3440 * Allocates a temporary host general purpose register.
3441 *
3442 * This may emit code to save register content onto the stack in order to free
3443 * up a register.
3444 *
3445 * @returns The host register number; throws VBox status code on failure,
3446 * so no need to check the return value.
3447 * @param pReNative The native recompile state.
3448 * @param poff Pointer to the variable with the code buffer
3449 * position. This will be update if we need to move
3450 * a variable from register to stack in order to
3451 * satisfy the request.
3452 * @tparam a_fPreferVolatile Whether to prefer volatile over non-volatile
3453 * registers (@c true, default) or the other way
3454 * around (@c false, for
3455 * iemNativeRegAllocTmpForGuestReg()).
3456 *
3457 * @note Must not modify the host status flags!
3458 */
3459template<bool const a_fPreferVolatile>
3460DECL_FORCE_INLINE_THROW(uint8_t) iemNativeRegAllocTmpInt(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
3461{
3462 /*
3463 * Try find a completely unused register, preferably a call-volatile one.
3464 */
3465 uint8_t idxReg;
3466 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3467 & ~pReNative->Core.bmHstRegsWithGstShadow
3468 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3469 if (fRegs)
3470 {
3471 if (a_fPreferVolatile)
3472 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3473 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3474 else
3475 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3476 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3477 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3478 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3479 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3480 }
3481 else
3482 {
3483 idxReg = iemNativeRegAllocFindFree(pReNative, poff, a_fPreferVolatile);
3484 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3485 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3486 }
3487 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3488}
3489
3490
3491/** See iemNativeRegAllocTmpInt for details. */
3492DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
3493{
3494 return iemNativeRegAllocTmpInt<true>(pReNative, poff);
3495}
3496
3497
3498/** See iemNativeRegAllocTmpInt for details. */
3499DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpPreferNonVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
3500{
3501 return iemNativeRegAllocTmpInt<false>(pReNative, poff);
3502}
3503
3504
3505/**
3506 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3507 * registers.
3508 *
3509 * @returns The host register number; throws VBox status code on failure,
3510 * so no need to check the return value.
3511 * @param pReNative The native recompile state.
3512 * @param poff Pointer to the variable with the code buffer
3513 * position. This will be update if we need to move
3514 * a variable from register to stack in order to
3515 * satisfy the request.
3516 * @param fRegMask Mask of acceptable registers.
3517 * @tparam a_fPreferVolatile Whether to prefer volatile over non-volatile
3518 * registers (@c true, default) or the other way
3519 * around (@c false, for
3520 * iemNativeRegAllocTmpForGuestReg()).
3521 */
3522template<bool const a_fPreferVolatile>
3523DECL_FORCE_INLINE_THROW(uint8_t) iemNativeRegAllocTmpExInt(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask)
3524{
3525 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3526 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3527
3528 /*
3529 * Try find a completely unused register, preferably a call-volatile one.
3530 */
3531 uint8_t idxReg;
3532 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3533 & ~pReNative->Core.bmHstRegsWithGstShadow
3534 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3535 & fRegMask;
3536 if (fRegs)
3537 {
3538 if RT_CONSTEXPR_IF(a_fPreferVolatile)
3539 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3540 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3541 else
3542 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3543 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3544 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3545 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3546 Log12(("iemNativeRegAllocTmpExInt: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3547 }
3548 else
3549 {
3550 idxReg = iemNativeRegAllocFindFree(pReNative, poff, a_fPreferVolatile, fRegMask);
3551 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3552 Log12(("iemNativeRegAllocTmpExInt: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3553 }
3554 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3555}
3556
3557
3558/** See iemNativeRegAllocTmpExInt for details. */
3559DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask)
3560{
3561 return iemNativeRegAllocTmpExInt<true>(pReNative, poff, fRegMask);
3562}
3563
3564
3565/** See iemNativeRegAllocTmpExInt for details. */
3566DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpExPreferNonVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask)
3567{
3568 return iemNativeRegAllocTmpExInt<false>(pReNative, poff, fRegMask);
3569}
3570
3571
3572/** Internal templated variation of iemNativeRegAllocTmpEx. */
3573template<uint32_t const a_fRegMask, bool const a_fPreferVolatile>
3574DECL_FORCE_INLINE_THROW(uint8_t) iemNativeRegAllocTmpExInt(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
3575{
3576 AssertCompile(!(a_fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3577 AssertCompile(!(a_fRegMask & IEMNATIVE_REG_FIXED_MASK));
3578
3579 /*
3580 * Try find a completely unused register, preferably a call-volatile one.
3581 */
3582 uint8_t idxReg;
3583 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3584 & ~pReNative->Core.bmHstRegsWithGstShadow
3585 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3586 & a_fRegMask;
3587 if (fRegs)
3588 {
3589 if RT_CONSTEXPR_IF(a_fPreferVolatile)
3590 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3591 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3592 else
3593 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3594 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3595 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3596 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3597 Log12(("iemNativeRegAllocTmpExInt: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3598 }
3599 else
3600 {
3601 idxReg = iemNativeRegAllocFindFree(pReNative, poff, a_fPreferVolatile, a_fRegMask);
3602 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3603 Log12(("iemNativeRegAllocTmpExInt: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3604 }
3605 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3606}
3607
3608
3609/**
3610 * Allocates a temporary register for loading an immediate value into.
3611 *
3612 * This will emit code to load the immediate, unless there happens to be an
3613 * unused register with the value already loaded.
3614 *
3615 * The caller will not modify the returned register, it must be considered
3616 * read-only. Free using iemNativeRegFreeTmpImm.
3617 *
3618 * @returns The host register number; throws VBox status code on failure, so no
3619 * need to check the return value.
3620 * @param pReNative The native recompile state.
3621 * @param poff Pointer to the variable with the code buffer position.
3622 * @param uImm The immediate value that the register must hold upon
3623 * return.
3624 * @note Prefers volatile registers.
3625 * @note Reusing immediate values has not been implemented yet.
3626 */
3627DECL_HIDDEN_THROW(uint8_t)
3628iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm)
3629{
3630 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff);
3631 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3632 return idxReg;
3633}
3634
3635
3636/**
3637 * Common worker for iemNativeRegAllocTmpForGuestReg() and
3638 * iemNativeRegAllocTmpForGuestEFlags().
3639 *
3640 * See iemNativeRegAllocTmpForGuestRegInt() for details.
3641 */
3642template<IEMNATIVEGSTREGUSE const a_enmIntendedUse, uint32_t const a_fRegMask>
3643static uint8_t iemNativeRegAllocTmpForGuestRegCommon(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3644{
3645 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3646#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
3647 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3648#endif
3649
3650 /*
3651 * First check if the guest register value is already in a host register.
3652 */
3653 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3654 {
3655 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3656 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3657 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3658 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3659
3660 /* It's not supposed to be allocated... */
3661 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3662 {
3663 /*
3664 * If the register will trash the guest shadow copy, try find a
3665 * completely unused register we can use instead. If that fails,
3666 * we need to disassociate the host reg from the guest reg.
3667 */
3668 /** @todo would be nice to know if preserving the register is in any way helpful. */
3669 /* If the purpose is calculations, try duplicate the register value as
3670 we'll be clobbering the shadow. */
3671 if ( a_enmIntendedUse == kIemNativeGstRegUse_Calculation
3672 && ( ~pReNative->Core.bmHstRegs
3673 & ~pReNative->Core.bmHstRegsWithGstShadow
3674 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3675 {
3676 uint8_t const idxRegNew = iemNativeRegAllocTmpExInt<a_fRegMask, true>(pReNative, poff);
3677
3678 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3679
3680 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3681 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3682 g_apszIemNativeHstRegNames[idxRegNew]));
3683 idxReg = idxRegNew;
3684 }
3685 /* If the current register matches the restrictions, go ahead and allocate
3686 it for the caller. */
3687 else if (a_fRegMask & RT_BIT_32(idxReg))
3688 {
3689 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3690 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3691 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3692 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_Calculation)
3693 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n", g_apszIemNativeHstRegNames[idxReg],
3694 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[a_enmIntendedUse]));
3695 else
3696 {
3697 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
3698 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
3699 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3700 }
3701 }
3702 /* Otherwise, allocate a register that satisfies the caller and transfer
3703 the shadowing if compatible with the intended use. (This basically
3704 means the call wants a non-volatile register (RSP push/pop scenario).) */
3705 else
3706 {
3707 Assert(!(a_fRegMask & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
3708 uint8_t const idxRegNew = (a_fRegMask & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3709 && a_enmIntendedUse == kIemNativeGstRegUse_Calculation
3710 ? iemNativeRegAllocTmpEx(pReNative, poff, a_fRegMask & ~RT_BIT_32(idxReg))
3711 : iemNativeRegAllocTmpExPreferNonVolatile(pReNative, poff, a_fRegMask & ~RT_BIT_32(idxReg));
3712 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3713 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_Calculation)
3714 {
3715 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3716 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
3717 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
3718 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[a_enmIntendedUse]));
3719 }
3720 else
3721 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3722 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3723 g_apszIemNativeHstRegNames[idxRegNew]));
3724 idxReg = idxRegNew;
3725 }
3726 }
3727 else
3728 {
3729 /*
3730 * Oops. Shadowed guest register already allocated!
3731 *
3732 * Allocate a new register, copy the value and, if updating, the
3733 * guest shadow copy assignment to the new register.
3734 */
3735 AssertMsg( a_enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3736 && a_enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
3737 ("This shouldn't happen: idxReg=%d enmGstReg=%d a_enmIntendedUse=%s\n",
3738 idxReg, enmGstReg, s_pszIntendedUse[a_enmIntendedUse]));
3739
3740 /** @todo share register for readonly access. */
3741 uint8_t const idxRegNew = a_enmIntendedUse == kIemNativeGstRegUse_Calculation
3742 ? iemNativeRegAllocTmpExInt<a_fRegMask, true>(pReNative, poff)
3743 : iemNativeRegAllocTmpExInt<a_fRegMask, false>(pReNative, poff);
3744
3745 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3746 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3747
3748 if RT_CONSTEXPR_IF( a_enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3749 && a_enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3750 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
3751 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3752 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[a_enmIntendedUse]));
3753 else
3754 {
3755 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3756 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
3757 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3758 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[a_enmIntendedUse]));
3759 }
3760 idxReg = idxRegNew;
3761 }
3762 Assert(RT_BIT_32(idxReg) & a_fRegMask); /* See assumption in fNoVolatileRegs docs. */
3763
3764#ifdef VBOX_STRICT
3765 /* Strict builds: Check that the value is correct. */
3766 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3767#endif
3768
3769#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3770 /** @todo r=aeichner Implement for registers other than GPR as well. */
3771 if RT_CONSTEXPR_IF( a_enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3772 || a_enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3773 if ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3774 && enmGstReg <= kIemNativeGstReg_GprLast)
3775 || enmGstReg == kIemNativeGstReg_MxCsr)
3776 {
3777# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3778 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3779 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
3780# endif
3781 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3782 }
3783#endif
3784
3785 return idxReg;
3786 }
3787
3788 /*
3789 * Allocate a new register, load it with the guest value and designate it as a copy of the
3790 */
3791 uint8_t const idxRegNew = a_enmIntendedUse != kIemNativeGstRegUse_Calculation
3792 ? iemNativeRegAllocTmpExInt<a_fRegMask, false>(pReNative, poff)
3793 : iemNativeRegAllocTmpExInt<a_fRegMask, true>(pReNative, poff);
3794
3795 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3796 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
3797
3798 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_Calculation)
3799 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
3800 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
3801 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[a_enmIntendedUse]));
3802
3803#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3804 /** @todo r=aeichner Implement for registers other than GPR as well. */
3805 if RT_CONSTEXPR_IF( a_enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3806 || a_enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3807 if ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3808 && enmGstReg <= kIemNativeGstReg_GprLast)
3809 || enmGstReg == kIemNativeGstReg_MxCsr)
3810 {
3811# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3812 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3813 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxRegNew);
3814# endif
3815 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3816 }
3817#endif
3818
3819 return idxRegNew;
3820}
3821
3822
3823/**
3824 * Allocates a temporary host general purpose register for keeping a guest
3825 * register value.
3826 *
3827 * Since we may already have a register holding the guest register value,
3828 * code will be emitted to do the loading if that's not the case. Code may also
3829 * be emitted if we have to free up a register to satify the request.
3830 *
3831 * @returns The host register number; throws VBox status code on failure, so no
3832 * need to check the return value.
3833 * @param pReNative The native recompile state.
3834 * @param poff Pointer to the variable with the code buffer
3835 * position. This will be update if we need to move
3836 * a variable from register to stack in order to
3837 * satisfy the request.
3838 * @param enmGstReg The guest register that will is to be updated.
3839 * @tparam a_enmIntendedUse How the caller will be using the host register.
3840 * @tparam a_fNonVolatileRegs Set if no volatile register allowed, clear if
3841 * any register is okay (default).
3842 * The ASSUMPTION here is that the caller has
3843 * already flushed all volatile registers,
3844 * so this is only applied if we allocate a new
3845 * register.
3846 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3847 */
3848template<IEMNATIVEGSTREGUSE const a_enmIntendedUse, bool const a_fNonVolatileRegs>
3849DECL_FORCE_INLINE_THROW(uint8_t)
3850iemNativeRegAllocTmpForGuestRegInt(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3851{
3852#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3853 AssertMsg( pReNative->idxCurCall == 0
3854 || enmGstReg == kIemNativeGstReg_Pc
3855 || (a_enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3856 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3857 : a_enmIntendedUse == kIemNativeGstRegUse_ForUpdate
3858 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3859 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
3860 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3861#endif
3862
3863 if RT_CONSTEXPR_IF(!a_fNonVolatileRegs)
3864 return iemNativeRegAllocTmpForGuestRegCommon<a_enmIntendedUse,
3865 IEMNATIVE_HST_GREG_MASK
3866 & ~IEMNATIVE_REG_FIXED_MASK>(pReNative, poff, enmGstReg);
3867 else /* keep else, is required by MSC */
3868 return iemNativeRegAllocTmpForGuestRegCommon<a_enmIntendedUse,
3869 IEMNATIVE_HST_GREG_MASK
3870 & ~IEMNATIVE_REG_FIXED_MASK
3871 & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK>(pReNative, poff, enmGstReg);
3872}
3873
3874/* Variants including volatile registers: */
3875
3876DECL_HIDDEN_THROW(uint8_t)
3877iemNativeRegAllocTmpForGuestRegReadOnly(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3878{
3879 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ReadOnly, false>(pReNative, poff, enmGstReg);
3880}
3881
3882DECL_HIDDEN_THROW(uint8_t)
3883iemNativeRegAllocTmpForGuestRegUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3884{
3885 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ForUpdate, false>(pReNative, poff, enmGstReg);
3886}
3887
3888DECL_HIDDEN_THROW(uint8_t)
3889iemNativeRegAllocTmpForGuestRegFullWrite(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3890{
3891 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ForFullWrite, false>(pReNative, poff, enmGstReg);
3892}
3893
3894DECL_HIDDEN_THROW(uint8_t)
3895iemNativeRegAllocTmpForGuestRegCalculation(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3896{
3897 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_Calculation, false>(pReNative, poff, enmGstReg);
3898}
3899
3900/* Variants excluding any volatile registers: */
3901
3902DECL_HIDDEN_THROW(uint8_t)
3903iemNativeRegAllocTmpForGuestRegReadOnlyNoVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3904{
3905 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ReadOnly, true>(pReNative, poff, enmGstReg);
3906}
3907
3908DECL_HIDDEN_THROW(uint8_t)
3909iemNativeRegAllocTmpForGuestRegUpdateNoVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3910{
3911 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ForUpdate, true>(pReNative, poff, enmGstReg);
3912}
3913
3914DECL_HIDDEN_THROW(uint8_t)
3915iemNativeRegAllocTmpForGuestRegFullWriteNoVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3916{
3917 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ForFullWrite, true>(pReNative, poff, enmGstReg);
3918}
3919
3920DECL_HIDDEN_THROW(uint8_t)
3921iemNativeRegAllocTmpForGuestRegCalculationNoVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3922{
3923 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_Calculation, true>(pReNative, poff, enmGstReg);
3924}
3925
3926
3927
3928#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && defined(VBOX_STRICT)
3929/**
3930 * Specialized version of iemNativeRegAllocTmpForGuestReg for EFLAGS.
3931 *
3932 * This takes additional arguments for covering liveness assertions in strict
3933 * builds, it's otherwise the same as iemNativeRegAllocTmpForGuestReg() with
3934 * kIemNativeGstReg_EFlags as argument.
3935 */
3936template<IEMNATIVEGSTREGUSE const a_enmIntendedUse>
3937DECL_FORCE_INLINE_THROW(uint8_t)
3938iemNativeRegAllocTmpForGuestEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t fRead,
3939 uint64_t fWrite /*= 0*/, uint64_t fPotentialCall /*= 0*/)
3940{
3941 if (pReNative->idxCurCall != 0 && (fRead || fWrite /*|| fPotentialCall*/))
3942 {
3943 Assert(!(fRead & ~IEMLIVENESSBIT_ALL_EFL_MASK));
3944 Assert(!(fWrite & ~IEMLIVENESSBIT_ALL_EFL_MASK));
3945 Assert(!(fPotentialCall & ~IEMLIVENESSBIT_ALL_EFL_MASK));
3946 uint64_t const fAll = fRead | fWrite /*| fPotentialCall*/;
3947 uint32_t fState;
3948# define MY_ASSERT_ONE_EFL(a_enmGstEfl) \
3949 fState = iemNativeLivenessGetPrevStateByGstRegEx(pReNative, (IEMNATIVEGSTREG)(a_enmGstEfl)); \
3950 AssertMsg( !( fAll & RT_BIT_64(a_enmGstEfl)) \
3951 || ( fRead & RT_BIT_64(a_enmGstEfl) \
3952 ? fWrite & RT_BIT_64(a_enmGstEfl) \
3953 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED(fState) \
3954 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED(fState) \
3955 : IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(fState) \
3956 ) \
3957 , ("%s - %u\n", #a_enmGstEfl, fState))
3958 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OTHER);
3959 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_CF);
3960 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_PF);
3961 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_AF);
3962 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_ZF);
3963 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_SF);
3964 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OF);
3965# undef MY_ASSERT_ONE_EFL
3966 }
3967 RT_NOREF(fPotentialCall);
3968
3969 AssertCompile(a_enmIntendedUse == kIemNativeGstRegUse_ReadOnly || a_enmIntendedUse == kIemNativeGstRegUse_ForUpdate);
3970 if RT_CONSTEXPR_IF(a_enmIntendedUse == kIemNativeGstRegUse_ReadOnly)
3971 return iemNativeRegAllocTmpForGuestRegCommon<kIemNativeGstRegUse_ReadOnly,
3972 IEMNATIVE_HST_GREG_MASK
3973 & ~IEMNATIVE_REG_FIXED_MASK>(pReNative, poff, kIemNativeGstReg_EFlags);
3974 else /* keep else, is required by MSC */
3975 return iemNativeRegAllocTmpForGuestRegCommon<kIemNativeGstRegUse_ForUpdate,
3976 IEMNATIVE_HST_GREG_MASK
3977 & ~IEMNATIVE_REG_FIXED_MASK>(pReNative, poff, kIemNativeGstReg_EFlags);
3978}
3979
3980
3981DECL_HIDDEN_THROW(uint8_t)
3982iemNativeRegAllocTmpForGuestEFlagsReadOnly(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
3983 uint64_t fRead, uint64_t fWrite /*= 0*/, uint64_t fPotentialCall /*= 0*/)
3984{
3985 return iemNativeRegAllocTmpForGuestEFlags<kIemNativeGstRegUse_ReadOnly>(pReNative, poff, fRead, fWrite, fPotentialCall);
3986}
3987
3988DECL_HIDDEN_THROW(uint8_t)
3989iemNativeRegAllocTmpForGuestEFlagsForUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t fRead,
3990 uint64_t fWrite /*= 0*/, uint64_t fPotentialCall /*= 0*/)
3991{
3992 return iemNativeRegAllocTmpForGuestEFlags<kIemNativeGstRegUse_ForUpdate>(pReNative, poff, fRead, fWrite, fPotentialCall);
3993}
3994
3995#endif
3996
3997
3998
3999/**
4000 * Common worker for iemNativeRegAllocTmpForGuestRegIfAlreadyPresent and
4001 * iemNativeRegAllocTmpForGuestEFlagsIfAlreadyPresent.
4002 *
4003 * See iemNativeRegAllocTmpForGuestRegIfAlreadyPresent() for details.
4004 */
4005DECL_FORCE_INLINE(uint8_t)
4006iemNativeRegAllocTmpForGuestRegIfAlreadyPresentCommon(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4007{
4008 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4009
4010 /*
4011 * First check if the guest register value is already in a host register.
4012 */
4013 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4014 {
4015 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4016 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4017 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4018 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4019
4020 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4021 {
4022 /*
4023 * We only do readonly use here, so easy compared to the other
4024 * variant of this code.
4025 */
4026 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4027 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4028 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4029 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4030 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4031
4032#ifdef VBOX_STRICT
4033 /* Strict builds: Check that the value is correct. */
4034 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4035#else
4036 RT_NOREF(poff);
4037#endif
4038 return idxReg;
4039 }
4040 }
4041
4042 return UINT8_MAX;
4043}
4044
4045
4046/**
4047 * Allocates a temporary host general purpose register that already holds the
4048 * given guest register value.
4049 *
4050 * The use case for this function is places where the shadowing state cannot be
4051 * modified due to branching and such. This will fail if the we don't have a
4052 * current shadow copy handy or if it's incompatible. The only code that will
4053 * be emitted here is value checking code in strict builds.
4054 *
4055 * The intended use can only be readonly!
4056 *
4057 * @returns The host register number, UINT8_MAX if not present.
4058 * @param pReNative The native recompile state.
4059 * @param poff Pointer to the instruction buffer offset.
4060 * Will be updated in strict builds if a register is
4061 * found.
4062 * @param enmGstReg The guest register that will is to be updated.
4063 * @note In strict builds, this may throw instruction buffer growth failures.
4064 * Non-strict builds will not throw anything.
4065 * @sa iemNativeRegAllocTmpForGuestReg
4066 */
4067DECL_HIDDEN_THROW(uint8_t)
4068iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4069{
4070#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4071 AssertMsg( pReNative->idxCurCall == 0
4072 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4073 || enmGstReg == kIemNativeGstReg_Pc
4074 , ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4075#endif
4076 return iemNativeRegAllocTmpForGuestRegIfAlreadyPresentCommon(pReNative, poff, enmGstReg);
4077}
4078
4079
4080#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && defined(VBOX_STRICT)
4081/**
4082 * Specialized version of iemNativeRegAllocTmpForGuestRegIfAlreadyPresent for
4083 * EFLAGS.
4084 *
4085 * This takes additional arguments for covering liveness assertions in strict
4086 * builds, it's otherwise the same as
4087 * iemNativeRegAllocTmpForGuestRegIfAlreadyPresent() with
4088 * kIemNativeGstReg_EFlags as argument.
4089 *
4090 * @note The @a fWrite parameter is necessary to complete the liveness picture,
4091 * as iemNativeEmitFetchEFlags() may fetch flags in prep for a later
4092 * commit. It the operation clobbers all the flags, @a fRead will be
4093 * zero, so better verify the whole picture while we're here.
4094 */
4095DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpForGuestEFlagsIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
4096 uint64_t fRead, uint64_t fWrite /*=0*/)
4097{
4098 if (pReNative->idxCurCall != 0)
4099 {
4100 Assert(fRead | fWrite);
4101 Assert(!(fRead & ~IEMLIVENESSBIT_ALL_EFL_MASK));
4102 Assert(!(fWrite & ~IEMLIVENESSBIT_ALL_EFL_MASK));
4103 uint64_t const fAll = fRead | fWrite;
4104 uint32_t fState;
4105# define MY_ASSERT_ONE_EFL(a_enmGstEfl) \
4106 fState = iemNativeLivenessGetPrevStateByGstRegEx(pReNative, (IEMNATIVEGSTREG)(a_enmGstEfl)); \
4107 AssertMsg( !( fAll & RT_BIT_64(a_enmGstEfl)) \
4108 || ( fRead & RT_BIT_64(a_enmGstEfl) \
4109 ? fWrite & RT_BIT_64(a_enmGstEfl) \
4110 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED(fState) \
4111 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED(fState) \
4112 : IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(fState) \
4113 ) \
4114 , ("%s - %u\n", #a_enmGstEfl, fState))
4115 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OTHER);
4116 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_CF);
4117 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_PF);
4118 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_AF);
4119 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_ZF);
4120 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_SF);
4121 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OF);
4122# undef MY_ASSERT_ONE_EFL
4123 }
4124 RT_NOREF(fRead);
4125 return iemNativeRegAllocTmpForGuestRegIfAlreadyPresentCommon(pReNative, poff, kIemNativeGstReg_EFlags);
4126}
4127#endif
4128
4129
4130/**
4131 * Allocates argument registers for a function call.
4132 *
4133 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4134 * need to check the return value.
4135 * @param pReNative The native recompile state.
4136 * @param off The current code buffer offset.
4137 * @param cArgs The number of arguments the function call takes.
4138 */
4139DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4140{
4141 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4142 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4143 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4144 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4145
4146 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4147 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4148 else if (cArgs == 0)
4149 return true;
4150
4151 /*
4152 * Do we get luck and all register are free and not shadowing anything?
4153 */
4154 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4155 for (uint32_t i = 0; i < cArgs; i++)
4156 {
4157 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4158 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4159 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4160 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4161 }
4162 /*
4163 * Okay, not lucky so we have to free up the registers.
4164 */
4165 else
4166 for (uint32_t i = 0; i < cArgs; i++)
4167 {
4168 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4169 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4170 {
4171 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4172 {
4173 case kIemNativeWhat_Var:
4174 {
4175 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4176 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4177 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4178 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4179 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4180 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4181
4182 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4183 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4184 else
4185 {
4186 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4187 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4188 }
4189 break;
4190 }
4191
4192 case kIemNativeWhat_Tmp:
4193 case kIemNativeWhat_Arg:
4194 case kIemNativeWhat_rc:
4195 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4196 default:
4197 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4198 }
4199
4200 }
4201 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4202 {
4203 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4204 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4205 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4206#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4207 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4208#endif
4209 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4210 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4211 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4212 }
4213 else
4214 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4215 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4216 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4217 }
4218 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4219 return true;
4220}
4221
4222
4223DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4224
4225
4226#if 0
4227/**
4228 * Frees a register assignment of any type.
4229 *
4230 * @param pReNative The native recompile state.
4231 * @param idxHstReg The register to free.
4232 *
4233 * @note Does not update variables.
4234 */
4235DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4236{
4237 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4238 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4239 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4240 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4241 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4242 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4243 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4244 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4245 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4246 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4247 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4248 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4249 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4250 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4251
4252 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4253 /* no flushing, right:
4254 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4255 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4256 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4257 */
4258}
4259#endif
4260
4261
4262/**
4263 * Frees a temporary register.
4264 *
4265 * Any shadow copies of guest registers assigned to the host register will not
4266 * be flushed by this operation.
4267 */
4268DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4269{
4270 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4271 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4272 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4273 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4274 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4275}
4276
4277
4278/**
4279 * Frees a temporary immediate register.
4280 *
4281 * It is assumed that the call has not modified the register, so it still hold
4282 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4283 */
4284DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4285{
4286 iemNativeRegFreeTmp(pReNative, idxHstReg);
4287}
4288
4289
4290/**
4291 * Frees a register assigned to a variable.
4292 *
4293 * The register will be disassociated from the variable.
4294 */
4295DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4296{
4297 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4298 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4299 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4300 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4301 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4302 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4303
4304 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4305 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4306 if (!fFlushShadows)
4307 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4308 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4309 else
4310 {
4311 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4312 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4313#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4314 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadowsOld));
4315#endif
4316 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4317 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4318 uint64_t fGstRegShadows = fGstRegShadowsOld;
4319 while (fGstRegShadows)
4320 {
4321 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4322 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4323
4324 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4325 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4326 }
4327 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4328 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4329 }
4330}
4331
4332
4333#if defined(LOG_ENABLED) || defined(IEMNATIVE_WITH_TB_DEBUG_INFO)
4334/** Host CPU SIMD register names. */
4335DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
4336{
4337# ifdef RT_ARCH_AMD64
4338 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
4339# elif RT_ARCH_ARM64
4340 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
4341 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
4342# else
4343# error "port me"
4344# endif
4345};
4346#endif
4347
4348
4349/**
4350 * Frees a SIMD register assigned to a variable.
4351 *
4352 * The register will be disassociated from the variable.
4353 */
4354DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4355{
4356 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
4357 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4358 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
4359 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4360 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4361 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4362
4363 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4364 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
4365 if (!fFlushShadows)
4366 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4367 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
4368 else
4369 {
4370 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4371 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
4372 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4373 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
4374 uint64_t fGstRegShadows = fGstRegShadowsOld;
4375 while (fGstRegShadows)
4376 {
4377 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4378 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4379
4380 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
4381 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
4382 }
4383 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4384 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4385 }
4386}
4387
4388
4389/**
4390 * Reassigns a variable to a different SIMD register specified by the caller.
4391 *
4392 * @returns The new code buffer position.
4393 * @param pReNative The native recompile state.
4394 * @param off The current code buffer position.
4395 * @param idxVar The variable index.
4396 * @param idxRegOld The old host register number.
4397 * @param idxRegNew The new host register number.
4398 * @param pszCaller The caller for logging.
4399 */
4400static uint32_t iemNativeSimdRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4401 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
4402{
4403 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4404 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
4405 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4406 RT_NOREF(pszCaller);
4407
4408 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4409 & pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows));
4410 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxRegNew, off);
4411
4412 uint64_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4413 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4414 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4415
4416 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
4417 pszCaller, idxVar, g_apszIemNativeHstSimdRegNames[idxRegOld], g_apszIemNativeHstSimdRegNames[idxRegNew], fGstRegShadows));
4418 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
4419
4420 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U))
4421 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxRegNew, idxRegOld);
4422 else
4423 {
4424 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U));
4425 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxRegNew, idxRegOld);
4426 }
4427
4428 pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
4429 pReNative->Core.aHstSimdRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
4430 pReNative->Core.aHstSimdRegs[idxRegNew].idxVar = idxVar;
4431 if (fGstRegShadows)
4432 {
4433 pReNative->Core.bmHstSimdRegsWithGstShadow = (pReNative->Core.bmHstSimdRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
4434 | RT_BIT_32(idxRegNew);
4435 while (fGstRegShadows)
4436 {
4437 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4438 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4439
4440 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxRegOld);
4441 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = idxRegNew;
4442 }
4443 }
4444
4445 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
4446 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4447 pReNative->Core.bmHstSimdRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstSimdRegs & ~RT_BIT_32(idxRegOld));
4448 return off;
4449}
4450
4451
4452/**
4453 * Moves a variable to a different register or spills it onto the stack.
4454 *
4455 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
4456 * kinds can easily be recreated if needed later.
4457 *
4458 * @returns The new code buffer position.
4459 * @param pReNative The native recompile state.
4460 * @param off The current code buffer position.
4461 * @param idxVar The variable index.
4462 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
4463 * call-volatile registers.
4464 */
4465DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4466 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK*/)
4467{
4468 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4469 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4470 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
4471 Assert(!pVar->fRegAcquired);
4472 Assert(!pVar->fSimdReg);
4473
4474 uint8_t const idxRegOld = pVar->idxReg;
4475 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
4476 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegOld));
4477 Assert(pReNative->Core.aHstSimdRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
4478 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows)
4479 == pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows);
4480 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4481 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxRegOld))
4482 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4483 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4484 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4485
4486 /** @todo Add statistics on this.*/
4487 /** @todo Implement basic variable liveness analysis (python) so variables
4488 * can be freed immediately once no longer used. This has the potential to
4489 * be trashing registers and stack for dead variables.
4490 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4491
4492 /*
4493 * First try move it to a different register, as that's cheaper.
4494 */
4495 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4496 fForbiddenRegs |= IEMNATIVE_SIMD_REG_FIXED_MASK;
4497 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & ~fForbiddenRegs;
4498 if (fRegs)
4499 {
4500 /* Avoid using shadow registers, if possible. */
4501 if (fRegs & ~pReNative->Core.bmHstSimdRegsWithGstShadow)
4502 fRegs &= ~pReNative->Core.bmHstSimdRegsWithGstShadow;
4503 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4504 return iemNativeSimdRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeSimdRegMoveOrSpillStackVar");
4505 }
4506
4507 /*
4508 * Otherwise we must spill the register onto the stack.
4509 */
4510 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4511 Log12(("iemNativeSimdRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4512 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4513
4514 if (pVar->cbVar == sizeof(RTUINT128U))
4515 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4516 else
4517 {
4518 Assert(pVar->cbVar == sizeof(RTUINT256U));
4519 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4520 }
4521
4522 pVar->idxReg = UINT8_MAX;
4523 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4524 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
4525 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4526 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4527 return off;
4528}
4529
4530
4531/**
4532 * Called right before emitting a call instruction to move anything important
4533 * out of call-volatile SIMD registers, free and flush the call-volatile SIMD registers,
4534 * optionally freeing argument variables.
4535 *
4536 * @returns New code buffer offset, UINT32_MAX on failure.
4537 * @param pReNative The native recompile state.
4538 * @param off The code buffer offset.
4539 * @param cArgs The number of arguments the function call takes.
4540 * It is presumed that the host register part of these have
4541 * been allocated as such already and won't need moving,
4542 * just freeing.
4543 * @param fKeepVars Mask of variables that should keep their register
4544 * assignments. Caller must take care to handle these.
4545 */
4546DECL_HIDDEN_THROW(uint32_t)
4547iemNativeSimdRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4548{
4549 Assert(!cArgs); RT_NOREF(cArgs);
4550
4551 /* fKeepVars will reduce this mask. */
4552 uint32_t fSimdRegsToFree = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
4553
4554 /*
4555 * Move anything important out of volatile registers.
4556 */
4557 uint32_t fSimdRegsToMove = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4558#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
4559 & ~RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)
4560#endif
4561 ;
4562
4563 fSimdRegsToMove &= pReNative->Core.bmHstSimdRegs;
4564 if (!fSimdRegsToMove)
4565 { /* likely */ }
4566 else
4567 {
4568 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: fSimdRegsToMove=%#x\n", fSimdRegsToMove));
4569 while (fSimdRegsToMove != 0)
4570 {
4571 unsigned const idxSimdReg = ASMBitFirstSetU32(fSimdRegsToMove) - 1;
4572 fSimdRegsToMove &= ~RT_BIT_32(idxSimdReg);
4573
4574 switch (pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat)
4575 {
4576 case kIemNativeWhat_Var:
4577 {
4578 uint8_t const idxVar = pReNative->Core.aHstRegs[idxSimdReg].idxVar;
4579 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4580 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4581 Assert(pVar->idxReg == idxSimdReg);
4582 Assert(pVar->fSimdReg);
4583 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4584 {
4585 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxSimdReg=%d\n",
4586 idxVar, pVar->enmKind, pVar->idxReg));
4587 if (pVar->enmKind != kIemNativeVarKind_Stack)
4588 pVar->idxReg = UINT8_MAX;
4589 else
4590 off = iemNativeSimdRegMoveOrSpillStackVar(pReNative, off, idxVar);
4591 }
4592 else
4593 fSimdRegsToFree &= ~RT_BIT_32(idxSimdReg);
4594 continue;
4595 }
4596
4597 case kIemNativeWhat_Arg:
4598 AssertMsgFailed(("What?!?: %u\n", idxSimdReg));
4599 continue;
4600
4601 case kIemNativeWhat_rc:
4602 case kIemNativeWhat_Tmp:
4603 AssertMsgFailed(("Missing free: %u\n", idxSimdReg));
4604 continue;
4605
4606 case kIemNativeWhat_FixedReserved:
4607#ifdef RT_ARCH_ARM64
4608 continue; /* On ARM the upper half of the virtual 256-bit register. */
4609#endif
4610
4611 case kIemNativeWhat_FixedTmp:
4612 case kIemNativeWhat_pVCpuFixed:
4613 case kIemNativeWhat_pCtxFixed:
4614 case kIemNativeWhat_PcShadow:
4615 case kIemNativeWhat_Invalid:
4616 case kIemNativeWhat_End:
4617 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4618 }
4619 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4620 }
4621 }
4622
4623 /*
4624 * Do the actual freeing.
4625 */
4626 if (pReNative->Core.bmHstSimdRegs & fSimdRegsToFree)
4627 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegs %#x -> %#x\n",
4628 pReNative->Core.bmHstSimdRegs, pReNative->Core.bmHstSimdRegs & ~fSimdRegsToFree));
4629 pReNative->Core.bmHstSimdRegs &= ~fSimdRegsToFree;
4630
4631 /* If there are guest register shadows in any call-volatile register, we
4632 have to clear the corrsponding guest register masks for each register. */
4633 uint32_t fHstSimdRegsWithGstShadow = pReNative->Core.bmHstSimdRegsWithGstShadow & fSimdRegsToFree;
4634 if (fHstSimdRegsWithGstShadow)
4635 {
4636 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4637 pReNative->Core.bmHstSimdRegsWithGstShadow, pReNative->Core.bmHstSimdRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK, fHstSimdRegsWithGstShadow));
4638 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~fHstSimdRegsWithGstShadow;
4639 do
4640 {
4641 unsigned const idxSimdReg = ASMBitFirstSetU32(fHstSimdRegsWithGstShadow) - 1;
4642 fHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdReg);
4643
4644 AssertMsg(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows != 0, ("idxSimdReg=%#x\n", idxSimdReg));
4645
4646#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4647 /*
4648 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4649 * to call volatile registers).
4650 */
4651 if ( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4652 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows)
4653 off = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, off, idxSimdReg);
4654#endif
4655 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4656 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows));
4657
4658 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows;
4659 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
4660 } while (fHstSimdRegsWithGstShadow != 0);
4661 }
4662
4663 return off;
4664}
4665
4666
4667/**
4668 * Called right before emitting a call instruction to move anything important
4669 * out of call-volatile registers, free and flush the call-volatile registers,
4670 * optionally freeing argument variables.
4671 *
4672 * @returns New code buffer offset, UINT32_MAX on failure.
4673 * @param pReNative The native recompile state.
4674 * @param off The code buffer offset.
4675 * @param cArgs The number of arguments the function call takes.
4676 * It is presumed that the host register part of these have
4677 * been allocated as such already and won't need moving,
4678 * just freeing.
4679 * @param fKeepVars Mask of variables that should keep their register
4680 * assignments. Caller must take care to handle these.
4681 */
4682DECL_HIDDEN_THROW(uint32_t)
4683iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4684{
4685 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4686
4687 /* fKeepVars will reduce this mask. */
4688 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK;
4689
4690#ifdef RT_ARCH_ARM64
4691AssertCompile(IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK == UINT32_C(0x37fff));
4692#endif
4693
4694 /*
4695 * Move anything important out of volatile registers.
4696 */
4697 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4698 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4699 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK
4700#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4701 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4702#endif
4703 & ~g_afIemNativeCallRegs[cArgs];
4704
4705 fRegsToMove &= pReNative->Core.bmHstRegs;
4706 if (!fRegsToMove)
4707 { /* likely */ }
4708 else
4709 {
4710 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4711 while (fRegsToMove != 0)
4712 {
4713 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4714 fRegsToMove &= ~RT_BIT_32(idxReg);
4715
4716 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4717 {
4718 case kIemNativeWhat_Var:
4719 {
4720 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4721 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4722 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4723 Assert(pVar->idxReg == idxReg);
4724 Assert(!pVar->fSimdReg);
4725 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4726 {
4727 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4728 idxVar, pVar->enmKind, pVar->idxReg));
4729 if (pVar->enmKind != kIemNativeVarKind_Stack)
4730 pVar->idxReg = UINT8_MAX;
4731 else
4732 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4733 }
4734 else
4735 fRegsToFree &= ~RT_BIT_32(idxReg);
4736 continue;
4737 }
4738
4739 case kIemNativeWhat_Arg:
4740 AssertMsgFailed(("What?!?: %u\n", idxReg));
4741 continue;
4742
4743 case kIemNativeWhat_rc:
4744 case kIemNativeWhat_Tmp:
4745 AssertMsgFailed(("Missing free: %u\n", idxReg));
4746 continue;
4747
4748 case kIemNativeWhat_FixedTmp:
4749 case kIemNativeWhat_pVCpuFixed:
4750 case kIemNativeWhat_pCtxFixed:
4751 case kIemNativeWhat_PcShadow:
4752 case kIemNativeWhat_FixedReserved:
4753 case kIemNativeWhat_Invalid:
4754 case kIemNativeWhat_End:
4755 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4756 }
4757 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4758 }
4759 }
4760
4761 /*
4762 * Do the actual freeing.
4763 */
4764 if (pReNative->Core.bmHstRegs & fRegsToFree)
4765 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4766 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4767 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4768
4769 /* If there are guest register shadows in any call-volatile register, we
4770 have to clear the corrsponding guest register masks for each register. */
4771 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4772 if (fHstRegsWithGstShadow)
4773 {
4774 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4775 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK,
4776 fHstRegsWithGstShadow));
4777 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4778 do
4779 {
4780 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4781 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4782
4783 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4784
4785#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4786 /*
4787 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4788 * to call volatile registers).
4789 */
4790 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4791 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxReg);
4792 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4793#endif
4794
4795 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4796 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4797 } while (fHstRegsWithGstShadow != 0);
4798 }
4799
4800 /*
4801 * Now for the SIMD registers, no argument support for now.
4802 */
4803 off = iemNativeSimdRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /*cArgs*/, fKeepVars);
4804
4805 return off;
4806}
4807
4808
4809/**
4810 * Flushes a set of guest register shadow copies.
4811 *
4812 * This is usually done after calling a threaded function or a C-implementation
4813 * of an instruction.
4814 *
4815 * @param pReNative The native recompile state.
4816 * @param fGstRegs Set of guest registers to flush.
4817 */
4818DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4819{
4820 /*
4821 * Reduce the mask by what's currently shadowed
4822 */
4823 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4824 fGstRegs &= bmGstRegShadowsOld;
4825 if (fGstRegs)
4826 {
4827 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4828 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4829 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4830 if (bmGstRegShadowsNew)
4831 {
4832 /*
4833 * Partial.
4834 */
4835 do
4836 {
4837 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4838 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4839 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4840 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4841 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4842#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4843 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4844#endif
4845
4846 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4847 fGstRegs &= ~fInThisHstReg;
4848 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4849 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4850 if (!fGstRegShadowsNew)
4851 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4852 } while (fGstRegs != 0);
4853 }
4854 else
4855 {
4856 /*
4857 * Clear all.
4858 */
4859 do
4860 {
4861 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4862 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4863 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4864 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4865 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4866#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4867 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4868#endif
4869
4870 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4871 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4872 } while (fGstRegs != 0);
4873 pReNative->Core.bmHstRegsWithGstShadow = 0;
4874 }
4875 }
4876}
4877
4878
4879/**
4880 * Flushes guest register shadow copies held by a set of host registers.
4881 *
4882 * This is used with the TLB lookup code for ensuring that we don't carry on
4883 * with any guest shadows in volatile registers, as these will get corrupted by
4884 * a TLB miss.
4885 *
4886 * @param pReNative The native recompile state.
4887 * @param fHstRegs Set of host registers to flush guest shadows for.
4888 */
4889DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4890{
4891 /*
4892 * Reduce the mask by what's currently shadowed.
4893 */
4894 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4895 fHstRegs &= bmHstRegsWithGstShadowOld;
4896 if (fHstRegs)
4897 {
4898 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4899 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4900 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4901 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4902 if (bmHstRegsWithGstShadowNew)
4903 {
4904 /*
4905 * Partial (likely).
4906 */
4907 uint64_t fGstShadows = 0;
4908 do
4909 {
4910 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4911 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4912 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4913 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4914#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4915 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4916#endif
4917
4918 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4919 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4920 fHstRegs &= ~RT_BIT_32(idxHstReg);
4921 } while (fHstRegs != 0);
4922 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4923 }
4924 else
4925 {
4926 /*
4927 * Clear all.
4928 */
4929 do
4930 {
4931 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4932 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4933 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4934 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4935#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4936 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4937#endif
4938
4939 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4940 fHstRegs &= ~RT_BIT_32(idxHstReg);
4941 } while (fHstRegs != 0);
4942 pReNative->Core.bmGstRegShadows = 0;
4943 }
4944 }
4945}
4946
4947
4948/**
4949 * Restores guest shadow copies in volatile registers.
4950 *
4951 * This is used after calling a helper function (think TLB miss) to restore the
4952 * register state of volatile registers.
4953 *
4954 * @param pReNative The native recompile state.
4955 * @param off The code buffer offset.
4956 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4957 * be active (allocated) w/o asserting. Hack.
4958 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4959 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4960 */
4961DECL_HIDDEN_THROW(uint32_t)
4962iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4963{
4964 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4965 if (fHstRegs)
4966 {
4967 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4968 do
4969 {
4970 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4971
4972 /* It's not fatal if a register is active holding a variable that
4973 shadowing a guest register, ASSUMING all pending guest register
4974 writes were flushed prior to the helper call. However, we'll be
4975 emitting duplicate restores, so it wasts code space. */
4976 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4977 RT_NOREF(fHstRegsActiveShadows);
4978
4979 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4980#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4981 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadows));
4982#endif
4983 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4984 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4985 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4986
4987 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4988 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4989
4990 fHstRegs &= ~RT_BIT_32(idxHstReg);
4991 } while (fHstRegs != 0);
4992 }
4993 return off;
4994}
4995
4996
4997
4998
4999/*********************************************************************************************************************************
5000* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
5001*********************************************************************************************************************************/
5002
5003/**
5004 * Info about shadowed guest SIMD register values.
5005 * @see IEMNATIVEGSTSIMDREG
5006 */
5007static struct
5008{
5009 /** Offset in VMCPU of XMM (low 128-bit) registers. */
5010 uint32_t offXmm;
5011 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
5012 uint32_t offYmm;
5013 /** Name (for logging). */
5014 const char *pszName;
5015} const g_aGstSimdShadowInfo[] =
5016{
5017#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
5018 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
5019 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
5020 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
5021 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
5022 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
5023 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
5024 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
5025 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
5026 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
5027 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
5028 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
5029 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
5030 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
5031 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
5032 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
5033 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
5034 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
5035#undef CPUMCTX_OFF_AND_SIZE
5036};
5037AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
5038
5039
5040/**
5041 * Frees a temporary SIMD register.
5042 *
5043 * Any shadow copies of guest registers assigned to the host register will not
5044 * be flushed by this operation.
5045 */
5046DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
5047{
5048 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
5049 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
5050 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5051 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
5052 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5053}
5054
5055
5056/**
5057 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
5058 *
5059 * @returns New code bufferoffset.
5060 * @param pReNative The native recompile state.
5061 * @param off Current code buffer position.
5062 * @param enmGstSimdReg The guest SIMD register to flush.
5063 */
5064DECL_HIDDEN_THROW(uint32_t)
5065iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
5066{
5067 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5068
5069 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
5070 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
5071 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
5072 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
5073
5074 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5075 {
5076 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5077 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5078 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5079 }
5080
5081 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
5082 {
5083 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5084 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
5085 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5086 }
5087
5088 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
5089 return off;
5090}
5091
5092
5093/**
5094 * Flush the given set of guest SIMD registers if marked as dirty.
5095 *
5096 * @returns New code buffer offset.
5097 * @param pReNative The native recompile state.
5098 * @param off Current code buffer position.
5099 * @param fFlushGstSimdReg The guest SIMD register set to flush (default is flush everything).
5100 */
5101DECL_HIDDEN_THROW(uint32_t)
5102iemNativeSimdRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstSimdReg /*= UINT64_MAX*/)
5103{
5104 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5105 & fFlushGstSimdReg;
5106 if (bmGstSimdRegShadowDirty)
5107 {
5108# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5109 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5110 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
5111# endif
5112
5113 do
5114 {
5115 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
5116 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
5117 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5118 } while (bmGstSimdRegShadowDirty);
5119 }
5120
5121 return off;
5122}
5123
5124
5125#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5126/**
5127 * Flush all shadowed guest SIMD registers marked as dirty for the given host SIMD register.
5128 *
5129 * @returns New code buffer offset.
5130 * @param pReNative The native recompile state.
5131 * @param off Current code buffer position.
5132 * @param idxHstSimdReg The host SIMD register.
5133 *
5134 * @note This doesn't do any unshadowing of guest registers from the host register.
5135 */
5136DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxHstSimdReg)
5137{
5138 /* We need to flush any pending guest register writes this host register shadows. */
5139 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5140 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5141 if (bmGstSimdRegShadowDirty)
5142 {
5143# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5144 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5145 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
5146# endif
5147
5148 do
5149 {
5150 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
5151 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
5152 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5153 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
5154 } while (bmGstSimdRegShadowDirty);
5155 }
5156
5157 return off;
5158}
5159#endif
5160
5161
5162/**
5163 * Locate a register, possibly freeing one up.
5164 *
5165 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5166 * failed.
5167 *
5168 * @returns Host register number on success. Returns UINT8_MAX if no registers
5169 * found, the caller is supposed to deal with this and raise a
5170 * allocation type specific status code (if desired).
5171 *
5172 * @throws VBox status code if we're run into trouble spilling a variable of
5173 * recording debug info. Does NOT throw anything if we're out of
5174 * registers, though.
5175 */
5176static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5177 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5178{
5179 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
5180 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5181 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5182
5183 /*
5184 * Try a freed register that's shadowing a guest register.
5185 */
5186 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5187 if (fRegs)
5188 {
5189 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
5190
5191#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5192 /*
5193 * When we have livness information, we use it to kick out all shadowed
5194 * guest register that will not be needed any more in this TB. If we're
5195 * lucky, this may prevent us from ending up here again.
5196 *
5197 * Note! We must consider the previous entry here so we don't free
5198 * anything that the current threaded function requires (current
5199 * entry is produced by the next threaded function).
5200 */
5201 uint32_t const idxCurCall = pReNative->idxCurCall;
5202 if (idxCurCall > 0)
5203 {
5204 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5205 uint64_t const fToFreeMask = IEMLIVENESS_STATE_GET_CAN_BE_FREED_SET(pLivenessEntry);
5206
5207 /* If it matches any shadowed registers. */
5208 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5209 {
5210 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
5211 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5212 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5213
5214 /* See if we've got any unshadowed registers we can return now. */
5215 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5216 if (fUnshadowedRegs)
5217 {
5218 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
5219 return (fPreferVolatile
5220 ? ASMBitFirstSetU32(fUnshadowedRegs)
5221 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5222 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5223 - 1;
5224 }
5225 }
5226 }
5227#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5228
5229 unsigned const idxReg = (fPreferVolatile
5230 ? ASMBitFirstSetU32(fRegs)
5231 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5232 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5233 - 1;
5234
5235 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5236 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5237 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5238 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5239
5240 /* We need to flush any pending guest register writes this host SIMD register shadows. */
5241 *poff = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, *poff, idxReg);
5242
5243 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5244 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5245 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5246 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5247 return idxReg;
5248 }
5249
5250 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
5251
5252 /*
5253 * Try free up a variable that's in a register.
5254 *
5255 * We do two rounds here, first evacuating variables we don't need to be
5256 * saved on the stack, then in the second round move things to the stack.
5257 */
5258 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
5259 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5260 {
5261 uint32_t fVars = pReNative->Core.bmVars;
5262 while (fVars)
5263 {
5264 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5265 if (pReNative->Core.aVars[idxVar].fSimdReg) /* (this is the SIMD allocator) */
5266 {
5267 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
5268 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5269 && (RT_BIT_32(idxReg) & fRegMask)
5270 && ( iLoop == 0
5271 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
5272 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5273 && !pReNative->Core.aVars[idxVar].fRegAcquired)
5274 {
5275 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
5276 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5277 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5278 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
5279 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
5280 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5281
5282 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5283 {
5284 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5285 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5286 }
5287
5288 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5289 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5290
5291 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5292 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
5293 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5294 return idxReg;
5295 }
5296 }
5297 fVars &= ~RT_BIT_32(idxVar);
5298 }
5299 }
5300
5301 AssertFailed();
5302 return UINT8_MAX;
5303}
5304
5305
5306/**
5307 * Flushes a set of guest register shadow copies.
5308 *
5309 * This is usually done after calling a threaded function or a C-implementation
5310 * of an instruction.
5311 *
5312 * @param pReNative The native recompile state.
5313 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5314 */
5315DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5316{
5317 /*
5318 * Reduce the mask by what's currently shadowed
5319 */
5320 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5321 fGstSimdRegs &= bmGstSimdRegShadows;
5322 if (fGstSimdRegs)
5323 {
5324 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5325 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5326 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5327 if (bmGstSimdRegShadowsNew)
5328 {
5329 /*
5330 * Partial.
5331 */
5332 do
5333 {
5334 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5335 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5336 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5337 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5338 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5339 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5340
5341 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5342 fGstSimdRegs &= ~fInThisHstReg;
5343 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5344 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5345 if (!fGstRegShadowsNew)
5346 {
5347 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5348 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5349 }
5350 } while (fGstSimdRegs != 0);
5351 }
5352 else
5353 {
5354 /*
5355 * Clear all.
5356 */
5357 do
5358 {
5359 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5360 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5361 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5362 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5363 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5364 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5365
5366 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5367 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5368 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5369 } while (fGstSimdRegs != 0);
5370 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5371 }
5372 }
5373}
5374
5375
5376/**
5377 * Allocates a temporary host SIMD register.
5378 *
5379 * This may emit code to save register content onto the stack in order to free
5380 * up a register.
5381 *
5382 * @returns The host register number; throws VBox status code on failure,
5383 * so no need to check the return value.
5384 * @param pReNative The native recompile state.
5385 * @param poff Pointer to the variable with the code buffer position.
5386 * This will be update if we need to move a variable from
5387 * register to stack in order to satisfy the request.
5388 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5389 * registers (@c true, default) or the other way around
5390 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5391 */
5392DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5393{
5394 /*
5395 * Try find a completely unused register, preferably a call-volatile one.
5396 */
5397 uint8_t idxSimdReg;
5398 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5399 & ~pReNative->Core.bmHstRegsWithGstShadow
5400 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5401 if (fRegs)
5402 {
5403 if (fPreferVolatile)
5404 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5405 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5406 else
5407 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5408 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5409 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5410 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5411
5412 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5413 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5414 }
5415 else
5416 {
5417 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5418 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5419 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5420 }
5421
5422 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5423 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5424}
5425
5426
5427/**
5428 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5429 * registers.
5430 *
5431 * @returns The host register number; throws VBox status code on failure,
5432 * so no need to check the return value.
5433 * @param pReNative The native recompile state.
5434 * @param poff Pointer to the variable with the code buffer position.
5435 * This will be update if we need to move a variable from
5436 * register to stack in order to satisfy the request.
5437 * @param fRegMask Mask of acceptable registers.
5438 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5439 * registers (@c true, default) or the other way around
5440 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5441 */
5442DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5443 bool fPreferVolatile /*= true*/)
5444{
5445 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5446 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5447
5448 /*
5449 * Try find a completely unused register, preferably a call-volatile one.
5450 */
5451 uint8_t idxSimdReg;
5452 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5453 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5454 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5455 & fRegMask;
5456 if (fRegs)
5457 {
5458 if (fPreferVolatile)
5459 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5460 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5461 else
5462 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5463 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5464 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5465 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5466
5467 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5468 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5469 }
5470 else
5471 {
5472 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5473 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5474 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5475 }
5476
5477 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5478 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5479}
5480
5481
5482/**
5483 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5484 *
5485 * @param pReNative The native recompile state.
5486 * @param idxHstSimdReg The host SIMD register to update the state for.
5487 * @param enmLoadSz The load size to set.
5488 */
5489DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5490 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5491{
5492 /* Everything valid already? -> nothing to do. */
5493 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5494 return;
5495
5496 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5497 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5498 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5499 {
5500 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5501 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5502 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5503 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5504 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5505 }
5506}
5507
5508
5509static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdRegDst,
5510 uint8_t idxHstSimdRegDst, uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5511{
5512 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5513 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5514 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5515 {
5516#ifdef RT_ARCH_ARM64
5517 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5518 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5519#endif
5520
5521 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5522 {
5523 switch (enmLoadSzDst)
5524 {
5525 case kIemNativeGstSimdRegLdStSz_256:
5526 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5527 break;
5528 case kIemNativeGstSimdRegLdStSz_Low128:
5529 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5530 break;
5531 case kIemNativeGstSimdRegLdStSz_High128:
5532 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5533 break;
5534 default:
5535 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5536 }
5537
5538 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5539 }
5540 }
5541 else
5542 {
5543 /* The source doesn't has the part loaded, so load the register from CPUMCTX. */
5544 Assert(enmLoadSzDst == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSzDst == kIemNativeGstSimdRegLdStSz_High128);
5545 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, idxHstSimdRegDst, enmGstSimdRegDst, enmLoadSzDst);
5546 }
5547
5548 return off;
5549}
5550
5551
5552/**
5553 * Allocates a temporary host SIMD register for keeping a guest
5554 * SIMD register value.
5555 *
5556 * Since we may already have a register holding the guest register value,
5557 * code will be emitted to do the loading if that's not the case. Code may also
5558 * be emitted if we have to free up a register to satify the request.
5559 *
5560 * @returns The host register number; throws VBox status code on failure, so no
5561 * need to check the return value.
5562 * @param pReNative The native recompile state.
5563 * @param poff Pointer to the variable with the code buffer
5564 * position. This will be update if we need to move a
5565 * variable from register to stack in order to satisfy
5566 * the request.
5567 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5568 * @param enmLoadSz Load/store size.
5569 * @param enmIntendedUse How the caller will be using the host register.
5570 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5571 * register is okay (default). The ASSUMPTION here is
5572 * that the caller has already flushed all volatile
5573 * registers, so this is only applied if we allocate a
5574 * new register.
5575 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5576 */
5577DECL_HIDDEN_THROW(uint8_t)
5578iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5579 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz,
5580 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5581 bool fNoVolatileRegs /*= false*/)
5582{
5583 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5584#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5585 AssertMsg( pReNative->idxCurCall == 0
5586 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5587 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5588 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5589 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5590 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5591 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5592#endif
5593#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5594 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5595#endif
5596 uint32_t const fRegMask = !fNoVolatileRegs
5597 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5598 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5599
5600 /*
5601 * First check if the guest register value is already in a host register.
5602 */
5603 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5604 {
5605 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5606 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5607 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5608 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5609
5610 /* It's not supposed to be allocated... */
5611 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5612 {
5613 /*
5614 * If the register will trash the guest shadow copy, try find a
5615 * completely unused register we can use instead. If that fails,
5616 * we need to disassociate the host reg from the guest reg.
5617 */
5618 /** @todo would be nice to know if preserving the register is in any way helpful. */
5619 /* If the purpose is calculations, try duplicate the register value as
5620 we'll be clobbering the shadow. */
5621 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5622 && ( ~pReNative->Core.bmHstSimdRegs
5623 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5624 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5625 {
5626 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5627
5628 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5629
5630 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5631 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5632 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5633 idxSimdReg = idxRegNew;
5634 }
5635 /* If the current register matches the restrictions, go ahead and allocate
5636 it for the caller. */
5637 else if (fRegMask & RT_BIT_32(idxSimdReg))
5638 {
5639 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5640 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5641 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5642 {
5643 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5644 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxSimdReg, idxSimdReg, enmLoadSz);
5645 else
5646 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5647 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5648 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5649 }
5650 else
5651 {
5652 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5653 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5654 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5655 }
5656 }
5657 /* Otherwise, allocate a register that satisfies the caller and transfer
5658 the shadowing if compatible with the intended use. (This basically
5659 means the call wants a non-volatile register (RSP push/pop scenario).) */
5660 else
5661 {
5662 Assert(fNoVolatileRegs);
5663 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5664 !fNoVolatileRegs
5665 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5666 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5667 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5668 {
5669 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5670 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5671 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5672 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5673 }
5674 else
5675 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5676 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5677 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5678 idxSimdReg = idxRegNew;
5679 }
5680 }
5681 else
5682 {
5683 /*
5684 * Oops. Shadowed guest register already allocated!
5685 *
5686 * Allocate a new register, copy the value and, if updating, the
5687 * guest shadow copy assignment to the new register.
5688 */
5689 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5690 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5691 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5692 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5693
5694 /** @todo share register for readonly access. */
5695 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5696 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5697
5698 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5699 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5700 else
5701 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5702
5703 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5704 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5705 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5706 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5707 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5708 else
5709 {
5710 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5711 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5712 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5713 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5714 }
5715 idxSimdReg = idxRegNew;
5716 }
5717 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5718
5719#ifdef VBOX_STRICT
5720 /* Strict builds: Check that the value is correct. */
5721 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5722 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5723#endif
5724
5725 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5726 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5727 {
5728#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5729 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5730 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxSimdReg);
5731#endif
5732
5733 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5734 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5735 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5736 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5737 else
5738 {
5739 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5740 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5741 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5742 }
5743 }
5744
5745 return idxSimdReg;
5746 }
5747
5748 /*
5749 * Allocate a new register, load it with the guest value and designate it as a copy of the
5750 */
5751 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5752
5753 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5754 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5755 else
5756 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5757
5758 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5759 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5760
5761 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5762 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5763 {
5764#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5765 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5766 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxRegNew);
5767#endif
5768
5769 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5770 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5771 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5772 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5773 else
5774 {
5775 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5776 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5777 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5778 }
5779 }
5780
5781 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5782 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5783
5784 return idxRegNew;
5785}
5786
5787
5788/**
5789 * Flushes guest SIMD register shadow copies held by a set of host registers.
5790 *
5791 * This is used whenever calling an external helper for ensuring that we don't carry on
5792 * with any guest shadows in volatile registers, as these will get corrupted by the caller.
5793 *
5794 * @param pReNative The native recompile state.
5795 * @param fHstSimdRegs Set of host SIMD registers to flush guest shadows for.
5796 */
5797DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstSimdRegs) RT_NOEXCEPT
5798{
5799 /*
5800 * Reduce the mask by what's currently shadowed.
5801 */
5802 uint32_t const bmHstSimdRegsWithGstShadowOld = pReNative->Core.bmHstSimdRegsWithGstShadow;
5803 fHstSimdRegs &= bmHstSimdRegsWithGstShadowOld;
5804 if (fHstSimdRegs)
5805 {
5806 uint32_t const bmHstSimdRegsWithGstShadowNew = bmHstSimdRegsWithGstShadowOld & ~fHstSimdRegs;
5807 Log12(("iemNativeSimdRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5808 fHstSimdRegs, bmHstSimdRegsWithGstShadowOld, bmHstSimdRegsWithGstShadowNew));
5809 pReNative->Core.bmHstSimdRegsWithGstShadow = bmHstSimdRegsWithGstShadowNew;
5810 if (bmHstSimdRegsWithGstShadowNew)
5811 {
5812 /*
5813 * Partial (likely).
5814 */
5815 uint64_t fGstShadows = 0;
5816 do
5817 {
5818 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5819 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5820 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5821 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5822 Assert(!(( pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5823 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5824
5825 fGstShadows |= pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5826 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5827 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5828 } while (fHstSimdRegs != 0);
5829 pReNative->Core.bmGstSimdRegShadows &= ~fGstShadows;
5830 }
5831 else
5832 {
5833 /*
5834 * Clear all.
5835 */
5836 do
5837 {
5838 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5839 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5840 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5841 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5842 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5843 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5844
5845 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5846 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5847 } while (fHstSimdRegs != 0);
5848 pReNative->Core.bmGstSimdRegShadows = 0;
5849 }
5850 }
5851}
5852
5853
5854
5855/*********************************************************************************************************************************
5856* Code emitters for flushing pending guest register writes and sanity checks *
5857*********************************************************************************************************************************/
5858
5859#ifdef VBOX_STRICT
5860/**
5861 * Does internal register allocator sanity checks.
5862 */
5863DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5864{
5865 /*
5866 * Iterate host registers building a guest shadowing set.
5867 */
5868 uint64_t bmGstRegShadows = 0;
5869 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5870 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5871 while (bmHstRegsWithGstShadow)
5872 {
5873 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5874 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5875 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5876
5877 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5878 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5879 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5880 bmGstRegShadows |= fThisGstRegShadows;
5881 while (fThisGstRegShadows)
5882 {
5883 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5884 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5885 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5886 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5887 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5888 }
5889 }
5890 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5891 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5892 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5893
5894 /*
5895 * Now the other way around, checking the guest to host index array.
5896 */
5897 bmHstRegsWithGstShadow = 0;
5898 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5899 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5900 while (bmGstRegShadows)
5901 {
5902 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5903 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5904 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5905
5906 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5907 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5908 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5909 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5910 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5911 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5912 }
5913 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5914 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5915 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5916}
5917#endif /* VBOX_STRICT */
5918
5919
5920/**
5921 * Flushes any delayed guest register writes.
5922 *
5923 * This must be called prior to calling CImpl functions and any helpers that use
5924 * the guest state (like raising exceptions) and such.
5925 *
5926 * @note This function does not flush any shadowing information for guest registers. This needs to be done by
5927 * the caller if it wishes to do so.
5928 */
5929DECL_HIDDEN_THROW(uint32_t)
5930iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, uint64_t fGstSimdShwExcept)
5931{
5932#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5933 if (!(fGstShwExcept & RT_BIT_64(kIemNativeGstReg_Pc)))
5934 off = iemNativeEmitPcWriteback(pReNative, off);
5935#else
5936 RT_NOREF(pReNative, fGstShwExcept);
5937#endif
5938
5939#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5940 off = iemNativeRegFlushDirtyGuest(pReNative, off, ~fGstShwExcept);
5941#endif
5942
5943 return iemNativeSimdRegFlushDirtyGuest(pReNative, off, ~fGstSimdShwExcept);
5944}
5945
5946#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5947
5948# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
5949
5950/**
5951 * Checks if the value in @a idxPcReg matches IEMCPU::uPcUpdatingDebug.
5952 */
5953DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcDebugCheckWithReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxPcReg)
5954{
5955 Assert(idxPcReg != IEMNATIVE_REG_FIXED_TMP0);
5956 Assert(pReNative->Core.fDebugPcInitialized);
5957
5958 /* cmp [pVCpu->iem.s.uPcUpdatingDebug], pcreg */
5959# ifdef RT_ARCH_AMD64
5960 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5961 pCodeBuf[off++] = X86_OP_REX_W | (idxPcReg >= 8 ? X86_OP_REX_R : 0);
5962 pCodeBuf[off++] = 0x3b;
5963 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, idxPcReg & 7, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
5964# else
5965 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5966 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
5967 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, idxPcReg);
5968# endif
5969
5970 uint32_t offFixup = off;
5971 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 1, kIemNativeInstrCond_e);
5972 off = iemNativeEmitBrkEx(pCodeBuf, off, UINT32_C(0x2200));
5973 iemNativeFixupFixedJump(pReNative, offFixup, off);
5974
5975 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5976 return off;
5977}
5978
5979
5980/**
5981 * Checks that the current RIP+offPc matches IEMCPU::uPcUpdatingDebug.
5982 */
5983DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcDebugCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5984{
5985 if (pReNative->Core.fDebugPcInitialized)
5986 {
5987 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc);
5988 if (pReNative->Core.offPc)
5989 {
5990 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5991 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, RT_ARCH_VAL == RT_ARCH_VAL_AMD64 ? 32 : 8);
5992 off = iemNativeEmitGprEqGprPlusImmEx(pCodeBuf, off, idxTmpReg, idxPcReg, pReNative->Core.offPc);
5993 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5994 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxTmpReg);
5995 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5996 }
5997 else
5998 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
5999 iemNativeRegFreeTmp(pReNative, idxPcReg);
6000 }
6001 return off;
6002}
6003
6004# endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG */
6005
6006/**
6007 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
6008 */
6009DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6010{
6011 Assert(pReNative->Core.offPc);
6012# if !defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && !defined(VBOX_WITH_STATISTICS)
6013 Log4(("iemNativeEmitPcWritebackSlow: offPc=%#RX64 -> 0; off=%#x\n", pReNative->Core.offPc, off));
6014# else
6015 uint8_t const idxOldInstrPlusOne = pReNative->idxInstrPlusOneOfLastPcUpdate;
6016 uint8_t idxCurCall = pReNative->idxCurCall;
6017 uint8_t idxInstr = pReNative->pTbOrg->Thrd.paCalls[idxCurCall].idxInstr; /* unreliable*/
6018 while (idxInstr == 0 && idxInstr + 1 < idxOldInstrPlusOne && idxCurCall > 0)
6019 idxInstr = pReNative->pTbOrg->Thrd.paCalls[--idxCurCall].idxInstr;
6020 pReNative->idxInstrPlusOneOfLastPcUpdate = RT_MAX(idxInstr + 1, idxOldInstrPlusOne);
6021 uint8_t const cInstrsSkipped = idxInstr <= idxOldInstrPlusOne ? 0 : idxInstr - idxOldInstrPlusOne;
6022 Log4(("iemNativeEmitPcWritebackSlow: offPc=%#RX64 -> 0; off=%#x; idxInstr=%u cInstrsSkipped=%u\n",
6023 pReNative->Core.offPc, off, idxInstr, cInstrsSkipped));
6024
6025 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, cInstrsSkipped);
6026
6027# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6028 iemNativeDbgInfoAddNativeOffset(pReNative, off);
6029 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, cInstrsSkipped);
6030# endif
6031# endif
6032
6033# ifndef IEMNATIVE_REG_FIXED_PC_DBG
6034 /* Allocate a temporary PC register. */
6035 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6036
6037 /* Perform the addition and store the result. */
6038 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6039 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
6040# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6041 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
6042# endif
6043
6044 /* Free but don't flush the PC register. */
6045 iemNativeRegFreeTmp(pReNative, idxPcReg);
6046# else
6047 /* Compare the shadow with the context value, they should match. */
6048 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
6049 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
6050# endif
6051
6052 pReNative->Core.offPc = 0;
6053
6054 return off;
6055}
6056
6057#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
6058
6059
6060/*********************************************************************************************************************************
6061* Code Emitters (larger snippets) *
6062*********************************************************************************************************************************/
6063
6064/**
6065 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6066 * extending to 64-bit width.
6067 *
6068 * @returns New code buffer offset on success, UINT32_MAX on failure.
6069 * @param pReNative .
6070 * @param off The current code buffer position.
6071 * @param idxHstReg The host register to load the guest register value into.
6072 * @param enmGstReg The guest register to load.
6073 *
6074 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6075 * that is something the caller needs to do if applicable.
6076 */
6077DECL_HIDDEN_THROW(uint32_t)
6078iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6079{
6080 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
6081 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6082
6083 switch (g_aGstShadowInfo[enmGstReg].cb)
6084 {
6085 case sizeof(uint64_t):
6086 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6087 case sizeof(uint32_t):
6088 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6089 case sizeof(uint16_t):
6090 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6091#if 0 /* not present in the table. */
6092 case sizeof(uint8_t):
6093 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6094#endif
6095 default:
6096 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6097 }
6098}
6099
6100
6101/**
6102 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6103 * extending to 64-bit width, extended version.
6104 *
6105 * @returns New code buffer offset on success, UINT32_MAX on failure.
6106 * @param pCodeBuf The code buffer.
6107 * @param off The current code buffer position.
6108 * @param idxHstReg The host register to load the guest register value into.
6109 * @param enmGstReg The guest register to load.
6110 *
6111 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6112 * that is something the caller needs to do if applicable.
6113 */
6114DECL_HIDDEN_THROW(uint32_t)
6115iemNativeEmitLoadGprWithGstShadowRegEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6116{
6117 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
6118 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6119
6120 switch (g_aGstShadowInfo[enmGstReg].cb)
6121 {
6122 case sizeof(uint64_t):
6123 return iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6124 case sizeof(uint32_t):
6125 return iemNativeEmitLoadGprFromVCpuU32Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6126 case sizeof(uint16_t):
6127 return iemNativeEmitLoadGprFromVCpuU16Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6128#if 0 /* not present in the table. */
6129 case sizeof(uint8_t):
6130 return iemNativeEmitLoadGprFromVCpuU8Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6131#endif
6132 default:
6133#ifdef IEM_WITH_THROW_CATCH
6134 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6135#else
6136 AssertReleaseFailedReturn(off);
6137#endif
6138 }
6139}
6140
6141
6142/**
6143 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
6144 *
6145 * @returns New code buffer offset on success, UINT32_MAX on failure.
6146 * @param pReNative The recompiler state.
6147 * @param off The current code buffer position.
6148 * @param idxHstSimdReg The host register to load the guest register value into.
6149 * @param enmGstSimdReg The guest register to load.
6150 * @param enmLoadSz The load size of the register.
6151 *
6152 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6153 * that is something the caller needs to do if applicable.
6154 */
6155DECL_HIDDEN_THROW(uint32_t)
6156iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
6157 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6158{
6159 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
6160
6161 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
6162 switch (enmLoadSz)
6163 {
6164 case kIemNativeGstSimdRegLdStSz_256:
6165 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6166 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6167 case kIemNativeGstSimdRegLdStSz_Low128:
6168 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6169 case kIemNativeGstSimdRegLdStSz_High128:
6170 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6171 default:
6172 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6173 }
6174}
6175
6176#ifdef VBOX_STRICT
6177
6178/**
6179 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6180 *
6181 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6182 * Trashes EFLAGS on AMD64.
6183 */
6184DECL_FORCE_INLINE(uint32_t)
6185iemNativeEmitTop32BitsClearCheckEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxReg)
6186{
6187# ifdef RT_ARCH_AMD64
6188 /* rol reg64, 32 */
6189 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6190 pCodeBuf[off++] = 0xc1;
6191 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6192 pCodeBuf[off++] = 32;
6193
6194 /* test reg32, ffffffffh */
6195 if (idxReg >= 8)
6196 pCodeBuf[off++] = X86_OP_REX_B;
6197 pCodeBuf[off++] = 0xf7;
6198 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6199 pCodeBuf[off++] = 0xff;
6200 pCodeBuf[off++] = 0xff;
6201 pCodeBuf[off++] = 0xff;
6202 pCodeBuf[off++] = 0xff;
6203
6204 /* je/jz +1 */
6205 pCodeBuf[off++] = 0x74;
6206 pCodeBuf[off++] = 0x01;
6207
6208 /* int3 */
6209 pCodeBuf[off++] = 0xcc;
6210
6211 /* rol reg64, 32 */
6212 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6213 pCodeBuf[off++] = 0xc1;
6214 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6215 pCodeBuf[off++] = 32;
6216
6217# elif defined(RT_ARCH_ARM64)
6218 /* lsr tmp0, reg64, #32 */
6219 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
6220 /* cbz tmp0, +1 */
6221 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6222 /* brk #0x1100 */
6223 pCodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
6224
6225# else
6226# error "Port me!"
6227# endif
6228 return off;
6229}
6230
6231
6232/**
6233 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6234 *
6235 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6236 * Trashes EFLAGS on AMD64.
6237 */
6238DECL_HIDDEN_THROW(uint32_t)
6239iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
6240{
6241# ifdef RT_ARCH_AMD64
6242 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6243# elif defined(RT_ARCH_ARM64)
6244 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6245# else
6246# error "Port me!"
6247# endif
6248 off = iemNativeEmitTop32BitsClearCheckEx(pCodeBuf, off, idxReg);
6249 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6250 return off;
6251}
6252
6253
6254/**
6255 * Emitting code that checks that the content of register @a idxReg is the same
6256 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6257 * instruction if that's not the case.
6258 *
6259 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6260 * Trashes EFLAGS on AMD64.
6261 */
6262DECL_HIDDEN_THROW(uint32_t) iemNativeEmitGuestRegValueCheckEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf,
6263 uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6264{
6265#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6266 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6267 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg))
6268 return off;
6269#endif
6270
6271# ifdef RT_ARCH_AMD64
6272 /* cmp reg, [mem] */
6273 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
6274 {
6275 if (idxReg >= 8)
6276 pCodeBuf[off++] = X86_OP_REX_R;
6277 pCodeBuf[off++] = 0x38;
6278 }
6279 else
6280 {
6281 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
6282 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
6283 else
6284 {
6285 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
6286 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6287 else
6288 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
6289 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
6290 if (idxReg >= 8)
6291 pCodeBuf[off++] = X86_OP_REX_R;
6292 }
6293 pCodeBuf[off++] = 0x39;
6294 }
6295 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
6296
6297 /* je/jz +1 */
6298 pCodeBuf[off++] = 0x74;
6299 pCodeBuf[off++] = 0x01;
6300
6301 /* int3 */
6302 pCodeBuf[off++] = 0xcc;
6303
6304 /* For values smaller than the register size, we must check that the rest
6305 of the register is all zeros. */
6306 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6307 {
6308 /* test reg64, imm32 */
6309 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6310 pCodeBuf[off++] = 0xf7;
6311 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6312 pCodeBuf[off++] = 0;
6313 pCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6314 pCodeBuf[off++] = 0xff;
6315 pCodeBuf[off++] = 0xff;
6316
6317 /* je/jz +1 */
6318 pCodeBuf[off++] = 0x74;
6319 pCodeBuf[off++] = 0x01;
6320
6321 /* int3 */
6322 pCodeBuf[off++] = 0xcc;
6323 }
6324 else if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6325 iemNativeEmitTop32BitsClearCheckEx(pCodeBuf, off, idxReg);
6326 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6327
6328# elif defined(RT_ARCH_ARM64)
6329 /* mov TMP0, [gstreg] */
6330 off = iemNativeEmitLoadGprWithGstShadowRegEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6331
6332 /* sub tmp0, tmp0, idxReg */
6333 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6334 /* cbz tmp0, +2 */
6335 pCodeBuf[off++] = Armv8A64MkInstrCbz(2, IEMNATIVE_REG_FIXED_TMP0);
6336 /* brk #0x1000+enmGstReg */
6337 pCodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6338 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6339
6340# else
6341# error "Port me!"
6342# endif
6343 return off;
6344}
6345
6346
6347/**
6348 * Emitting code that checks that the content of register @a idxReg is the same
6349 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6350 * instruction if that's not the case.
6351 *
6352 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6353 * Trashes EFLAGS on AMD64.
6354 */
6355DECL_HIDDEN_THROW(uint32_t)
6356iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6357{
6358#ifdef RT_ARCH_AMD64
6359 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6360#elif defined(RT_ARCH_ARM64)
6361 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6362# else
6363# error "Port me!"
6364# endif
6365 return iemNativeEmitGuestRegValueCheckEx(pReNative, pCodeBuf, off, idxReg, enmGstReg);
6366}
6367
6368# ifdef RT_ARCH_AMD64
6369/**
6370 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
6371 */
6372DECL_FORCE_INLINE_THROW(uint32_t)
6373iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
6374{
6375 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
6376 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6377 if (idxSimdReg >= 8)
6378 pbCodeBuf[off++] = X86_OP_REX_R;
6379 pbCodeBuf[off++] = 0x0f;
6380 pbCodeBuf[off++] = 0x38;
6381 pbCodeBuf[off++] = 0x29;
6382 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
6383
6384 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6385 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6386 pbCodeBuf[off++] = X86_OP_REX_W
6387 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6388 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6389 pbCodeBuf[off++] = 0x0f;
6390 pbCodeBuf[off++] = 0x3a;
6391 pbCodeBuf[off++] = 0x16;
6392 pbCodeBuf[off++] = 0xeb;
6393 pbCodeBuf[off++] = 0x00;
6394
6395 /* cmp tmp0, 0xffffffffffffffff. */
6396 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6397 pbCodeBuf[off++] = 0x83;
6398 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6399 pbCodeBuf[off++] = 0xff;
6400
6401 /* je/jz +1 */
6402 pbCodeBuf[off++] = 0x74;
6403 pbCodeBuf[off++] = 0x01;
6404
6405 /* int3 */
6406 pbCodeBuf[off++] = 0xcc;
6407
6408 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6409 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6410 pbCodeBuf[off++] = X86_OP_REX_W
6411 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6412 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6413 pbCodeBuf[off++] = 0x0f;
6414 pbCodeBuf[off++] = 0x3a;
6415 pbCodeBuf[off++] = 0x16;
6416 pbCodeBuf[off++] = 0xeb;
6417 pbCodeBuf[off++] = 0x01;
6418
6419 /* cmp tmp0, 0xffffffffffffffff. */
6420 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6421 pbCodeBuf[off++] = 0x83;
6422 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6423 pbCodeBuf[off++] = 0xff;
6424
6425 /* je/jz +1 */
6426 pbCodeBuf[off++] = 0x74;
6427 pbCodeBuf[off++] = 0x01;
6428
6429 /* int3 */
6430 pbCodeBuf[off++] = 0xcc;
6431
6432 return off;
6433}
6434# endif /* RT_ARCH_AMD64 */
6435
6436
6437/**
6438 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6439 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6440 * instruction if that's not the case.
6441 *
6442 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6443 * Trashes EFLAGS on AMD64.
6444 */
6445DECL_HIDDEN_THROW(uint32_t)
6446iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
6447 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6448{
6449 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6450 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
6451 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
6452 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6453 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
6454 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
6455 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
6456 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6457 return off;
6458
6459# ifdef RT_ARCH_AMD64
6460 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6461 {
6462 /* movdqa vectmp0, idxSimdReg */
6463 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6464
6465 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
6466
6467 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6468 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6469 }
6470
6471 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6472 {
6473 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
6474 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
6475
6476 /* vextracti128 vectmp0, idxSimdReg, 1 */
6477 pbCodeBuf[off++] = X86_OP_VEX3;
6478 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
6479 | X86_OP_VEX3_BYTE1_X
6480 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
6481 | 0x03; /* Opcode map */
6482 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
6483 pbCodeBuf[off++] = 0x39;
6484 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
6485 pbCodeBuf[off++] = 0x01;
6486
6487 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6488 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6489 }
6490
6491# elif defined(RT_ARCH_ARM64)
6492 /* mov vectmp0, [gstreg] */
6493 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6494
6495 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6496 {
6497 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6498 /* eor vectmp0, vectmp0, idxSimdReg */
6499 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6500 /* uaddlv vectmp0, vectmp0.16B */
6501 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
6502 /* umov tmp0, vectmp0.H[0] */
6503 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6504 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6505 /* cbz tmp0, +1 */
6506 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6507 /* brk #0x1000+enmGstReg */
6508 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6509 }
6510
6511 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6512 {
6513 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6514 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6515 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
6516 /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
6517 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
6518 /* umov tmp0, (vectmp0 + 1).H[0] */
6519 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6520 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6521 /* cbz tmp0, +1 */
6522 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6523 /* brk #0x1000+enmGstReg */
6524 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6525 }
6526
6527# else
6528# error "Port me!"
6529# endif
6530
6531 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6532 return off;
6533}
6534
6535
6536/**
6537 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6538 * important bits.
6539 *
6540 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6541 * Trashes EFLAGS on AMD64.
6542 */
6543DECL_HIDDEN_THROW(uint32_t)
6544iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6545{
6546 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6547 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6548 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6549 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6550
6551# ifdef RT_ARCH_AMD64
6552 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6553
6554 /* je/jz +1 */
6555 pbCodeBuf[off++] = 0x74;
6556 pbCodeBuf[off++] = 0x01;
6557
6558 /* int3 */
6559 pbCodeBuf[off++] = 0xcc;
6560
6561# elif defined(RT_ARCH_ARM64)
6562 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6563
6564 /* b.eq +1 */
6565 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6566 /* brk #0x2000 */
6567 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6568
6569# else
6570# error "Port me!"
6571# endif
6572 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6573
6574 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6575 return off;
6576}
6577
6578#endif /* VBOX_STRICT */
6579
6580
6581#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6582/**
6583 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
6584 */
6585DECL_HIDDEN_THROW(uint32_t)
6586iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
6587{
6588 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
6589
6590 fEflNeeded &= X86_EFL_STATUS_BITS;
6591 if (fEflNeeded)
6592 {
6593# ifdef RT_ARCH_AMD64
6594 /* test dword [pVCpu + offVCpu], imm32 */
6595 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 13);
6596 if (fEflNeeded <= 0xff)
6597 {
6598 pCodeBuf[off++] = 0xf6;
6599 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6600 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6601 }
6602 else
6603 {
6604 pCodeBuf[off++] = 0xf7;
6605 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6606 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6607 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
6608 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
6609 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
6610 }
6611
6612 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 3, kIemNativeInstrCond_e);
6613 pCodeBuf[off++] = 0xcc;
6614
6615 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6616
6617# else
6618 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6619 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
6620 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
6621# ifdef RT_ARCH_ARM64
6622 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
6623 off = iemNativeEmitBrk(pReNative, off, 0x7777);
6624# else
6625# error "Port me!"
6626# endif
6627 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6628# endif
6629 }
6630 return off;
6631}
6632#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
6633
6634
6635/**
6636 * Emits a code for checking the return code of a call and rcPassUp, returning
6637 * from the code if either are non-zero.
6638 */
6639DECL_HIDDEN_THROW(uint32_t)
6640iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6641{
6642#ifdef RT_ARCH_AMD64
6643 /*
6644 * AMD64: eax = call status code.
6645 */
6646
6647 /* edx = rcPassUp */
6648 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6649# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6650 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6651# endif
6652
6653 /* edx = eax | rcPassUp */
6654 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6655 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6656 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6657 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6658
6659 /* Jump to non-zero status return path. */
6660 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_NonZeroRetOrPassUp>(pReNative, off);
6661
6662 /* done. */
6663
6664#elif RT_ARCH_ARM64
6665 /*
6666 * ARM64: w0 = call status code.
6667 */
6668 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+3+3 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
6669
6670# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6671 AssertCompile(ARMV8_A64_REG_X2 == IEMNATIVE_CALL_ARG2_GREG);
6672 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, ARMV8_A64_REG_X2, idxInstr);
6673# endif
6674 off = iemNativeEmitLoadGprFromVCpuU32Ex(pCodeBuf, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6675
6676 pCodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6677
6678 off = iemNativeEmitTbExitIfGprIsNotZeroEx<kIemNativeLabelType_NonZeroRetOrPassUp>(pReNative, pCodeBuf, off,
6679 ARMV8_A64_REG_X4, true /*f64Bit*/);
6680
6681#else
6682# error "port me"
6683#endif
6684 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6685 RT_NOREF_PV(idxInstr);
6686 return off;
6687}
6688
6689
6690/**
6691 * Emits a call to a CImpl function or something similar.
6692 */
6693DECL_HIDDEN_THROW(uint32_t)
6694iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6695 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6696{
6697 /* Writeback everything. */
6698 off = iemNativeRegFlushPendingWrites(pReNative, off);
6699
6700 /*
6701 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6702 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6703 */
6704 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6705 fGstShwFlush
6706 | RT_BIT_64(kIemNativeGstReg_Pc)
6707 | RT_BIT_64(kIemNativeGstReg_EFlags));
6708 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6709
6710 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6711
6712 /*
6713 * Load the parameters.
6714 */
6715#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_ARCH_AMD64)
6716 /* Special code the hidden VBOXSTRICTRC pointer. */
6717 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6718 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6719 if (cAddParams > 0)
6720 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6721 if (cAddParams > 1)
6722 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6723 if (cAddParams > 2)
6724 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6725 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6726
6727#else
6728 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6729 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6730 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6731 if (cAddParams > 0)
6732 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6733 if (cAddParams > 1)
6734 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6735 if (cAddParams > 2)
6736# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6737 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6738# else
6739 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6740# endif
6741#endif
6742
6743 /*
6744 * Make the call.
6745 */
6746 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6747
6748#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6749 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6750#endif
6751
6752#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6753 pReNative->Core.fDebugPcInitialized = false;
6754 Log4(("fDebugPcInitialized=false cimpl off=%#x (v2)\n", off));
6755#endif
6756
6757 /*
6758 * Check the status code.
6759 */
6760 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6761}
6762
6763
6764/**
6765 * Emits a call to a threaded worker function.
6766 */
6767DECL_HIDDEN_THROW(uint32_t)
6768iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6769{
6770 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, X86_EFL_STATUS_BITS);
6771 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6772
6773 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6774 off = iemNativeRegFlushPendingWrites(pReNative, off);
6775
6776 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6777 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6778
6779#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6780 /* The threaded function may throw / long jmp, so set current instruction
6781 number if we're counting. */
6782 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6783#endif
6784
6785 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6786
6787#ifdef RT_ARCH_AMD64
6788 /* Load the parameters and emit the call. */
6789# ifdef RT_OS_WINDOWS
6790# ifndef VBOXSTRICTRC_STRICT_ENABLED
6791 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6792 if (cParams > 0)
6793 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6794 if (cParams > 1)
6795 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6796 if (cParams > 2)
6797 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6798# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6799 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6800 if (cParams > 0)
6801 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6802 if (cParams > 1)
6803 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6804 if (cParams > 2)
6805 {
6806 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6807 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6808 }
6809 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6810# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6811# else
6812 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6813 if (cParams > 0)
6814 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6815 if (cParams > 1)
6816 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6817 if (cParams > 2)
6818 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6819# endif
6820
6821 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6822
6823# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6824 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6825# endif
6826
6827#elif RT_ARCH_ARM64
6828 /*
6829 * ARM64:
6830 */
6831 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6832 if (cParams > 0)
6833 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6834 if (cParams > 1)
6835 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6836 if (cParams > 2)
6837 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6838
6839 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6840
6841#else
6842# error "port me"
6843#endif
6844
6845#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6846 pReNative->Core.fDebugPcInitialized = false;
6847 Log4(("fDebugPcInitialized=false todo off=%#x (v2)\n", off));
6848#endif
6849
6850 /*
6851 * Check the status code.
6852 */
6853 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6854
6855 return off;
6856}
6857
6858
6859/**
6860 * The default liveness function, matching iemNativeEmitThreadedCall.
6861 */
6862IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_ThreadedCall)
6863{
6864 IEM_LIVENESS_RAW_INIT_WITH_CALL(pOutgoing, pIncoming);
6865 RT_NOREF(pCallEntry);
6866}
6867
6868#ifdef VBOX_WITH_STATISTICS
6869
6870/**
6871 * Emits code to update the thread call statistics.
6872 */
6873DECL_INLINE_THROW(uint32_t)
6874iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6875{
6876 /*
6877 * Update threaded function stats.
6878 */
6879 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6880 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6881# if defined(RT_ARCH_ARM64)
6882 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6883 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6884 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6885 iemNativeRegFreeTmp(pReNative, idxTmp1);
6886 iemNativeRegFreeTmp(pReNative, idxTmp2);
6887# else
6888 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6889# endif
6890 return off;
6891}
6892
6893
6894/**
6895 * Emits code to update the TB exit reason statistics.
6896 */
6897DECL_INLINE_THROW(uint32_t)
6898iemNativeEmitNativeTbExitStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t const offVCpu)
6899{
6900 uint8_t const idxStatsTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6901 uint8_t const idxStatsTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6902 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, idxStatsTmp1, idxStatsTmp2, offVCpu);
6903 iemNativeRegFreeTmp(pReNative, idxStatsTmp1);
6904 iemNativeRegFreeTmp(pReNative, idxStatsTmp2);
6905
6906 return off;
6907}
6908
6909#endif /* VBOX_WITH_STATISTICS */
6910
6911/**
6912 * Worker for iemNativeEmitViaLookupDoOne and iemNativeRecompileAttachExecMemChunkCtx.
6913 */
6914static uint32_t
6915iemNativeEmitCoreViaLookupDoOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offReturnBreak, uintptr_t pfnHelper)
6916{
6917 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6918 off = iemNativeEmitCallImm(pReNative, off, pfnHelper);
6919
6920 /* Jump to ReturnBreak if the return register is NULL. */
6921 off = iemNativeEmitTestIfGprIsZeroAndJmpToFixed(pReNative, off, IEMNATIVE_CALL_RET_GREG,
6922 true /*f64Bit*/, offReturnBreak);
6923
6924 /* Okay, continue executing the next TB. */
6925 off = iemNativeEmitJmpViaGpr(pReNative, off, IEMNATIVE_CALL_RET_GREG);
6926 return off;
6927}
6928
6929
6930/**
6931 * Emits the code at the ReturnWithFlags label (returns VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6932 */
6933static uint32_t iemNativeEmitCoreReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6934{
6935 /* set the return status */
6936 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6937}
6938
6939
6940/**
6941 * Emits the code at the ReturnBreakFF label (returns VINF_IEM_REEXEC_BREAK_FF).
6942 */
6943static uint32_t iemNativeEmitCoreReturnBreakFF(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6944{
6945 /* set the return status */
6946 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK_FF);
6947}
6948
6949
6950/**
6951 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6952 */
6953static uint32_t iemNativeEmitCoreReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6954{
6955 /* set the return status */
6956 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6957}
6958
6959
6960/**
6961 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6962 */
6963static uint32_t iemNativeEmitCoreRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6964{
6965 /*
6966 * Generate the rc + rcPassUp fiddling code.
6967 */
6968 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6969#ifdef RT_ARCH_AMD64
6970# ifdef RT_OS_WINDOWS
6971# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6972 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6973# endif
6974 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6975 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6976# else
6977 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6978 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6979# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6980 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6981# endif
6982# endif
6983# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6984 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6985# endif
6986
6987#else
6988 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6989 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6990 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6991#endif
6992
6993 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6994 return off;
6995}
6996
6997
6998/**
6999 * Emits a standard epilog.
7000 */
7001static uint32_t iemNativeEmitCoreEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7002{
7003 pReNative->Core.bmHstRegs |= RT_BIT_32(IEMNATIVE_CALL_RET_GREG); /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK (return register is already set to status code). */
7004
7005 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, X86_EFL_STATUS_BITS);
7006 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
7007
7008 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK (return register is already set to status code). */
7009 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
7010
7011 /*
7012 * Restore registers and return.
7013 */
7014#ifdef RT_ARCH_AMD64
7015 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
7016
7017 /* Reposition esp at the r15 restore point. */
7018 pbCodeBuf[off++] = X86_OP_REX_W;
7019 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
7020 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
7021 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
7022
7023 /* Pop non-volatile registers and return */
7024 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
7025 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
7026 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
7027 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
7028 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
7029 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
7030 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
7031 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
7032# ifdef RT_OS_WINDOWS
7033 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
7034 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
7035# endif
7036 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
7037 pbCodeBuf[off++] = 0xc9; /* leave */
7038 pbCodeBuf[off++] = 0xc3; /* ret */
7039 pbCodeBuf[off++] = 0xcc; /* int3 poison */
7040
7041#elif RT_ARCH_ARM64
7042 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7043
7044 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
7045 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
7046 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7047 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7048 IEMNATIVE_FRAME_VAR_SIZE / 8);
7049 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
7050 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7051 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7052 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7053 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7054 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7055 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7056 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7057 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7058 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7059 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7060 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7061
7062 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
7063 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
7064 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
7065 IEMNATIVE_FRAME_SAVE_REG_SIZE);
7066
7067 /* retab / ret */
7068# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
7069 if (1)
7070 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
7071 else
7072# endif
7073 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
7074
7075#else
7076# error "port me"
7077#endif
7078 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7079
7080 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK. */
7081 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
7082
7083 return off;
7084}
7085
7086
7087
7088/*********************************************************************************************************************************
7089* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7090*********************************************************************************************************************************/
7091
7092/**
7093 * Internal work that allocates a variable with kind set to
7094 * kIemNativeVarKind_Invalid and no current stack allocation.
7095 *
7096 * The kind will either be set by the caller or later when the variable is first
7097 * assigned a value.
7098 *
7099 * @returns Unpacked index.
7100 * @internal
7101 */
7102DECL_INLINE_THROW(uint8_t) iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7103{
7104 Assert(cbType > 0 && cbType <= 64);
7105 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7106 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7107
7108 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7109
7110 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[idxVar]; /* VS 2019 gets a bit weird on us otherwise. */
7111#if 0
7112 pVar->cbVar = cbType;
7113 pVar->enmKind = kIemNativeVarKind_Invalid;
7114 pVar->fRegAcquired = false;
7115 pVar->fSimdReg = false;
7116 pVar->idxReg = UINT8_MAX;
7117 pVar->uArgNo = UINT8_MAX;
7118 pVar->idxStackSlot = UINT8_MAX;
7119 pVar->idxReferrerVar = UINT8_MAX;
7120 pVar->u.uValue = 0;
7121#else
7122 /* Neither clang 15 nor VC++ 2019 is able to generate this from the above. */
7123 AssertCompileMemberOffset(IEMNATIVEVAR, cbVar, 1);
7124 AssertCompile((int)kIemNativeVarKind_Invalid == 0);
7125 pVar->u32Init0 = (uint32_t)cbType << 8;
7126 pVar->u32Init1 = UINT32_MAX;
7127 pVar->u.uValue = 0;
7128#endif
7129 return idxVar;
7130}
7131
7132
7133/**
7134 * Internal work that allocates an argument variable w/o setting enmKind.
7135 *
7136 * @returns Unpacked index.
7137 * @internal
7138 */
7139static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7140{
7141 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7142 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7143 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7144
7145 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7146 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7147 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7148 return idxVar;
7149}
7150
7151
7152/**
7153 * Gets the stack slot for a stack variable, allocating one if necessary.
7154 *
7155 * Calling this function implies that the stack slot will contain a valid
7156 * variable value. The caller deals with any register currently assigned to the
7157 * variable, typically by spilling it into the stack slot.
7158 *
7159 * @returns The stack slot number.
7160 * @param pReNative The recompiler state.
7161 * @param idxVar The variable.
7162 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7163 */
7164DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7165{
7166 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7167 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7168 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7169
7170 /* Already got a slot? */
7171 uint8_t const idxStackSlot = pVar->idxStackSlot;
7172 if (idxStackSlot != UINT8_MAX)
7173 {
7174 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7175 return idxStackSlot;
7176 }
7177
7178 /*
7179 * A single slot is easy to allocate.
7180 * Allocate them from the top end, closest to BP, to reduce the displacement.
7181 */
7182 if (pVar->cbVar <= sizeof(uint64_t))
7183 {
7184 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7185 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7186 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7187 pVar->idxStackSlot = (uint8_t)iSlot;
7188 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7189 return (uint8_t)iSlot;
7190 }
7191
7192 /*
7193 * We need more than one stack slot.
7194 *
7195 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7196 */
7197 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7198 Assert(pVar->cbVar <= 64);
7199 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7200 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7201 uint32_t bmStack = pReNative->Core.bmStack;
7202 while (bmStack != UINT32_MAX)
7203 {
7204 unsigned iSlot = ASMBitLastSetU32(~bmStack);
7205 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7206 iSlot = (iSlot - 1) & ~fBitAlignMask;
7207 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
7208 {
7209 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7210 pVar->idxStackSlot = (uint8_t)iSlot;
7211 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7212 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7213 return (uint8_t)iSlot;
7214 }
7215
7216 bmStack |= (fBitAllocMask << iSlot);
7217 }
7218 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7219}
7220
7221
7222/**
7223 * Changes the variable to a stack variable.
7224 *
7225 * Currently this is s only possible to do the first time the variable is used,
7226 * switching later is can be implemented but not done.
7227 *
7228 * @param pReNative The recompiler state.
7229 * @param idxVar The variable.
7230 * @throws VERR_IEM_VAR_IPE_2
7231 */
7232DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7233{
7234 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7235 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7236 if (pVar->enmKind != kIemNativeVarKind_Stack)
7237 {
7238 /* We could in theory transition from immediate to stack as well, but it
7239 would involve the caller doing work storing the value on the stack. So,
7240 till that's required we only allow transition from invalid. */
7241 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7242 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7243 pVar->enmKind = kIemNativeVarKind_Stack;
7244
7245 /* Note! We don't allocate a stack slot here, that's only done when a
7246 slot is actually needed to hold a variable value. */
7247 }
7248}
7249
7250
7251/**
7252 * Sets it to a variable with a constant value.
7253 *
7254 * This does not require stack storage as we know the value and can always
7255 * reload it, unless of course it's referenced.
7256 *
7257 * @param pReNative The recompiler state.
7258 * @param idxVar The variable.
7259 * @param uValue The immediate value.
7260 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7261 */
7262DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7263{
7264 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7265 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7266 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7267 {
7268 /* Only simple transitions for now. */
7269 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7270 pVar->enmKind = kIemNativeVarKind_Immediate;
7271 }
7272 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7273
7274 pVar->u.uValue = uValue;
7275 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7276 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7277 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7278}
7279
7280
7281/**
7282 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7283 *
7284 * This does not require stack storage as we know the value and can always
7285 * reload it. Loading is postponed till needed.
7286 *
7287 * @param pReNative The recompiler state.
7288 * @param idxVar The variable. Unpacked.
7289 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7290 *
7291 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7292 * @internal
7293 */
7294static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7295{
7296 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7297 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7298
7299 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7300 {
7301 /* Only simple transitions for now. */
7302 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7303 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7304 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7305 }
7306 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7307
7308 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7309
7310 /* Update the other variable, ensure it's a stack variable. */
7311 /** @todo handle variables with const values... that'll go boom now. */
7312 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7313 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7314}
7315
7316
7317/**
7318 * Sets the variable to a reference (pointer) to a guest register reference.
7319 *
7320 * This does not require stack storage as we know the value and can always
7321 * reload it. Loading is postponed till needed.
7322 *
7323 * @param pReNative The recompiler state.
7324 * @param idxVar The variable.
7325 * @param enmRegClass The class guest registers to reference.
7326 * @param idxReg The register within @a enmRegClass to reference.
7327 *
7328 * @throws VERR_IEM_VAR_IPE_2
7329 */
7330DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7331 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7332{
7333 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7334 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7335
7336 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7337 {
7338 /* Only simple transitions for now. */
7339 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7340 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7341 }
7342 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7343
7344 pVar->u.GstRegRef.enmClass = enmRegClass;
7345 pVar->u.GstRegRef.idx = idxReg;
7346}
7347
7348
7349DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7350{
7351 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7352}
7353
7354
7355DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7356{
7357 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7358
7359 /* Since we're using a generic uint64_t value type, we must truncate it if
7360 the variable is smaller otherwise we may end up with too large value when
7361 scaling up a imm8 w/ sign-extension.
7362
7363 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7364 in the bios, bx=1) when running on arm, because clang expect 16-bit
7365 register parameters to have bits 16 and up set to zero. Instead of
7366 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7367 CF value in the result. */
7368 switch (cbType)
7369 {
7370 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7371 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7372 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7373 }
7374 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7375 return idxVar;
7376}
7377
7378
7379DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7380{
7381 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7382 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7383 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7384 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7385 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7386 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7387
7388 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7389 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7390 return idxArgVar;
7391}
7392
7393
7394DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7395{
7396 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7397 /* Don't set to stack now, leave that to the first use as for instance
7398 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7399 return idxVar;
7400}
7401
7402
7403DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7404{
7405 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7406
7407 /* Since we're using a generic uint64_t value type, we must truncate it if
7408 the variable is smaller otherwise we may end up with too large value when
7409 scaling up a imm8 w/ sign-extension. */
7410 switch (cbType)
7411 {
7412 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7413 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7414 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7415 }
7416 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7417 return idxVar;
7418}
7419
7420
7421DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocAssign(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
7422 uint8_t cbType, uint8_t idxVarOther)
7423{
7424 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7425 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7426
7427 uint8_t const idxVarOtherReg = iemNativeVarRegisterAcquireInited(pReNative, idxVarOther, poff);
7428 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, poff);
7429
7430/** @todo combine MOV and AND using MOVZX/similar. */
7431 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxVarReg, idxVarOtherReg);
7432
7433 /* Truncate the value to this variables size. */
7434 switch (cbType)
7435 {
7436 case sizeof(uint8_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xff)); break;
7437 case sizeof(uint16_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffff)); break;
7438 case sizeof(uint32_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffffffff)); break;
7439 }
7440
7441 iemNativeVarRegisterRelease(pReNative, idxVarOther);
7442 iemNativeVarRegisterRelease(pReNative, idxVar);
7443 return idxVar;
7444}
7445
7446
7447/**
7448 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7449 * fixed till we call iemNativeVarRegisterRelease.
7450 *
7451 * @returns The host register number.
7452 * @param pReNative The recompiler state.
7453 * @param idxVar The variable.
7454 * @param poff Pointer to the instruction buffer offset.
7455 * In case a register needs to be freed up or the value
7456 * loaded off the stack.
7457 * @param idxRegPref Preferred register number or UINT8_MAX.
7458 *
7459 * @tparam a_fInitialized Set if the variable must already have been
7460 * initialized. Will throw VERR_IEM_VAR_NOT_INITIALIZED
7461 * if this is not the case.
7462 * @tparam a_fWithRegPref If idxRegPref is valid.
7463 *
7464 * @note Must not modify the host status flags!
7465 */
7466template<bool const a_fInitialized, bool const a_fWithRegPref>
7467DECL_FORCE_INLINE_THROW(uint8_t)
7468iemNativeVarRegisterAcquireSlowInt(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff, uint8_t idxRegPref)
7469{
7470 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7471 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7472 Assert(pVar->cbVar <= 8);
7473 Assert(!pVar->fRegAcquired);
7474 Assert(!a_fWithRegPref || idxRegPref < RT_ELEMENTS(pReNative->Core.aHstRegs));
7475
7476 /* This slow code path only handles the case where no register have been
7477 allocated for the variable yet. */
7478 Assert(pVar->idxReg == UINT8_MAX);
7479
7480 /*
7481 * If the kind of variable has not yet been set, default to 'stack'.
7482 */
7483 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7484 && pVar->enmKind < kIemNativeVarKind_End);
7485 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7486 iemNativeVarSetKindToStack(pReNative, idxVar);
7487
7488 /*
7489 * We have to allocate a register for the variable, even if its a stack one
7490 * as we don't know if there are modification being made to it before its
7491 * finalized (todo: analyze and insert hints about that?).
7492 *
7493 * If we can, we try get the correct register for argument variables. This
7494 * is assuming that most argument variables are fetched as close as possible
7495 * to the actual call, so that there aren't any interfering hidden calls
7496 * (memory accesses, etc) inbetween.
7497 *
7498 * If we cannot or it's a variable, we make sure no argument registers
7499 * that will be used by this MC block will be allocated here, and we always
7500 * prefer non-volatile registers to avoid needing to spill stuff for internal
7501 * call.
7502 */
7503 /** @todo Detect too early argument value fetches and warn about hidden
7504 * calls causing less optimal code to be generated in the python script. */
7505
7506 uint8_t idxReg;
7507 uint8_t const uArgNo = pVar->uArgNo;
7508 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7509 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7510 {
7511 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7512
7513#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7514 /* Writeback any dirty shadow registers we are about to unshadow. */
7515 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
7516#endif
7517
7518 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7519 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7520 }
7521 else if ( !a_fWithRegPref
7522 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7523 {
7524 /** @todo there must be a better way for this and boot cArgsX? */
7525 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgsX, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7526 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7527 & ~pReNative->Core.bmHstRegsWithGstShadow
7528 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7529 & fNotArgsMask;
7530 if (fRegs)
7531 {
7532 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7533 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7534 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7535 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7536 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7537 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7538 }
7539 else
7540 {
7541 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7542 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7543 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7544 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7545 }
7546 }
7547 else
7548 {
7549 idxReg = idxRegPref;
7550 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7551 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7552 }
7553 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7554 pVar->idxReg = idxReg;
7555 pVar->fSimdReg = false;
7556
7557 /*
7558 * Load it off the stack if we've got a stack slot.
7559 */
7560 uint8_t const idxStackSlot = pVar->idxStackSlot;
7561 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7562 {
7563 Assert(a_fInitialized);
7564 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7565 switch (pVar->cbVar)
7566 {
7567 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7568 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7569 case 3: AssertFailed(); RT_FALL_THRU();
7570 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7571 default: AssertFailed(); RT_FALL_THRU();
7572 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7573 }
7574 }
7575 else
7576 {
7577 Assert(idxStackSlot == UINT8_MAX);
7578 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7579 AssertStmt(!a_fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7580 else
7581 {
7582 /*
7583 * Convert from immediate to stack/register. This is currently only
7584 * required by IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR, IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR
7585 * and IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR in connection with BT, BTS, BTR, and BTC.
7586 */
7587 AssertStmt(a_fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7588 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u uValue=%RX64 converting from immediate to stack\n",
7589 idxVar, idxReg, pVar->u.uValue));
7590 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7591 pVar->enmKind = kIemNativeVarKind_Stack;
7592 }
7593 }
7594
7595 pVar->fRegAcquired = true;
7596 return idxReg;
7597}
7598
7599
7600/** See iemNativeVarRegisterAcquireSlowInt for details. */
7601DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquireSlow(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff)
7602{
7603 /* very likely */
7604 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.aStatAdHoc[(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)) + 0]);
7605 return iemNativeVarRegisterAcquireSlowInt<false, false>(pReNative, idxVar, poff, UINT8_MAX);
7606}
7607
7608
7609/** See iemNativeVarRegisterAcquireSlowInt for details. */
7610DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquireInitedSlow(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff)
7611{
7612 /* even more likely */
7613 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.aStatAdHoc[(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)) + 2]);
7614 return iemNativeVarRegisterAcquireSlowInt<true, false>(pReNative, idxVar, poff, UINT8_MAX);
7615}
7616
7617
7618/** See iemNativeVarRegisterAcquireSlowInt for details. */
7619DECL_HIDDEN_THROW(uint8_t)
7620iemNativeVarRegisterAcquireWithPrefSlow(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff, uint8_t idxRegPref)
7621{
7622 /* unused */
7623 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.aStatAdHoc[(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)) + 4]);
7624 return iemNativeVarRegisterAcquireSlowInt<false, true>(pReNative, idxVar, poff, idxRegPref);
7625}
7626
7627
7628/** See iemNativeVarRegisterAcquireSlowInt for details. */
7629DECL_HIDDEN_THROW(uint8_t)
7630iemNativeVarRegisterAcquireInitedWithPrefSlow(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff, uint8_t idxRegPref)
7631{
7632 /* very very likely */
7633 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.aStatAdHoc[(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)) + 6]);
7634 return iemNativeVarRegisterAcquireSlowInt<true, true>(pReNative, idxVar, poff, idxRegPref);
7635}
7636
7637
7638/**
7639 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
7640 * fixed till we call iemNativeVarRegisterRelease.
7641 *
7642 * @returns The host register number.
7643 * @param pReNative The recompiler state.
7644 * @param idxVar The variable.
7645 * @param poff Pointer to the instruction buffer offset.
7646 * In case a register needs to be freed up or the value
7647 * loaded off the stack.
7648 * @param fInitialized Set if the variable must already have been initialized.
7649 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7650 * the case.
7651 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
7652 */
7653/** @todo Create variants for the last two params like we've done for the
7654 * GPR variant? */
7655DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7656 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7657{
7658 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7659 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7660 Assert( pVar->cbVar == sizeof(RTUINT128U)
7661 || pVar->cbVar == sizeof(RTUINT256U));
7662 Assert(!pVar->fRegAcquired);
7663
7664/** @todo inline this bit? */
7665 uint8_t idxReg = pVar->idxReg;
7666 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
7667 {
7668 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7669 && pVar->enmKind < kIemNativeVarKind_End);
7670 pVar->fRegAcquired = true;
7671 return idxReg;
7672 }
7673
7674 /*
7675 * If the kind of variable has not yet been set, default to 'stack'.
7676 */
7677 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7678 && pVar->enmKind < kIemNativeVarKind_End);
7679 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7680 iemNativeVarSetKindToStack(pReNative, idxVar);
7681
7682 /*
7683 * We have to allocate a register for the variable, even if its a stack one
7684 * as we don't know if there are modification being made to it before its
7685 * finalized (todo: analyze and insert hints about that?).
7686 *
7687 * If we can, we try get the correct register for argument variables. This
7688 * is assuming that most argument variables are fetched as close as possible
7689 * to the actual call, so that there aren't any interfering hidden calls
7690 * (memory accesses, etc) inbetween.
7691 *
7692 * If we cannot or it's a variable, we make sure no argument registers
7693 * that will be used by this MC block will be allocated here, and we always
7694 * prefer non-volatile registers to avoid needing to spill stuff for internal
7695 * call.
7696 */
7697 /** @todo Detect too early argument value fetches and warn about hidden
7698 * calls causing less optimal code to be generated in the python script. */
7699
7700 uint8_t const uArgNo = pVar->uArgNo;
7701 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7702
7703 /* SIMD is bit simpler for now because there is no support for arguments. */
7704 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7705 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7706 {
7707 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7708 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7709 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7710 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7711 & fNotArgsMask;
7712 if (fRegs)
7713 {
7714 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7715 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7716 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7717 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7718 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7719 }
7720 else
7721 {
7722 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7723 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7724 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7725 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7726 }
7727 }
7728 else
7729 {
7730 idxReg = idxRegPref;
7731 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7732 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7733 }
7734 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7735 pVar->idxReg = idxReg;
7736 pVar->fSimdReg = true;
7737
7738 /*
7739 * Load it off the stack if we've got a stack slot.
7740 */
7741 uint8_t const idxStackSlot = pVar->idxStackSlot;
7742 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7743 {
7744 Assert(fInitialized);
7745 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7746 switch (pVar->cbVar)
7747 {
7748 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7749 default: AssertFailed(); RT_FALL_THRU();
7750 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7751 }
7752 }
7753 else
7754 {
7755 Assert(idxStackSlot == UINT8_MAX);
7756 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7757 }
7758 pVar->fRegAcquired = true;
7759 return idxReg;
7760}
7761
7762
7763/**
7764 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7765 * guest register.
7766 *
7767 * This function makes sure there is a register for it and sets it to be the
7768 * current shadow copy of @a enmGstReg.
7769 *
7770 * @returns The host register number.
7771 * @param pReNative The recompiler state.
7772 * @param idxVar The variable.
7773 * @param enmGstReg The guest register this variable will be written to
7774 * after this call.
7775 * @param poff Pointer to the instruction buffer offset.
7776 * In case a register needs to be freed up or if the
7777 * variable content needs to be loaded off the stack.
7778 *
7779 * @note We DO NOT expect @a idxVar to be an argument variable,
7780 * because we can only in the commit stage of an instruction when this
7781 * function is used.
7782 */
7783DECL_HIDDEN_THROW(uint8_t)
7784iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7785{
7786 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7787 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7788 Assert(!pVar->fRegAcquired);
7789 AssertMsgStmt( pVar->cbVar <= 8
7790 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7791 || pVar->enmKind == kIemNativeVarKind_Stack),
7792 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7793 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7794 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7795
7796 /*
7797 * This shouldn't ever be used for arguments, unless it's in a weird else
7798 * branch that doesn't do any calling and even then it's questionable.
7799 *
7800 * However, in case someone writes crazy wrong MC code and does register
7801 * updates before making calls, just use the regular register allocator to
7802 * ensure we get a register suitable for the intended argument number.
7803 */
7804 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7805
7806 /*
7807 * If there is already a register for the variable, we transfer/set the
7808 * guest shadow copy assignment to it.
7809 */
7810 uint8_t idxReg = pVar->idxReg;
7811 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7812 {
7813#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7814 AssertCompile(kIemNativeGstReg_GprFirst == 0);
7815 if (enmGstReg <= kIemNativeGstReg_GprLast)
7816 {
7817# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7818 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
7819 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
7820# endif
7821 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
7822 }
7823#endif
7824
7825 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7826 {
7827 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7828 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7829 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7830 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7831 }
7832 else
7833 {
7834 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7835 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7836 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7837 }
7838 pVar->fRegAcquired = true;
7839 return idxReg;
7840 }
7841 Assert(pVar->uArgNo == UINT8_MAX);
7842
7843 /*
7844 * Because this is supposed to be the commit stage, we're just tag along with the
7845 * temporary register allocator and upgrade it to a variable register.
7846 */
7847 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7848 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7849 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7850 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7851 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7852 pVar->idxReg = idxReg;
7853
7854 /*
7855 * Now we need to load the register value.
7856 */
7857 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7858 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7859 else
7860 {
7861 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7862 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7863 switch (pVar->cbVar)
7864 {
7865 case sizeof(uint64_t):
7866 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7867 break;
7868 case sizeof(uint32_t):
7869 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7870 break;
7871 case sizeof(uint16_t):
7872 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7873 break;
7874 case sizeof(uint8_t):
7875 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7876 break;
7877 default:
7878 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7879 }
7880 }
7881
7882 pVar->fRegAcquired = true;
7883 return idxReg;
7884}
7885
7886
7887/**
7888 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7889 *
7890 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7891 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7892 * requirement of flushing anything in volatile host registers when making a
7893 * call.
7894 *
7895 * @returns New @a off value.
7896 * @param pReNative The recompiler state.
7897 * @param off The code buffer position.
7898 * @param fHstGprNotToSave Set of GPRs not to save & restore.
7899 */
7900DECL_HIDDEN_THROW(uint32_t)
7901iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstGprNotToSave)
7902{
7903 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK & ~fHstGprNotToSave;
7904 if (fHstRegs)
7905 {
7906 do
7907 {
7908 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7909 fHstRegs &= ~RT_BIT_32(idxHstReg);
7910
7911 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7912 {
7913 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7914 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7915 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7916 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7917 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7918 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7919 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7920 {
7921 case kIemNativeVarKind_Stack:
7922 {
7923 /* Temporarily spill the variable register. */
7924 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7925 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7926 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7927 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7928 continue;
7929 }
7930
7931 case kIemNativeVarKind_Immediate:
7932 case kIemNativeVarKind_VarRef:
7933 case kIemNativeVarKind_GstRegRef:
7934 /* It is weird to have any of these loaded at this point. */
7935 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7936 continue;
7937
7938 case kIemNativeVarKind_End:
7939 case kIemNativeVarKind_Invalid:
7940 break;
7941 }
7942 AssertFailed();
7943 }
7944 else
7945 {
7946 /*
7947 * Allocate a temporary stack slot and spill the register to it.
7948 */
7949 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7950 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7951 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7952 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7953 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7954 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7955 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7956 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7957 }
7958 } while (fHstRegs);
7959 }
7960
7961 /*
7962 * Guest register shadows are flushed to CPUMCTX at the moment and don't need allocating a stack slot
7963 * which would be more difficult due to spanning multiple stack slots and different sizes
7964 * (besides we only have a limited amount of slots at the moment).
7965 *
7966 * However the shadows need to be flushed out as the guest SIMD register might get corrupted by
7967 * the callee. This asserts that the registers were written back earlier and are not in the dirty state.
7968 */
7969 iemNativeSimdRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK);
7970
7971 fHstRegs = pReNative->Core.bmHstSimdRegs & (IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK);
7972 if (fHstRegs)
7973 {
7974 do
7975 {
7976 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7977 fHstRegs &= ~RT_BIT_32(idxHstReg);
7978
7979 /* Fixed reserved and temporary registers don't need saving. */
7980 /*Assert( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat != kIemNativeWhat_FixedReserved
7981 && pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat != kIemNativeWhat_FixedTmp); included below */
7982 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7983
7984 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7985 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7986 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7987 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7988 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7989 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7990 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7991 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7992 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7993 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7994 {
7995 case kIemNativeVarKind_Stack:
7996 {
7997 /* Temporarily spill the variable register. */
7998 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7999 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8000 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8001 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8002 if (cbVar == sizeof(RTUINT128U))
8003 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8004 else
8005 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8006 continue;
8007 }
8008
8009 case kIemNativeVarKind_Immediate:
8010 case kIemNativeVarKind_VarRef:
8011 case kIemNativeVarKind_GstRegRef:
8012 /* It is weird to have any of these loaded at this point. */
8013 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8014 continue;
8015
8016 case kIemNativeVarKind_End:
8017 case kIemNativeVarKind_Invalid:
8018 break;
8019 }
8020 AssertFailed();
8021 } while (fHstRegs);
8022 }
8023 return off;
8024}
8025
8026
8027/**
8028 * Emit code to restore volatile registers after to a call to a helper.
8029 *
8030 * @returns New @a off value.
8031 * @param pReNative The recompiler state.
8032 * @param off The code buffer position.
8033 * @param fHstGprNotToSave Set of registers not to save & restore.
8034 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
8035 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
8036 */
8037DECL_HIDDEN_THROW(uint32_t)
8038iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstGprNotToSave)
8039{
8040 /*
8041 * GPRs
8042 */
8043 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK & ~fHstGprNotToSave;
8044 if (fHstRegs)
8045 {
8046 do
8047 {
8048 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8049 fHstRegs &= ~RT_BIT_32(idxHstReg);
8050
8051 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8052 {
8053 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8054 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8055 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8056 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8057 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8058 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8059 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8060 {
8061 case kIemNativeVarKind_Stack:
8062 {
8063 /* Unspill the variable register. */
8064 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8065 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8066 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8067 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8068 continue;
8069 }
8070
8071 case kIemNativeVarKind_Immediate:
8072 case kIemNativeVarKind_VarRef:
8073 case kIemNativeVarKind_GstRegRef:
8074 /* It is weird to have any of these loaded at this point. */
8075 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8076 continue;
8077
8078 case kIemNativeVarKind_End:
8079 case kIemNativeVarKind_Invalid:
8080 break;
8081 }
8082 AssertFailed();
8083 }
8084 else
8085 {
8086 /*
8087 * Restore from temporary stack slot.
8088 */
8089 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
8090 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
8091 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
8092 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
8093
8094 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8095 }
8096 } while (fHstRegs);
8097 }
8098
8099 /*
8100 * SIMD registers.
8101 */
8102 fHstRegs = pReNative->Core.bmHstSimdRegs & (IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK);
8103 if (fHstRegs)
8104 {
8105 do
8106 {
8107 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8108 fHstRegs &= ~RT_BIT_32(idxHstReg);
8109
8110 /*Assert( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat != kIemNativeWhat_FixedTmp
8111 && pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat != kIemNativeWhat_FixedReserved); - included below. */
8112 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8113
8114 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8115 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8116 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8117 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8118 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8119 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8120 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8121 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8122 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8123 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8124 {
8125 case kIemNativeVarKind_Stack:
8126 {
8127 /* Unspill the variable register. */
8128 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8129 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8130 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8131 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8132
8133 if (cbVar == sizeof(RTUINT128U))
8134 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8135 else
8136 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8137 continue;
8138 }
8139
8140 case kIemNativeVarKind_Immediate:
8141 case kIemNativeVarKind_VarRef:
8142 case kIemNativeVarKind_GstRegRef:
8143 /* It is weird to have any of these loaded at this point. */
8144 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8145 continue;
8146
8147 case kIemNativeVarKind_End:
8148 case kIemNativeVarKind_Invalid:
8149 break;
8150 }
8151 AssertFailed();
8152 } while (fHstRegs);
8153 }
8154 return off;
8155}
8156
8157
8158/**
8159 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8160 *
8161 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8162 *
8163 * ASSUMES that @a idxVar is valid and unpacked.
8164 */
8165DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8166{
8167 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
8168 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8169 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8170 {
8171 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8172 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8173 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8174 Assert(cSlots > 0);
8175 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8176 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8177 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8178 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8179 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8180 }
8181 else
8182 Assert(idxStackSlot == UINT8_MAX);
8183}
8184
8185
8186/**
8187 * Worker that frees a single variable.
8188 *
8189 * ASSUMES that @a idxVar is valid and unpacked.
8190 */
8191DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8192{
8193 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8194 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8195 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8196
8197 /* Free the host register first if any assigned. */
8198 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8199 if (idxHstReg != UINT8_MAX)
8200 {
8201 if (!pReNative->Core.aVars[idxVar].fSimdReg)
8202 {
8203 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
8204 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8205 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8206 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8207 }
8208 else
8209 {
8210 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8211 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8212 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
8213 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
8214 }
8215 }
8216
8217 /* Free argument mapping. */
8218 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8219 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8220 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8221
8222 /* Free the stack slots. */
8223 iemNativeVarFreeStackSlots(pReNative, idxVar);
8224
8225 /* Free the actual variable. */
8226 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8227 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8228}
8229
8230
8231/**
8232 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8233 */
8234DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8235{
8236 while (bmVars != 0)
8237 {
8238 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8239 bmVars &= ~RT_BIT_32(idxVar);
8240
8241#if 1 /** @todo optimize by simplifying this later... */
8242 iemNativeVarFreeOneWorker(pReNative, idxVar);
8243#else
8244 /* Only need to free the host register, the rest is done as bulk updates below. */
8245 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8246 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8247 {
8248 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8249 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8250 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8251 }
8252#endif
8253 }
8254#if 0 /** @todo optimize by simplifying this later... */
8255 pReNative->Core.bmVars = 0;
8256 pReNative->Core.bmStack = 0;
8257 pReNative->Core.u64ArgVars = UINT64_MAX;
8258#endif
8259}
8260
8261
8262
8263/*********************************************************************************************************************************
8264* Emitters for IEM_MC_CALL_CIMPL_XXX *
8265*********************************************************************************************************************************/
8266
8267/**
8268 * Emits code to load a reference to the given guest register into @a idxGprDst.
8269 */
8270DECL_HIDDEN_THROW(uint32_t)
8271iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8272 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8273{
8274#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8275 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8276#endif
8277
8278 /*
8279 * Get the offset relative to the CPUMCTX structure.
8280 */
8281 uint32_t offCpumCtx;
8282 switch (enmClass)
8283 {
8284 case kIemNativeGstRegRef_Gpr:
8285 Assert(idxRegInClass < 16);
8286 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8287 break;
8288
8289 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8290 Assert(idxRegInClass < 4);
8291 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8292 break;
8293
8294 case kIemNativeGstRegRef_EFlags:
8295 Assert(idxRegInClass == 0);
8296 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8297 break;
8298
8299 case kIemNativeGstRegRef_MxCsr:
8300 Assert(idxRegInClass == 0);
8301 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8302 break;
8303
8304 case kIemNativeGstRegRef_FpuReg:
8305 Assert(idxRegInClass < 8);
8306 AssertFailed(); /** @todo what kind of indexing? */
8307 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8308 break;
8309
8310 case kIemNativeGstRegRef_MReg:
8311 Assert(idxRegInClass < 8);
8312 AssertFailed(); /** @todo what kind of indexing? */
8313 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8314 break;
8315
8316 case kIemNativeGstRegRef_XReg:
8317 Assert(idxRegInClass < 16);
8318 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8319 break;
8320
8321 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
8322 Assert(idxRegInClass == 0);
8323 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
8324 break;
8325
8326 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
8327 Assert(idxRegInClass == 0);
8328 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
8329 break;
8330
8331 default:
8332 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8333 }
8334
8335 /*
8336 * Load the value into the destination register.
8337 */
8338#ifdef RT_ARCH_AMD64
8339 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8340
8341#elif defined(RT_ARCH_ARM64)
8342 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8343 Assert(offCpumCtx < 4096);
8344 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8345
8346#else
8347# error "Port me!"
8348#endif
8349
8350 return off;
8351}
8352
8353
8354/**
8355 * Common code for CIMPL and AIMPL calls.
8356 *
8357 * These are calls that uses argument variables and such. They should not be
8358 * confused with internal calls required to implement an MC operation,
8359 * like a TLB load and similar.
8360 *
8361 * Upon return all that is left to do is to load any hidden arguments and
8362 * perform the call. All argument variables are freed.
8363 *
8364 * @returns New code buffer offset; throws VBox status code on error.
8365 * @param pReNative The native recompile state.
8366 * @param off The code buffer offset.
8367 * @param cArgs The total nubmer of arguments (includes hidden
8368 * count).
8369 * @param cHiddenArgs The number of hidden arguments. The hidden
8370 * arguments must not have any variable declared for
8371 * them, whereas all the regular arguments must
8372 * (tstIEMCheckMc ensures this).
8373 * @param fFlushPendingWrites Flag whether to flush pending writes (default true),
8374 * this will still flush pending writes in call volatile registers if false.
8375 */
8376DECL_HIDDEN_THROW(uint32_t)
8377iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs,
8378 bool fFlushPendingWrites /*= true*/)
8379{
8380#ifdef VBOX_STRICT
8381 /*
8382 * Assert sanity.
8383 */
8384 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8385 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8386 for (unsigned i = 0; i < cHiddenArgs; i++)
8387 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8388 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8389 {
8390 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8391 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8392 }
8393 iemNativeRegAssertSanity(pReNative);
8394#endif
8395
8396 /* We don't know what the called function makes use of, so flush any pending register writes. */
8397 RT_NOREF(fFlushPendingWrites);
8398#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8399 if (fFlushPendingWrites)
8400#endif
8401 off = iemNativeRegFlushPendingWrites(pReNative, off);
8402
8403 /*
8404 * Before we do anything else, go over variables that are referenced and
8405 * make sure they are not in a register.
8406 */
8407 uint32_t bmVars = pReNative->Core.bmVars;
8408 if (bmVars)
8409 {
8410 do
8411 {
8412 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8413 bmVars &= ~RT_BIT_32(idxVar);
8414
8415 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8416 {
8417 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8418 if (idxRegOld != UINT8_MAX)
8419 {
8420 if (!pReNative->Core.aVars[idxVar].fSimdReg)
8421 {
8422 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
8423
8424 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8425 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8426 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8427 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8428 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8429
8430 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8431 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8432 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8433 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8434 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8435 }
8436 else
8437 {
8438 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8439 Assert( pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U)
8440 || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
8441
8442 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8443 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8444 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8445 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8446 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
8447 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off,
8448 iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8449 else
8450 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off,
8451 iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8452
8453 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
8454 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
8455
8456 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8457 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
8458 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8459 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
8460 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
8461 }
8462 }
8463 }
8464 } while (bmVars != 0);
8465#if 0 //def VBOX_STRICT
8466 iemNativeRegAssertSanity(pReNative);
8467#endif
8468 }
8469
8470 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8471
8472#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8473 /*
8474 * At the very first step go over the host registers that will be used for arguments
8475 * don't shadow anything which needs writing back first.
8476 */
8477 for (uint32_t i = 0; i < cRegArgs; i++)
8478 {
8479 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8480
8481 /* Writeback any dirty guest shadows before using this register. */
8482 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows)
8483 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxArgReg);
8484 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows));
8485 }
8486#endif
8487
8488 /*
8489 * First, go over the host registers that will be used for arguments and make
8490 * sure they either hold the desired argument or are free.
8491 */
8492 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8493 {
8494 for (uint32_t i = 0; i < cRegArgs; i++)
8495 {
8496 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8497 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8498 {
8499 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8500 {
8501 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8502 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8503 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8504 Assert(pVar->idxReg == idxArgReg);
8505 uint8_t const uArgNo = pVar->uArgNo;
8506 if (uArgNo == i)
8507 { /* prefect */ }
8508 /* The variable allocator logic should make sure this is impossible,
8509 except for when the return register is used as a parameter (ARM,
8510 but not x86). */
8511#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8512 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8513 {
8514# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8515# error "Implement this"
8516# endif
8517 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8518 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8519 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8520 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8521 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8522 }
8523#endif
8524 else
8525 {
8526 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8527
8528 if (pVar->enmKind == kIemNativeVarKind_Stack)
8529 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8530 else
8531 {
8532 /* just free it, can be reloaded if used again */
8533 pVar->idxReg = UINT8_MAX;
8534 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8535 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8536 }
8537 }
8538 }
8539 else
8540 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8541 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8542 }
8543 }
8544#if 0 //def VBOX_STRICT
8545 iemNativeRegAssertSanity(pReNative);
8546#endif
8547 }
8548
8549 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8550
8551#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8552 /*
8553 * If there are any stack arguments, make sure they are in their place as well.
8554 *
8555 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8556 * the caller) be loading it later and it must be free (see first loop).
8557 */
8558 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8559 {
8560 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8561 {
8562 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8563 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8564 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8565 {
8566 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8567 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
8568 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
8569 pVar->idxReg = UINT8_MAX;
8570 }
8571 else
8572 {
8573 /* Use ARG0 as temp for stuff we need registers for. */
8574 switch (pVar->enmKind)
8575 {
8576 case kIemNativeVarKind_Stack:
8577 {
8578 uint8_t const idxStackSlot = pVar->idxStackSlot;
8579 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8580 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8581 iemNativeStackCalcBpDisp(idxStackSlot));
8582 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8583 continue;
8584 }
8585
8586 case kIemNativeVarKind_Immediate:
8587 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8588 continue;
8589
8590 case kIemNativeVarKind_VarRef:
8591 {
8592 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8593 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8594 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8595 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8596 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8597 if (idxRegOther != UINT8_MAX)
8598 {
8599 if (!pReNative->Core.aVars[idxOtherVar].fSimdReg)
8600 {
8601 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs));
8602 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8603 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8604 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8605 }
8606 else
8607 {
8608 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8609 if (pReNative->Core.aVars[idxOtherVar].cbVar == sizeof(RTUINT128U))
8610 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8611 else
8612 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8613 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8614 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8615 }
8616 }
8617 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8618 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8619 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8620 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8621 continue;
8622 }
8623
8624 case kIemNativeVarKind_GstRegRef:
8625 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8626 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8627 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8628 continue;
8629
8630 case kIemNativeVarKind_Invalid:
8631 case kIemNativeVarKind_End:
8632 break;
8633 }
8634 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8635 }
8636 }
8637# if 0 //def VBOX_STRICT
8638 iemNativeRegAssertSanity(pReNative);
8639# endif
8640 }
8641#else
8642 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8643#endif
8644
8645 /*
8646 * Make sure the argument variables are loaded into their respective registers.
8647 *
8648 * We can optimize this by ASSUMING that any register allocations are for
8649 * registeres that have already been loaded and are ready. The previous step
8650 * saw to that.
8651 */
8652 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8653 {
8654 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8655 {
8656 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8657 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8658 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8659 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8660 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8661 else
8662 {
8663 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8664 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8665 {
8666 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8667 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8668 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8669 | RT_BIT_32(idxArgReg);
8670 pVar->idxReg = idxArgReg;
8671 }
8672 else
8673 {
8674 /* Use ARG0 as temp for stuff we need registers for. */
8675 switch (pVar->enmKind)
8676 {
8677 case kIemNativeVarKind_Stack:
8678 {
8679 uint8_t const idxStackSlot = pVar->idxStackSlot;
8680 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8681 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8682 continue;
8683 }
8684
8685 case kIemNativeVarKind_Immediate:
8686 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8687 continue;
8688
8689 case kIemNativeVarKind_VarRef:
8690 {
8691 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8692 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8693 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8694 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8695 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8696 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8697 if (idxRegOther != UINT8_MAX)
8698 {
8699 if (!pReNative->Core.aVars[idxOtherVar].fSimdReg)
8700 {
8701 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs));
8702 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8703 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8704 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8705 }
8706 else
8707 {
8708 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8709 if (pReNative->Core.aVars[idxOtherVar].cbVar == sizeof(RTUINT128U))
8710 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8711 else
8712 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8713 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8714 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8715 }
8716 }
8717 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8718 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8719 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8720 continue;
8721 }
8722
8723 case kIemNativeVarKind_GstRegRef:
8724 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8725 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8726 continue;
8727
8728 case kIemNativeVarKind_Invalid:
8729 case kIemNativeVarKind_End:
8730 break;
8731 }
8732 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8733 }
8734 }
8735 }
8736#if 0 //def VBOX_STRICT
8737 iemNativeRegAssertSanity(pReNative);
8738#endif
8739 }
8740#ifdef VBOX_STRICT
8741 else
8742 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8743 {
8744 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8745 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8746 }
8747#endif
8748
8749 /*
8750 * Free all argument variables (simplified).
8751 * Their lifetime always expires with the call they are for.
8752 */
8753 /** @todo Make the python script check that arguments aren't used after
8754 * IEM_MC_CALL_XXXX. */
8755 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8756 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8757 * an argument value. There is also some FPU stuff. */
8758 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8759 {
8760 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8761 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8762
8763 /* no need to free registers: */
8764 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8765 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8766 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8767 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8768 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8769 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8770
8771 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8772 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8773 iemNativeVarFreeStackSlots(pReNative, idxVar);
8774 }
8775 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8776
8777 /*
8778 * Flush volatile registers as we make the call.
8779 */
8780 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8781
8782 return off;
8783}
8784
8785
8786
8787/*********************************************************************************************************************************
8788* TLB Lookup. *
8789*********************************************************************************************************************************/
8790
8791/**
8792 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8793 */
8794DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint64_t uSegAndSizeAndAccessAndDisp)
8795{
8796 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccessAndDisp);
8797 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccessAndDisp);
8798 uint32_t const fAccess = (uint32_t)uSegAndSizeAndAccessAndDisp >> 16;
8799 uint8_t const offDisp = RT_BYTE5(uSegAndSizeAndAccessAndDisp);
8800 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64+%#x LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, offDisp, cbMem, fAccess, uResult));
8801
8802 /* Do the lookup manually. */
8803 RTGCPTR const GCPtrFlat = (iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base) + offDisp;
8804 uint64_t const uTagNoRev = IEMTLB_CALC_TAG_NO_REV(GCPtrFlat);
8805 PCIEMTLBENTRY pTlbe = IEMTLB_TAG_TO_EVEN_ENTRY(&pVCpu->iem.s.DataTlb, uTagNoRev);
8806 if (RT_LIKELY( pTlbe->uTag == (uTagNoRev | pVCpu->iem.s.DataTlb.uTlbRevision)
8807 || (pTlbe = pTlbe + 1)->uTag == (uTagNoRev | pVCpu->iem.s.DataTlb.uTlbRevisionGlobal)))
8808 {
8809 /*
8810 * Check TLB page table level access flags.
8811 */
8812 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8813 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8814 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8815 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8816 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8817 | IEMTLBE_F_PG_UNASSIGNED
8818 | IEMTLBE_F_PT_NO_ACCESSED
8819 | fNoWriteNoDirty | fNoUser);
8820 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8821 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8822 {
8823 /*
8824 * Return the address.
8825 */
8826 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8827 if ((uintptr_t)pbAddr == uResult)
8828 return;
8829 RT_NOREF(cbMem);
8830 AssertFailed();
8831 }
8832 else
8833 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8834 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8835 }
8836 else
8837 AssertFailed();
8838 RT_BREAKPOINT();
8839}
8840
8841/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8842
8843
8844
8845/*********************************************************************************************************************************
8846* Recompiler Core. *
8847*********************************************************************************************************************************/
8848
8849/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8850static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8851{
8852 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8853 pDis->cbCachedInstr += cbMaxRead;
8854 RT_NOREF(cbMinRead);
8855 return VERR_NO_DATA;
8856}
8857
8858
8859DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8860{
8861 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8862 {
8863#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8864 ENTRY(fLocalForcedActions),
8865 ENTRY(iem.s.rcPassUp),
8866 ENTRY(iem.s.fExec),
8867 ENTRY(iem.s.pbInstrBuf),
8868 ENTRY(iem.s.uInstrBufPc),
8869 ENTRY(iem.s.GCPhysInstrBuf),
8870 ENTRY(iem.s.cbInstrBufTotal),
8871 ENTRY(iem.s.idxTbCurInstr),
8872 ENTRY(iem.s.fSkippingEFlags),
8873#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
8874 ENTRY(iem.s.uPcUpdatingDebug),
8875#endif
8876#ifdef VBOX_WITH_STATISTICS
8877 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8878 ENTRY(iem.s.StatNativeTlbHitsForStore),
8879 ENTRY(iem.s.StatNativeTlbHitsForStack),
8880 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8881 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8882 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8883 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8884 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8885#endif
8886 ENTRY(iem.s.DataTlb.uTlbRevision),
8887 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8888 ENTRY(iem.s.DataTlb.cTlbCoreHits),
8889 ENTRY(iem.s.DataTlb.cTlbInlineCodeHits),
8890 ENTRY(iem.s.DataTlb.cTlbNativeMissTag),
8891 ENTRY(iem.s.DataTlb.cTlbNativeMissFlagsAndPhysRev),
8892 ENTRY(iem.s.DataTlb.cTlbNativeMissAlignment),
8893 ENTRY(iem.s.DataTlb.cTlbNativeMissCrossPage),
8894 ENTRY(iem.s.DataTlb.cTlbNativeMissNonCanonical),
8895 ENTRY(iem.s.DataTlb.aEntries),
8896 ENTRY(iem.s.CodeTlb.uTlbRevision),
8897 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8898 ENTRY(iem.s.CodeTlb.cTlbCoreHits),
8899 ENTRY(iem.s.CodeTlb.cTlbNativeMissTag),
8900 ENTRY(iem.s.CodeTlb.cTlbNativeMissFlagsAndPhysRev),
8901 ENTRY(iem.s.CodeTlb.cTlbNativeMissAlignment),
8902 ENTRY(iem.s.CodeTlb.cTlbNativeMissCrossPage),
8903 ENTRY(iem.s.CodeTlb.cTlbNativeMissNonCanonical),
8904 ENTRY(iem.s.CodeTlb.aEntries),
8905 ENTRY(pVMR3),
8906 ENTRY(cpum.GstCtx.rax),
8907 ENTRY(cpum.GstCtx.ah),
8908 ENTRY(cpum.GstCtx.rcx),
8909 ENTRY(cpum.GstCtx.ch),
8910 ENTRY(cpum.GstCtx.rdx),
8911 ENTRY(cpum.GstCtx.dh),
8912 ENTRY(cpum.GstCtx.rbx),
8913 ENTRY(cpum.GstCtx.bh),
8914 ENTRY(cpum.GstCtx.rsp),
8915 ENTRY(cpum.GstCtx.rbp),
8916 ENTRY(cpum.GstCtx.rsi),
8917 ENTRY(cpum.GstCtx.rdi),
8918 ENTRY(cpum.GstCtx.r8),
8919 ENTRY(cpum.GstCtx.r9),
8920 ENTRY(cpum.GstCtx.r10),
8921 ENTRY(cpum.GstCtx.r11),
8922 ENTRY(cpum.GstCtx.r12),
8923 ENTRY(cpum.GstCtx.r13),
8924 ENTRY(cpum.GstCtx.r14),
8925 ENTRY(cpum.GstCtx.r15),
8926 ENTRY(cpum.GstCtx.es.Sel),
8927 ENTRY(cpum.GstCtx.es.u64Base),
8928 ENTRY(cpum.GstCtx.es.u32Limit),
8929 ENTRY(cpum.GstCtx.es.Attr),
8930 ENTRY(cpum.GstCtx.cs.Sel),
8931 ENTRY(cpum.GstCtx.cs.u64Base),
8932 ENTRY(cpum.GstCtx.cs.u32Limit),
8933 ENTRY(cpum.GstCtx.cs.Attr),
8934 ENTRY(cpum.GstCtx.ss.Sel),
8935 ENTRY(cpum.GstCtx.ss.u64Base),
8936 ENTRY(cpum.GstCtx.ss.u32Limit),
8937 ENTRY(cpum.GstCtx.ss.Attr),
8938 ENTRY(cpum.GstCtx.ds.Sel),
8939 ENTRY(cpum.GstCtx.ds.u64Base),
8940 ENTRY(cpum.GstCtx.ds.u32Limit),
8941 ENTRY(cpum.GstCtx.ds.Attr),
8942 ENTRY(cpum.GstCtx.fs.Sel),
8943 ENTRY(cpum.GstCtx.fs.u64Base),
8944 ENTRY(cpum.GstCtx.fs.u32Limit),
8945 ENTRY(cpum.GstCtx.fs.Attr),
8946 ENTRY(cpum.GstCtx.gs.Sel),
8947 ENTRY(cpum.GstCtx.gs.u64Base),
8948 ENTRY(cpum.GstCtx.gs.u32Limit),
8949 ENTRY(cpum.GstCtx.gs.Attr),
8950 ENTRY(cpum.GstCtx.rip),
8951 ENTRY(cpum.GstCtx.eflags),
8952 ENTRY(cpum.GstCtx.uRipInhibitInt),
8953 ENTRY(cpum.GstCtx.cr0),
8954 ENTRY(cpum.GstCtx.cr4),
8955 ENTRY(cpum.GstCtx.aXcr[0]),
8956 ENTRY(cpum.GstCtx.aXcr[1]),
8957 ENTRY(cpum.GstCtx.XState.x87.MXCSR),
8958 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8959 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8960 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8961 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8962 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8963 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8964 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8965 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8966 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8967 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8968 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8969 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8970 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8971 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8972 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8973 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8974 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8975 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8976 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8977 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8978 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8979 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8980 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8981 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8982 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8983 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8984 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8985 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8986 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8987 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8988 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8989 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8990#undef ENTRY
8991 };
8992#ifdef VBOX_STRICT
8993 static bool s_fOrderChecked = false;
8994 if (!s_fOrderChecked)
8995 {
8996 s_fOrderChecked = true;
8997 uint32_t offPrev = s_aMembers[0].off;
8998 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8999 {
9000 Assert(s_aMembers[i].off > offPrev);
9001 offPrev = s_aMembers[i].off;
9002 }
9003 }
9004#endif
9005
9006 /*
9007 * Binary lookup.
9008 */
9009 unsigned iStart = 0;
9010 unsigned iEnd = RT_ELEMENTS(s_aMembers);
9011 for (;;)
9012 {
9013 unsigned const iCur = iStart + (iEnd - iStart) / 2;
9014 uint32_t const offCur = s_aMembers[iCur].off;
9015 if (off < offCur)
9016 {
9017 if (iCur != iStart)
9018 iEnd = iCur;
9019 else
9020 break;
9021 }
9022 else if (off > offCur)
9023 {
9024 if (iCur + 1 < iEnd)
9025 iStart = iCur + 1;
9026 else
9027 break;
9028 }
9029 else
9030 return s_aMembers[iCur].pszName;
9031 }
9032#ifdef VBOX_WITH_STATISTICS
9033 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
9034 return "iem.s.acThreadedFuncStats[iFn]";
9035#endif
9036 return NULL;
9037}
9038
9039
9040/**
9041 * Translates a label to a name.
9042 */
9043static const char *iemNativeGetLabelName(IEMNATIVELABELTYPE enmLabel, bool fCommonCode /*= false*/)
9044{
9045 switch (enmLabel)
9046 {
9047#define STR_CASE_CMN(a_Label) case kIemNativeLabelType_ ## a_Label: return fCommonCode ? "Chunk_" #a_Label : #a_Label;
9048 STR_CASE_CMN(Invalid);
9049 STR_CASE_CMN(RaiseDe);
9050 STR_CASE_CMN(RaiseUd);
9051 STR_CASE_CMN(RaiseSseRelated);
9052 STR_CASE_CMN(RaiseAvxRelated);
9053 STR_CASE_CMN(RaiseSseAvxFpRelated);
9054 STR_CASE_CMN(RaiseNm);
9055 STR_CASE_CMN(RaiseGp0);
9056 STR_CASE_CMN(RaiseMf);
9057 STR_CASE_CMN(RaiseXf);
9058 STR_CASE_CMN(ObsoleteTb);
9059 STR_CASE_CMN(NeedCsLimChecking);
9060 STR_CASE_CMN(CheckBranchMiss);
9061 STR_CASE_CMN(ReturnSuccess);
9062 STR_CASE_CMN(ReturnBreak);
9063 STR_CASE_CMN(ReturnBreakFF);
9064 STR_CASE_CMN(ReturnWithFlags);
9065 STR_CASE_CMN(ReturnBreakViaLookup);
9066 STR_CASE_CMN(ReturnBreakViaLookupWithIrq);
9067 STR_CASE_CMN(ReturnBreakViaLookupWithTlb);
9068 STR_CASE_CMN(ReturnBreakViaLookupWithTlbAndIrq);
9069 STR_CASE_CMN(NonZeroRetOrPassUp);
9070#undef STR_CASE_CMN
9071#define STR_CASE_LBL(a_Label) case kIemNativeLabelType_ ## a_Label: return #a_Label;
9072 STR_CASE_LBL(LoopJumpTarget);
9073 STR_CASE_LBL(If);
9074 STR_CASE_LBL(Else);
9075 STR_CASE_LBL(Endif);
9076 STR_CASE_LBL(CheckIrq);
9077 STR_CASE_LBL(TlbLookup);
9078 STR_CASE_LBL(TlbMiss);
9079 STR_CASE_LBL(TlbDone);
9080 case kIemNativeLabelType_End: break;
9081 }
9082 return NULL;
9083}
9084
9085
9086/** Info for the symbols resolver used when disassembling. */
9087typedef struct IEMNATIVDISASMSYMCTX
9088{
9089 PVMCPU pVCpu;
9090 PCIEMTB pTb;
9091 PCIEMNATIVEPERCHUNKCTX pCtx;
9092#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9093 PCIEMTBDBG pDbgInfo;
9094#endif
9095} IEMNATIVDISASMSYMCTX;
9096typedef IEMNATIVDISASMSYMCTX *PIEMNATIVDISASMSYMCTX;
9097
9098
9099/**
9100 * Resolve address to symbol, if we can.
9101 */
9102static const char *iemNativeDisasmGetSymbol(PIEMNATIVDISASMSYMCTX pSymCtx, uintptr_t uAddress, char *pszBuf, size_t cbBuf)
9103{
9104 PCIEMTB const pTb = pSymCtx->pTb;
9105 uintptr_t const offNative = (uAddress - (uintptr_t)pTb->Native.paInstructions) / sizeof(IEMNATIVEINSTR);
9106 if (offNative <= pTb->Native.cInstructions)
9107 {
9108#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9109 /*
9110 * Scan debug info for a matching label.
9111 * Since the debug info should be 100% linear, we can do a binary search here.
9112 */
9113 PCIEMTBDBG const pDbgInfo = pSymCtx->pDbgInfo;
9114 if (pDbgInfo)
9115 {
9116 uint32_t const cEntries = pDbgInfo->cEntries;
9117 uint32_t idxEnd = cEntries;
9118 uint32_t idxStart = 0;
9119 for (;;)
9120 {
9121 /* Find a NativeOffset record close to the midpoint. */
9122 uint32_t idx = idxStart + (idxEnd - idxStart) / 2;
9123 while (idx > idxStart && pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9124 idx--;
9125 if (pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9126 {
9127 idx = idxStart + (idxEnd - idxStart) / 2 + 1;
9128 while (idx < idxEnd && pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9129 idx++;
9130 if (idx >= idxEnd)
9131 break;
9132 }
9133
9134 /* Do the binary searching thing. */
9135 if (offNative < pDbgInfo->aEntries[idx].NativeOffset.offNative)
9136 {
9137 if (idx > idxStart)
9138 idxEnd = idx;
9139 else
9140 break;
9141 }
9142 else if (offNative > pDbgInfo->aEntries[idx].NativeOffset.offNative)
9143 {
9144 idx += 1;
9145 if (idx < idxEnd)
9146 idxStart = idx;
9147 else
9148 break;
9149 }
9150 else
9151 {
9152 /* Got a matching offset, scan forward till we hit a label, but
9153 stop when the native offset changes. */
9154 while (++idx < cEntries)
9155 switch (pDbgInfo->aEntries[idx].Gen.uType)
9156 {
9157 case kIemTbDbgEntryType_Label:
9158 {
9159 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)pDbgInfo->aEntries[idx].Label.enmLabel;
9160 const char * const pszName = iemNativeGetLabelName(enmLabel);
9161 if (enmLabel < kIemNativeLabelType_FirstWithMultipleInstances)
9162 return pszName;
9163 RTStrPrintf(pszBuf, cbBuf, "%s_%u", pszName, pDbgInfo->aEntries[idx].Label.uData);
9164 return pszBuf;
9165 }
9166
9167 case kIemTbDbgEntryType_NativeOffset:
9168 if (pDbgInfo->aEntries[idx].NativeOffset.offNative != offNative)
9169 return NULL;
9170 break;
9171 }
9172 break;
9173 }
9174 }
9175 }
9176#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9177 }
9178 else
9179 {
9180 PCIEMNATIVEPERCHUNKCTX const pChunkCtx = pSymCtx->pCtx;
9181 if (pChunkCtx)
9182 for (uint32_t i = 1; i < RT_ELEMENTS(pChunkCtx->apExitLabels); i++)
9183 if ((PIEMNATIVEINSTR)uAddress == pChunkCtx->apExitLabels[i])
9184 return iemNativeGetLabelName((IEMNATIVELABELTYPE)i, true /*fCommonCode*/);
9185 }
9186 RT_NOREF(pszBuf, cbBuf);
9187 return NULL;
9188}
9189
9190#ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9191
9192/**
9193 * @callback_method_impl{FNDISGETSYMBOL}
9194 */
9195static DECLCALLBACK(int) iemNativeDisasmGetSymbolCb(PCDISSTATE pDis, uint32_t u32Sel, RTUINTPTR uAddress,
9196 char *pszBuf, size_t cchBuf, RTINTPTR *poff, void *pvUser)
9197{
9198 const char * const pszSym = iemNativeDisasmGetSymbol((PIEMNATIVDISASMSYMCTX)pvUser, uAddress, pszBuf, cchBuf);
9199 if (pszSym)
9200 {
9201 *poff = 0;
9202 if (pszSym != pszBuf)
9203 return RTStrCopy(pszBuf, cchBuf, pszSym);
9204 return VINF_SUCCESS;
9205 }
9206 RT_NOREF(pDis, u32Sel);
9207 return VERR_SYMBOL_NOT_FOUND;
9208}
9209
9210#else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9211
9212/**
9213 * Annotates an instruction decoded by the capstone disassembler.
9214 */
9215static const char *
9216iemNativeDisasmAnnotateCapstone(PIEMNATIVDISASMSYMCTX pSymCtx, cs_insn const *pInstr, char *pszBuf, size_t cchBuf)
9217{
9218# if defined(RT_ARCH_ARM64)
9219 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
9220 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
9221 {
9222 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
9223 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
9224 char const *psz = strchr(pInstr->op_str, '[');
9225 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
9226 {
9227 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
9228 int32_t off = -1;
9229 psz += 4;
9230 if (*psz == ']')
9231 off = 0;
9232 else if (*psz == ',')
9233 {
9234 psz = RTStrStripL(psz + 1);
9235 if (*psz == '#')
9236 off = RTStrToInt32(&psz[1]);
9237 /** @todo deal with index registers and LSL as well... */
9238 }
9239 if (off >= 0)
9240 return iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
9241 }
9242 }
9243 else if (pInstr->id == ARM64_INS_B || pInstr->id == ARM64_INS_BL)
9244 {
9245 const char *pszAddr = strchr(pInstr->op_str, '#');
9246 if (pszAddr)
9247 {
9248 uint64_t uAddr = RTStrToUInt64(pszAddr + 1);
9249 if (uAddr != 0)
9250 return iemNativeDisasmGetSymbol(pSymCtx, uAddr, pszBuf, cchBuf);
9251 }
9252 }
9253# endif
9254 RT_NOREF(pSymCtx, pInstr, pszBuf, cchBuf);
9255 return NULL;
9256}
9257#endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9258
9259
9260DECLHIDDEN(void) iemNativeDisassembleTb(PVMCPU pVCpu, PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
9261{
9262 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
9263#if defined(RT_ARCH_AMD64)
9264 static const char * const a_apszMarkers[] =
9265 {
9266 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
9267 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
9268 };
9269#endif
9270
9271 char szDisBuf[512];
9272 DISSTATE Dis;
9273 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
9274 uint32_t const cNative = pTb->Native.cInstructions;
9275 uint32_t offNative = 0;
9276#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9277 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
9278#endif
9279 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9280 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9281 : DISCPUMODE_64BIT;
9282#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9283 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb, iemExecMemGetTbChunkCtx(pVCpu, pTb), pDbgInfo };
9284#else
9285 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb, iemExecMemGetTbChunkCtx(pVCpu, pTb) };
9286#endif
9287#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9288 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
9289#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9290 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
9291#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9292# error "Port me"
9293#else
9294 csh hDisasm = ~(size_t)0;
9295# if defined(RT_ARCH_AMD64)
9296 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
9297# elif defined(RT_ARCH_ARM64)
9298 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
9299# else
9300# error "Port me"
9301# endif
9302 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
9303
9304 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
9305 //Assert(rcCs == CS_ERR_OK);
9306#endif
9307
9308 /*
9309 * Print TB info.
9310 */
9311 pHlp->pfnPrintf(pHlp,
9312 "pTb=%p: GCPhysPc=%RGp (%%%RGv) cInstructions=%u LB %#x cRanges=%u\n"
9313 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
9314 pTb, pTb->GCPhysPc,
9315#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9316 pTb->pDbgInfo ? pTb->pDbgInfo->FlatPc : RTGCPTR_MAX,
9317#else
9318 pTb->FlatPc,
9319#endif
9320 pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
9321 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
9322#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9323 if (pDbgInfo && pDbgInfo->cEntries > 1)
9324 {
9325 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
9326
9327 /*
9328 * This disassembly is driven by the debug info which follows the native
9329 * code and indicates when it starts with the next guest instructions,
9330 * where labels are and such things.
9331 */
9332 uint32_t idxThreadedCall = 0;
9333 uint32_t idxGuestInstr = 0;
9334 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
9335 uint8_t idxRange = UINT8_MAX;
9336 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
9337 uint32_t offRange = 0;
9338 uint32_t offOpcodes = 0;
9339 uint32_t const cbOpcodes = pTb->cbOpcodes;
9340 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
9341 uint32_t const cDbgEntries = pDbgInfo->cEntries;
9342 uint32_t iDbgEntry = 1;
9343 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
9344
9345 while (offNative < cNative)
9346 {
9347 /* If we're at or have passed the point where the next chunk of debug
9348 info starts, process it. */
9349 if (offDbgNativeNext <= offNative)
9350 {
9351 offDbgNativeNext = UINT32_MAX;
9352 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
9353 {
9354 switch ((IEMTBDBGENTRYTYPE)pDbgInfo->aEntries[iDbgEntry].Gen.uType)
9355 {
9356 case kIemTbDbgEntryType_GuestInstruction:
9357 {
9358 /* Did the exec flag change? */
9359 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
9360 {
9361 pHlp->pfnPrintf(pHlp,
9362 " fExec change %#08x -> %#08x %s\n",
9363 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9364 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9365 szDisBuf, sizeof(szDisBuf)));
9366 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
9367 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9368 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9369 : DISCPUMODE_64BIT;
9370 }
9371
9372 /* New opcode range? We need to fend up a spurious debug info entry here for cases
9373 where the compilation was aborted before the opcode was recorded and the actual
9374 instruction was translated to a threaded call. This may happen when we run out
9375 of ranges, or when some complicated interrupts/FFs are found to be pending or
9376 similar. So, we just deal with it here rather than in the compiler code as it
9377 is a lot simpler to do here. */
9378 if ( idxRange == UINT8_MAX
9379 || idxRange >= cRanges
9380 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
9381 {
9382 idxRange += 1;
9383 if (idxRange < cRanges)
9384 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
9385 else
9386 continue;
9387 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
9388 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
9389 + (pTb->aRanges[idxRange].idxPhysPage == 0
9390 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9391 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
9392 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9393 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
9394 pTb->aRanges[idxRange].idxPhysPage);
9395 GCPhysPc += offRange;
9396 }
9397
9398 /* Disassemble the instruction. */
9399 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
9400 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
9401 uint32_t cbInstr = 1;
9402 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9403 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
9404 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9405 if (RT_SUCCESS(rc))
9406 {
9407 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9408 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9409 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9410 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9411
9412 static unsigned const s_offMarker = 55;
9413 static char const s_szMarker[] = " ; <--- guest";
9414 if (cch < s_offMarker)
9415 {
9416 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
9417 cch = s_offMarker;
9418 }
9419 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
9420 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
9421
9422 pHlp->pfnPrintf(pHlp, "\n %%%%%RGp: %s #%u\n", GCPhysPc, szDisBuf, idxGuestInstr);
9423 }
9424 else
9425 {
9426 pHlp->pfnPrintf(pHlp, "\n %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
9427 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
9428 cbInstr = 1;
9429 }
9430 idxGuestInstr++;
9431 GCPhysPc += cbInstr;
9432 offOpcodes += cbInstr;
9433 offRange += cbInstr;
9434 continue;
9435 }
9436
9437 case kIemTbDbgEntryType_ThreadedCall:
9438 pHlp->pfnPrintf(pHlp,
9439 " Call #%u to %s (%u args) - %s\n",
9440 idxThreadedCall,
9441 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9442 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9443 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
9444 idxThreadedCall++;
9445 continue;
9446
9447 case kIemTbDbgEntryType_GuestRegShadowing:
9448 {
9449 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9450 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
9451 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
9452 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
9453 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9454 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
9455 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s \n", pszGstReg,
9456 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
9457 else
9458 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
9459 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
9460 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9461 continue;
9462 }
9463
9464 case kIemTbDbgEntryType_GuestSimdRegShadowing:
9465 {
9466 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9467 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
9468 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
9469 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
9470 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9471 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
9472 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
9473 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
9474 else
9475 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
9476 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
9477 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9478 continue;
9479 }
9480
9481 case kIemTbDbgEntryType_Label:
9482 {
9483 const char *pszName = iemNativeGetLabelName((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel);
9484 if (pDbgInfo->aEntries[iDbgEntry].Label.enmLabel >= kIemNativeLabelType_FirstWithMultipleInstances)
9485 {
9486 const char *pszComment = pDbgInfo->aEntries[iDbgEntry].Label.enmLabel == kIemNativeLabelType_Else
9487 ? " ; regs state restored pre-if-block" : "";
9488 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
9489 }
9490 else
9491 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
9492 continue;
9493 }
9494
9495 case kIemTbDbgEntryType_NativeOffset:
9496 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
9497 Assert(offDbgNativeNext >= offNative);
9498 break;
9499
9500# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9501 case kIemTbDbgEntryType_DelayedPcUpdate:
9502 pHlp->pfnPrintf(pHlp, " Updating guest PC value by %u (cInstrSkipped=%u)\n",
9503 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
9504 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
9505 continue;
9506# endif
9507
9508# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
9509 case kIemTbDbgEntryType_GuestRegDirty:
9510 {
9511 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9512 const char * const pszGstReg = pEntry->GuestRegDirty.fSimdReg
9513 ? g_aGstSimdShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName
9514 : g_aGstShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName;
9515 const char * const pszHstReg = pEntry->GuestRegDirty.fSimdReg
9516 ? g_apszIemNativeHstSimdRegNames[pEntry->GuestRegDirty.idxHstReg]
9517 : g_apszIemNativeHstRegNames[pEntry->GuestRegDirty.idxHstReg];
9518 pHlp->pfnPrintf(pHlp, " Guest register %s (shadowed by %s) is now marked dirty (intent)\n",
9519 pszGstReg, pszHstReg);
9520 continue;
9521 }
9522
9523 case kIemTbDbgEntryType_GuestRegWriteback:
9524 pHlp->pfnPrintf(pHlp, " Writing dirty %s registers (gst %#RX32)\n",
9525 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fSimdReg ? "SIMD" : "general",
9526 (uint64_t)pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fGstReg
9527 << (pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.cShift * 25));
9528 continue;
9529# endif
9530
9531# ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
9532 case kIemTbDbgEntryType_PostponedEFlagsCalc:
9533 {
9534 const char *pszOp = "!unknown!";
9535 switch ((IEMNATIVE_POSTPONED_EFL_OP_T)pDbgInfo->aEntries[iDbgEntry].PostponedEflCalc.enmOp)
9536 {
9537 case kIemNativePostponedEflOp_Logical: pszOp = "logical"; break;
9538 case kIemNativePostponedEflOp_Invalid: break;
9539 case kIemNativePostponedEflOp_End: break;
9540 }
9541 pHlp->pfnPrintf(pHlp, " Postponed EFLAGS calc #%u: %s %u bits\n",
9542 pDbgInfo->aEntries[iDbgEntry].PostponedEflCalc.idxEmit, pszOp,
9543 pDbgInfo->aEntries[iDbgEntry].PostponedEflCalc.cOpBits);
9544 continue;
9545 }
9546# endif
9547 default:
9548 AssertFailed();
9549 continue;
9550 }
9551 /* Break out of the loop at kIemTbDbgEntryType_NativeOffset. */
9552 iDbgEntry++;
9553 break;
9554 }
9555 }
9556
9557 /*
9558 * Disassemble the next native instruction.
9559 */
9560 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9561# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9562 uint32_t cbInstr = sizeof(paNative[0]);
9563 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9564 if (RT_SUCCESS(rc))
9565 {
9566# if defined(RT_ARCH_AMD64)
9567 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9568 {
9569 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9570 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9571 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9572 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9573 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9574 uInfo & 0x8000 ? "recompiled" : "todo");
9575 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9576 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9577 else
9578 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9579 }
9580 else
9581# endif
9582 {
9583 const char *pszAnnotation = NULL;
9584# ifdef RT_ARCH_AMD64
9585 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9586 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9587 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9588 iemNativeDisasmGetSymbolCb, &SymCtx);
9589 PCDISOPPARAM pMemOp;
9590 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.aParams[0].fUse))
9591 pMemOp = &Dis.aParams[0];
9592 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.aParams[1].fUse))
9593 pMemOp = &Dis.aParams[1];
9594 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.aParams[2].fUse))
9595 pMemOp = &Dis.aParams[2];
9596 else
9597 pMemOp = NULL;
9598 if ( pMemOp
9599 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
9600 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
9601 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
9602 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
9603
9604# elif defined(RT_ARCH_ARM64)
9605 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9606 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9607 iemNativeDisasmGetSymbolCb, &SymCtx);
9608# else
9609# error "Port me"
9610# endif
9611 if (pszAnnotation)
9612 {
9613 static unsigned const s_offAnnotation = 55;
9614 size_t const cchAnnotation = strlen(pszAnnotation);
9615 size_t cchDis = strlen(szDisBuf);
9616 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
9617 {
9618 if (cchDis < s_offAnnotation)
9619 {
9620 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
9621 cchDis = s_offAnnotation;
9622 }
9623 szDisBuf[cchDis++] = ' ';
9624 szDisBuf[cchDis++] = ';';
9625 szDisBuf[cchDis++] = ' ';
9626 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
9627 }
9628 }
9629 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9630 }
9631 }
9632 else
9633 {
9634# if defined(RT_ARCH_AMD64)
9635 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9636 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9637# elif defined(RT_ARCH_ARM64)
9638 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9639# else
9640# error "Port me"
9641# endif
9642 cbInstr = sizeof(paNative[0]);
9643 }
9644 offNative += cbInstr / sizeof(paNative[0]);
9645
9646# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9647 cs_insn *pInstr;
9648 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9649 (uintptr_t)pNativeCur, 1, &pInstr);
9650 if (cInstrs > 0)
9651 {
9652 Assert(cInstrs == 1);
9653 const char * const pszAnnotation = iemNativeDisasmAnnotateCapstone(&SymCtx, pInstr, szDisBuf, sizeof(szDisBuf));
9654 size_t const cchOp = strlen(pInstr->op_str);
9655# if defined(RT_ARCH_AMD64)
9656 if (pszAnnotation)
9657 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9658 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9659 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9660 else
9661 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9662 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9663
9664# else
9665 if (pszAnnotation)
9666 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9667 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9668 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9669 else
9670 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9671 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9672# endif
9673 offNative += pInstr->size / sizeof(*pNativeCur);
9674 cs_free(pInstr, cInstrs);
9675 }
9676 else
9677 {
9678# if defined(RT_ARCH_AMD64)
9679 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9680 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9681# else
9682 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9683# endif
9684 offNative++;
9685 }
9686# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9687 }
9688 }
9689 else
9690#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9691 {
9692 /*
9693 * No debug info, just disassemble the x86 code and then the native code.
9694 *
9695 * First the guest code:
9696 */
9697 for (unsigned i = 0; i < pTb->cRanges; i++)
9698 {
9699 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
9700 + (pTb->aRanges[i].idxPhysPage == 0
9701 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9702 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
9703 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9704 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
9705 unsigned off = pTb->aRanges[i].offOpcodes;
9706 /** @todo this ain't working when crossing pages! */
9707 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
9708 while (off < cbOpcodes)
9709 {
9710 uint32_t cbInstr = 1;
9711 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9712 &pTb->pabOpcodes[off], cbOpcodes - off,
9713 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9714 if (RT_SUCCESS(rc))
9715 {
9716 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9717 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9718 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9719 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9720 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
9721 GCPhysPc += cbInstr;
9722 off += cbInstr;
9723 }
9724 else
9725 {
9726 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - disassembly failure %Rrc\n",
9727 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
9728 break;
9729 }
9730 }
9731 }
9732
9733 /*
9734 * Then the native code:
9735 */
9736 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
9737 while (offNative < cNative)
9738 {
9739 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9740#ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9741 uint32_t cbInstr = sizeof(paNative[0]);
9742 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9743 if (RT_SUCCESS(rc))
9744 {
9745# if defined(RT_ARCH_AMD64)
9746 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9747 {
9748 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9749 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9750 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9751 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9752 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9753 uInfo & 0x8000 ? "recompiled" : "todo");
9754 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9755 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9756 else
9757 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9758 }
9759 else
9760# endif
9761 {
9762# ifdef RT_ARCH_AMD64
9763 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9764 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9765 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9766 iemNativeDisasmGetSymbolCb, &SymCtx);
9767# elif defined(RT_ARCH_ARM64)
9768 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9769 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9770 iemNativeDisasmGetSymbolCb, &SymCtx);
9771# else
9772# error "Port me"
9773# endif
9774 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9775 }
9776 }
9777 else
9778 {
9779# if defined(RT_ARCH_AMD64)
9780 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9781 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9782# else
9783 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9784# endif
9785 cbInstr = sizeof(paNative[0]);
9786 }
9787 offNative += cbInstr / sizeof(paNative[0]);
9788
9789#else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9790 cs_insn *pInstr;
9791 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9792 (uintptr_t)pNativeCur, 1, &pInstr);
9793 if (cInstrs > 0)
9794 {
9795 Assert(cInstrs == 1);
9796 const char * const pszAnnotation = iemNativeDisasmAnnotateCapstone(&SymCtx, pInstr, szDisBuf, sizeof(szDisBuf));
9797 size_t const cchOp = strlen(pInstr->op_str);
9798# if defined(RT_ARCH_AMD64)
9799 if (pszAnnotation)
9800 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9801 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9802 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9803 else
9804 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9805 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9806
9807# else
9808 if (pszAnnotation)
9809 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9810 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9811 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9812 else
9813 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9814 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9815# endif
9816 offNative += pInstr->size / sizeof(*pNativeCur);
9817 cs_free(pInstr, cInstrs);
9818 }
9819 else
9820 {
9821# if defined(RT_ARCH_AMD64)
9822 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9823 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9824# else
9825 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9826# endif
9827 offNative++;
9828 }
9829#endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9830 }
9831 }
9832
9833#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9834 /* Cleanup. */
9835 cs_close(&hDisasm);
9836#endif
9837}
9838
9839
9840/** Emit alignment padding between labels / functions. */
9841DECL_INLINE_THROW(uint32_t)
9842iemNativeRecompileEmitAlignmentPadding(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fAlignMask)
9843{
9844 if (off & fAlignMask)
9845 {
9846 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, fAlignMask + 1);
9847 while (off & fAlignMask)
9848#if defined(RT_ARCH_AMD64)
9849 pCodeBuf[off++] = 0xcc;
9850#elif defined(RT_ARCH_ARM64)
9851 pCodeBuf[off++] = Armv8A64MkInstrBrk(0xcccc);
9852#else
9853# error "port me"
9854#endif
9855 }
9856 return off;
9857}
9858
9859
9860/**
9861 * Called when a new chunk is allocate to emit common per-chunk code.
9862 *
9863 * Allocates a per-chunk context directly from the chunk itself and place the
9864 * common code there.
9865 *
9866 * @returns VBox status code.
9867 * @param pVCpu The cross context virtual CPU structure of the calling
9868 * thread.
9869 * @param idxChunk The index of the chunk being added and requiring a
9870 * common code context.
9871 * @param ppCtx Where to return the pointer to the chunk context start.
9872 */
9873DECLHIDDEN(int) iemNativeRecompileAttachExecMemChunkCtx(PVMCPU pVCpu, uint32_t idxChunk, PCIEMNATIVEPERCHUNKCTX *ppCtx)
9874{
9875 *ppCtx = NULL;
9876
9877 /*
9878 * Allocate a new recompiler state (since we're likely to be called while
9879 * the default one is fully loaded already with a recompiled TB).
9880 *
9881 * This is a bit of overkill, but this isn't a frequently used code path.
9882 */
9883 PIEMRECOMPILERSTATE pReNative = iemNativeInit(pVCpu, NULL);
9884 AssertReturn(pReNative, VERR_NO_MEMORY);
9885
9886#if defined(RT_ARCH_AMD64)
9887 uint32_t const fAlignMask = 15;
9888#elif defined(RT_ARCH_ARM64)
9889 uint32_t const fAlignMask = 31 / 4;
9890#else
9891# error "port me"
9892#endif
9893 uint32_t aoffLabels[kIemNativeLabelType_LastTbExit + 1] = {0};
9894 int rc = VINF_SUCCESS;
9895 uint32_t off = 0;
9896
9897 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9898 {
9899 /*
9900 * Emit the epilog code.
9901 */
9902 aoffLabels[kIemNativeLabelType_ReturnSuccess] = off;
9903 off = iemNativeEmitGprZero(pReNative, off, IEMNATIVE_CALL_RET_GREG);
9904 uint32_t const offReturnWithStatus = off;
9905 off = iemNativeEmitCoreEpilog(pReNative, off);
9906
9907 /*
9908 * Generate special jump labels. All of these gets a copy of the epilog code.
9909 */
9910 static struct
9911 {
9912 IEMNATIVELABELTYPE enmExitReason;
9913 uint32_t (*pfnEmitCore)(PIEMRECOMPILERSTATE pReNative, uint32_t off);
9914 } const s_aSpecialWithEpilogs[] =
9915 {
9916 { kIemNativeLabelType_NonZeroRetOrPassUp, iemNativeEmitCoreRcFiddling },
9917 { kIemNativeLabelType_ReturnBreak, iemNativeEmitCoreReturnBreak },
9918 { kIemNativeLabelType_ReturnBreakFF, iemNativeEmitCoreReturnBreakFF },
9919 { kIemNativeLabelType_ReturnWithFlags, iemNativeEmitCoreReturnWithFlags },
9920 };
9921 for (uint32_t i = 0; i < RT_ELEMENTS(s_aSpecialWithEpilogs); i++)
9922 {
9923 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9924 Assert(aoffLabels[s_aSpecialWithEpilogs[i].enmExitReason] == 0);
9925 aoffLabels[s_aSpecialWithEpilogs[i].enmExitReason] = off;
9926 off = s_aSpecialWithEpilogs[i].pfnEmitCore(pReNative, off);
9927 off = iemNativeEmitCoreEpilog(pReNative, off);
9928 }
9929
9930 /*
9931 * Do what iemNativeEmitReturnBreakViaLookup does.
9932 */
9933 static struct
9934 {
9935 IEMNATIVELABELTYPE enmExitReason;
9936 uintptr_t pfnHelper;
9937 } const s_aViaLookup[] =
9938 {
9939 { kIemNativeLabelType_ReturnBreakViaLookup,
9940 (uintptr_t)iemNativeHlpReturnBreakViaLookup<false /*a_fWithIrqCheck*/> },
9941 { kIemNativeLabelType_ReturnBreakViaLookupWithIrq,
9942 (uintptr_t)iemNativeHlpReturnBreakViaLookup<true /*a_fWithIrqCheck*/> },
9943 { kIemNativeLabelType_ReturnBreakViaLookupWithTlb,
9944 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<false /*a_fWithIrqCheck*/> },
9945 { kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq,
9946 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<true /*a_fWithIrqCheck*/> },
9947 };
9948 uint32_t const offReturnBreak = aoffLabels[kIemNativeLabelType_ReturnBreak]; Assert(offReturnBreak != 0);
9949 for (uint32_t i = 0; i < RT_ELEMENTS(s_aViaLookup); i++)
9950 {
9951 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9952 Assert(aoffLabels[s_aViaLookup[i].enmExitReason] == 0);
9953 aoffLabels[s_aViaLookup[i].enmExitReason] = off;
9954 off = iemNativeEmitCoreViaLookupDoOne(pReNative, off, offReturnBreak, s_aViaLookup[i].pfnHelper);
9955 }
9956
9957 /*
9958 * Generate simple TB tail labels that just calls a help with a pVCpu
9959 * arg and either return or longjmps/throws a non-zero status.
9960 */
9961 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9962 static struct
9963 {
9964 IEMNATIVELABELTYPE enmExitReason;
9965 bool fWithEpilog;
9966 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9967 } const s_aSimpleTailLabels[] =
9968 {
9969 { kIemNativeLabelType_RaiseDe, false, iemNativeHlpExecRaiseDe },
9970 { kIemNativeLabelType_RaiseUd, false, iemNativeHlpExecRaiseUd },
9971 { kIemNativeLabelType_RaiseSseRelated, false, iemNativeHlpExecRaiseSseRelated },
9972 { kIemNativeLabelType_RaiseAvxRelated, false, iemNativeHlpExecRaiseAvxRelated },
9973 { kIemNativeLabelType_RaiseSseAvxFpRelated, false, iemNativeHlpExecRaiseSseAvxFpRelated },
9974 { kIemNativeLabelType_RaiseNm, false, iemNativeHlpExecRaiseNm },
9975 { kIemNativeLabelType_RaiseGp0, false, iemNativeHlpExecRaiseGp0 },
9976 { kIemNativeLabelType_RaiseMf, false, iemNativeHlpExecRaiseMf },
9977 { kIemNativeLabelType_RaiseXf, false, iemNativeHlpExecRaiseXf },
9978 { kIemNativeLabelType_ObsoleteTb, true, iemNativeHlpObsoleteTb },
9979 { kIemNativeLabelType_NeedCsLimChecking, true, iemNativeHlpNeedCsLimChecking },
9980 { kIemNativeLabelType_CheckBranchMiss, true, iemNativeHlpCheckBranchMiss },
9981 };
9982 for (uint32_t i = 0; i < RT_ELEMENTS(s_aSimpleTailLabels); i++)
9983 {
9984 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9985 Assert(!aoffLabels[s_aSimpleTailLabels[i].enmExitReason]);
9986 aoffLabels[s_aSimpleTailLabels[i].enmExitReason] = off;
9987
9988 /* int pfnCallback(PVMCPUCC pVCpu) */
9989 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9990 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)s_aSimpleTailLabels[i].pfnCallback);
9991
9992 /* If the callback is supposed to return with a status code we inline the epilog
9993 sequence for better speed. Otherwise, if the callback shouldn't return because
9994 it throws/longjmps, we just jump to the return sequence to be on the safe side. */
9995 if (s_aSimpleTailLabels[i].fWithEpilog)
9996 off = iemNativeEmitCoreEpilog(pReNative, off);
9997 else
9998 {
9999#ifdef VBOX_STRICT
10000 off = iemNativeEmitBrk(pReNative, off, 0x2201);
10001#endif
10002 off = iemNativeEmitJmpToFixed(pReNative, off, offReturnWithStatus);
10003 }
10004 }
10005
10006
10007#ifdef VBOX_STRICT
10008 /* Make sure we've generate code for all labels. */
10009 for (uint32_t i = kIemNativeLabelType_Invalid + 1; i < RT_ELEMENTS(aoffLabels); i++)
10010 Assert(aoffLabels[i] != 0 || i == kIemNativeLabelType_ReturnSuccess);
10011#endif
10012 }
10013 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
10014 {
10015 Log(("iemNativeRecompileAttachExecMemChunkCtx: Caught %Rrc while recompiling!\n", rc));
10016 iemNativeTerm(pReNative);
10017 return rc;
10018 }
10019 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
10020
10021 /*
10022 * Allocate memory for the context (first) and the common code (last).
10023 */
10024 PIEMNATIVEPERCHUNKCTX pCtx;
10025 uint32_t const cbCtx = RT_ALIGN_32(sizeof(*pCtx), 64);
10026 uint32_t const cbCode = off * sizeof(IEMNATIVEINSTR);
10027 PIEMNATIVEINSTR paFinalCommonCodeRx = NULL;
10028 pCtx = (PIEMNATIVEPERCHUNKCTX)iemExecMemAllocatorAllocFromChunk(pVCpu, idxChunk, cbCtx + cbCode, &paFinalCommonCodeRx);
10029 AssertLogRelMsgReturnStmt(pCtx, ("cbCtx=%#x cbCode=%#x idxChunk=%#x\n", cbCtx, cbCode, idxChunk),
10030 iemNativeTerm(pReNative), VERR_OUT_OF_RESOURCES);
10031
10032 /*
10033 * Copy over the generated code.
10034 * There should be no fixups or labels defined here.
10035 */
10036 paFinalCommonCodeRx = (PIEMNATIVEINSTR)((uintptr_t)paFinalCommonCodeRx + cbCtx);
10037 memcpy((PIEMNATIVEINSTR)((uintptr_t)pCtx + cbCtx), pReNative->pInstrBuf, cbCode);
10038
10039 Assert(pReNative->cFixups == 0);
10040 Assert(pReNative->cLabels == 0);
10041
10042 /*
10043 * Initialize the context.
10044 */
10045 AssertCompile(kIemNativeLabelType_Invalid == 0);
10046 AssertCompile(RT_ELEMENTS(pCtx->apExitLabels) == RT_ELEMENTS(aoffLabels));
10047 pCtx->apExitLabels[kIemNativeLabelType_Invalid] = 0;
10048 for (uint32_t i = kIemNativeLabelType_Invalid + 1; i < RT_ELEMENTS(pCtx->apExitLabels); i++)
10049 {
10050 Assert(aoffLabels[i] != 0 || i == kIemNativeLabelType_ReturnSuccess);
10051 pCtx->apExitLabels[i] = &paFinalCommonCodeRx[aoffLabels[i]];
10052 Log10((" apExitLabels[%u]=%p %s\n", i, pCtx->apExitLabels[i], iemNativeGetLabelName((IEMNATIVELABELTYPE)i, true)));
10053 }
10054
10055 iemExecMemAllocatorReadyForUse(pVCpu, pCtx, cbCtx + cbCode);
10056
10057 iemNativeTerm(pReNative);
10058 *ppCtx = pCtx;
10059 return VINF_SUCCESS;
10060}
10061
10062
10063/**
10064 * Recompiles the given threaded TB into a native one.
10065 *
10066 * In case of failure the translation block will be returned as-is.
10067 *
10068 * @returns pTb.
10069 * @param pVCpu The cross context virtual CPU structure of the calling
10070 * thread.
10071 * @param pTb The threaded translation to recompile to native.
10072 */
10073IEM_DECL_MSC_GUARD_IGNORE DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
10074{
10075#if 0 /* For profiling the native recompiler code. */
10076l_profile_again:
10077#endif
10078 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
10079
10080 /*
10081 * The first time thru, we allocate the recompiler state and save it,
10082 * all the other times we'll just reuse the saved one after a quick reset.
10083 */
10084 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
10085 if (RT_LIKELY(pReNative))
10086 iemNativeReInit(pReNative, pTb);
10087 else
10088 {
10089 pReNative = iemNativeInit(pVCpu, pTb);
10090 AssertReturn(pReNative, pTb);
10091 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative; /* save it */
10092 }
10093
10094#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
10095 /*
10096 * First do liveness analysis. This is done backwards.
10097 */
10098 {
10099 uint32_t idxCall = pTb->Thrd.cCalls;
10100 if (idxCall <= pReNative->cLivenessEntriesAlloc)
10101 { /* likely */ }
10102 else
10103 {
10104 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
10105 while (idxCall > cAlloc)
10106 cAlloc *= 2;
10107 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
10108 AssertReturn(pvNew, pTb);
10109 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
10110 pReNative->cLivenessEntriesAlloc = cAlloc;
10111 }
10112 AssertReturn(idxCall > 0, pTb);
10113 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
10114
10115 /* The initial (final) entry. */
10116 idxCall--;
10117 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
10118
10119 /* Loop backwards thru the calls and fill in the other entries. */
10120 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
10121 while (idxCall > 0)
10122 {
10123 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
10124 Assert(pfnLiveness);
10125 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
10126 pCallEntry--;
10127 idxCall--;
10128 }
10129 }
10130#endif
10131
10132 /*
10133 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
10134 * for aborting if an error happens.
10135 */
10136 uint32_t cCallsLeft = pTb->Thrd.cCalls;
10137#ifdef LOG_ENABLED
10138 uint32_t const cCallsOrg = cCallsLeft;
10139#endif
10140 uint32_t off = 0;
10141 int rc = VINF_SUCCESS;
10142 IEMNATIVE_TRY_SETJMP(pReNative, rc)
10143 {
10144 /*
10145 * Convert the calls to native code.
10146 */
10147#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10148 int32_t iGstInstr = -1;
10149#endif
10150#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
10151 uint32_t cThreadedCalls = 0;
10152 uint32_t cRecompiledCalls = 0;
10153#endif
10154#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(IEM_WITH_INTRA_TB_JUMPS) || defined(VBOX_STRICT) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10155 uint32_t idxCurCall = 0;
10156#endif
10157 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
10158 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
10159 while (cCallsLeft-- > 0)
10160 {
10161 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
10162#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10163 pReNative->idxCurCall = idxCurCall;
10164#endif
10165
10166#ifdef IEM_WITH_INTRA_TB_JUMPS
10167 /*
10168 * Define label for jump targets (currently only the first entry).
10169 */
10170 if (!(pCallEntry->fFlags & IEMTHREADEDCALLENTRY_F_JUMP_TARGET))
10171 { /* likely */ }
10172 else
10173 {
10174 iemNativeLabelCreate(pReNative, kIemNativeLabelType_LoopJumpTarget, off);
10175 Assert(idxCurCall == 0); /** @todo when jumping elsewhere, we have to save the register state. */
10176 }
10177#endif
10178
10179 /*
10180 * Debug info, assembly markup and statistics.
10181 */
10182#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
10183 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
10184 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
10185#endif
10186#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10187 iemNativeDbgInfoAddNativeOffset(pReNative, off);
10188 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
10189 {
10190 if (iGstInstr < (int32_t)pTb->cInstructions)
10191 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
10192 else
10193 Assert(iGstInstr == pTb->cInstructions);
10194 iGstInstr = pCallEntry->idxInstr;
10195 }
10196 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
10197#endif
10198#if defined(VBOX_STRICT)
10199 off = iemNativeEmitMarker(pReNative, off,
10200 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
10201#endif
10202#if defined(VBOX_STRICT)
10203 iemNativeRegAssertSanity(pReNative);
10204#endif
10205#ifdef VBOX_WITH_STATISTICS
10206 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
10207#endif
10208
10209#if 0
10210 if ( pTb->GCPhysPc == 0x00000000000c1240
10211 && idxCurCall == 67)
10212 off = iemNativeEmitBrk(pReNative, off, 0xf000);
10213#endif
10214
10215 /*
10216 * Actual work.
10217 */
10218 Log2(("%u[%u]: %s%s (off=%#x)\n", idxCurCall, pCallEntry->idxInstr,
10219 g_apszIemThreadedFunctions[pCallEntry->enmFunction], pfnRecom ? "(recompiled)" : "(todo)", off));
10220 if (pfnRecom) /** @todo stats on this. */
10221 {
10222 off = pfnRecom(pReNative, off, pCallEntry);
10223 STAM_REL_STATS({cRecompiledCalls++;});
10224 }
10225 else
10226 {
10227 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
10228 STAM_REL_STATS({cThreadedCalls++;});
10229 }
10230 Assert(off <= pReNative->cInstrBufAlloc);
10231 Assert(pReNative->cCondDepth == 0);
10232
10233#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
10234 if (LogIs2Enabled())
10235 {
10236 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
10237# ifndef IEMLIVENESS_EXTENDED_LAYOUT
10238 static const char s_achState[] = "CUXI";
10239# else
10240 /* 0123 4567 89ab cdef */
10241 /* CCCC CCCC */
10242 /* WWWW WWWW */
10243 /* RR RR RR RR */
10244 /* P P P P P P P P */
10245 static const char s_achState[] = "UxRr" "WwMm" "CcQq" "KkNn";
10246# endif
10247
10248 char szGpr[17];
10249 for (unsigned i = 0; i < 16; i++)
10250 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
10251 szGpr[16] = '\0';
10252
10253 char szSegBase[X86_SREG_COUNT + 1];
10254 char szSegLimit[X86_SREG_COUNT + 1];
10255 char szSegAttrib[X86_SREG_COUNT + 1];
10256 char szSegSel[X86_SREG_COUNT + 1];
10257 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
10258 {
10259 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
10260 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
10261 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
10262 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
10263 }
10264 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
10265 = szSegSel[X86_SREG_COUNT] = '\0';
10266
10267 char szEFlags[IEMLIVENESSBIT_IDX_EFL_COUNT + 1];
10268 for (unsigned i = 0; i < IEMLIVENESSBIT_IDX_EFL_COUNT; i++)
10269 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
10270 szEFlags[7] = '\0';
10271
10272 Log2(("liveness: gpr=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
10273 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
10274 }
10275#endif
10276
10277 /*
10278 * Advance.
10279 */
10280 pCallEntry++;
10281#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(IEM_WITH_INTRA_TB_JUMPS) || defined(VBOX_STRICT) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10282 idxCurCall++;
10283#endif
10284 }
10285
10286 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
10287 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
10288 if (!cThreadedCalls)
10289 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
10290
10291 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, UINT32_MAX);
10292
10293#ifdef VBOX_WITH_STATISTICS
10294 off = iemNativeEmitNativeTbExitStats(pReNative, off, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbFinished));
10295#endif
10296
10297 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
10298 off = iemNativeRegFlushPendingWrites(pReNative, off);
10299
10300 /*
10301 * Jump to the common per-chunk epilog code.
10302 */
10303 //off = iemNativeEmitBrk(pReNative, off, 0x1227);
10304 off = iemNativeEmitTbExit<kIemNativeLabelType_ReturnSuccess, true, false>(pReNative, off);
10305
10306 /*
10307 * Generate tail labels with jumps to the common per-chunk code on non-x86 hosts.
10308 */
10309#ifndef RT_ARCH_AMD64
10310 Assert(!(pReNative->bmLabelTypes & ( RT_BIT_64(kIemNativeLabelType_ReturnSuccess)
10311 | RT_BIT_64(kIemNativeLabelType_Invalid) )));
10312 AssertCompile(kIemNativeLabelType_Invalid == 0);
10313 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastTbExit + 1U) - 2U);
10314 if (fTailLabels)
10315 {
10316 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, kIemNativeLabelType_LastTbExit + 1);
10317 do
10318 {
10319 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
10320 fTailLabels &= ~RT_BIT_64(enmLabel);
10321
10322 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
10323 AssertContinue(idxLabel != UINT32_MAX);
10324 iemNativeLabelDefine(pReNative, idxLabel, off);
10325
10326 iemNativeAddTbExitFixup(pReNative, off, enmLabel);
10327# ifdef RT_ARCH_ARM64
10328 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
10329# else
10330# error "port me"
10331# endif
10332 } while (fTailLabels);
10333 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10334 }
10335#else
10336 Assert(!(pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastTbExit + 1) - 1U))); /* Should not be used! */
10337#endif
10338 }
10339 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
10340 {
10341 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
10342 return pTb;
10343 }
10344 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
10345 Assert(off <= pReNative->cInstrBufAlloc);
10346
10347 /*
10348 * Make sure all labels has been defined.
10349 */
10350 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
10351#ifdef VBOX_STRICT
10352 uint32_t const cLabels = pReNative->cLabels;
10353 for (uint32_t i = 0; i < cLabels; i++)
10354 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
10355#endif
10356
10357#if 0 /* For profiling the native recompiler code. */
10358 if (pTb->Thrd.cCalls >= 136)
10359 {
10360 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10361 goto l_profile_again;
10362 }
10363#endif
10364
10365 /*
10366 * Allocate executable memory, copy over the code we've generated.
10367 */
10368 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
10369 if (pTbAllocator->pDelayedFreeHead)
10370 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
10371
10372 PIEMNATIVEINSTR paFinalInstrBufRx = NULL;
10373 PCIEMNATIVEPERCHUNKCTX pCtx = NULL;
10374 PIEMNATIVEINSTR const paFinalInstrBuf = iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb,
10375 &paFinalInstrBufRx, &pCtx);
10376
10377 AssertReturn(paFinalInstrBuf, pTb);
10378 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
10379
10380 /*
10381 * Apply fixups.
10382 */
10383 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
10384 uint32_t const cFixups = pReNative->cFixups;
10385 for (uint32_t i = 0; i < cFixups; i++)
10386 {
10387 Assert(paFixups[i].off < off);
10388 Assert(paFixups[i].idxLabel < cLabels);
10389 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
10390 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
10391 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
10392 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
10393 switch (paFixups[i].enmType)
10394 {
10395#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10396 case kIemNativeFixupType_Rel32:
10397 Assert(paFixups[i].off + 4 <= off);
10398 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10399 continue;
10400
10401#elif defined(RT_ARCH_ARM64)
10402 case kIemNativeFixupType_RelImm26At0:
10403 {
10404 Assert(paFixups[i].off < off);
10405 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10406 Assert(offDisp >= -33554432 && offDisp < 33554432);
10407 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10408 continue;
10409 }
10410
10411 case kIemNativeFixupType_RelImm19At5:
10412 {
10413 Assert(paFixups[i].off < off);
10414 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10415 Assert(offDisp >= -262144 && offDisp < 262144);
10416 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
10417 continue;
10418 }
10419
10420 case kIemNativeFixupType_RelImm14At5:
10421 {
10422 Assert(paFixups[i].off < off);
10423 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10424 Assert(offDisp >= -8192 && offDisp < 8192);
10425 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
10426 continue;
10427 }
10428
10429#endif
10430 case kIemNativeFixupType_Invalid:
10431 case kIemNativeFixupType_End:
10432 break;
10433 }
10434 AssertFailed();
10435 }
10436
10437 /*
10438 * Apply TB exit fixups.
10439 */
10440 PIEMNATIVEEXITFIXUP const paTbExitFixups = pReNative->paTbExitFixups;
10441 uint32_t const cTbExitFixups = pReNative->cTbExitFixups;
10442 for (uint32_t i = 0; i < cTbExitFixups; i++)
10443 {
10444 Assert(paTbExitFixups[i].off < off);
10445 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(paTbExitFixups[i].enmExitReason));
10446 RTPTRUNION const Ptr = { &paFinalInstrBuf[paTbExitFixups[i].off] };
10447
10448#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10449 Assert(paTbExitFixups[i].off + 4 <= off);
10450 intptr_t const offDisp = pCtx->apExitLabels[paTbExitFixups[i].enmExitReason] - &paFinalInstrBufRx[paTbExitFixups[i].off + 4];
10451 Assert(offDisp >= INT32_MIN && offDisp <= INT32_MAX);
10452 *Ptr.pi32 = (int32_t)offDisp;
10453
10454#elif defined(RT_ARCH_ARM64)
10455 intptr_t const offDisp = pCtx->apExitLabels[paTbExitFixups[i].enmExitReason] - &paFinalInstrBufRx[paTbExitFixups[i].off];
10456 Assert(offDisp >= -33554432 && offDisp < 33554432);
10457 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10458
10459#else
10460# error "Port me!"
10461#endif
10462 }
10463
10464 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBufRx, off * sizeof(IEMNATIVEINSTR));
10465 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
10466
10467 /*
10468 * Convert the translation block.
10469 */
10470 RTMemFree(pTb->Thrd.paCalls);
10471 pTb->Native.paInstructions = paFinalInstrBufRx;
10472 pTb->Native.cInstructions = off;
10473 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
10474#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10475 pReNative->pDbgInfo->FlatPc = pTb->FlatPc;
10476 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
10477 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
10478#endif
10479
10480 Assert(pTbAllocator->cThreadedTbs > 0);
10481 pTbAllocator->cThreadedTbs -= 1;
10482 pTbAllocator->cNativeTbs += 1;
10483 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
10484
10485#ifdef LOG_ENABLED
10486 /*
10487 * Disassemble to the log if enabled.
10488 */
10489 if (LogIs3Enabled())
10490 {
10491 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
10492 iemNativeDisassembleTb(pVCpu, pTb, DBGFR3InfoLogHlp());
10493# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
10494 RTLogFlush(NULL);
10495# endif
10496 }
10497#endif
10498 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
10499
10500 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10501 return pTb;
10502}
10503
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette