VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 104188

Last change on this file since 104188 was 104151, checked in by vboxsync, 11 months ago

VMM/IEM: Deal with iemNativeDbgInfoAddGuestRegWriteback assertion. bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 409.2 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 104151 2024-04-04 09:29:59Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/mem.h>
62#include <iprt/string.h>
63#if defined(RT_ARCH_AMD64)
64# include <iprt/x86.h>
65#elif defined(RT_ARCH_ARM64)
66# include <iprt/armv8.h>
67#endif
68
69#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
70# include "/opt/local/include/capstone/capstone.h"
71#endif
72
73#include "IEMInline.h"
74#include "IEMThreadedFunctions.h"
75#include "IEMN8veRecompiler.h"
76#include "IEMN8veRecompilerEmit.h"
77#include "IEMN8veRecompilerTlbLookup.h"
78#include "IEMNativeFunctions.h"
79
80
81/*
82 * Narrow down configs here to avoid wasting time on unused configs here.
83 * Note! Same checks in IEMAllThrdRecompiler.cpp.
84 */
85
86#ifndef IEM_WITH_CODE_TLB
87# error The code TLB must be enabled for the recompiler.
88#endif
89
90#ifndef IEM_WITH_DATA_TLB
91# error The data TLB must be enabled for the recompiler.
92#endif
93
94#ifndef IEM_WITH_SETJMP
95# error The setjmp approach must be enabled for the recompiler.
96#endif
97
98/** @todo eliminate this clang build hack. */
99#if RT_CLANG_PREREQ(4, 0)
100# pragma GCC diagnostic ignored "-Wunused-function"
101#endif
102
103
104/*********************************************************************************************************************************
105* Internal Functions *
106*********************************************************************************************************************************/
107#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
108static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
109#endif
110DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
111DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
112 IEMNATIVEGSTREG enmGstReg, uint32_t off);
113DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
114
115
116
117/*********************************************************************************************************************************
118* Native Recompilation *
119*********************************************************************************************************************************/
120
121
122/**
123 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
124 */
125IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
126{
127 pVCpu->iem.s.cInstructions += idxInstr;
128 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
129}
130
131
132/**
133 * Used by TB code when it wants to raise a \#DE.
134 */
135IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
136{
137 iemRaiseDivideErrorJmp(pVCpu);
138#ifndef _MSC_VER
139 return VINF_IEM_RAISED_XCPT; /* not reached */
140#endif
141}
142
143
144/**
145 * Used by TB code when it wants to raise a \#UD.
146 */
147IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
148{
149 iemRaiseUndefinedOpcodeJmp(pVCpu);
150#ifndef _MSC_VER
151 return VINF_IEM_RAISED_XCPT; /* not reached */
152#endif
153}
154
155
156/**
157 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
158 *
159 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
160 */
161IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
162{
163 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
164 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
165 iemRaiseUndefinedOpcodeJmp(pVCpu);
166 else
167 iemRaiseDeviceNotAvailableJmp(pVCpu);
168#ifndef _MSC_VER
169 return VINF_IEM_RAISED_XCPT; /* not reached */
170#endif
171}
172
173
174/**
175 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
176 *
177 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
178 */
179IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
180{
181 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
182 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
183 iemRaiseUndefinedOpcodeJmp(pVCpu);
184 else
185 iemRaiseDeviceNotAvailableJmp(pVCpu);
186#ifndef _MSC_VER
187 return VINF_IEM_RAISED_XCPT; /* not reached */
188#endif
189}
190
191
192/**
193 * Used by TB code when it wants to raise an SSE/AVX floating point exception related \#UD or \#XF.
194 *
195 * See IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT.
196 */
197IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseAvxFpRelated,(PVMCPUCC pVCpu))
198{
199 if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)
200 iemRaiseSimdFpExceptionJmp(pVCpu);
201 else
202 iemRaiseUndefinedOpcodeJmp(pVCpu);
203#ifndef _MSC_VER
204 return VINF_IEM_RAISED_XCPT; /* not reached */
205#endif
206}
207
208
209/**
210 * Used by TB code when it wants to raise a \#NM.
211 */
212IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
213{
214 iemRaiseDeviceNotAvailableJmp(pVCpu);
215#ifndef _MSC_VER
216 return VINF_IEM_RAISED_XCPT; /* not reached */
217#endif
218}
219
220
221/**
222 * Used by TB code when it wants to raise a \#GP(0).
223 */
224IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
225{
226 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
227#ifndef _MSC_VER
228 return VINF_IEM_RAISED_XCPT; /* not reached */
229#endif
230}
231
232
233/**
234 * Used by TB code when it wants to raise a \#MF.
235 */
236IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
237{
238 iemRaiseMathFaultJmp(pVCpu);
239#ifndef _MSC_VER
240 return VINF_IEM_RAISED_XCPT; /* not reached */
241#endif
242}
243
244
245/**
246 * Used by TB code when it wants to raise a \#XF.
247 */
248IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
249{
250 iemRaiseSimdFpExceptionJmp(pVCpu);
251#ifndef _MSC_VER
252 return VINF_IEM_RAISED_XCPT; /* not reached */
253#endif
254}
255
256
257/**
258 * Used by TB code when detecting opcode changes.
259 * @see iemThreadeFuncWorkerObsoleteTb
260 */
261IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
262{
263 /* We set fSafeToFree to false where as we're being called in the context
264 of a TB callback function, which for native TBs means we cannot release
265 the executable memory till we've returned our way back to iemTbExec as
266 that return path codes via the native code generated for the TB. */
267 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
268 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
269 return VINF_IEM_REEXEC_BREAK;
270}
271
272
273/**
274 * Used by TB code when we need to switch to a TB with CS.LIM checking.
275 */
276IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
277{
278 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
279 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
280 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
281 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
282 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
283 return VINF_IEM_REEXEC_BREAK;
284}
285
286
287/**
288 * Used by TB code when we missed a PC check after a branch.
289 */
290IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
291{
292 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
293 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
294 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
295 pVCpu->iem.s.pbInstrBuf));
296 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
297 return VINF_IEM_REEXEC_BREAK;
298}
299
300
301
302/*********************************************************************************************************************************
303* Helpers: Segmented memory fetches and stores. *
304*********************************************************************************************************************************/
305
306/**
307 * Used by TB code to load unsigned 8-bit data w/ segmentation.
308 */
309IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
310{
311#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
312 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
313#else
314 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
315#endif
316}
317
318
319/**
320 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
321 * to 16 bits.
322 */
323IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
324{
325#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
326 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
327#else
328 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
329#endif
330}
331
332
333/**
334 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
335 * to 32 bits.
336 */
337IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
338{
339#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
340 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
341#else
342 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
343#endif
344}
345
346/**
347 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
348 * to 64 bits.
349 */
350IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
351{
352#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
353 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
354#else
355 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
356#endif
357}
358
359
360/**
361 * Used by TB code to load unsigned 16-bit data w/ segmentation.
362 */
363IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
364{
365#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
366 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
367#else
368 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
369#endif
370}
371
372
373/**
374 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
375 * to 32 bits.
376 */
377IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
378{
379#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
380 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
381#else
382 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
383#endif
384}
385
386
387/**
388 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
389 * to 64 bits.
390 */
391IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
392{
393#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
394 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
395#else
396 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
397#endif
398}
399
400
401/**
402 * Used by TB code to load unsigned 32-bit data w/ segmentation.
403 */
404IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
405{
406#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
407 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
408#else
409 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
410#endif
411}
412
413
414/**
415 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
416 * to 64 bits.
417 */
418IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
419{
420#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
421 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
422#else
423 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
424#endif
425}
426
427
428/**
429 * Used by TB code to load unsigned 64-bit data w/ segmentation.
430 */
431IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
432{
433#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
434 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
435#else
436 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
437#endif
438}
439
440
441#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
442/**
443 * Used by TB code to load 128-bit data w/ segmentation.
444 */
445IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
446{
447#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
448 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
449#else
450 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
451#endif
452}
453
454
455/**
456 * Used by TB code to load 128-bit data w/ segmentation.
457 */
458IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
459{
460#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
461 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
462#else
463 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
464#endif
465}
466
467
468/**
469 * Used by TB code to load 128-bit data w/ segmentation.
470 */
471IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
472{
473#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
474 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
475#else
476 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
477#endif
478}
479
480
481/**
482 * Used by TB code to load 256-bit data w/ segmentation.
483 */
484IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
485{
486#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
487 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
488#else
489 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
490#endif
491}
492
493
494/**
495 * Used by TB code to load 256-bit data w/ segmentation.
496 */
497IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
498{
499#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
500 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
501#else
502 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
503#endif
504}
505#endif
506
507
508/**
509 * Used by TB code to store unsigned 8-bit data w/ segmentation.
510 */
511IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
512{
513#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
514 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
515#else
516 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
517#endif
518}
519
520
521/**
522 * Used by TB code to store unsigned 16-bit data w/ segmentation.
523 */
524IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
525{
526#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
527 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
528#else
529 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
530#endif
531}
532
533
534/**
535 * Used by TB code to store unsigned 32-bit data w/ segmentation.
536 */
537IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
538{
539#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
540 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
541#else
542 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
543#endif
544}
545
546
547/**
548 * Used by TB code to store unsigned 64-bit data w/ segmentation.
549 */
550IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
551{
552#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
553 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
554#else
555 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
556#endif
557}
558
559
560#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
561/**
562 * Used by TB code to store unsigned 128-bit data w/ segmentation.
563 */
564IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
565{
566#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
567 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
568#else
569 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
570#endif
571}
572
573
574/**
575 * Used by TB code to store unsigned 128-bit data w/ segmentation.
576 */
577IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
578{
579#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
580 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
581#else
582 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
583#endif
584}
585
586
587/**
588 * Used by TB code to store unsigned 256-bit data w/ segmentation.
589 */
590IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
591{
592#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
593 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
594#else
595 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
596#endif
597}
598
599
600/**
601 * Used by TB code to store unsigned 256-bit data w/ segmentation.
602 */
603IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
604{
605#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
606 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
607#else
608 iemMemStoreDataU256AlignedAvxJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
609#endif
610}
611#endif
612
613
614
615/**
616 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
617 */
618IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
619{
620#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
621 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
622#else
623 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
624#endif
625}
626
627
628/**
629 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
630 */
631IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
632{
633#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
634 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
635#else
636 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
637#endif
638}
639
640
641/**
642 * Used by TB code to store an 32-bit selector value onto a generic stack.
643 *
644 * Intel CPUs doesn't do write a whole dword, thus the special function.
645 */
646IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
647{
648#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
649 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
650#else
651 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
652#endif
653}
654
655
656/**
657 * Used by TB code to push unsigned 64-bit value onto a generic stack.
658 */
659IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
660{
661#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
662 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
663#else
664 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
665#endif
666}
667
668
669/**
670 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
671 */
672IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
673{
674#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
675 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
676#else
677 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
678#endif
679}
680
681
682/**
683 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
684 */
685IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
686{
687#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
688 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
689#else
690 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
691#endif
692}
693
694
695/**
696 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
697 */
698IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
699{
700#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
701 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
702#else
703 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
704#endif
705}
706
707
708
709/*********************************************************************************************************************************
710* Helpers: Flat memory fetches and stores. *
711*********************************************************************************************************************************/
712
713/**
714 * Used by TB code to load unsigned 8-bit data w/ flat address.
715 * @note Zero extending the value to 64-bit to simplify assembly.
716 */
717IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
718{
719#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
720 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
721#else
722 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
723#endif
724}
725
726
727/**
728 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
729 * to 16 bits.
730 * @note Zero extending the value to 64-bit to simplify assembly.
731 */
732IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
733{
734#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
735 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
736#else
737 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
738#endif
739}
740
741
742/**
743 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
744 * to 32 bits.
745 * @note Zero extending the value to 64-bit to simplify assembly.
746 */
747IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
748{
749#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
750 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
751#else
752 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
753#endif
754}
755
756
757/**
758 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
759 * to 64 bits.
760 */
761IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
762{
763#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
764 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
765#else
766 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
767#endif
768}
769
770
771/**
772 * Used by TB code to load unsigned 16-bit data w/ flat address.
773 * @note Zero extending the value to 64-bit to simplify assembly.
774 */
775IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
776{
777#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
778 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
779#else
780 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
781#endif
782}
783
784
785/**
786 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
787 * to 32 bits.
788 * @note Zero extending the value to 64-bit to simplify assembly.
789 */
790IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
791{
792#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
793 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
794#else
795 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
796#endif
797}
798
799
800/**
801 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
802 * to 64 bits.
803 * @note Zero extending the value to 64-bit to simplify assembly.
804 */
805IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
806{
807#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
808 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
809#else
810 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
811#endif
812}
813
814
815/**
816 * Used by TB code to load unsigned 32-bit data w/ flat address.
817 * @note Zero extending the value to 64-bit to simplify assembly.
818 */
819IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
820{
821#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
822 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
823#else
824 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
825#endif
826}
827
828
829/**
830 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
831 * to 64 bits.
832 * @note Zero extending the value to 64-bit to simplify assembly.
833 */
834IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
835{
836#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
837 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
838#else
839 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
840#endif
841}
842
843
844/**
845 * Used by TB code to load unsigned 64-bit data w/ flat address.
846 */
847IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
848{
849#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
850 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
851#else
852 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
853#endif
854}
855
856
857#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
858/**
859 * Used by TB code to load unsigned 128-bit data w/ flat address.
860 */
861IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
862{
863#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
864 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
865#else
866 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, GCPtrMem);
867#endif
868}
869
870
871/**
872 * Used by TB code to load unsigned 128-bit data w/ flat address.
873 */
874IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
875{
876#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
877 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
878#else
879 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, GCPtrMem);
880#endif
881}
882
883
884/**
885 * Used by TB code to load unsigned 128-bit data w/ flat address.
886 */
887IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
888{
889#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
890 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
891#else
892 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, GCPtrMem);
893#endif
894}
895
896
897/**
898 * Used by TB code to load unsigned 256-bit data w/ flat address.
899 */
900IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
901{
902#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
903 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
904#else
905 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, GCPtrMem);
906#endif
907}
908
909
910/**
911 * Used by TB code to load unsigned 256-bit data w/ flat address.
912 */
913IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
914{
915#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
916 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
917#else
918 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, GCPtrMem);
919#endif
920}
921#endif
922
923
924/**
925 * Used by TB code to store unsigned 8-bit data w/ flat address.
926 */
927IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
928{
929#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
930 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
931#else
932 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
933#endif
934}
935
936
937/**
938 * Used by TB code to store unsigned 16-bit data w/ flat address.
939 */
940IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
941{
942#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
943 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
944#else
945 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
946#endif
947}
948
949
950/**
951 * Used by TB code to store unsigned 32-bit data w/ flat address.
952 */
953IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
954{
955#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
956 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
957#else
958 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
959#endif
960}
961
962
963/**
964 * Used by TB code to store unsigned 64-bit data w/ flat address.
965 */
966IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
967{
968#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
969 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
970#else
971 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
972#endif
973}
974
975
976#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
977/**
978 * Used by TB code to store unsigned 128-bit data w/ flat address.
979 */
980IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
981{
982#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
983 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
984#else
985 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
986#endif
987}
988
989
990/**
991 * Used by TB code to store unsigned 128-bit data w/ flat address.
992 */
993IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
994{
995#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
996 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
997#else
998 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
999#endif
1000}
1001
1002
1003/**
1004 * Used by TB code to store unsigned 256-bit data w/ flat address.
1005 */
1006IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1007{
1008#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1009 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1010#else
1011 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
1012#endif
1013}
1014
1015
1016/**
1017 * Used by TB code to store unsigned 256-bit data w/ flat address.
1018 */
1019IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1020{
1021#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1022 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1023#else
1024 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
1025#endif
1026}
1027#endif
1028
1029
1030
1031/**
1032 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
1033 */
1034IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1035{
1036#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1037 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1038#else
1039 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1040#endif
1041}
1042
1043
1044/**
1045 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
1046 */
1047IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1048{
1049#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1050 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1051#else
1052 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1053#endif
1054}
1055
1056
1057/**
1058 * Used by TB code to store a segment selector value onto a flat stack.
1059 *
1060 * Intel CPUs doesn't do write a whole dword, thus the special function.
1061 */
1062IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1063{
1064#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1065 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1066#else
1067 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1068#endif
1069}
1070
1071
1072/**
1073 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
1074 */
1075IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1076{
1077#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1078 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1079#else
1080 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1081#endif
1082}
1083
1084
1085/**
1086 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1087 */
1088IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1089{
1090#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1091 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1092#else
1093 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
1094#endif
1095}
1096
1097
1098/**
1099 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1100 */
1101IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1102{
1103#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1104 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1105#else
1106 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
1107#endif
1108}
1109
1110
1111/**
1112 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1113 */
1114IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1115{
1116#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1117 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1118#else
1119 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
1120#endif
1121}
1122
1123
1124
1125/*********************************************************************************************************************************
1126* Helpers: Segmented memory mapping. *
1127*********************************************************************************************************************************/
1128
1129/**
1130 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
1131 * segmentation.
1132 */
1133IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1134 RTGCPTR GCPtrMem, uint8_t iSegReg))
1135{
1136#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1137 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1138#else
1139 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1140#endif
1141}
1142
1143
1144/**
1145 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
1146 */
1147IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1148 RTGCPTR GCPtrMem, uint8_t iSegReg))
1149{
1150#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1151 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1152#else
1153 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1154#endif
1155}
1156
1157
1158/**
1159 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
1160 */
1161IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1162 RTGCPTR GCPtrMem, uint8_t iSegReg))
1163{
1164#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1165 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1166#else
1167 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1168#endif
1169}
1170
1171
1172/**
1173 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
1174 */
1175IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1176 RTGCPTR GCPtrMem, uint8_t iSegReg))
1177{
1178#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1179 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1180#else
1181 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1182#endif
1183}
1184
1185
1186/**
1187 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
1188 * segmentation.
1189 */
1190IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1191 RTGCPTR GCPtrMem, uint8_t iSegReg))
1192{
1193#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1194 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1195#else
1196 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1197#endif
1198}
1199
1200
1201/**
1202 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
1203 */
1204IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1205 RTGCPTR GCPtrMem, uint8_t iSegReg))
1206{
1207#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1208 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1209#else
1210 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1211#endif
1212}
1213
1214
1215/**
1216 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
1217 */
1218IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1219 RTGCPTR GCPtrMem, uint8_t iSegReg))
1220{
1221#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1222 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1223#else
1224 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1225#endif
1226}
1227
1228
1229/**
1230 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
1231 */
1232IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1233 RTGCPTR GCPtrMem, uint8_t iSegReg))
1234{
1235#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1236 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1237#else
1238 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1239#endif
1240}
1241
1242
1243/**
1244 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
1245 * segmentation.
1246 */
1247IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1248 RTGCPTR GCPtrMem, uint8_t iSegReg))
1249{
1250#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1251 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1252#else
1253 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1254#endif
1255}
1256
1257
1258/**
1259 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
1260 */
1261IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1262 RTGCPTR GCPtrMem, uint8_t iSegReg))
1263{
1264#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1265 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1266#else
1267 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1268#endif
1269}
1270
1271
1272/**
1273 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
1274 */
1275IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1276 RTGCPTR GCPtrMem, uint8_t iSegReg))
1277{
1278#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1279 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1280#else
1281 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1282#endif
1283}
1284
1285
1286/**
1287 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
1288 */
1289IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1290 RTGCPTR GCPtrMem, uint8_t iSegReg))
1291{
1292#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1293 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1294#else
1295 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1296#endif
1297}
1298
1299
1300/**
1301 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
1302 * segmentation.
1303 */
1304IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1305 RTGCPTR GCPtrMem, uint8_t iSegReg))
1306{
1307#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1308 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1309#else
1310 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1311#endif
1312}
1313
1314
1315/**
1316 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
1317 */
1318IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1319 RTGCPTR GCPtrMem, uint8_t iSegReg))
1320{
1321#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1322 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1323#else
1324 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1325#endif
1326}
1327
1328
1329/**
1330 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
1331 */
1332IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1333 RTGCPTR GCPtrMem, uint8_t iSegReg))
1334{
1335#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1336 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1337#else
1338 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1339#endif
1340}
1341
1342
1343/**
1344 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
1345 */
1346IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1347 RTGCPTR GCPtrMem, uint8_t iSegReg))
1348{
1349#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1350 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1351#else
1352 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1353#endif
1354}
1355
1356
1357/**
1358 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
1359 */
1360IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1361 RTGCPTR GCPtrMem, uint8_t iSegReg))
1362{
1363#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1364 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1365#else
1366 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1367#endif
1368}
1369
1370
1371/**
1372 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
1373 */
1374IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1375 RTGCPTR GCPtrMem, uint8_t iSegReg))
1376{
1377#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1378 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1379#else
1380 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1381#endif
1382}
1383
1384
1385/**
1386 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
1387 * segmentation.
1388 */
1389IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1390 RTGCPTR GCPtrMem, uint8_t iSegReg))
1391{
1392#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1393 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1394#else
1395 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1396#endif
1397}
1398
1399
1400/**
1401 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
1402 */
1403IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1404 RTGCPTR GCPtrMem, uint8_t iSegReg))
1405{
1406#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1407 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1408#else
1409 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1410#endif
1411}
1412
1413
1414/**
1415 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
1416 */
1417IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1418 RTGCPTR GCPtrMem, uint8_t iSegReg))
1419{
1420#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1421 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1422#else
1423 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1424#endif
1425}
1426
1427
1428/**
1429 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
1430 */
1431IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1432 RTGCPTR GCPtrMem, uint8_t iSegReg))
1433{
1434#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1435 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1436#else
1437 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1438#endif
1439}
1440
1441
1442/*********************************************************************************************************************************
1443* Helpers: Flat memory mapping. *
1444*********************************************************************************************************************************/
1445
1446/**
1447 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
1448 * address.
1449 */
1450IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1451{
1452#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1453 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1454#else
1455 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1456#endif
1457}
1458
1459
1460/**
1461 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
1462 */
1463IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1464{
1465#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1466 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1467#else
1468 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1469#endif
1470}
1471
1472
1473/**
1474 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
1475 */
1476IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1477{
1478#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1479 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1480#else
1481 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1482#endif
1483}
1484
1485
1486/**
1487 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
1488 */
1489IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1490{
1491#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1492 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1493#else
1494 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1495#endif
1496}
1497
1498
1499/**
1500 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
1501 * address.
1502 */
1503IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1504{
1505#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1506 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1507#else
1508 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1509#endif
1510}
1511
1512
1513/**
1514 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
1515 */
1516IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1517{
1518#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1519 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1520#else
1521 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1522#endif
1523}
1524
1525
1526/**
1527 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
1528 */
1529IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1530{
1531#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1532 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1533#else
1534 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1535#endif
1536}
1537
1538
1539/**
1540 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
1541 */
1542IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1543{
1544#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1545 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1546#else
1547 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1548#endif
1549}
1550
1551
1552/**
1553 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
1554 * address.
1555 */
1556IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1557{
1558#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1559 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1560#else
1561 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1562#endif
1563}
1564
1565
1566/**
1567 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
1568 */
1569IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1570{
1571#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1572 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1573#else
1574 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1575#endif
1576}
1577
1578
1579/**
1580 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
1581 */
1582IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1583{
1584#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1585 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1586#else
1587 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1588#endif
1589}
1590
1591
1592/**
1593 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
1594 */
1595IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1596{
1597#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1598 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1599#else
1600 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1601#endif
1602}
1603
1604
1605/**
1606 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
1607 * address.
1608 */
1609IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1610{
1611#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1612 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1613#else
1614 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1615#endif
1616}
1617
1618
1619/**
1620 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
1621 */
1622IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1623{
1624#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1625 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1626#else
1627 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1628#endif
1629}
1630
1631
1632/**
1633 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
1634 */
1635IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1636{
1637#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1638 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1639#else
1640 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1641#endif
1642}
1643
1644
1645/**
1646 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
1647 */
1648IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1649{
1650#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1651 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1652#else
1653 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1654#endif
1655}
1656
1657
1658/**
1659 * Used by TB code to map 80-bit float data writeonly w/ flat address.
1660 */
1661IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1662{
1663#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1664 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1665#else
1666 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1667#endif
1668}
1669
1670
1671/**
1672 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
1673 */
1674IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1675{
1676#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1677 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1678#else
1679 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1680#endif
1681}
1682
1683
1684/**
1685 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
1686 * address.
1687 */
1688IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1689{
1690#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1691 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1692#else
1693 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1694#endif
1695}
1696
1697
1698/**
1699 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
1700 */
1701IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1702{
1703#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1704 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1705#else
1706 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1707#endif
1708}
1709
1710
1711/**
1712 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
1713 */
1714IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1715{
1716#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1717 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1718#else
1719 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1720#endif
1721}
1722
1723
1724/**
1725 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
1726 */
1727IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1728{
1729#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1730 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1731#else
1732 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1733#endif
1734}
1735
1736
1737/*********************************************************************************************************************************
1738* Helpers: Commit, rollback & unmap *
1739*********************************************************************************************************************************/
1740
1741/**
1742 * Used by TB code to commit and unmap a read-write memory mapping.
1743 */
1744IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
1745{
1746 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
1747}
1748
1749
1750/**
1751 * Used by TB code to commit and unmap a read-write memory mapping.
1752 */
1753IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
1754{
1755 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
1756}
1757
1758
1759/**
1760 * Used by TB code to commit and unmap a write-only memory mapping.
1761 */
1762IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
1763{
1764 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
1765}
1766
1767
1768/**
1769 * Used by TB code to commit and unmap a read-only memory mapping.
1770 */
1771IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
1772{
1773 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
1774}
1775
1776
1777/**
1778 * Reinitializes the native recompiler state.
1779 *
1780 * Called before starting a new recompile job.
1781 */
1782static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
1783{
1784 pReNative->cLabels = 0;
1785 pReNative->bmLabelTypes = 0;
1786 pReNative->cFixups = 0;
1787#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1788 pReNative->pDbgInfo->cEntries = 0;
1789 pReNative->pDbgInfo->offNativeLast = UINT32_MAX;
1790#endif
1791 pReNative->pTbOrg = pTb;
1792 pReNative->cCondDepth = 0;
1793 pReNative->uCondSeqNo = 0;
1794 pReNative->uCheckIrqSeqNo = 0;
1795 pReNative->uTlbSeqNo = 0;
1796
1797#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1798 pReNative->Core.offPc = 0;
1799 pReNative->Core.cInstrPcUpdateSkipped = 0;
1800#endif
1801#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1802 pReNative->fSimdRaiseXcptChecksEmitted = 0;
1803#endif
1804 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
1805#if IEMNATIVE_HST_GREG_COUNT < 32
1806 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
1807#endif
1808 ;
1809 pReNative->Core.bmHstRegsWithGstShadow = 0;
1810 pReNative->Core.bmGstRegShadows = 0;
1811#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1812 pReNative->Core.bmGstRegShadowDirty = 0;
1813#endif
1814 pReNative->Core.bmVars = 0;
1815 pReNative->Core.bmStack = 0;
1816 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
1817 pReNative->Core.u64ArgVars = UINT64_MAX;
1818
1819 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 17);
1820 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
1821 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
1822 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
1823 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
1824 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
1825 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
1826 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
1827 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
1828 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
1829 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
1830 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
1831 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
1832 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
1833 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
1834 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
1835 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
1836 pReNative->aidxUniqueLabels[16] = UINT32_MAX;
1837
1838 /* Full host register reinit: */
1839 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
1840 {
1841 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
1842 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
1843 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
1844 }
1845
1846 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
1847 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
1848#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1849 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
1850#endif
1851#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1852 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
1853#endif
1854#ifdef IEMNATIVE_REG_FIXED_TMP1
1855 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
1856#endif
1857#ifdef IEMNATIVE_REG_FIXED_PC_DBG
1858 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
1859#endif
1860 );
1861 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
1862 {
1863 fRegs &= ~RT_BIT_32(idxReg);
1864 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
1865 }
1866
1867 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
1868#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1869 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
1870#endif
1871#ifdef IEMNATIVE_REG_FIXED_TMP0
1872 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
1873#endif
1874#ifdef IEMNATIVE_REG_FIXED_TMP1
1875 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
1876#endif
1877#ifdef IEMNATIVE_REG_FIXED_PC_DBG
1878 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
1879#endif
1880
1881#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1882 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
1883# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
1884 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
1885# endif
1886 ;
1887 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
1888 pReNative->Core.bmGstSimdRegShadows = 0;
1889 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
1890 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
1891
1892 /* Full host register reinit: */
1893 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
1894 {
1895 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
1896 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
1897 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
1898 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
1899 }
1900
1901 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK;
1902 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
1903 {
1904 fRegs &= ~RT_BIT_32(idxReg);
1905 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
1906 }
1907
1908#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
1909 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
1910#endif
1911
1912#endif
1913
1914 return pReNative;
1915}
1916
1917
1918/**
1919 * Allocates and initializes the native recompiler state.
1920 *
1921 * This is called the first time an EMT wants to recompile something.
1922 *
1923 * @returns Pointer to the new recompiler state.
1924 * @param pVCpu The cross context virtual CPU structure of the calling
1925 * thread.
1926 * @param pTb The TB that's about to be recompiled.
1927 * @thread EMT(pVCpu)
1928 */
1929static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
1930{
1931 VMCPU_ASSERT_EMT(pVCpu);
1932
1933 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
1934 AssertReturn(pReNative, NULL);
1935
1936 /*
1937 * Try allocate all the buffers and stuff we need.
1938 */
1939 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
1940 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
1941 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
1942#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1943 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
1944#endif
1945 if (RT_LIKELY( pReNative->pInstrBuf
1946 && pReNative->paLabels
1947 && pReNative->paFixups)
1948#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1949 && pReNative->pDbgInfo
1950#endif
1951 )
1952 {
1953 /*
1954 * Set the buffer & array sizes on success.
1955 */
1956 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
1957 pReNative->cLabelsAlloc = _8K;
1958 pReNative->cFixupsAlloc = _16K;
1959#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1960 pReNative->cDbgInfoAlloc = _16K;
1961#endif
1962
1963 /* Other constant stuff: */
1964 pReNative->pVCpu = pVCpu;
1965
1966 /*
1967 * Done, just need to save it and reinit it.
1968 */
1969 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
1970 return iemNativeReInit(pReNative, pTb);
1971 }
1972
1973 /*
1974 * Failed. Cleanup and return.
1975 */
1976 AssertFailed();
1977 RTMemFree(pReNative->pInstrBuf);
1978 RTMemFree(pReNative->paLabels);
1979 RTMemFree(pReNative->paFixups);
1980#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1981 RTMemFree(pReNative->pDbgInfo);
1982#endif
1983 RTMemFree(pReNative);
1984 return NULL;
1985}
1986
1987
1988/**
1989 * Creates a label
1990 *
1991 * If the label does not yet have a defined position,
1992 * call iemNativeLabelDefine() later to set it.
1993 *
1994 * @returns Label ID. Throws VBox status code on failure, so no need to check
1995 * the return value.
1996 * @param pReNative The native recompile state.
1997 * @param enmType The label type.
1998 * @param offWhere The instruction offset of the label. UINT32_MAX if the
1999 * label is not yet defined (default).
2000 * @param uData Data associated with the lable. Only applicable to
2001 * certain type of labels. Default is zero.
2002 */
2003DECL_HIDDEN_THROW(uint32_t)
2004iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2005 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2006{
2007 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2008
2009 /*
2010 * Locate existing label definition.
2011 *
2012 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2013 * and uData is zero.
2014 */
2015 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2016 uint32_t const cLabels = pReNative->cLabels;
2017 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2018#ifndef VBOX_STRICT
2019 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2020 && offWhere == UINT32_MAX
2021 && uData == 0
2022#endif
2023 )
2024 {
2025#ifndef VBOX_STRICT
2026 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2027 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2028 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2029 if (idxLabel < pReNative->cLabels)
2030 return idxLabel;
2031#else
2032 for (uint32_t i = 0; i < cLabels; i++)
2033 if ( paLabels[i].enmType == enmType
2034 && paLabels[i].uData == uData)
2035 {
2036 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2037 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2038 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2039 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2040 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2041 return i;
2042 }
2043 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2044 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2045#endif
2046 }
2047
2048 /*
2049 * Make sure we've got room for another label.
2050 */
2051 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2052 { /* likely */ }
2053 else
2054 {
2055 uint32_t cNew = pReNative->cLabelsAlloc;
2056 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2057 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2058 cNew *= 2;
2059 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2060 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2061 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2062 pReNative->paLabels = paLabels;
2063 pReNative->cLabelsAlloc = cNew;
2064 }
2065
2066 /*
2067 * Define a new label.
2068 */
2069 paLabels[cLabels].off = offWhere;
2070 paLabels[cLabels].enmType = enmType;
2071 paLabels[cLabels].uData = uData;
2072 pReNative->cLabels = cLabels + 1;
2073
2074 Assert((unsigned)enmType < 64);
2075 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2076
2077 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2078 {
2079 Assert(uData == 0);
2080 pReNative->aidxUniqueLabels[enmType] = cLabels;
2081 }
2082
2083 if (offWhere != UINT32_MAX)
2084 {
2085#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2086 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2087 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2088#endif
2089 }
2090 return cLabels;
2091}
2092
2093
2094/**
2095 * Defines the location of an existing label.
2096 *
2097 * @param pReNative The native recompile state.
2098 * @param idxLabel The label to define.
2099 * @param offWhere The position.
2100 */
2101DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
2102{
2103 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
2104 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
2105 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2106 pLabel->off = offWhere;
2107#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2108 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2109 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2110#endif
2111}
2112
2113
2114/**
2115 * Looks up a lable.
2116 *
2117 * @returns Label ID if found, UINT32_MAX if not.
2118 */
2119static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2120 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
2121{
2122 Assert((unsigned)enmType < 64);
2123 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
2124 {
2125 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2126 return pReNative->aidxUniqueLabels[enmType];
2127
2128 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2129 uint32_t const cLabels = pReNative->cLabels;
2130 for (uint32_t i = 0; i < cLabels; i++)
2131 if ( paLabels[i].enmType == enmType
2132 && paLabels[i].uData == uData
2133 && ( paLabels[i].off == offWhere
2134 || offWhere == UINT32_MAX
2135 || paLabels[i].off == UINT32_MAX))
2136 return i;
2137 }
2138 return UINT32_MAX;
2139}
2140
2141
2142/**
2143 * Adds a fixup.
2144 *
2145 * @throws VBox status code (int) on failure.
2146 * @param pReNative The native recompile state.
2147 * @param offWhere The instruction offset of the fixup location.
2148 * @param idxLabel The target label ID for the fixup.
2149 * @param enmType The fixup type.
2150 * @param offAddend Fixup addend if applicable to the type. Default is 0.
2151 */
2152DECL_HIDDEN_THROW(void)
2153iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
2154 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
2155{
2156 Assert(idxLabel <= UINT16_MAX);
2157 Assert((unsigned)enmType <= UINT8_MAX);
2158#ifdef RT_ARCH_ARM64
2159 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
2160 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
2161 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
2162#endif
2163
2164 /*
2165 * Make sure we've room.
2166 */
2167 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
2168 uint32_t const cFixups = pReNative->cFixups;
2169 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
2170 { /* likely */ }
2171 else
2172 {
2173 uint32_t cNew = pReNative->cFixupsAlloc;
2174 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2175 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2176 cNew *= 2;
2177 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2178 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2179 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2180 pReNative->paFixups = paFixups;
2181 pReNative->cFixupsAlloc = cNew;
2182 }
2183
2184 /*
2185 * Add the fixup.
2186 */
2187 paFixups[cFixups].off = offWhere;
2188 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2189 paFixups[cFixups].enmType = enmType;
2190 paFixups[cFixups].offAddend = offAddend;
2191 pReNative->cFixups = cFixups + 1;
2192}
2193
2194
2195/**
2196 * Slow code path for iemNativeInstrBufEnsure.
2197 */
2198DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2199{
2200 /* Double the buffer size till we meet the request. */
2201 uint32_t cNew = pReNative->cInstrBufAlloc;
2202 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
2203 do
2204 cNew *= 2;
2205 while (cNew < off + cInstrReq);
2206
2207 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2208#ifdef RT_ARCH_ARM64
2209 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2210#else
2211 uint32_t const cbMaxInstrBuf = _2M;
2212#endif
2213 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2214
2215 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2216 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2217
2218#ifdef VBOX_STRICT
2219 pReNative->offInstrBufChecked = off + cInstrReq;
2220#endif
2221 pReNative->cInstrBufAlloc = cNew;
2222 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2223}
2224
2225#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2226
2227/**
2228 * Grows the static debug info array used during recompilation.
2229 *
2230 * @returns Pointer to the new debug info block; throws VBox status code on
2231 * failure, so no need to check the return value.
2232 */
2233DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2234{
2235 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2236 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2237 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2238 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2239 pReNative->pDbgInfo = pDbgInfo;
2240 pReNative->cDbgInfoAlloc = cNew;
2241 return pDbgInfo;
2242}
2243
2244
2245/**
2246 * Adds a new debug info uninitialized entry, returning the pointer to it.
2247 */
2248DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2249{
2250 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2251 { /* likely */ }
2252 else
2253 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2254 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2255}
2256
2257
2258/**
2259 * Debug Info: Adds a native offset record, if necessary.
2260 */
2261DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2262{
2263 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2264
2265 /*
2266 * Do we need this one?
2267 */
2268 uint32_t const offPrev = pDbgInfo->offNativeLast;
2269 if (offPrev == off)
2270 return;
2271 AssertStmt(offPrev < off || offPrev == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2272
2273 /*
2274 * Add it.
2275 */
2276 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2277 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2278 pEntry->NativeOffset.offNative = off;
2279 pDbgInfo->offNativeLast = off;
2280}
2281
2282
2283/**
2284 * Debug Info: Record info about a label.
2285 */
2286static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2287{
2288 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2289 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2290 pEntry->Label.uUnused = 0;
2291 pEntry->Label.enmLabel = (uint8_t)enmType;
2292 pEntry->Label.uData = uData;
2293}
2294
2295
2296/**
2297 * Debug Info: Record info about a threaded call.
2298 */
2299static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2300{
2301 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2302 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2303 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2304 pEntry->ThreadedCall.uUnused = 0;
2305 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2306}
2307
2308
2309/**
2310 * Debug Info: Record info about a new guest instruction.
2311 */
2312static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2313{
2314 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2315 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2316 pEntry->GuestInstruction.uUnused = 0;
2317 pEntry->GuestInstruction.fExec = fExec;
2318}
2319
2320
2321/**
2322 * Debug Info: Record info about guest register shadowing.
2323 */
2324DECL_HIDDEN_THROW(void)
2325iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2326 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
2327{
2328 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2329 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2330 pEntry->GuestRegShadowing.uUnused = 0;
2331 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2332 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2333 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2334#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2335 Assert( idxHstReg != UINT8_MAX
2336 || !(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg)));
2337#endif
2338}
2339
2340
2341# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2342/**
2343 * Debug Info: Record info about guest register shadowing.
2344 */
2345DECL_HIDDEN_THROW(void)
2346iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
2347 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
2348{
2349 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2350 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
2351 pEntry->GuestSimdRegShadowing.uUnused = 0;
2352 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
2353 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
2354 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
2355}
2356# endif
2357
2358
2359# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2360/**
2361 * Debug Info: Record info about delayed RIP updates.
2362 */
2363DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
2364{
2365 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2366 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
2367 pEntry->DelayedPcUpdate.offPc = offPc;
2368 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
2369}
2370# endif
2371
2372# if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
2373
2374/**
2375 * Debug Info: Record info about a dirty guest register.
2376 */
2377DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegDirty(PIEMRECOMPILERSTATE pReNative, bool fSimdReg,
2378 uint8_t idxGstReg, uint8_t idxHstReg)
2379{
2380 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2381 pEntry->GuestRegDirty.uType = kIemTbDbgEntryType_GuestRegDirty;
2382 pEntry->GuestRegDirty.fSimdReg = fSimdReg ? 1 : 0;
2383 pEntry->GuestRegDirty.idxGstReg = idxGstReg;
2384 pEntry->GuestRegDirty.idxHstReg = idxHstReg;
2385}
2386
2387
2388/**
2389 * Debug Info: Record info about a dirty guest register writeback operation.
2390 */
2391DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegWriteback(PIEMRECOMPILERSTATE pReNative, bool fSimdReg, uint64_t fGstReg)
2392{
2393 unsigned const cBitsGstRegMask = 25;
2394 uint32_t const fGstRegMask = RT_BIT_32(cBitsGstRegMask) - 1U;
2395
2396 /* The first block of 25 bits: */
2397 if (fGstReg & fGstRegMask)
2398 {
2399 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2400 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2401 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2402 pEntry->GuestRegWriteback.cShift = 0;
2403 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2404 fGstReg &= ~(uint64_t)fGstRegMask;
2405 if (!fGstReg)
2406 return;
2407 }
2408
2409 /* The second block of 25 bits: */
2410 fGstReg >>= cBitsGstRegMask;
2411 if (fGstReg & fGstRegMask)
2412 {
2413 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2414 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2415 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2416 pEntry->GuestRegWriteback.cShift = 0;
2417 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2418 fGstReg &= ~(uint64_t)fGstRegMask;
2419 if (!fGstReg)
2420 return;
2421 }
2422
2423 /* The last block with 14 bits: */
2424 fGstReg >>= cBitsGstRegMask;
2425 Assert(fGstReg & fGstRegMask);
2426 Assert((fGstReg & ~(uint64_t)fGstRegMask) == 0);
2427 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2428 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2429 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2430 pEntry->GuestRegWriteback.cShift = 2;
2431 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2432}
2433
2434# endif /* defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR) */
2435
2436#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2437
2438
2439/*********************************************************************************************************************************
2440* Register Allocator *
2441*********************************************************************************************************************************/
2442
2443/**
2444 * Register parameter indexes (indexed by argument number).
2445 */
2446DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2447{
2448 IEMNATIVE_CALL_ARG0_GREG,
2449 IEMNATIVE_CALL_ARG1_GREG,
2450 IEMNATIVE_CALL_ARG2_GREG,
2451 IEMNATIVE_CALL_ARG3_GREG,
2452#if defined(IEMNATIVE_CALL_ARG4_GREG)
2453 IEMNATIVE_CALL_ARG4_GREG,
2454# if defined(IEMNATIVE_CALL_ARG5_GREG)
2455 IEMNATIVE_CALL_ARG5_GREG,
2456# if defined(IEMNATIVE_CALL_ARG6_GREG)
2457 IEMNATIVE_CALL_ARG6_GREG,
2458# if defined(IEMNATIVE_CALL_ARG7_GREG)
2459 IEMNATIVE_CALL_ARG7_GREG,
2460# endif
2461# endif
2462# endif
2463#endif
2464};
2465AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2466
2467/**
2468 * Call register masks indexed by argument count.
2469 */
2470DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2471{
2472 0,
2473 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2474 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2475 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2476 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2477 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2478#if defined(IEMNATIVE_CALL_ARG4_GREG)
2479 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2480 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2481# if defined(IEMNATIVE_CALL_ARG5_GREG)
2482 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2483 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2484# if defined(IEMNATIVE_CALL_ARG6_GREG)
2485 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2486 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2487 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2488# if defined(IEMNATIVE_CALL_ARG7_GREG)
2489 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2490 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2491 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2492# endif
2493# endif
2494# endif
2495#endif
2496};
2497
2498#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2499/**
2500 * BP offset of the stack argument slots.
2501 *
2502 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2503 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2504 */
2505DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2506{
2507 IEMNATIVE_FP_OFF_STACK_ARG0,
2508# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2509 IEMNATIVE_FP_OFF_STACK_ARG1,
2510# endif
2511# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2512 IEMNATIVE_FP_OFF_STACK_ARG2,
2513# endif
2514# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2515 IEMNATIVE_FP_OFF_STACK_ARG3,
2516# endif
2517};
2518AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2519#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2520
2521/**
2522 * Info about shadowed guest register values.
2523 * @see IEMNATIVEGSTREG
2524 */
2525DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
2526{
2527#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2528 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2529 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2530 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2531 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2532 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2533 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2534 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2535 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2536 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2537 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2538 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2539 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2540 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2541 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2542 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2543 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2544 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2545 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
2546 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
2547 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
2548 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2549 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2550 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2551 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2552 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2553 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2554 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
2555 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
2556 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
2557 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
2558 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
2559 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
2560 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2561 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2562 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2563 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2564 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2565 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2566 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2567 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2568 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2569 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2570 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2571 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2572 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
2573 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
2574 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
2575 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2576#undef CPUMCTX_OFF_AND_SIZE
2577};
2578AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2579
2580
2581/** Host CPU general purpose register names. */
2582DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2583{
2584#ifdef RT_ARCH_AMD64
2585 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2586#elif RT_ARCH_ARM64
2587 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2588 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2589#else
2590# error "port me"
2591#endif
2592};
2593
2594
2595#if 0 /* unused */
2596/**
2597 * Tries to locate a suitable register in the given register mask.
2598 *
2599 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2600 * failed.
2601 *
2602 * @returns Host register number on success, returns UINT8_MAX on failure.
2603 */
2604static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2605{
2606 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2607 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2608 if (fRegs)
2609 {
2610 /** @todo pick better here: */
2611 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2612
2613 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2614 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2615 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2616 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2617
2618 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2619 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2620 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2621 return idxReg;
2622 }
2623 return UINT8_MAX;
2624}
2625#endif /* unused */
2626
2627
2628#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2629/**
2630 * Stores the host reg @a idxHstReg into guest shadow register @a enmGstReg.
2631 *
2632 * @returns New code buffer offset on success, UINT32_MAX on failure.
2633 * @param pReNative .
2634 * @param off The current code buffer position.
2635 * @param enmGstReg The guest register to store to.
2636 * @param idxHstReg The host register to store from.
2637 */
2638DECL_FORCE_INLINE_THROW(uint32_t)
2639iemNativeEmitStoreGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg, uint8_t idxHstReg)
2640{
2641 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
2642 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
2643
2644 switch (g_aGstShadowInfo[enmGstReg].cb)
2645 {
2646 case sizeof(uint64_t):
2647 return iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2648 case sizeof(uint32_t):
2649 return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2650 case sizeof(uint16_t):
2651 return iemNativeEmitStoreGprToVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2652#if 0 /* not present in the table. */
2653 case sizeof(uint8_t):
2654 return iemNativeEmitStoreGprToVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2655#endif
2656 default:
2657 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
2658 }
2659}
2660
2661
2662/**
2663 * Emits code to flush a pending write of the given guest register if any.
2664 *
2665 * @returns New code buffer offset.
2666 * @param pReNative The native recompile state.
2667 * @param off Current code buffer position.
2668 * @param enmGstReg The guest register to flush.
2669 */
2670DECL_HIDDEN_THROW(uint32_t)
2671iemNativeRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg)
2672{
2673 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
2674
2675 Assert( ( enmGstReg >= kIemNativeGstReg_GprFirst
2676 && enmGstReg <= kIemNativeGstReg_GprLast)
2677 || enmGstReg == kIemNativeGstReg_MxCsr);
2678 Assert( idxHstReg != UINT8_MAX
2679 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
2680 Log12(("iemNativeRegFlushPendingWrite: Clearing guest register %s shadowed by host %s (off=%#x)\n",
2681 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
2682
2683 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
2684
2685 pReNative->Core.bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
2686 return off;
2687}
2688
2689
2690/**
2691 * Flush the given set of guest registers if marked as dirty.
2692 *
2693 * @returns New code buffer offset.
2694 * @param pReNative The native recompile state.
2695 * @param off Current code buffer position.
2696 * @param fFlushGstReg The guest register set to flush (default is flush everything).
2697 */
2698DECL_HIDDEN_THROW(uint32_t)
2699iemNativeRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstReg /*= UINT64_MAX*/)
2700{
2701 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fFlushGstReg;
2702 if (bmGstRegShadowDirty)
2703 {
2704# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2705 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2706 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, bmGstRegShadowDirty);
2707# endif
2708 do
2709 {
2710 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
2711 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
2712 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
2713 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
2714 } while (bmGstRegShadowDirty);
2715 }
2716
2717 return off;
2718}
2719
2720
2721/**
2722 * Flush all shadowed guest registers marked as dirty for the given host register.
2723 *
2724 * @returns New code buffer offset.
2725 * @param pReNative The native recompile state.
2726 * @param off Current code buffer position.
2727 * @param idxHstReg The host register.
2728 *
2729 * @note This doesn't do any unshadowing of guest registers from the host register.
2730 */
2731DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushDirtyGuestByHostRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg)
2732{
2733 /* We need to flush any pending guest register writes this host register shadows. */
2734 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
2735 if (pReNative->Core.bmGstRegShadowDirty & fGstRegShadows)
2736 {
2737# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2738 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2739 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, pReNative->Core.bmGstRegShadowDirty & fGstRegShadows);
2740# endif
2741 /** @todo r=bird: This is a crap way of enumerating a bitmask where we're
2742 * likely to only have a single bit set. It'll be in the 0..15 range,
2743 * but still it's 15 unnecessary loops for the last guest register. */
2744
2745 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fGstRegShadows;
2746 do
2747 {
2748 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
2749 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
2750 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
2751 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
2752 } while (bmGstRegShadowDirty);
2753 }
2754
2755 return off;
2756}
2757#endif
2758
2759
2760/**
2761 * Locate a register, possibly freeing one up.
2762 *
2763 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2764 * failed.
2765 *
2766 * @returns Host register number on success. Returns UINT8_MAX if no registers
2767 * found, the caller is supposed to deal with this and raise a
2768 * allocation type specific status code (if desired).
2769 *
2770 * @throws VBox status code if we're run into trouble spilling a variable of
2771 * recording debug info. Does NOT throw anything if we're out of
2772 * registers, though.
2773 */
2774static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
2775 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
2776{
2777 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
2778 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2779 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
2780
2781 /*
2782 * Try a freed register that's shadowing a guest register.
2783 */
2784 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2785 if (fRegs)
2786 {
2787 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
2788
2789#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
2790 /*
2791 * When we have livness information, we use it to kick out all shadowed
2792 * guest register that will not be needed any more in this TB. If we're
2793 * lucky, this may prevent us from ending up here again.
2794 *
2795 * Note! We must consider the previous entry here so we don't free
2796 * anything that the current threaded function requires (current
2797 * entry is produced by the next threaded function).
2798 */
2799 uint32_t const idxCurCall = pReNative->idxCurCall;
2800 if (idxCurCall > 0)
2801 {
2802 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
2803
2804# ifndef IEMLIVENESS_EXTENDED_LAYOUT
2805 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
2806 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
2807 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
2808#else
2809 /* Construct a mask of the registers not in the read or write state.
2810 Note! We could skips writes, if they aren't from us, as this is just
2811 a hack to prevent trashing registers that have just been written
2812 or will be written when we retire the current instruction. */
2813 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
2814 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
2815 & IEMLIVENESSBIT_MASK;
2816#endif
2817 /* Merge EFLAGS. */
2818 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
2819 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
2820 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
2821 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
2822 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
2823
2824 /* If it matches any shadowed registers. */
2825 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
2826 {
2827#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2828 /* Writeback any dirty shadow registers we are about to unshadow. */
2829 *poff = iemNativeRegFlushDirtyGuest(pReNative, *poff, fToFreeMask);
2830#endif
2831
2832 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
2833 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
2834 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
2835
2836 /* See if we've got any unshadowed registers we can return now. */
2837 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
2838 if (fUnshadowedRegs)
2839 {
2840 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
2841 return (fPreferVolatile
2842 ? ASMBitFirstSetU32(fUnshadowedRegs)
2843 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2844 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
2845 - 1;
2846 }
2847 }
2848 }
2849#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
2850
2851 unsigned const idxReg = (fPreferVolatile
2852 ? ASMBitFirstSetU32(fRegs)
2853 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2854 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
2855 - 1;
2856
2857 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2858 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2859 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2860 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2861
2862#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2863 /* We need to flush any pending guest register writes this host register shadows. */
2864 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
2865#endif
2866
2867 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2868 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2869 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2870 return idxReg;
2871 }
2872
2873 /*
2874 * Try free up a variable that's in a register.
2875 *
2876 * We do two rounds here, first evacuating variables we don't need to be
2877 * saved on the stack, then in the second round move things to the stack.
2878 */
2879 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
2880 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
2881 {
2882 uint32_t fVars = pReNative->Core.bmVars;
2883 while (fVars)
2884 {
2885 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
2886 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
2887#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2888 if (pReNative->Core.aVars[idxVar].fSimdReg) /* Need to ignore SIMD variables here or we end up freeing random registers. */
2889 continue;
2890#endif
2891
2892 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
2893 && (RT_BIT_32(idxReg) & fRegMask)
2894 && ( iLoop == 0
2895 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
2896 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
2897 && !pReNative->Core.aVars[idxVar].fRegAcquired)
2898 {
2899 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
2900 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
2901 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2902 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
2903 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2904 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
2905#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2906 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
2907#endif
2908
2909 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
2910 {
2911 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
2912 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
2913 }
2914
2915 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2916 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
2917
2918 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2919 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2920 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2921 return idxReg;
2922 }
2923 fVars &= ~RT_BIT_32(idxVar);
2924 }
2925 }
2926
2927 return UINT8_MAX;
2928}
2929
2930
2931/**
2932 * Reassigns a variable to a different register specified by the caller.
2933 *
2934 * @returns The new code buffer position.
2935 * @param pReNative The native recompile state.
2936 * @param off The current code buffer position.
2937 * @param idxVar The variable index.
2938 * @param idxRegOld The old host register number.
2939 * @param idxRegNew The new host register number.
2940 * @param pszCaller The caller for logging.
2941 */
2942static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
2943 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
2944{
2945 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2946 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
2947#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2948 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
2949#endif
2950 RT_NOREF(pszCaller);
2951
2952#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2953 Assert(!(pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
2954#endif
2955 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
2956
2957 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
2958#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2959 Assert(!(fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
2960#endif
2961 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
2962 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
2963 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
2964
2965 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
2966 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
2967 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
2968 if (fGstRegShadows)
2969 {
2970 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
2971 | RT_BIT_32(idxRegNew);
2972 while (fGstRegShadows)
2973 {
2974 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
2975 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
2976
2977 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
2978 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
2979 }
2980 }
2981
2982 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
2983 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
2984 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
2985 return off;
2986}
2987
2988
2989/**
2990 * Moves a variable to a different register or spills it onto the stack.
2991 *
2992 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
2993 * kinds can easily be recreated if needed later.
2994 *
2995 * @returns The new code buffer position.
2996 * @param pReNative The native recompile state.
2997 * @param off The current code buffer position.
2998 * @param idxVar The variable index.
2999 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3000 * call-volatile registers.
3001 */
3002DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3003 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
3004{
3005 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3006 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3007 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3008 Assert(!pVar->fRegAcquired);
3009
3010 uint8_t const idxRegOld = pVar->idxReg;
3011 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3012 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3013 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3014 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3015 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3016 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3017 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3018 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3019#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3020 Assert(!(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3021#endif
3022
3023
3024 /** @todo Add statistics on this.*/
3025 /** @todo Implement basic variable liveness analysis (python) so variables
3026 * can be freed immediately once no longer used. This has the potential to
3027 * be trashing registers and stack for dead variables.
3028 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3029
3030 /*
3031 * First try move it to a different register, as that's cheaper.
3032 */
3033 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3034 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3035 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3036 if (fRegs)
3037 {
3038 /* Avoid using shadow registers, if possible. */
3039 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3040 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3041 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3042 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3043 }
3044
3045 /*
3046 * Otherwise we must spill the register onto the stack.
3047 */
3048 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3049 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3050 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3051 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3052
3053 pVar->idxReg = UINT8_MAX;
3054 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3055 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3056 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3057 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3058 return off;
3059}
3060
3061
3062/**
3063 * Allocates a temporary host general purpose register.
3064 *
3065 * This may emit code to save register content onto the stack in order to free
3066 * up a register.
3067 *
3068 * @returns The host register number; throws VBox status code on failure,
3069 * so no need to check the return value.
3070 * @param pReNative The native recompile state.
3071 * @param poff Pointer to the variable with the code buffer position.
3072 * This will be update if we need to move a variable from
3073 * register to stack in order to satisfy the request.
3074 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3075 * registers (@c true, default) or the other way around
3076 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3077 */
3078DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3079{
3080 /*
3081 * Try find a completely unused register, preferably a call-volatile one.
3082 */
3083 uint8_t idxReg;
3084 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3085 & ~pReNative->Core.bmHstRegsWithGstShadow
3086 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3087 if (fRegs)
3088 {
3089 if (fPreferVolatile)
3090 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3091 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3092 else
3093 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3094 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3095 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3096 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3097 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3098 }
3099 else
3100 {
3101 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3102 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3103 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3104 }
3105 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3106}
3107
3108
3109/**
3110 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3111 * registers.
3112 *
3113 * @returns The host register number; throws VBox status code on failure,
3114 * so no need to check the return value.
3115 * @param pReNative The native recompile state.
3116 * @param poff Pointer to the variable with the code buffer position.
3117 * This will be update if we need to move a variable from
3118 * register to stack in order to satisfy the request.
3119 * @param fRegMask Mask of acceptable registers.
3120 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3121 * registers (@c true, default) or the other way around
3122 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3123 */
3124DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3125 bool fPreferVolatile /*= true*/)
3126{
3127 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3128 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3129
3130 /*
3131 * Try find a completely unused register, preferably a call-volatile one.
3132 */
3133 uint8_t idxReg;
3134 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3135 & ~pReNative->Core.bmHstRegsWithGstShadow
3136 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3137 & fRegMask;
3138 if (fRegs)
3139 {
3140 if (fPreferVolatile)
3141 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3142 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3143 else
3144 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3145 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3146 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3147 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3148 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3149 }
3150 else
3151 {
3152 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
3153 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3154 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3155 }
3156 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3157}
3158
3159
3160/**
3161 * Allocates a temporary register for loading an immediate value into.
3162 *
3163 * This will emit code to load the immediate, unless there happens to be an
3164 * unused register with the value already loaded.
3165 *
3166 * The caller will not modify the returned register, it must be considered
3167 * read-only. Free using iemNativeRegFreeTmpImm.
3168 *
3169 * @returns The host register number; throws VBox status code on failure, so no
3170 * need to check the return value.
3171 * @param pReNative The native recompile state.
3172 * @param poff Pointer to the variable with the code buffer position.
3173 * @param uImm The immediate value that the register must hold upon
3174 * return.
3175 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3176 * registers (@c true, default) or the other way around
3177 * (@c false).
3178 *
3179 * @note Reusing immediate values has not been implemented yet.
3180 */
3181DECL_HIDDEN_THROW(uint8_t)
3182iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3183{
3184 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
3185 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3186 return idxReg;
3187}
3188
3189
3190/**
3191 * Allocates a temporary host general purpose register for keeping a guest
3192 * register value.
3193 *
3194 * Since we may already have a register holding the guest register value,
3195 * code will be emitted to do the loading if that's not the case. Code may also
3196 * be emitted if we have to free up a register to satify the request.
3197 *
3198 * @returns The host register number; throws VBox status code on failure, so no
3199 * need to check the return value.
3200 * @param pReNative The native recompile state.
3201 * @param poff Pointer to the variable with the code buffer
3202 * position. This will be update if we need to move a
3203 * variable from register to stack in order to satisfy
3204 * the request.
3205 * @param enmGstReg The guest register that will is to be updated.
3206 * @param enmIntendedUse How the caller will be using the host register.
3207 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
3208 * register is okay (default). The ASSUMPTION here is
3209 * that the caller has already flushed all volatile
3210 * registers, so this is only applied if we allocate a
3211 * new register.
3212 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
3213 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3214 */
3215DECL_HIDDEN_THROW(uint8_t)
3216iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
3217 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
3218 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
3219{
3220 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3221#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3222 AssertMsg( fSkipLivenessAssert
3223 || pReNative->idxCurCall == 0
3224 || enmGstReg == kIemNativeGstReg_Pc
3225 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3226 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3227 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
3228 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3229 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
3230 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3231#endif
3232 RT_NOREF(fSkipLivenessAssert);
3233#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
3234 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3235#endif
3236 uint32_t const fRegMask = !fNoVolatileRegs
3237 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
3238 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3239
3240 /*
3241 * First check if the guest register value is already in a host register.
3242 */
3243 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3244 {
3245 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3246 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3247 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3248 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3249
3250 /* It's not supposed to be allocated... */
3251 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3252 {
3253 /*
3254 * If the register will trash the guest shadow copy, try find a
3255 * completely unused register we can use instead. If that fails,
3256 * we need to disassociate the host reg from the guest reg.
3257 */
3258 /** @todo would be nice to know if preserving the register is in any way helpful. */
3259 /* If the purpose is calculations, try duplicate the register value as
3260 we'll be clobbering the shadow. */
3261 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
3262 && ( ~pReNative->Core.bmHstRegs
3263 & ~pReNative->Core.bmHstRegsWithGstShadow
3264 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3265 {
3266 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
3267
3268 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3269
3270 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3271 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3272 g_apszIemNativeHstRegNames[idxRegNew]));
3273 idxReg = idxRegNew;
3274 }
3275 /* If the current register matches the restrictions, go ahead and allocate
3276 it for the caller. */
3277 else if (fRegMask & RT_BIT_32(idxReg))
3278 {
3279 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3280 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3281 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3282 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3283 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
3284 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3285 else
3286 {
3287 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
3288 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
3289 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3290 }
3291 }
3292 /* Otherwise, allocate a register that satisfies the caller and transfer
3293 the shadowing if compatible with the intended use. (This basically
3294 means the call wants a non-volatile register (RSP push/pop scenario).) */
3295 else
3296 {
3297 Assert(fNoVolatileRegs);
3298 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
3299 !fNoVolatileRegs
3300 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
3301 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3302 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3303 {
3304 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3305 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
3306 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
3307 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3308 }
3309 else
3310 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3311 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3312 g_apszIemNativeHstRegNames[idxRegNew]));
3313 idxReg = idxRegNew;
3314 }
3315 }
3316 else
3317 {
3318 /*
3319 * Oops. Shadowed guest register already allocated!
3320 *
3321 * Allocate a new register, copy the value and, if updating, the
3322 * guest shadow copy assignment to the new register.
3323 */
3324 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3325 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
3326 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
3327 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
3328
3329 /** @todo share register for readonly access. */
3330 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
3331 enmIntendedUse == kIemNativeGstRegUse_Calculation);
3332
3333 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3334 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3335
3336 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3337 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3338 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
3339 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3340 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3341 else
3342 {
3343 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3344 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
3345 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3346 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3347 }
3348 idxReg = idxRegNew;
3349 }
3350 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
3351
3352#ifdef VBOX_STRICT
3353 /* Strict builds: Check that the value is correct. */
3354 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3355#endif
3356
3357#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3358 /** @todo r=aeichner Implement for registers other than GPR as well. */
3359 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3360 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3361 && ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3362 && enmGstReg <= kIemNativeGstReg_GprLast)
3363 || enmGstReg == kIemNativeGstReg_MxCsr))
3364 {
3365# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3366 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3367 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
3368# endif
3369 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3370 }
3371#endif
3372
3373 return idxReg;
3374 }
3375
3376 /*
3377 * Allocate a new register, load it with the guest value and designate it as a copy of the
3378 */
3379 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
3380
3381 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3382 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
3383
3384 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3385 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
3386 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
3387 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3388
3389#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3390 /** @todo r=aeichner Implement for registers other than GPR as well. */
3391 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3392 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3393 && ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3394 && enmGstReg <= kIemNativeGstReg_GprLast)
3395 || enmGstReg == kIemNativeGstReg_MxCsr))
3396 {
3397# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3398 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3399 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxRegNew);
3400# endif
3401 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3402 }
3403#endif
3404
3405 return idxRegNew;
3406}
3407
3408
3409/**
3410 * Allocates a temporary host general purpose register that already holds the
3411 * given guest register value.
3412 *
3413 * The use case for this function is places where the shadowing state cannot be
3414 * modified due to branching and such. This will fail if the we don't have a
3415 * current shadow copy handy or if it's incompatible. The only code that will
3416 * be emitted here is value checking code in strict builds.
3417 *
3418 * The intended use can only be readonly!
3419 *
3420 * @returns The host register number, UINT8_MAX if not present.
3421 * @param pReNative The native recompile state.
3422 * @param poff Pointer to the instruction buffer offset.
3423 * Will be updated in strict builds if a register is
3424 * found.
3425 * @param enmGstReg The guest register that will is to be updated.
3426 * @note In strict builds, this may throw instruction buffer growth failures.
3427 * Non-strict builds will not throw anything.
3428 * @sa iemNativeRegAllocTmpForGuestReg
3429 */
3430DECL_HIDDEN_THROW(uint8_t)
3431iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3432{
3433 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3434#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3435 AssertMsg( pReNative->idxCurCall == 0
3436 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3437 || enmGstReg == kIemNativeGstReg_Pc,
3438 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3439#endif
3440
3441 /*
3442 * First check if the guest register value is already in a host register.
3443 */
3444 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3445 {
3446 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3447 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3448 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3449 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3450
3451 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3452 {
3453 /*
3454 * We only do readonly use here, so easy compared to the other
3455 * variant of this code.
3456 */
3457 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3458 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3459 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3460 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
3461 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3462
3463#ifdef VBOX_STRICT
3464 /* Strict builds: Check that the value is correct. */
3465 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3466#else
3467 RT_NOREF(poff);
3468#endif
3469 return idxReg;
3470 }
3471 }
3472
3473 return UINT8_MAX;
3474}
3475
3476
3477/**
3478 * Allocates argument registers for a function call.
3479 *
3480 * @returns New code buffer offset on success; throws VBox status code on failure, so no
3481 * need to check the return value.
3482 * @param pReNative The native recompile state.
3483 * @param off The current code buffer offset.
3484 * @param cArgs The number of arguments the function call takes.
3485 */
3486DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
3487{
3488 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
3489 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
3490 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3491 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3492
3493 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
3494 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
3495 else if (cArgs == 0)
3496 return true;
3497
3498 /*
3499 * Do we get luck and all register are free and not shadowing anything?
3500 */
3501 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
3502 for (uint32_t i = 0; i < cArgs; i++)
3503 {
3504 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3505 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3506 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3507 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3508 }
3509 /*
3510 * Okay, not lucky so we have to free up the registers.
3511 */
3512 else
3513 for (uint32_t i = 0; i < cArgs; i++)
3514 {
3515 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3516 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
3517 {
3518 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
3519 {
3520 case kIemNativeWhat_Var:
3521 {
3522 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
3523 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3524 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
3525 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3526 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
3527#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3528 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3529#endif
3530
3531 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
3532 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3533 else
3534 {
3535 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
3536 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3537 }
3538 break;
3539 }
3540
3541 case kIemNativeWhat_Tmp:
3542 case kIemNativeWhat_Arg:
3543 case kIemNativeWhat_rc:
3544 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3545 default:
3546 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
3547 }
3548
3549 }
3550 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3551 {
3552 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3553 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3554 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3555#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3556 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3557#endif
3558 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3559 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3560 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3561 }
3562 else
3563 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3564 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3565 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3566 }
3567 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
3568 return true;
3569}
3570
3571
3572DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
3573
3574
3575#if 0
3576/**
3577 * Frees a register assignment of any type.
3578 *
3579 * @param pReNative The native recompile state.
3580 * @param idxHstReg The register to free.
3581 *
3582 * @note Does not update variables.
3583 */
3584DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3585{
3586 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3587 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3588 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
3589 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
3590 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
3591 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
3592 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
3593 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
3594 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
3595 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
3596 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3597 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3598 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3599 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3600
3601 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3602 /* no flushing, right:
3603 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3604 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3605 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3606 */
3607}
3608#endif
3609
3610
3611/**
3612 * Frees a temporary register.
3613 *
3614 * Any shadow copies of guest registers assigned to the host register will not
3615 * be flushed by this operation.
3616 */
3617DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3618{
3619 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3620 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
3621 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3622 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
3623 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3624}
3625
3626
3627/**
3628 * Frees a temporary immediate register.
3629 *
3630 * It is assumed that the call has not modified the register, so it still hold
3631 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
3632 */
3633DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3634{
3635 iemNativeRegFreeTmp(pReNative, idxHstReg);
3636}
3637
3638
3639/**
3640 * Frees a register assigned to a variable.
3641 *
3642 * The register will be disassociated from the variable.
3643 */
3644DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
3645{
3646 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3647 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
3648 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
3649 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3650 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
3651#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3652 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3653#endif
3654
3655 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3656 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3657 if (!fFlushShadows)
3658 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
3659 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
3660 else
3661 {
3662 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3663 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3664#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3665 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadowsOld));
3666#endif
3667 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3668 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
3669 uint64_t fGstRegShadows = fGstRegShadowsOld;
3670 while (fGstRegShadows)
3671 {
3672 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3673 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3674
3675 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
3676 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
3677 }
3678 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
3679 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
3680 }
3681}
3682
3683
3684#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3685# ifdef LOG_ENABLED
3686/** Host CPU SIMD register names. */
3687DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
3688{
3689# ifdef RT_ARCH_AMD64
3690 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
3691# elif RT_ARCH_ARM64
3692 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
3693 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
3694# else
3695# error "port me"
3696# endif
3697};
3698# endif
3699
3700
3701/**
3702 * Frees a SIMD register assigned to a variable.
3703 *
3704 * The register will be disassociated from the variable.
3705 */
3706DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
3707{
3708 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
3709 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
3710 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
3711 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3712 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
3713 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3714
3715 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3716 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
3717 if (!fFlushShadows)
3718 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
3719 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
3720 else
3721 {
3722 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3723 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
3724 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
3725 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
3726 uint64_t fGstRegShadows = fGstRegShadowsOld;
3727 while (fGstRegShadows)
3728 {
3729 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3730 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3731
3732 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
3733 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
3734 }
3735 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
3736 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
3737 }
3738}
3739
3740
3741/**
3742 * Reassigns a variable to a different SIMD register specified by the caller.
3743 *
3744 * @returns The new code buffer position.
3745 * @param pReNative The native recompile state.
3746 * @param off The current code buffer position.
3747 * @param idxVar The variable index.
3748 * @param idxRegOld The old host register number.
3749 * @param idxRegNew The new host register number.
3750 * @param pszCaller The caller for logging.
3751 */
3752static uint32_t iemNativeSimdRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3753 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3754{
3755 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3756 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3757 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3758 RT_NOREF(pszCaller);
3759
3760 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
3761 & pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows));
3762 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxRegNew, off);
3763
3764 uint64_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
3765 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
3766 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
3767
3768 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3769 pszCaller, idxVar, g_apszIemNativeHstSimdRegNames[idxRegOld], g_apszIemNativeHstSimdRegNames[idxRegNew], fGstRegShadows));
3770 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3771
3772 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U))
3773 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxRegNew, idxRegOld);
3774 else
3775 {
3776 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U));
3777 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxRegNew, idxRegOld);
3778 }
3779
3780 pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3781 pReNative->Core.aHstSimdRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3782 pReNative->Core.aHstSimdRegs[idxRegNew].idxVar = idxVar;
3783 if (fGstRegShadows)
3784 {
3785 pReNative->Core.bmHstSimdRegsWithGstShadow = (pReNative->Core.bmHstSimdRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3786 | RT_BIT_32(idxRegNew);
3787 while (fGstRegShadows)
3788 {
3789 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3790 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3791
3792 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxRegOld);
3793 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = idxRegNew;
3794 }
3795 }
3796
3797 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3798 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
3799 pReNative->Core.bmHstSimdRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstSimdRegs & ~RT_BIT_32(idxRegOld));
3800 return off;
3801}
3802
3803
3804/**
3805 * Moves a variable to a different register or spills it onto the stack.
3806 *
3807 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3808 * kinds can easily be recreated if needed later.
3809 *
3810 * @returns The new code buffer position.
3811 * @param pReNative The native recompile state.
3812 * @param off The current code buffer position.
3813 * @param idxVar The variable index.
3814 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3815 * call-volatile registers.
3816 */
3817DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3818 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK*/)
3819{
3820 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3821 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3822 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3823 Assert(!pVar->fRegAcquired);
3824 Assert(!pVar->fSimdReg);
3825
3826 uint8_t const idxRegOld = pVar->idxReg;
3827 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
3828 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegOld));
3829 Assert(pReNative->Core.aHstSimdRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3830 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows)
3831 == pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows);
3832 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3833 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxRegOld))
3834 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
3835 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
3836 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
3837
3838 /** @todo Add statistics on this.*/
3839 /** @todo Implement basic variable liveness analysis (python) so variables
3840 * can be freed immediately once no longer used. This has the potential to
3841 * be trashing registers and stack for dead variables.
3842 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3843
3844 /*
3845 * First try move it to a different register, as that's cheaper.
3846 */
3847 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3848 fForbiddenRegs |= IEMNATIVE_SIMD_REG_FIXED_MASK;
3849 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & ~fForbiddenRegs;
3850 if (fRegs)
3851 {
3852 /* Avoid using shadow registers, if possible. */
3853 if (fRegs & ~pReNative->Core.bmHstSimdRegsWithGstShadow)
3854 fRegs &= ~pReNative->Core.bmHstSimdRegsWithGstShadow;
3855 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3856 return iemNativeSimdRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeSimdRegMoveOrSpillStackVar");
3857 }
3858
3859 /*
3860 * Otherwise we must spill the register onto the stack.
3861 */
3862 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3863 Log12(("iemNativeSimdRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3864 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3865
3866 if (pVar->cbVar == sizeof(RTUINT128U))
3867 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3868 else
3869 {
3870 Assert(pVar->cbVar == sizeof(RTUINT256U));
3871 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3872 }
3873
3874 pVar->idxReg = UINT8_MAX;
3875 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3876 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
3877 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
3878 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
3879 return off;
3880}
3881
3882
3883/**
3884 * Called right before emitting a call instruction to move anything important
3885 * out of call-volatile SIMD registers, free and flush the call-volatile SIMD registers,
3886 * optionally freeing argument variables.
3887 *
3888 * @returns New code buffer offset, UINT32_MAX on failure.
3889 * @param pReNative The native recompile state.
3890 * @param off The code buffer offset.
3891 * @param cArgs The number of arguments the function call takes.
3892 * It is presumed that the host register part of these have
3893 * been allocated as such already and won't need moving,
3894 * just freeing.
3895 * @param fKeepVars Mask of variables that should keep their register
3896 * assignments. Caller must take care to handle these.
3897 */
3898DECL_HIDDEN_THROW(uint32_t)
3899iemNativeSimdRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
3900{
3901 Assert(!cArgs); RT_NOREF(cArgs);
3902
3903 /* fKeepVars will reduce this mask. */
3904 uint32_t fSimdRegsToFree = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
3905
3906 /*
3907 * Move anything important out of volatile registers.
3908 */
3909 uint32_t fSimdRegsToMove = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
3910#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3911 & ~RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)
3912#endif
3913 ;
3914
3915 fSimdRegsToMove &= pReNative->Core.bmHstSimdRegs;
3916 if (!fSimdRegsToMove)
3917 { /* likely */ }
3918 else
3919 {
3920 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: fSimdRegsToMove=%#x\n", fSimdRegsToMove));
3921 while (fSimdRegsToMove != 0)
3922 {
3923 unsigned const idxSimdReg = ASMBitFirstSetU32(fSimdRegsToMove) - 1;
3924 fSimdRegsToMove &= ~RT_BIT_32(idxSimdReg);
3925
3926 switch (pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat)
3927 {
3928 case kIemNativeWhat_Var:
3929 {
3930 uint8_t const idxVar = pReNative->Core.aHstRegs[idxSimdReg].idxVar;
3931 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3932 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3933 Assert(pVar->idxReg == idxSimdReg);
3934 Assert(pVar->fSimdReg);
3935 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
3936 {
3937 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxSimdReg=%d\n",
3938 idxVar, pVar->enmKind, pVar->idxReg));
3939 if (pVar->enmKind != kIemNativeVarKind_Stack)
3940 pVar->idxReg = UINT8_MAX;
3941 else
3942 off = iemNativeSimdRegMoveOrSpillStackVar(pReNative, off, idxVar);
3943 }
3944 else
3945 fSimdRegsToFree &= ~RT_BIT_32(idxSimdReg);
3946 continue;
3947 }
3948
3949 case kIemNativeWhat_Arg:
3950 AssertMsgFailed(("What?!?: %u\n", idxSimdReg));
3951 continue;
3952
3953 case kIemNativeWhat_rc:
3954 case kIemNativeWhat_Tmp:
3955 AssertMsgFailed(("Missing free: %u\n", idxSimdReg));
3956 continue;
3957
3958 case kIemNativeWhat_FixedReserved:
3959#ifdef RT_ARCH_ARM64
3960 continue; /* On ARM the upper half of the virtual 256-bit register. */
3961#endif
3962
3963 case kIemNativeWhat_FixedTmp:
3964 case kIemNativeWhat_pVCpuFixed:
3965 case kIemNativeWhat_pCtxFixed:
3966 case kIemNativeWhat_PcShadow:
3967 case kIemNativeWhat_Invalid:
3968 case kIemNativeWhat_End:
3969 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
3970 }
3971 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
3972 }
3973 }
3974
3975 /*
3976 * Do the actual freeing.
3977 */
3978 if (pReNative->Core.bmHstSimdRegs & fSimdRegsToFree)
3979 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegs %#x -> %#x\n",
3980 pReNative->Core.bmHstSimdRegs, pReNative->Core.bmHstSimdRegs & ~fSimdRegsToFree));
3981 pReNative->Core.bmHstSimdRegs &= ~fSimdRegsToFree;
3982
3983 /* If there are guest register shadows in any call-volatile register, we
3984 have to clear the corrsponding guest register masks for each register. */
3985 uint32_t fHstSimdRegsWithGstShadow = pReNative->Core.bmHstSimdRegsWithGstShadow & fSimdRegsToFree;
3986 if (fHstSimdRegsWithGstShadow)
3987 {
3988 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
3989 pReNative->Core.bmHstSimdRegsWithGstShadow, pReNative->Core.bmHstSimdRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK, fHstSimdRegsWithGstShadow));
3990 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~fHstSimdRegsWithGstShadow;
3991 do
3992 {
3993 unsigned const idxSimdReg = ASMBitFirstSetU32(fHstSimdRegsWithGstShadow) - 1;
3994 fHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdReg);
3995
3996 AssertMsg(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows != 0, ("idxSimdReg=%#x\n", idxSimdReg));
3997
3998#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3999 /*
4000 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4001 * to call volatile registers).
4002 */
4003 if ( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4004 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows)
4005 off = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, off, idxSimdReg);
4006#endif
4007 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4008 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows));
4009
4010 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows;
4011 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
4012 } while (fHstSimdRegsWithGstShadow != 0);
4013 }
4014
4015 return off;
4016}
4017#endif
4018
4019
4020/**
4021 * Called right before emitting a call instruction to move anything important
4022 * out of call-volatile registers, free and flush the call-volatile registers,
4023 * optionally freeing argument variables.
4024 *
4025 * @returns New code buffer offset, UINT32_MAX on failure.
4026 * @param pReNative The native recompile state.
4027 * @param off The code buffer offset.
4028 * @param cArgs The number of arguments the function call takes.
4029 * It is presumed that the host register part of these have
4030 * been allocated as such already and won't need moving,
4031 * just freeing.
4032 * @param fKeepVars Mask of variables that should keep their register
4033 * assignments. Caller must take care to handle these.
4034 */
4035DECL_HIDDEN_THROW(uint32_t)
4036iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4037{
4038 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4039
4040 /* fKeepVars will reduce this mask. */
4041 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4042
4043 /*
4044 * Move anything important out of volatile registers.
4045 */
4046 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4047 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4048 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4049#ifdef IEMNATIVE_REG_FIXED_TMP0
4050 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4051#endif
4052#ifdef IEMNATIVE_REG_FIXED_TMP1
4053 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4054#endif
4055#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4056 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4057#endif
4058 & ~g_afIemNativeCallRegs[cArgs];
4059
4060 fRegsToMove &= pReNative->Core.bmHstRegs;
4061 if (!fRegsToMove)
4062 { /* likely */ }
4063 else
4064 {
4065 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4066 while (fRegsToMove != 0)
4067 {
4068 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4069 fRegsToMove &= ~RT_BIT_32(idxReg);
4070
4071 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4072 {
4073 case kIemNativeWhat_Var:
4074 {
4075 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4076 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4077 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4078 Assert(pVar->idxReg == idxReg);
4079#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4080 Assert(!pVar->fSimdReg);
4081#endif
4082 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4083 {
4084 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4085 idxVar, pVar->enmKind, pVar->idxReg));
4086 if (pVar->enmKind != kIemNativeVarKind_Stack)
4087 pVar->idxReg = UINT8_MAX;
4088 else
4089 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4090 }
4091 else
4092 fRegsToFree &= ~RT_BIT_32(idxReg);
4093 continue;
4094 }
4095
4096 case kIemNativeWhat_Arg:
4097 AssertMsgFailed(("What?!?: %u\n", idxReg));
4098 continue;
4099
4100 case kIemNativeWhat_rc:
4101 case kIemNativeWhat_Tmp:
4102 AssertMsgFailed(("Missing free: %u\n", idxReg));
4103 continue;
4104
4105 case kIemNativeWhat_FixedTmp:
4106 case kIemNativeWhat_pVCpuFixed:
4107 case kIemNativeWhat_pCtxFixed:
4108 case kIemNativeWhat_PcShadow:
4109 case kIemNativeWhat_FixedReserved:
4110 case kIemNativeWhat_Invalid:
4111 case kIemNativeWhat_End:
4112 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4113 }
4114 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4115 }
4116 }
4117
4118 /*
4119 * Do the actual freeing.
4120 */
4121 if (pReNative->Core.bmHstRegs & fRegsToFree)
4122 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4123 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4124 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4125
4126 /* If there are guest register shadows in any call-volatile register, we
4127 have to clear the corrsponding guest register masks for each register. */
4128 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4129 if (fHstRegsWithGstShadow)
4130 {
4131 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4132 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4133 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4134 do
4135 {
4136 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4137 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4138
4139 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4140
4141#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4142 /*
4143 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4144 * to call volatile registers).
4145 */
4146 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4147 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxReg);
4148 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4149#endif
4150
4151 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4152 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4153 } while (fHstRegsWithGstShadow != 0);
4154 }
4155
4156#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4157 /* Now for the SIMD registers, no argument support for now. */
4158 off = iemNativeSimdRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /*cArgs*/, fKeepVars);
4159#endif
4160
4161 return off;
4162}
4163
4164
4165/**
4166 * Flushes a set of guest register shadow copies.
4167 *
4168 * This is usually done after calling a threaded function or a C-implementation
4169 * of an instruction.
4170 *
4171 * @param pReNative The native recompile state.
4172 * @param fGstRegs Set of guest registers to flush.
4173 */
4174DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4175{
4176 /*
4177 * Reduce the mask by what's currently shadowed
4178 */
4179 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4180 fGstRegs &= bmGstRegShadowsOld;
4181 if (fGstRegs)
4182 {
4183 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4184 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4185 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4186 if (bmGstRegShadowsNew)
4187 {
4188 /*
4189 * Partial.
4190 */
4191 do
4192 {
4193 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4194 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4195 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4196 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4197 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4198#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4199 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4200#endif
4201
4202 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4203 fGstRegs &= ~fInThisHstReg;
4204 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4205 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4206 if (!fGstRegShadowsNew)
4207 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4208 } while (fGstRegs != 0);
4209 }
4210 else
4211 {
4212 /*
4213 * Clear all.
4214 */
4215 do
4216 {
4217 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4218 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4219 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4220 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4221 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4222#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4223 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4224#endif
4225
4226 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4227 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4228 } while (fGstRegs != 0);
4229 pReNative->Core.bmHstRegsWithGstShadow = 0;
4230 }
4231 }
4232}
4233
4234
4235/**
4236 * Flushes guest register shadow copies held by a set of host registers.
4237 *
4238 * This is used with the TLB lookup code for ensuring that we don't carry on
4239 * with any guest shadows in volatile registers, as these will get corrupted by
4240 * a TLB miss.
4241 *
4242 * @param pReNative The native recompile state.
4243 * @param fHstRegs Set of host registers to flush guest shadows for.
4244 */
4245DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4246{
4247 /*
4248 * Reduce the mask by what's currently shadowed.
4249 */
4250 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4251 fHstRegs &= bmHstRegsWithGstShadowOld;
4252 if (fHstRegs)
4253 {
4254 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4255 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4256 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4257 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4258 if (bmHstRegsWithGstShadowNew)
4259 {
4260 /*
4261 * Partial (likely).
4262 */
4263 uint64_t fGstShadows = 0;
4264 do
4265 {
4266 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4267 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4268 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4269 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4270#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4271 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4272#endif
4273
4274 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4275 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4276 fHstRegs &= ~RT_BIT_32(idxHstReg);
4277 } while (fHstRegs != 0);
4278 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4279 }
4280 else
4281 {
4282 /*
4283 * Clear all.
4284 */
4285 do
4286 {
4287 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4288 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4289 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4290 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4291#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4292 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4293#endif
4294
4295 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4296 fHstRegs &= ~RT_BIT_32(idxHstReg);
4297 } while (fHstRegs != 0);
4298 pReNative->Core.bmGstRegShadows = 0;
4299 }
4300 }
4301}
4302
4303
4304/**
4305 * Restores guest shadow copies in volatile registers.
4306 *
4307 * This is used after calling a helper function (think TLB miss) to restore the
4308 * register state of volatile registers.
4309 *
4310 * @param pReNative The native recompile state.
4311 * @param off The code buffer offset.
4312 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4313 * be active (allocated) w/o asserting. Hack.
4314 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4315 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4316 */
4317DECL_HIDDEN_THROW(uint32_t)
4318iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4319{
4320 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4321 if (fHstRegs)
4322 {
4323 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4324 do
4325 {
4326 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4327
4328 /* It's not fatal if a register is active holding a variable that
4329 shadowing a guest register, ASSUMING all pending guest register
4330 writes were flushed prior to the helper call. However, we'll be
4331 emitting duplicate restores, so it wasts code space. */
4332 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4333 RT_NOREF(fHstRegsActiveShadows);
4334
4335 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4336#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4337 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadows));
4338#endif
4339 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4340 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4341 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4342
4343 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4344 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4345
4346 fHstRegs &= ~RT_BIT_32(idxHstReg);
4347 } while (fHstRegs != 0);
4348 }
4349 return off;
4350}
4351
4352
4353
4354
4355/*********************************************************************************************************************************
4356* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
4357*********************************************************************************************************************************/
4358#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4359
4360/**
4361 * Info about shadowed guest SIMD register values.
4362 * @see IEMNATIVEGSTSIMDREG
4363 */
4364static struct
4365{
4366 /** Offset in VMCPU of XMM (low 128-bit) registers. */
4367 uint32_t offXmm;
4368 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
4369 uint32_t offYmm;
4370 /** Name (for logging). */
4371 const char *pszName;
4372} const g_aGstSimdShadowInfo[] =
4373{
4374#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
4375 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
4376 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
4377 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
4378 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
4379 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
4380 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
4381 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
4382 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
4383 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
4384 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
4385 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
4386 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
4387 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
4388 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
4389 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
4390 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
4391 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
4392#undef CPUMCTX_OFF_AND_SIZE
4393};
4394AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
4395
4396
4397/**
4398 * Frees a temporary SIMD register.
4399 *
4400 * Any shadow copies of guest registers assigned to the host register will not
4401 * be flushed by this operation.
4402 */
4403DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
4404{
4405 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
4406 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
4407 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
4408 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
4409 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
4410}
4411
4412
4413/**
4414 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
4415 *
4416 * @returns New code bufferoffset.
4417 * @param pReNative The native recompile state.
4418 * @param off Current code buffer position.
4419 * @param enmGstSimdReg The guest SIMD register to flush.
4420 */
4421DECL_HIDDEN_THROW(uint32_t)
4422iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
4423{
4424 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
4425
4426 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
4427 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
4428 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
4429 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
4430
4431 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
4432 {
4433 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4434 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
4435 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
4436 }
4437
4438 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
4439 {
4440 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4441 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
4442 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
4443 }
4444
4445 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
4446 return off;
4447}
4448
4449
4450/**
4451 * Flush the given set of guest SIMD registers if marked as dirty.
4452 *
4453 * @returns New code buffer offset.
4454 * @param pReNative The native recompile state.
4455 * @param off Current code buffer position.
4456 * @param fFlushGstSimdReg The guest SIMD register set to flush (default is flush everything).
4457 */
4458DECL_HIDDEN_THROW(uint32_t)
4459iemNativeSimdRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstSimdReg /*= UINT64_MAX*/)
4460{
4461 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4462 & fFlushGstSimdReg;
4463 if (bmGstSimdRegShadowDirty)
4464 {
4465# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4466 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4467 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
4468# endif
4469
4470 do
4471 {
4472 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
4473 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
4474 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
4475 } while (bmGstSimdRegShadowDirty);
4476 }
4477
4478 return off;
4479}
4480
4481
4482#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4483/**
4484 * Flush all shadowed guest SIMD registers marked as dirty for the given host SIMD register.
4485 *
4486 * @returns New code buffer offset.
4487 * @param pReNative The native recompile state.
4488 * @param off Current code buffer position.
4489 * @param idxHstSimdReg The host SIMD register.
4490 *
4491 * @note This doesn't do any unshadowing of guest registers from the host register.
4492 */
4493DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxHstSimdReg)
4494{
4495 /* We need to flush any pending guest register writes this host register shadows. */
4496 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4497 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
4498 if (bmGstSimdRegShadowDirty)
4499 {
4500# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4501 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4502 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
4503# endif
4504
4505 do
4506 {
4507 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
4508 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
4509 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
4510 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
4511 } while (bmGstSimdRegShadowDirty);
4512 }
4513
4514 return off;
4515}
4516#endif
4517
4518
4519/**
4520 * Locate a register, possibly freeing one up.
4521 *
4522 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4523 * failed.
4524 *
4525 * @returns Host register number on success. Returns UINT8_MAX if no registers
4526 * found, the caller is supposed to deal with this and raise a
4527 * allocation type specific status code (if desired).
4528 *
4529 * @throws VBox status code if we're run into trouble spilling a variable of
4530 * recording debug info. Does NOT throw anything if we're out of
4531 * registers, though.
4532 */
4533static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
4534 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
4535{
4536 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
4537 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
4538 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
4539
4540 /*
4541 * Try a freed register that's shadowing a guest register.
4542 */
4543 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
4544 if (fRegs)
4545 {
4546 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
4547
4548#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4549 /*
4550 * When we have livness information, we use it to kick out all shadowed
4551 * guest register that will not be needed any more in this TB. If we're
4552 * lucky, this may prevent us from ending up here again.
4553 *
4554 * Note! We must consider the previous entry here so we don't free
4555 * anything that the current threaded function requires (current
4556 * entry is produced by the next threaded function).
4557 */
4558 uint32_t const idxCurCall = pReNative->idxCurCall;
4559 if (idxCurCall > 0)
4560 {
4561 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
4562
4563# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4564 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
4565 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
4566 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
4567#else
4568 /* Construct a mask of the registers not in the read or write state.
4569 Note! We could skips writes, if they aren't from us, as this is just
4570 a hack to prevent trashing registers that have just been written
4571 or will be written when we retire the current instruction. */
4572 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
4573 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
4574 & IEMLIVENESSBIT_MASK;
4575#endif
4576 /* If it matches any shadowed registers. */
4577 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
4578 {
4579 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
4580 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
4581 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
4582
4583 /* See if we've got any unshadowed registers we can return now. */
4584 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
4585 if (fUnshadowedRegs)
4586 {
4587 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
4588 return (fPreferVolatile
4589 ? ASMBitFirstSetU32(fUnshadowedRegs)
4590 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4591 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
4592 - 1;
4593 }
4594 }
4595 }
4596#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4597
4598 unsigned const idxReg = (fPreferVolatile
4599 ? ASMBitFirstSetU32(fRegs)
4600 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4601 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
4602 - 1;
4603
4604 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
4605 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
4606 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
4607 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
4608
4609 /* We need to flush any pending guest register writes this host SIMD register shadows. */
4610 *poff = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, *poff, idxReg);
4611
4612 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4613 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
4614 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
4615 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4616 return idxReg;
4617 }
4618
4619 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
4620
4621 /*
4622 * Try free up a variable that's in a register.
4623 *
4624 * We do two rounds here, first evacuating variables we don't need to be
4625 * saved on the stack, then in the second round move things to the stack.
4626 */
4627 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
4628 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
4629 {
4630 uint32_t fVars = pReNative->Core.bmVars;
4631 while (fVars)
4632 {
4633 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
4634 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
4635 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* Ignore non SIMD variables here. */
4636 continue;
4637
4638 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
4639 && (RT_BIT_32(idxReg) & fRegMask)
4640 && ( iLoop == 0
4641 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
4642 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4643 && !pReNative->Core.aVars[idxVar].fRegAcquired)
4644 {
4645 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
4646 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
4647 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
4648 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
4649 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
4650 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
4651
4652 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4653 {
4654 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
4655 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
4656 }
4657
4658 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4659 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
4660
4661 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4662 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4663 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
4664 return idxReg;
4665 }
4666 fVars &= ~RT_BIT_32(idxVar);
4667 }
4668 }
4669
4670 AssertFailed();
4671 return UINT8_MAX;
4672}
4673
4674
4675/**
4676 * Flushes a set of guest register shadow copies.
4677 *
4678 * This is usually done after calling a threaded function or a C-implementation
4679 * of an instruction.
4680 *
4681 * @param pReNative The native recompile state.
4682 * @param fGstSimdRegs Set of guest SIMD registers to flush.
4683 */
4684DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
4685{
4686 /*
4687 * Reduce the mask by what's currently shadowed
4688 */
4689 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
4690 fGstSimdRegs &= bmGstSimdRegShadows;
4691 if (fGstSimdRegs)
4692 {
4693 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
4694 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
4695 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
4696 if (bmGstSimdRegShadowsNew)
4697 {
4698 /*
4699 * Partial.
4700 */
4701 do
4702 {
4703 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
4704 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
4705 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
4706 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
4707 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4708 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
4709
4710 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
4711 fGstSimdRegs &= ~fInThisHstReg;
4712 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4713 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4714 if (!fGstRegShadowsNew)
4715 {
4716 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4717 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4718 }
4719 } while (fGstSimdRegs != 0);
4720 }
4721 else
4722 {
4723 /*
4724 * Clear all.
4725 */
4726 do
4727 {
4728 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
4729 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
4730 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
4731 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
4732 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4733 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
4734
4735 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4736 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4737 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4738 } while (fGstSimdRegs != 0);
4739 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
4740 }
4741 }
4742}
4743
4744
4745/**
4746 * Allocates a temporary host SIMD register.
4747 *
4748 * This may emit code to save register content onto the stack in order to free
4749 * up a register.
4750 *
4751 * @returns The host register number; throws VBox status code on failure,
4752 * so no need to check the return value.
4753 * @param pReNative The native recompile state.
4754 * @param poff Pointer to the variable with the code buffer position.
4755 * This will be update if we need to move a variable from
4756 * register to stack in order to satisfy the request.
4757 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4758 * registers (@c true, default) or the other way around
4759 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4760 */
4761DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
4762{
4763 /*
4764 * Try find a completely unused register, preferably a call-volatile one.
4765 */
4766 uint8_t idxSimdReg;
4767 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4768 & ~pReNative->Core.bmHstRegsWithGstShadow
4769 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
4770 if (fRegs)
4771 {
4772 if (fPreferVolatile)
4773 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4774 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
4775 else
4776 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4777 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
4778 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
4779 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
4780
4781 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4782 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
4783 }
4784 else
4785 {
4786 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
4787 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4788 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
4789 }
4790
4791 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
4792 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
4793}
4794
4795
4796/**
4797 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
4798 * registers.
4799 *
4800 * @returns The host register number; throws VBox status code on failure,
4801 * so no need to check the return value.
4802 * @param pReNative The native recompile state.
4803 * @param poff Pointer to the variable with the code buffer position.
4804 * This will be update if we need to move a variable from
4805 * register to stack in order to satisfy the request.
4806 * @param fRegMask Mask of acceptable registers.
4807 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4808 * registers (@c true, default) or the other way around
4809 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4810 */
4811DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4812 bool fPreferVolatile /*= true*/)
4813{
4814 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
4815 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
4816
4817 /*
4818 * Try find a completely unused register, preferably a call-volatile one.
4819 */
4820 uint8_t idxSimdReg;
4821 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
4822 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
4823 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
4824 & fRegMask;
4825 if (fRegs)
4826 {
4827 if (fPreferVolatile)
4828 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4829 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
4830 else
4831 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4832 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
4833 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
4834 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
4835
4836 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
4837 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
4838 }
4839 else
4840 {
4841 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4842 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4843 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
4844 }
4845
4846 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
4847 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
4848}
4849
4850
4851/**
4852 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
4853 *
4854 * @param pReNative The native recompile state.
4855 * @param idxHstSimdReg The host SIMD register to update the state for.
4856 * @param enmLoadSz The load size to set.
4857 */
4858DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
4859 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
4860{
4861 /* Everything valid already? -> nothing to do. */
4862 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
4863 return;
4864
4865 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
4866 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
4867 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
4868 {
4869 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
4870 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
4871 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
4872 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
4873 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
4874 }
4875}
4876
4877
4878static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdRegDst,
4879 uint8_t idxHstSimdRegDst, uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
4880{
4881 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
4882 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
4883 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
4884 {
4885# ifdef RT_ARCH_ARM64
4886 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
4887 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
4888# endif
4889
4890 if (idxHstSimdRegDst != idxHstSimdRegSrc)
4891 {
4892 switch (enmLoadSzDst)
4893 {
4894 case kIemNativeGstSimdRegLdStSz_256:
4895 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
4896 break;
4897 case kIemNativeGstSimdRegLdStSz_Low128:
4898 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
4899 break;
4900 case kIemNativeGstSimdRegLdStSz_High128:
4901 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
4902 break;
4903 default:
4904 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
4905 }
4906
4907 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
4908 }
4909 }
4910 else
4911 {
4912 /* The source doesn't has the part loaded, so load the register from CPUMCTX. */
4913 Assert(enmLoadSzDst == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSzDst == kIemNativeGstSimdRegLdStSz_High128);
4914 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, idxHstSimdRegDst, enmGstSimdRegDst, enmLoadSzDst);
4915 }
4916
4917 return off;
4918}
4919
4920
4921/**
4922 * Allocates a temporary host SIMD register for keeping a guest
4923 * SIMD register value.
4924 *
4925 * Since we may already have a register holding the guest register value,
4926 * code will be emitted to do the loading if that's not the case. Code may also
4927 * be emitted if we have to free up a register to satify the request.
4928 *
4929 * @returns The host register number; throws VBox status code on failure, so no
4930 * need to check the return value.
4931 * @param pReNative The native recompile state.
4932 * @param poff Pointer to the variable with the code buffer
4933 * position. This will be update if we need to move a
4934 * variable from register to stack in order to satisfy
4935 * the request.
4936 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
4937 * @param enmIntendedUse How the caller will be using the host register.
4938 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4939 * register is okay (default). The ASSUMPTION here is
4940 * that the caller has already flushed all volatile
4941 * registers, so this is only applied if we allocate a
4942 * new register.
4943 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4944 */
4945DECL_HIDDEN_THROW(uint8_t)
4946iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
4947 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4948 bool fNoVolatileRegs /*= false*/)
4949{
4950 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
4951#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
4952 AssertMsg( pReNative->idxCurCall == 0
4953 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4954 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
4955 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4956 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
4957 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
4958 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
4959#endif
4960#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4961 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4962#endif
4963 uint32_t const fRegMask = !fNoVolatileRegs
4964 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
4965 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
4966
4967 /*
4968 * First check if the guest register value is already in a host register.
4969 */
4970 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
4971 {
4972 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
4973 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
4974 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
4975 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
4976
4977 /* It's not supposed to be allocated... */
4978 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
4979 {
4980 /*
4981 * If the register will trash the guest shadow copy, try find a
4982 * completely unused register we can use instead. If that fails,
4983 * we need to disassociate the host reg from the guest reg.
4984 */
4985 /** @todo would be nice to know if preserving the register is in any way helpful. */
4986 /* If the purpose is calculations, try duplicate the register value as
4987 we'll be clobbering the shadow. */
4988 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4989 && ( ~pReNative->Core.bmHstSimdRegs
4990 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
4991 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
4992 {
4993 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
4994
4995 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
4996
4997 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
4998 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
4999 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5000 idxSimdReg = idxRegNew;
5001 }
5002 /* If the current register matches the restrictions, go ahead and allocate
5003 it for the caller. */
5004 else if (fRegMask & RT_BIT_32(idxSimdReg))
5005 {
5006 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5007 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5008 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5009 {
5010 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5011 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxSimdReg, idxSimdReg, enmLoadSz);
5012 else
5013 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5014 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5015 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5016 }
5017 else
5018 {
5019 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5020 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5021 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5022 }
5023 }
5024 /* Otherwise, allocate a register that satisfies the caller and transfer
5025 the shadowing if compatible with the intended use. (This basically
5026 means the call wants a non-volatile register (RSP push/pop scenario).) */
5027 else
5028 {
5029 Assert(fNoVolatileRegs);
5030 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5031 !fNoVolatileRegs
5032 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5033 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5034 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5035 {
5036 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5037 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5038 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5039 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5040 }
5041 else
5042 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5043 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5044 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5045 idxSimdReg = idxRegNew;
5046 }
5047 }
5048 else
5049 {
5050 /*
5051 * Oops. Shadowed guest register already allocated!
5052 *
5053 * Allocate a new register, copy the value and, if updating, the
5054 * guest shadow copy assignment to the new register.
5055 */
5056 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5057 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5058 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5059 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5060
5061 /** @todo share register for readonly access. */
5062 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5063 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5064
5065 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5066 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5067 else
5068 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5069
5070 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5071 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5072 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5073 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5074 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5075 else
5076 {
5077 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5078 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5079 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5080 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5081 }
5082 idxSimdReg = idxRegNew;
5083 }
5084 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5085
5086#ifdef VBOX_STRICT
5087 /* Strict builds: Check that the value is correct. */
5088 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5089 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5090#endif
5091
5092 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5093 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5094 {
5095# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5096 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5097 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxSimdReg);
5098# endif
5099
5100 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5101 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5102 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5103 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5104 else
5105 {
5106 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5107 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5108 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5109 }
5110 }
5111
5112 return idxSimdReg;
5113 }
5114
5115 /*
5116 * Allocate a new register, load it with the guest value and designate it as a copy of the
5117 */
5118 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5119
5120 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5121 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5122 else
5123 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5124
5125 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5126 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5127
5128 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5129 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5130 {
5131# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5132 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5133 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxRegNew);
5134# endif
5135
5136 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5137 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5138 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5139 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5140 else
5141 {
5142 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5143 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5144 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5145 }
5146 }
5147
5148 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5149 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5150
5151 return idxRegNew;
5152}
5153
5154
5155/**
5156 * Flushes guest SIMD register shadow copies held by a set of host registers.
5157 *
5158 * This is used whenever calling an external helper for ensuring that we don't carry on
5159 * with any guest shadows in volatile registers, as these will get corrupted by the caller.
5160 *
5161 * @param pReNative The native recompile state.
5162 * @param fHstSimdRegs Set of host SIMD registers to flush guest shadows for.
5163 */
5164DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstSimdRegs) RT_NOEXCEPT
5165{
5166 /*
5167 * Reduce the mask by what's currently shadowed.
5168 */
5169 uint32_t const bmHstSimdRegsWithGstShadowOld = pReNative->Core.bmHstSimdRegsWithGstShadow;
5170 fHstSimdRegs &= bmHstSimdRegsWithGstShadowOld;
5171 if (fHstSimdRegs)
5172 {
5173 uint32_t const bmHstSimdRegsWithGstShadowNew = bmHstSimdRegsWithGstShadowOld & ~fHstSimdRegs;
5174 Log12(("iemNativeSimdRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5175 fHstSimdRegs, bmHstSimdRegsWithGstShadowOld, bmHstSimdRegsWithGstShadowNew));
5176 pReNative->Core.bmHstSimdRegsWithGstShadow = bmHstSimdRegsWithGstShadowNew;
5177 if (bmHstSimdRegsWithGstShadowNew)
5178 {
5179 /*
5180 * Partial (likely).
5181 */
5182 uint64_t fGstShadows = 0;
5183 do
5184 {
5185 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5186 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5187 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5188 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5189 Assert(!(( pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5190 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5191
5192 fGstShadows |= pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5193 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5194 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5195 } while (fHstSimdRegs != 0);
5196 pReNative->Core.bmGstSimdRegShadows &= ~fGstShadows;
5197 }
5198 else
5199 {
5200 /*
5201 * Clear all.
5202 */
5203 do
5204 {
5205 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5206 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5207 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5208 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5209 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5210 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5211
5212 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5213 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5214 } while (fHstSimdRegs != 0);
5215 pReNative->Core.bmGstSimdRegShadows = 0;
5216 }
5217 }
5218}
5219#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5220
5221
5222
5223/*********************************************************************************************************************************
5224* Code emitters for flushing pending guest register writes and sanity checks *
5225*********************************************************************************************************************************/
5226
5227#ifdef VBOX_STRICT
5228/**
5229 * Does internal register allocator sanity checks.
5230 */
5231DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5232{
5233 /*
5234 * Iterate host registers building a guest shadowing set.
5235 */
5236 uint64_t bmGstRegShadows = 0;
5237 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5238 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5239 while (bmHstRegsWithGstShadow)
5240 {
5241 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5242 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5243 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5244
5245 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5246 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5247 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5248 bmGstRegShadows |= fThisGstRegShadows;
5249 while (fThisGstRegShadows)
5250 {
5251 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5252 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5253 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5254 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5255 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5256 }
5257 }
5258 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5259 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5260 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5261
5262 /*
5263 * Now the other way around, checking the guest to host index array.
5264 */
5265 bmHstRegsWithGstShadow = 0;
5266 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5267 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5268 while (bmGstRegShadows)
5269 {
5270 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5271 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5272 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5273
5274 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5275 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5276 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5277 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5278 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5279 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5280 }
5281 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5282 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5283 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5284}
5285#endif /* VBOX_STRICT */
5286
5287
5288/**
5289 * Flushes any delayed guest register writes.
5290 *
5291 * This must be called prior to calling CImpl functions and any helpers that use
5292 * the guest state (like raising exceptions) and such.
5293 *
5294 * @note This function does not flush any shadowing information for guest registers. This needs to be done by
5295 * the caller if it wishes to do so.
5296 */
5297DECL_HIDDEN_THROW(uint32_t)
5298iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, uint64_t fGstSimdShwExcept)
5299{
5300#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5301 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
5302 off = iemNativeEmitPcWriteback(pReNative, off);
5303#else
5304 RT_NOREF(pReNative, fGstShwExcept);
5305#endif
5306
5307#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5308 off = iemNativeRegFlushDirtyGuest(pReNative, off, ~fGstShwExcept);
5309#endif
5310
5311#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5312 off = iemNativeSimdRegFlushDirtyGuest(pReNative, off, ~fGstSimdShwExcept);
5313#endif
5314
5315 return off;
5316}
5317
5318
5319#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5320/**
5321 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5322 */
5323DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5324{
5325 Assert(pReNative->Core.offPc);
5326# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5327 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5328 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5329# endif
5330
5331# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5332 /* Allocate a temporary PC register. */
5333 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5334
5335 /* Perform the addition and store the result. */
5336 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5337 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5338
5339 /* Free but don't flush the PC register. */
5340 iemNativeRegFreeTmp(pReNative, idxPcReg);
5341# else
5342 /* Compare the shadow with the context value, they should match. */
5343 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5344 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5345# endif
5346
5347 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5348 pReNative->Core.offPc = 0;
5349 pReNative->Core.cInstrPcUpdateSkipped = 0;
5350
5351 return off;
5352}
5353#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
5354
5355
5356/*********************************************************************************************************************************
5357* Code Emitters (larger snippets) *
5358*********************************************************************************************************************************/
5359
5360/**
5361 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5362 * extending to 64-bit width.
5363 *
5364 * @returns New code buffer offset on success, UINT32_MAX on failure.
5365 * @param pReNative .
5366 * @param off The current code buffer position.
5367 * @param idxHstReg The host register to load the guest register value into.
5368 * @param enmGstReg The guest register to load.
5369 *
5370 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5371 * that is something the caller needs to do if applicable.
5372 */
5373DECL_HIDDEN_THROW(uint32_t)
5374iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5375{
5376 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
5377 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5378
5379 switch (g_aGstShadowInfo[enmGstReg].cb)
5380 {
5381 case sizeof(uint64_t):
5382 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5383 case sizeof(uint32_t):
5384 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5385 case sizeof(uint16_t):
5386 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5387#if 0 /* not present in the table. */
5388 case sizeof(uint8_t):
5389 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5390#endif
5391 default:
5392 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5393 }
5394}
5395
5396
5397#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5398/**
5399 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
5400 *
5401 * @returns New code buffer offset on success, UINT32_MAX on failure.
5402 * @param pReNative The recompiler state.
5403 * @param off The current code buffer position.
5404 * @param idxHstSimdReg The host register to load the guest register value into.
5405 * @param enmGstSimdReg The guest register to load.
5406 * @param enmLoadSz The load size of the register.
5407 *
5408 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5409 * that is something the caller needs to do if applicable.
5410 */
5411DECL_HIDDEN_THROW(uint32_t)
5412iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
5413 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5414{
5415 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
5416
5417 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
5418 switch (enmLoadSz)
5419 {
5420 case kIemNativeGstSimdRegLdStSz_256:
5421 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5422 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5423 case kIemNativeGstSimdRegLdStSz_Low128:
5424 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5425 case kIemNativeGstSimdRegLdStSz_High128:
5426 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5427 default:
5428 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5429 }
5430}
5431#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5432
5433#ifdef VBOX_STRICT
5434
5435/**
5436 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5437 *
5438 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5439 * Trashes EFLAGS on AMD64.
5440 */
5441DECL_HIDDEN_THROW(uint32_t)
5442iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5443{
5444# ifdef RT_ARCH_AMD64
5445 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5446
5447 /* rol reg64, 32 */
5448 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5449 pbCodeBuf[off++] = 0xc1;
5450 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5451 pbCodeBuf[off++] = 32;
5452
5453 /* test reg32, ffffffffh */
5454 if (idxReg >= 8)
5455 pbCodeBuf[off++] = X86_OP_REX_B;
5456 pbCodeBuf[off++] = 0xf7;
5457 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5458 pbCodeBuf[off++] = 0xff;
5459 pbCodeBuf[off++] = 0xff;
5460 pbCodeBuf[off++] = 0xff;
5461 pbCodeBuf[off++] = 0xff;
5462
5463 /* je/jz +1 */
5464 pbCodeBuf[off++] = 0x74;
5465 pbCodeBuf[off++] = 0x01;
5466
5467 /* int3 */
5468 pbCodeBuf[off++] = 0xcc;
5469
5470 /* rol reg64, 32 */
5471 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5472 pbCodeBuf[off++] = 0xc1;
5473 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5474 pbCodeBuf[off++] = 32;
5475
5476# elif defined(RT_ARCH_ARM64)
5477 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5478 /* lsr tmp0, reg64, #32 */
5479 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
5480 /* cbz tmp0, +1 */
5481 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5482 /* brk #0x1100 */
5483 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
5484
5485# else
5486# error "Port me!"
5487# endif
5488 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5489 return off;
5490}
5491
5492
5493/**
5494 * Emitting code that checks that the content of register @a idxReg is the same
5495 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
5496 * instruction if that's not the case.
5497 *
5498 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5499 * Trashes EFLAGS on AMD64.
5500 */
5501DECL_HIDDEN_THROW(uint32_t)
5502iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5503{
5504#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5505 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
5506 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg))
5507 return off;
5508#endif
5509
5510# ifdef RT_ARCH_AMD64
5511 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5512
5513 /* cmp reg, [mem] */
5514 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5515 {
5516 if (idxReg >= 8)
5517 pbCodeBuf[off++] = X86_OP_REX_R;
5518 pbCodeBuf[off++] = 0x38;
5519 }
5520 else
5521 {
5522 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5523 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5524 else
5525 {
5526 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5527 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5528 else
5529 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5530 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5531 if (idxReg >= 8)
5532 pbCodeBuf[off++] = X86_OP_REX_R;
5533 }
5534 pbCodeBuf[off++] = 0x39;
5535 }
5536 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
5537
5538 /* je/jz +1 */
5539 pbCodeBuf[off++] = 0x74;
5540 pbCodeBuf[off++] = 0x01;
5541
5542 /* int3 */
5543 pbCodeBuf[off++] = 0xcc;
5544
5545 /* For values smaller than the register size, we must check that the rest
5546 of the register is all zeros. */
5547 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
5548 {
5549 /* test reg64, imm32 */
5550 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5551 pbCodeBuf[off++] = 0xf7;
5552 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5553 pbCodeBuf[off++] = 0;
5554 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
5555 pbCodeBuf[off++] = 0xff;
5556 pbCodeBuf[off++] = 0xff;
5557
5558 /* je/jz +1 */
5559 pbCodeBuf[off++] = 0x74;
5560 pbCodeBuf[off++] = 0x01;
5561
5562 /* int3 */
5563 pbCodeBuf[off++] = 0xcc;
5564 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5565 }
5566 else
5567 {
5568 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5569 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
5570 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
5571 }
5572
5573# elif defined(RT_ARCH_ARM64)
5574 /* mov TMP0, [gstreg] */
5575 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
5576
5577 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5578 /* sub tmp0, tmp0, idxReg */
5579 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
5580 /* cbz tmp0, +1 */
5581 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5582 /* brk #0x1000+enmGstReg */
5583 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
5584 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5585
5586# else
5587# error "Port me!"
5588# endif
5589 return off;
5590}
5591
5592
5593# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5594# ifdef RT_ARCH_AMD64
5595/**
5596 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
5597 */
5598DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
5599{
5600 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
5601 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5602 if (idxSimdReg >= 8)
5603 pbCodeBuf[off++] = X86_OP_REX_R;
5604 pbCodeBuf[off++] = 0x0f;
5605 pbCodeBuf[off++] = 0x38;
5606 pbCodeBuf[off++] = 0x29;
5607 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
5608
5609 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
5610 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5611 pbCodeBuf[off++] = X86_OP_REX_W
5612 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
5613 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5614 pbCodeBuf[off++] = 0x0f;
5615 pbCodeBuf[off++] = 0x3a;
5616 pbCodeBuf[off++] = 0x16;
5617 pbCodeBuf[off++] = 0xeb;
5618 pbCodeBuf[off++] = 0x00;
5619
5620 /* cmp tmp0, 0xffffffffffffffff. */
5621 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5622 pbCodeBuf[off++] = 0x83;
5623 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
5624 pbCodeBuf[off++] = 0xff;
5625
5626 /* je/jz +1 */
5627 pbCodeBuf[off++] = 0x74;
5628 pbCodeBuf[off++] = 0x01;
5629
5630 /* int3 */
5631 pbCodeBuf[off++] = 0xcc;
5632
5633 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
5634 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5635 pbCodeBuf[off++] = X86_OP_REX_W
5636 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
5637 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5638 pbCodeBuf[off++] = 0x0f;
5639 pbCodeBuf[off++] = 0x3a;
5640 pbCodeBuf[off++] = 0x16;
5641 pbCodeBuf[off++] = 0xeb;
5642 pbCodeBuf[off++] = 0x01;
5643
5644 /* cmp tmp0, 0xffffffffffffffff. */
5645 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5646 pbCodeBuf[off++] = 0x83;
5647 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
5648 pbCodeBuf[off++] = 0xff;
5649
5650 /* je/jz +1 */
5651 pbCodeBuf[off++] = 0x74;
5652 pbCodeBuf[off++] = 0x01;
5653
5654 /* int3 */
5655 pbCodeBuf[off++] = 0xcc;
5656
5657 return off;
5658}
5659# endif
5660
5661
5662/**
5663 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
5664 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
5665 * instruction if that's not the case.
5666 *
5667 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
5668 * Trashes EFLAGS on AMD64.
5669 */
5670DECL_HIDDEN_THROW(uint32_t)
5671iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
5672 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5673{
5674 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
5675 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
5676 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
5677 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
5678 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
5679 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5680 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
5681 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
5682 return off;
5683
5684# ifdef RT_ARCH_AMD64
5685 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5686 {
5687 /* movdqa vectmp0, idxSimdReg */
5688 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
5689
5690 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
5691
5692 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
5693 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5694 }
5695
5696 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5697 {
5698 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
5699 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
5700
5701 /* vextracti128 vectmp0, idxSimdReg, 1 */
5702 pbCodeBuf[off++] = X86_OP_VEX3;
5703 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
5704 | X86_OP_VEX3_BYTE1_X
5705 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
5706 | 0x03; /* Opcode map */
5707 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
5708 pbCodeBuf[off++] = 0x39;
5709 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
5710 pbCodeBuf[off++] = 0x01;
5711
5712 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
5713 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5714 }
5715# elif defined(RT_ARCH_ARM64)
5716 /* mov vectmp0, [gstreg] */
5717 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
5718
5719 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5720 {
5721 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5722 /* eor vectmp0, vectmp0, idxSimdReg */
5723 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
5724 /* uaddlv vectmp0, vectmp0.16B */
5725 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
5726 /* umov tmp0, vectmp0.H[0] */
5727 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
5728 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
5729 /* cbz tmp0, +1 */
5730 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5731 /* brk #0x1000+enmGstReg */
5732 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
5733 }
5734
5735 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
5736 {
5737 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5738 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
5739 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
5740 /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
5741 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
5742 /* umov tmp0, (vectmp0 + 1).H[0] */
5743 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
5744 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
5745 /* cbz tmp0, +1 */
5746 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5747 /* brk #0x1000+enmGstReg */
5748 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
5749 }
5750
5751# else
5752# error "Port me!"
5753# endif
5754
5755 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5756 return off;
5757}
5758# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5759
5760
5761/**
5762 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
5763 * important bits.
5764 *
5765 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5766 * Trashes EFLAGS on AMD64.
5767 */
5768DECL_HIDDEN_THROW(uint32_t)
5769iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
5770{
5771 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5772 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
5773 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
5774 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
5775
5776#ifdef RT_ARCH_AMD64
5777 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5778
5779 /* je/jz +1 */
5780 pbCodeBuf[off++] = 0x74;
5781 pbCodeBuf[off++] = 0x01;
5782
5783 /* int3 */
5784 pbCodeBuf[off++] = 0xcc;
5785
5786# elif defined(RT_ARCH_ARM64)
5787 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5788
5789 /* b.eq +1 */
5790 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
5791 /* brk #0x2000 */
5792 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
5793
5794# else
5795# error "Port me!"
5796# endif
5797 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5798
5799 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5800 return off;
5801}
5802
5803#endif /* VBOX_STRICT */
5804
5805
5806#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5807/**
5808 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
5809 */
5810DECL_HIDDEN_THROW(uint32_t)
5811iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
5812{
5813 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
5814
5815 fEflNeeded &= X86_EFL_STATUS_BITS;
5816 if (fEflNeeded)
5817 {
5818# ifdef RT_ARCH_AMD64
5819 /* test dword [pVCpu + offVCpu], imm32 */
5820 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5821 if (fEflNeeded <= 0xff)
5822 {
5823 pCodeBuf[off++] = 0xf6;
5824 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
5825 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
5826 }
5827 else
5828 {
5829 pCodeBuf[off++] = 0xf7;
5830 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
5831 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
5832 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
5833 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
5834 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
5835 }
5836 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5837
5838# else
5839 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5840 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
5841 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
5842# ifdef RT_ARCH_ARM64
5843 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
5844 off = iemNativeEmitBrk(pReNative, off, 0x7777);
5845# else
5846# error "Port me!"
5847# endif
5848 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5849# endif
5850 }
5851 return off;
5852}
5853#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
5854
5855
5856/**
5857 * Emits a code for checking the return code of a call and rcPassUp, returning
5858 * from the code if either are non-zero.
5859 */
5860DECL_HIDDEN_THROW(uint32_t)
5861iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
5862{
5863#ifdef RT_ARCH_AMD64
5864 /*
5865 * AMD64: eax = call status code.
5866 */
5867
5868 /* edx = rcPassUp */
5869 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5870# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5871 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
5872# endif
5873
5874 /* edx = eax | rcPassUp */
5875 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5876 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
5877 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
5878 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5879
5880 /* Jump to non-zero status return path. */
5881 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
5882
5883 /* done. */
5884
5885#elif RT_ARCH_ARM64
5886 /*
5887 * ARM64: w0 = call status code.
5888 */
5889# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5890 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
5891# endif
5892 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5893
5894 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5895
5896 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
5897
5898 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5899 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
5900 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
5901
5902#else
5903# error "port me"
5904#endif
5905 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5906 RT_NOREF_PV(idxInstr);
5907 return off;
5908}
5909
5910
5911/**
5912 * Emits code to check if the content of @a idxAddrReg is a canonical address,
5913 * raising a \#GP(0) if it isn't.
5914 *
5915 * @returns New code buffer offset, UINT32_MAX on failure.
5916 * @param pReNative The native recompile state.
5917 * @param off The code buffer offset.
5918 * @param idxAddrReg The host register with the address to check.
5919 * @param idxInstr The current instruction.
5920 */
5921DECL_HIDDEN_THROW(uint32_t)
5922iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
5923{
5924 /*
5925 * Make sure we don't have any outstanding guest register writes as we may
5926 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5927 */
5928 off = iemNativeRegFlushPendingWrites(pReNative, off);
5929
5930#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5931 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5932#else
5933 RT_NOREF(idxInstr);
5934#endif
5935
5936#ifdef RT_ARCH_AMD64
5937 /*
5938 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
5939 * return raisexcpt();
5940 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
5941 */
5942 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5943
5944 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
5945 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
5946 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
5947 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
5948 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5949
5950 iemNativeRegFreeTmp(pReNative, iTmpReg);
5951
5952#elif defined(RT_ARCH_ARM64)
5953 /*
5954 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
5955 * return raisexcpt();
5956 * ----
5957 * mov x1, 0x800000000000
5958 * add x1, x0, x1
5959 * cmp xzr, x1, lsr 48
5960 * b.ne .Lraisexcpt
5961 */
5962 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5963
5964 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
5965 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
5966 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
5967 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5968
5969 iemNativeRegFreeTmp(pReNative, iTmpReg);
5970
5971#else
5972# error "Port me"
5973#endif
5974 return off;
5975}
5976
5977
5978/**
5979 * Emits code to check if that the content of @a idxAddrReg is within the limit
5980 * of CS, raising a \#GP(0) if it isn't.
5981 *
5982 * @returns New code buffer offset; throws VBox status code on error.
5983 * @param pReNative The native recompile state.
5984 * @param off The code buffer offset.
5985 * @param idxAddrReg The host register (32-bit) with the address to
5986 * check.
5987 * @param idxInstr The current instruction.
5988 */
5989DECL_HIDDEN_THROW(uint32_t)
5990iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5991 uint8_t idxAddrReg, uint8_t idxInstr)
5992{
5993 /*
5994 * Make sure we don't have any outstanding guest register writes as we may
5995 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5996 */
5997 off = iemNativeRegFlushPendingWrites(pReNative, off);
5998
5999#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6000 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6001#else
6002 RT_NOREF(idxInstr);
6003#endif
6004
6005 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6006 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6007 kIemNativeGstRegUse_ReadOnly);
6008
6009 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6010 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6011
6012 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6013 return off;
6014}
6015
6016
6017/**
6018 * Emits a call to a CImpl function or something similar.
6019 */
6020DECL_HIDDEN_THROW(uint32_t)
6021iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6022 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6023{
6024 /* Writeback everything. */
6025 off = iemNativeRegFlushPendingWrites(pReNative, off);
6026
6027 /*
6028 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6029 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6030 */
6031 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6032 fGstShwFlush
6033 | RT_BIT_64(kIemNativeGstReg_Pc)
6034 | RT_BIT_64(kIemNativeGstReg_EFlags));
6035 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6036
6037 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6038
6039 /*
6040 * Load the parameters.
6041 */
6042#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6043 /* Special code the hidden VBOXSTRICTRC pointer. */
6044 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6045 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6046 if (cAddParams > 0)
6047 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6048 if (cAddParams > 1)
6049 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6050 if (cAddParams > 2)
6051 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6052 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6053
6054#else
6055 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6056 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6057 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6058 if (cAddParams > 0)
6059 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6060 if (cAddParams > 1)
6061 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6062 if (cAddParams > 2)
6063# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6064 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6065# else
6066 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6067# endif
6068#endif
6069
6070 /*
6071 * Make the call.
6072 */
6073 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6074
6075#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6076 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6077#endif
6078
6079 /*
6080 * Check the status code.
6081 */
6082 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6083}
6084
6085
6086/**
6087 * Emits a call to a threaded worker function.
6088 */
6089DECL_HIDDEN_THROW(uint32_t)
6090iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6091{
6092 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6093
6094 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6095 off = iemNativeRegFlushPendingWrites(pReNative, off);
6096
6097 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6098 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6099
6100#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6101 /* The threaded function may throw / long jmp, so set current instruction
6102 number if we're counting. */
6103 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6104#endif
6105
6106 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6107
6108#ifdef RT_ARCH_AMD64
6109 /* Load the parameters and emit the call. */
6110# ifdef RT_OS_WINDOWS
6111# ifndef VBOXSTRICTRC_STRICT_ENABLED
6112 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6113 if (cParams > 0)
6114 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6115 if (cParams > 1)
6116 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6117 if (cParams > 2)
6118 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6119# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6120 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6121 if (cParams > 0)
6122 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6123 if (cParams > 1)
6124 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6125 if (cParams > 2)
6126 {
6127 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6128 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6129 }
6130 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6131# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6132# else
6133 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6134 if (cParams > 0)
6135 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6136 if (cParams > 1)
6137 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6138 if (cParams > 2)
6139 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6140# endif
6141
6142 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6143
6144# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6145 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6146# endif
6147
6148#elif RT_ARCH_ARM64
6149 /*
6150 * ARM64:
6151 */
6152 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6153 if (cParams > 0)
6154 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6155 if (cParams > 1)
6156 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6157 if (cParams > 2)
6158 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6159
6160 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6161
6162#else
6163# error "port me"
6164#endif
6165
6166 /*
6167 * Check the status code.
6168 */
6169 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6170
6171 return off;
6172}
6173
6174#ifdef VBOX_WITH_STATISTICS
6175/**
6176 * Emits code to update the thread call statistics.
6177 */
6178DECL_INLINE_THROW(uint32_t)
6179iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6180{
6181 /*
6182 * Update threaded function stats.
6183 */
6184 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6185 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6186# if defined(RT_ARCH_ARM64)
6187 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6188 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6189 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6190 iemNativeRegFreeTmp(pReNative, idxTmp1);
6191 iemNativeRegFreeTmp(pReNative, idxTmp2);
6192# else
6193 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6194# endif
6195 return off;
6196}
6197#endif /* VBOX_WITH_STATISTICS */
6198
6199
6200/**
6201 * Emits the code at the ReturnWithFlags label (returns
6202 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6203 */
6204static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6205{
6206 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6207 if (idxLabel != UINT32_MAX)
6208 {
6209 iemNativeLabelDefine(pReNative, idxLabel, off);
6210
6211 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6212
6213 /* jump back to the return sequence. */
6214 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6215 }
6216 return off;
6217}
6218
6219
6220/**
6221 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6222 */
6223static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6224{
6225 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6226 if (idxLabel != UINT32_MAX)
6227 {
6228 iemNativeLabelDefine(pReNative, idxLabel, off);
6229
6230 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6231
6232 /* jump back to the return sequence. */
6233 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6234 }
6235 return off;
6236}
6237
6238
6239/**
6240 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6241 */
6242static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6243{
6244 /*
6245 * Generate the rc + rcPassUp fiddling code if needed.
6246 */
6247 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6248 if (idxLabel != UINT32_MAX)
6249 {
6250 iemNativeLabelDefine(pReNative, idxLabel, off);
6251
6252 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6253#ifdef RT_ARCH_AMD64
6254# ifdef RT_OS_WINDOWS
6255# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6256 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6257# endif
6258 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6259 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6260# else
6261 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6262 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6263# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6264 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6265# endif
6266# endif
6267# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6268 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6269# endif
6270
6271#else
6272 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6273 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6274 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6275#endif
6276
6277 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6278 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6279 }
6280 return off;
6281}
6282
6283
6284/**
6285 * Emits a standard epilog.
6286 */
6287static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
6288{
6289 *pidxReturnLabel = UINT32_MAX;
6290
6291 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
6292 off = iemNativeRegFlushPendingWrites(pReNative, off);
6293
6294 /*
6295 * Successful return, so clear the return register (eax, w0).
6296 */
6297 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
6298
6299 /*
6300 * Define label for common return point.
6301 */
6302 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
6303 *pidxReturnLabel = idxReturn;
6304
6305 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6306
6307 /*
6308 * Restore registers and return.
6309 */
6310#ifdef RT_ARCH_AMD64
6311 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6312
6313 /* Reposition esp at the r15 restore point. */
6314 pbCodeBuf[off++] = X86_OP_REX_W;
6315 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6316 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6317 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6318
6319 /* Pop non-volatile registers and return */
6320 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6321 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6322 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6323 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6324 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6325 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6326 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6327 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6328# ifdef RT_OS_WINDOWS
6329 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6330 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6331# endif
6332 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6333 pbCodeBuf[off++] = 0xc9; /* leave */
6334 pbCodeBuf[off++] = 0xc3; /* ret */
6335 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6336
6337#elif RT_ARCH_ARM64
6338 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6339
6340 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6341 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
6342 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6343 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6344 IEMNATIVE_FRAME_VAR_SIZE / 8);
6345 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
6346 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6347 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6348 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6349 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6350 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6351 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6352 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6353 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6354 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6355 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6356 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6357
6358 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
6359 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
6360 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
6361 IEMNATIVE_FRAME_SAVE_REG_SIZE);
6362
6363 /* retab / ret */
6364# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
6365 if (1)
6366 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6367 else
6368# endif
6369 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6370
6371#else
6372# error "port me"
6373#endif
6374 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6375
6376 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
6377}
6378
6379
6380/**
6381 * Emits a standard prolog.
6382 */
6383static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6384{
6385#ifdef RT_ARCH_AMD64
6386 /*
6387 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
6388 * reserving 64 bytes for stack variables plus 4 non-register argument
6389 * slots. Fixed register assignment: xBX = pReNative;
6390 *
6391 * Since we always do the same register spilling, we can use the same
6392 * unwind description for all the code.
6393 */
6394 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6395 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
6396 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
6397 pbCodeBuf[off++] = 0x8b;
6398 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
6399 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
6400 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
6401# ifdef RT_OS_WINDOWS
6402 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
6403 pbCodeBuf[off++] = 0x8b;
6404 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
6405 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
6406 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
6407# else
6408 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
6409 pbCodeBuf[off++] = 0x8b;
6410 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
6411# endif
6412 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
6413 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
6414 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
6415 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
6416 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
6417 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
6418 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
6419 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
6420
6421# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6422 /* Save the frame pointer. */
6423 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
6424# endif
6425
6426 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
6427 X86_GREG_xSP,
6428 IEMNATIVE_FRAME_ALIGN_SIZE
6429 + IEMNATIVE_FRAME_VAR_SIZE
6430 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
6431 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
6432 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
6433 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
6434 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
6435
6436#elif RT_ARCH_ARM64
6437 /*
6438 * We set up a stack frame exactly like on x86, only we have to push the
6439 * return address our selves here. We save all non-volatile registers.
6440 */
6441 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
6442
6443# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
6444 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
6445 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
6446 * in any way conditional, so just emitting this instructions now and hoping for the best... */
6447 /* pacibsp */
6448 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
6449# endif
6450
6451 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
6452 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
6453 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6454 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6455 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
6456 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
6457 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6458 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6459 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6460 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6461 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6462 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6463 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6464 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6465 /* Save the BP and LR (ret address) registers at the top of the frame. */
6466 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6467 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6468 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6469 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
6470 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
6471 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
6472
6473 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
6474 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
6475
6476 /* mov r28, r0 */
6477 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
6478 /* mov r27, r1 */
6479 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
6480
6481# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6482 /* Save the frame pointer. */
6483 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
6484 ARMV8_A64_REG_X2);
6485# endif
6486
6487#else
6488# error "port me"
6489#endif
6490 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6491 return off;
6492}
6493
6494
6495/*********************************************************************************************************************************
6496* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
6497*********************************************************************************************************************************/
6498
6499/**
6500 * Internal work that allocates a variable with kind set to
6501 * kIemNativeVarKind_Invalid and no current stack allocation.
6502 *
6503 * The kind will either be set by the caller or later when the variable is first
6504 * assigned a value.
6505 *
6506 * @returns Unpacked index.
6507 * @internal
6508 */
6509static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6510{
6511 Assert(cbType > 0 && cbType <= 64);
6512 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
6513 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
6514 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
6515 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
6516 pReNative->Core.aVars[idxVar].cbVar = cbType;
6517 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
6518 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6519 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
6520 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
6521 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
6522 pReNative->Core.aVars[idxVar].fRegAcquired = false;
6523 pReNative->Core.aVars[idxVar].u.uValue = 0;
6524#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6525 pReNative->Core.aVars[idxVar].fSimdReg = false;
6526#endif
6527 return idxVar;
6528}
6529
6530
6531/**
6532 * Internal work that allocates an argument variable w/o setting enmKind.
6533 *
6534 * @returns Unpacked index.
6535 * @internal
6536 */
6537static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6538{
6539 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
6540 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6541 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
6542
6543 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6544 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
6545 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
6546 return idxVar;
6547}
6548
6549
6550/**
6551 * Gets the stack slot for a stack variable, allocating one if necessary.
6552 *
6553 * Calling this function implies that the stack slot will contain a valid
6554 * variable value. The caller deals with any register currently assigned to the
6555 * variable, typically by spilling it into the stack slot.
6556 *
6557 * @returns The stack slot number.
6558 * @param pReNative The recompiler state.
6559 * @param idxVar The variable.
6560 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
6561 */
6562DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6563{
6564 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6565 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6566 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
6567
6568 /* Already got a slot? */
6569 uint8_t const idxStackSlot = pVar->idxStackSlot;
6570 if (idxStackSlot != UINT8_MAX)
6571 {
6572 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
6573 return idxStackSlot;
6574 }
6575
6576 /*
6577 * A single slot is easy to allocate.
6578 * Allocate them from the top end, closest to BP, to reduce the displacement.
6579 */
6580 if (pVar->cbVar <= sizeof(uint64_t))
6581 {
6582 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
6583 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6584 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
6585 pVar->idxStackSlot = (uint8_t)iSlot;
6586 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
6587 return (uint8_t)iSlot;
6588 }
6589
6590 /*
6591 * We need more than one stack slot.
6592 *
6593 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
6594 */
6595 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
6596 Assert(pVar->cbVar <= 64);
6597 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
6598 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
6599 uint32_t bmStack = pReNative->Core.bmStack;
6600 while (bmStack != UINT32_MAX)
6601 {
6602 unsigned iSlot = ASMBitLastSetU32(~bmStack);
6603 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6604 iSlot = (iSlot - 1) & ~fBitAlignMask;
6605 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
6606 {
6607 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
6608 pVar->idxStackSlot = (uint8_t)iSlot;
6609 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
6610 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
6611 return (uint8_t)iSlot;
6612 }
6613
6614 bmStack |= (fBitAllocMask << iSlot);
6615 }
6616 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6617}
6618
6619
6620/**
6621 * Changes the variable to a stack variable.
6622 *
6623 * Currently this is s only possible to do the first time the variable is used,
6624 * switching later is can be implemented but not done.
6625 *
6626 * @param pReNative The recompiler state.
6627 * @param idxVar The variable.
6628 * @throws VERR_IEM_VAR_IPE_2
6629 */
6630DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6631{
6632 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6633 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6634 if (pVar->enmKind != kIemNativeVarKind_Stack)
6635 {
6636 /* We could in theory transition from immediate to stack as well, but it
6637 would involve the caller doing work storing the value on the stack. So,
6638 till that's required we only allow transition from invalid. */
6639 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6640 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6641 pVar->enmKind = kIemNativeVarKind_Stack;
6642
6643 /* Note! We don't allocate a stack slot here, that's only done when a
6644 slot is actually needed to hold a variable value. */
6645 }
6646}
6647
6648
6649/**
6650 * Sets it to a variable with a constant value.
6651 *
6652 * This does not require stack storage as we know the value and can always
6653 * reload it, unless of course it's referenced.
6654 *
6655 * @param pReNative The recompiler state.
6656 * @param idxVar The variable.
6657 * @param uValue The immediate value.
6658 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
6659 */
6660DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
6661{
6662 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6663 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6664 if (pVar->enmKind != kIemNativeVarKind_Immediate)
6665 {
6666 /* Only simple transitions for now. */
6667 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6668 pVar->enmKind = kIemNativeVarKind_Immediate;
6669 }
6670 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6671
6672 pVar->u.uValue = uValue;
6673 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
6674 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
6675 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
6676}
6677
6678
6679/**
6680 * Sets the variable to a reference (pointer) to @a idxOtherVar.
6681 *
6682 * This does not require stack storage as we know the value and can always
6683 * reload it. Loading is postponed till needed.
6684 *
6685 * @param pReNative The recompiler state.
6686 * @param idxVar The variable. Unpacked.
6687 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
6688 *
6689 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
6690 * @internal
6691 */
6692static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
6693{
6694 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
6695 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
6696
6697 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
6698 {
6699 /* Only simple transitions for now. */
6700 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
6701 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6702 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
6703 }
6704 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6705
6706 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
6707
6708 /* Update the other variable, ensure it's a stack variable. */
6709 /** @todo handle variables with const values... that'll go boom now. */
6710 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
6711 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
6712}
6713
6714
6715/**
6716 * Sets the variable to a reference (pointer) to a guest register reference.
6717 *
6718 * This does not require stack storage as we know the value and can always
6719 * reload it. Loading is postponed till needed.
6720 *
6721 * @param pReNative The recompiler state.
6722 * @param idxVar The variable.
6723 * @param enmRegClass The class guest registers to reference.
6724 * @param idxReg The register within @a enmRegClass to reference.
6725 *
6726 * @throws VERR_IEM_VAR_IPE_2
6727 */
6728DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
6729 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
6730{
6731 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6732 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6733
6734 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
6735 {
6736 /* Only simple transitions for now. */
6737 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6738 pVar->enmKind = kIemNativeVarKind_GstRegRef;
6739 }
6740 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
6741
6742 pVar->u.GstRegRef.enmClass = enmRegClass;
6743 pVar->u.GstRegRef.idx = idxReg;
6744}
6745
6746
6747DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6748{
6749 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
6750}
6751
6752
6753DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
6754{
6755 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
6756
6757 /* Since we're using a generic uint64_t value type, we must truncate it if
6758 the variable is smaller otherwise we may end up with too large value when
6759 scaling up a imm8 w/ sign-extension.
6760
6761 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
6762 in the bios, bx=1) when running on arm, because clang expect 16-bit
6763 register parameters to have bits 16 and up set to zero. Instead of
6764 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
6765 CF value in the result. */
6766 switch (cbType)
6767 {
6768 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
6769 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
6770 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
6771 }
6772 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
6773 return idxVar;
6774}
6775
6776
6777DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
6778{
6779 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
6780 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
6781 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
6782 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
6783 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
6784 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6785
6786 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
6787 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
6788 return idxArgVar;
6789}
6790
6791
6792DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6793{
6794 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
6795 /* Don't set to stack now, leave that to the first use as for instance
6796 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
6797 return idxVar;
6798}
6799
6800
6801DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
6802{
6803 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
6804
6805 /* Since we're using a generic uint64_t value type, we must truncate it if
6806 the variable is smaller otherwise we may end up with too large value when
6807 scaling up a imm8 w/ sign-extension. */
6808 switch (cbType)
6809 {
6810 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
6811 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
6812 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
6813 }
6814 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
6815 return idxVar;
6816}
6817
6818
6819DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocAssign(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t cbType, uint8_t idxVarOther)
6820{
6821 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
6822 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
6823
6824 uint8_t const idxVarOtherReg = iemNativeVarRegisterAcquire(pReNative, idxVarOther, poff, true /*fInitialized*/);
6825 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, poff);
6826
6827 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxVarReg, idxVarOtherReg);
6828
6829 /* Truncate the value to this variables size. */
6830 switch (cbType)
6831 {
6832 case sizeof(uint8_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xff)); break;
6833 case sizeof(uint16_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffff)); break;
6834 case sizeof(uint32_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffffffff)); break;
6835 }
6836
6837 iemNativeVarRegisterRelease(pReNative, idxVarOther);
6838 iemNativeVarRegisterRelease(pReNative, idxVar);
6839 return idxVar;
6840}
6841
6842
6843/**
6844 * Makes sure variable @a idxVar has a register assigned to it and that it stays
6845 * fixed till we call iemNativeVarRegisterRelease.
6846 *
6847 * @returns The host register number.
6848 * @param pReNative The recompiler state.
6849 * @param idxVar The variable.
6850 * @param poff Pointer to the instruction buffer offset.
6851 * In case a register needs to be freed up or the value
6852 * loaded off the stack.
6853 * @param fInitialized Set if the variable must already have been initialized.
6854 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
6855 * the case.
6856 * @param idxRegPref Preferred register number or UINT8_MAX.
6857 */
6858DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
6859 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
6860{
6861 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6862 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6863 Assert(pVar->cbVar <= 8);
6864 Assert(!pVar->fRegAcquired);
6865
6866 uint8_t idxReg = pVar->idxReg;
6867 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
6868 {
6869 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
6870 && pVar->enmKind < kIemNativeVarKind_End);
6871 pVar->fRegAcquired = true;
6872 return idxReg;
6873 }
6874
6875 /*
6876 * If the kind of variable has not yet been set, default to 'stack'.
6877 */
6878 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
6879 && pVar->enmKind < kIemNativeVarKind_End);
6880 if (pVar->enmKind == kIemNativeVarKind_Invalid)
6881 iemNativeVarSetKindToStack(pReNative, idxVar);
6882
6883 /*
6884 * We have to allocate a register for the variable, even if its a stack one
6885 * as we don't know if there are modification being made to it before its
6886 * finalized (todo: analyze and insert hints about that?).
6887 *
6888 * If we can, we try get the correct register for argument variables. This
6889 * is assuming that most argument variables are fetched as close as possible
6890 * to the actual call, so that there aren't any interfering hidden calls
6891 * (memory accesses, etc) inbetween.
6892 *
6893 * If we cannot or it's a variable, we make sure no argument registers
6894 * that will be used by this MC block will be allocated here, and we always
6895 * prefer non-volatile registers to avoid needing to spill stuff for internal
6896 * call.
6897 */
6898 /** @todo Detect too early argument value fetches and warn about hidden
6899 * calls causing less optimal code to be generated in the python script. */
6900
6901 uint8_t const uArgNo = pVar->uArgNo;
6902 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
6903 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
6904 {
6905 idxReg = g_aidxIemNativeCallRegs[uArgNo];
6906
6907#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
6908 /* Writeback any dirty shadow registers we are about to unshadow. */
6909 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
6910#endif
6911
6912 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
6913 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
6914 }
6915 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
6916 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
6917 {
6918 /** @todo there must be a better way for this and boot cArgsX? */
6919 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgsX, IEMNATIVE_CALL_ARG_GREG_COUNT)];
6920 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
6921 & ~pReNative->Core.bmHstRegsWithGstShadow
6922 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
6923 & fNotArgsMask;
6924 if (fRegs)
6925 {
6926 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
6927 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
6928 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
6929 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
6930 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
6931 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
6932 }
6933 else
6934 {
6935 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
6936 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
6937 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
6938 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
6939 }
6940 }
6941 else
6942 {
6943 idxReg = idxRegPref;
6944 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
6945 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
6946 }
6947 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
6948 pVar->idxReg = idxReg;
6949
6950#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6951 pVar->fSimdReg = false;
6952#endif
6953
6954 /*
6955 * Load it off the stack if we've got a stack slot.
6956 */
6957 uint8_t const idxStackSlot = pVar->idxStackSlot;
6958 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
6959 {
6960 Assert(fInitialized);
6961 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
6962 switch (pVar->cbVar)
6963 {
6964 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
6965 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
6966 case 3: AssertFailed(); RT_FALL_THRU();
6967 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
6968 default: AssertFailed(); RT_FALL_THRU();
6969 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
6970 }
6971 }
6972 else
6973 {
6974 Assert(idxStackSlot == UINT8_MAX);
6975 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
6976 }
6977 pVar->fRegAcquired = true;
6978 return idxReg;
6979}
6980
6981
6982#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6983/**
6984 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
6985 * fixed till we call iemNativeVarRegisterRelease.
6986 *
6987 * @returns The host register number.
6988 * @param pReNative The recompiler state.
6989 * @param idxVar The variable.
6990 * @param poff Pointer to the instruction buffer offset.
6991 * In case a register needs to be freed up or the value
6992 * loaded off the stack.
6993 * @param fInitialized Set if the variable must already have been initialized.
6994 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
6995 * the case.
6996 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
6997 */
6998DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
6999 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7000{
7001 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7002 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7003 Assert( pVar->cbVar == sizeof(RTUINT128U)
7004 || pVar->cbVar == sizeof(RTUINT256U));
7005 Assert(!pVar->fRegAcquired);
7006
7007 uint8_t idxReg = pVar->idxReg;
7008 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
7009 {
7010 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7011 && pVar->enmKind < kIemNativeVarKind_End);
7012 pVar->fRegAcquired = true;
7013 return idxReg;
7014 }
7015
7016 /*
7017 * If the kind of variable has not yet been set, default to 'stack'.
7018 */
7019 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7020 && pVar->enmKind < kIemNativeVarKind_End);
7021 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7022 iemNativeVarSetKindToStack(pReNative, idxVar);
7023
7024 /*
7025 * We have to allocate a register for the variable, even if its a stack one
7026 * as we don't know if there are modification being made to it before its
7027 * finalized (todo: analyze and insert hints about that?).
7028 *
7029 * If we can, we try get the correct register for argument variables. This
7030 * is assuming that most argument variables are fetched as close as possible
7031 * to the actual call, so that there aren't any interfering hidden calls
7032 * (memory accesses, etc) inbetween.
7033 *
7034 * If we cannot or it's a variable, we make sure no argument registers
7035 * that will be used by this MC block will be allocated here, and we always
7036 * prefer non-volatile registers to avoid needing to spill stuff for internal
7037 * call.
7038 */
7039 /** @todo Detect too early argument value fetches and warn about hidden
7040 * calls causing less optimal code to be generated in the python script. */
7041
7042 uint8_t const uArgNo = pVar->uArgNo;
7043 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7044
7045 /* SIMD is bit simpler for now because there is no support for arguments. */
7046 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7047 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7048 {
7049 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7050 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7051 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7052 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7053 & fNotArgsMask;
7054 if (fRegs)
7055 {
7056 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7057 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7058 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7059 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7060 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7061 }
7062 else
7063 {
7064 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7065 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7066 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7067 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7068 }
7069 }
7070 else
7071 {
7072 idxReg = idxRegPref;
7073 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7074 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7075 }
7076 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7077
7078 pVar->fSimdReg = true;
7079 pVar->idxReg = idxReg;
7080
7081 /*
7082 * Load it off the stack if we've got a stack slot.
7083 */
7084 uint8_t const idxStackSlot = pVar->idxStackSlot;
7085 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7086 {
7087 Assert(fInitialized);
7088 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7089 switch (pVar->cbVar)
7090 {
7091 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7092 default: AssertFailed(); RT_FALL_THRU();
7093 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7094 }
7095 }
7096 else
7097 {
7098 Assert(idxStackSlot == UINT8_MAX);
7099 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7100 }
7101 pVar->fRegAcquired = true;
7102 return idxReg;
7103}
7104#endif
7105
7106
7107/**
7108 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7109 * guest register.
7110 *
7111 * This function makes sure there is a register for it and sets it to be the
7112 * current shadow copy of @a enmGstReg.
7113 *
7114 * @returns The host register number.
7115 * @param pReNative The recompiler state.
7116 * @param idxVar The variable.
7117 * @param enmGstReg The guest register this variable will be written to
7118 * after this call.
7119 * @param poff Pointer to the instruction buffer offset.
7120 * In case a register needs to be freed up or if the
7121 * variable content needs to be loaded off the stack.
7122 *
7123 * @note We DO NOT expect @a idxVar to be an argument variable,
7124 * because we can only in the commit stage of an instruction when this
7125 * function is used.
7126 */
7127DECL_HIDDEN_THROW(uint8_t)
7128iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7129{
7130 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7131 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7132 Assert(!pVar->fRegAcquired);
7133 AssertMsgStmt( pVar->cbVar <= 8
7134 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7135 || pVar->enmKind == kIemNativeVarKind_Stack),
7136 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7137 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7138 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7139
7140 /*
7141 * This shouldn't ever be used for arguments, unless it's in a weird else
7142 * branch that doesn't do any calling and even then it's questionable.
7143 *
7144 * However, in case someone writes crazy wrong MC code and does register
7145 * updates before making calls, just use the regular register allocator to
7146 * ensure we get a register suitable for the intended argument number.
7147 */
7148 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7149
7150 /*
7151 * If there is already a register for the variable, we transfer/set the
7152 * guest shadow copy assignment to it.
7153 */
7154 uint8_t idxReg = pVar->idxReg;
7155 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7156 {
7157#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7158 if (enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast)
7159 {
7160# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7161 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
7162 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
7163# endif
7164 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
7165 }
7166#endif
7167
7168 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7169 {
7170 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7171 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7172 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7173 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7174 }
7175 else
7176 {
7177 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7178 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7179 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7180 }
7181 /** @todo figure this one out. We need some way of making sure the register isn't
7182 * modified after this point, just in case we start writing crappy MC code. */
7183 pVar->enmGstReg = enmGstReg;
7184 pVar->fRegAcquired = true;
7185 return idxReg;
7186 }
7187 Assert(pVar->uArgNo == UINT8_MAX);
7188
7189 /*
7190 * Because this is supposed to be the commit stage, we're just tag along with the
7191 * temporary register allocator and upgrade it to a variable register.
7192 */
7193 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7194 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7195 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7196 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7197 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7198 pVar->idxReg = idxReg;
7199
7200 /*
7201 * Now we need to load the register value.
7202 */
7203 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7204 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7205 else
7206 {
7207 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7208 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7209 switch (pVar->cbVar)
7210 {
7211 case sizeof(uint64_t):
7212 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7213 break;
7214 case sizeof(uint32_t):
7215 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7216 break;
7217 case sizeof(uint16_t):
7218 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7219 break;
7220 case sizeof(uint8_t):
7221 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7222 break;
7223 default:
7224 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7225 }
7226 }
7227
7228 pVar->fRegAcquired = true;
7229 return idxReg;
7230}
7231
7232
7233/**
7234 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7235 *
7236 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7237 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7238 * requirement of flushing anything in volatile host registers when making a
7239 * call.
7240 *
7241 * @returns New @a off value.
7242 * @param pReNative The recompiler state.
7243 * @param off The code buffer position.
7244 * @param fHstRegsNotToSave Set of registers not to save & restore.
7245 */
7246DECL_HIDDEN_THROW(uint32_t)
7247iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7248{
7249 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7250 if (fHstRegs)
7251 {
7252 do
7253 {
7254 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7255 fHstRegs &= ~RT_BIT_32(idxHstReg);
7256
7257 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7258 {
7259 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7260 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7261 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7262 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7263 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7264 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7265 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7266 {
7267 case kIemNativeVarKind_Stack:
7268 {
7269 /* Temporarily spill the variable register. */
7270 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7271 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7272 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7273 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7274 continue;
7275 }
7276
7277 case kIemNativeVarKind_Immediate:
7278 case kIemNativeVarKind_VarRef:
7279 case kIemNativeVarKind_GstRegRef:
7280 /* It is weird to have any of these loaded at this point. */
7281 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7282 continue;
7283
7284 case kIemNativeVarKind_End:
7285 case kIemNativeVarKind_Invalid:
7286 break;
7287 }
7288 AssertFailed();
7289 }
7290 else
7291 {
7292 /*
7293 * Allocate a temporary stack slot and spill the register to it.
7294 */
7295 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7296 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7297 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7298 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7299 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7300 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7301 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7302 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7303 }
7304 } while (fHstRegs);
7305 }
7306#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7307
7308 /*
7309 * Guest register shadows are flushed to CPUMCTX at the moment and don't need allocating a stack slot
7310 * which would be more difficult due to spanning multiple stack slots and different sizes
7311 * (besides we only have a limited amount of slots at the moment).
7312 *
7313 * However the shadows need to be flushed out as the guest SIMD register might get corrupted by
7314 * the callee. This asserts that the registers were written back earlier and are not in the dirty state.
7315 */
7316 iemNativeSimdRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK);
7317
7318 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7319 if (fHstRegs)
7320 {
7321 do
7322 {
7323 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7324 fHstRegs &= ~RT_BIT_32(idxHstReg);
7325
7326 /* Fixed reserved and temporary registers don't need saving. */
7327 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved
7328 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp)
7329 continue;
7330
7331 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7332
7333 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7334 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7335 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7336 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7337 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7338 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7339 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7340 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7341 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7342 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7343 {
7344 case kIemNativeVarKind_Stack:
7345 {
7346 /* Temporarily spill the variable register. */
7347 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7348 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7349 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7350 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7351 if (cbVar == sizeof(RTUINT128U))
7352 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7353 else
7354 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7355 continue;
7356 }
7357
7358 case kIemNativeVarKind_Immediate:
7359 case kIemNativeVarKind_VarRef:
7360 case kIemNativeVarKind_GstRegRef:
7361 /* It is weird to have any of these loaded at this point. */
7362 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7363 continue;
7364
7365 case kIemNativeVarKind_End:
7366 case kIemNativeVarKind_Invalid:
7367 break;
7368 }
7369 AssertFailed();
7370 } while (fHstRegs);
7371 }
7372#endif
7373 return off;
7374}
7375
7376
7377/**
7378 * Emit code to restore volatile registers after to a call to a helper.
7379 *
7380 * @returns New @a off value.
7381 * @param pReNative The recompiler state.
7382 * @param off The code buffer position.
7383 * @param fHstRegsNotToSave Set of registers not to save & restore.
7384 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7385 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7386 */
7387DECL_HIDDEN_THROW(uint32_t)
7388iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7389{
7390 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7391 if (fHstRegs)
7392 {
7393 do
7394 {
7395 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7396 fHstRegs &= ~RT_BIT_32(idxHstReg);
7397
7398 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7399 {
7400 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7401 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7402 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7403 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7404 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7405 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7406 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7407 {
7408 case kIemNativeVarKind_Stack:
7409 {
7410 /* Unspill the variable register. */
7411 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7412 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7413 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7414 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7415 continue;
7416 }
7417
7418 case kIemNativeVarKind_Immediate:
7419 case kIemNativeVarKind_VarRef:
7420 case kIemNativeVarKind_GstRegRef:
7421 /* It is weird to have any of these loaded at this point. */
7422 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7423 continue;
7424
7425 case kIemNativeVarKind_End:
7426 case kIemNativeVarKind_Invalid:
7427 break;
7428 }
7429 AssertFailed();
7430 }
7431 else
7432 {
7433 /*
7434 * Restore from temporary stack slot.
7435 */
7436 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
7437 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
7438 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
7439 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
7440
7441 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7442 }
7443 } while (fHstRegs);
7444 }
7445#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7446 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7447 if (fHstRegs)
7448 {
7449 do
7450 {
7451 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7452 fHstRegs &= ~RT_BIT_32(idxHstReg);
7453
7454 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
7455 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
7456 continue;
7457 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7458
7459 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7460 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7461 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7462 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7463 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7464 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7465 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7466 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7467 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7468 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7469 {
7470 case kIemNativeVarKind_Stack:
7471 {
7472 /* Unspill the variable register. */
7473 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7474 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7475 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7476 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7477
7478 if (cbVar == sizeof(RTUINT128U))
7479 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7480 else
7481 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7482 continue;
7483 }
7484
7485 case kIemNativeVarKind_Immediate:
7486 case kIemNativeVarKind_VarRef:
7487 case kIemNativeVarKind_GstRegRef:
7488 /* It is weird to have any of these loaded at this point. */
7489 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7490 continue;
7491
7492 case kIemNativeVarKind_End:
7493 case kIemNativeVarKind_Invalid:
7494 break;
7495 }
7496 AssertFailed();
7497 } while (fHstRegs);
7498 }
7499#endif
7500 return off;
7501}
7502
7503
7504/**
7505 * Worker that frees the stack slots for variable @a idxVar if any allocated.
7506 *
7507 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
7508 *
7509 * ASSUMES that @a idxVar is valid and unpacked.
7510 */
7511DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7512{
7513 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
7514 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7515 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7516 {
7517 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
7518 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
7519 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
7520 Assert(cSlots > 0);
7521 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
7522 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7523 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
7524 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
7525 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7526 }
7527 else
7528 Assert(idxStackSlot == UINT8_MAX);
7529}
7530
7531
7532/**
7533 * Worker that frees a single variable.
7534 *
7535 * ASSUMES that @a idxVar is valid and unpacked.
7536 */
7537DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7538{
7539 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
7540 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
7541 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7542
7543 /* Free the host register first if any assigned. */
7544 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7545#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7546 if ( idxHstReg != UINT8_MAX
7547 && pReNative->Core.aVars[idxVar].fSimdReg)
7548 {
7549 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7550 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7551 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
7552 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
7553 }
7554 else
7555#endif
7556 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7557 {
7558 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7559 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7560 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7561 }
7562
7563 /* Free argument mapping. */
7564 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7565 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
7566 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
7567
7568 /* Free the stack slots. */
7569 iemNativeVarFreeStackSlots(pReNative, idxVar);
7570
7571 /* Free the actual variable. */
7572 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7573 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7574}
7575
7576
7577/**
7578 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
7579 */
7580DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
7581{
7582 while (bmVars != 0)
7583 {
7584 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7585 bmVars &= ~RT_BIT_32(idxVar);
7586
7587#if 1 /** @todo optimize by simplifying this later... */
7588 iemNativeVarFreeOneWorker(pReNative, idxVar);
7589#else
7590 /* Only need to free the host register, the rest is done as bulk updates below. */
7591 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7592 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7593 {
7594 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7595 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7596 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7597 }
7598#endif
7599 }
7600#if 0 /** @todo optimize by simplifying this later... */
7601 pReNative->Core.bmVars = 0;
7602 pReNative->Core.bmStack = 0;
7603 pReNative->Core.u64ArgVars = UINT64_MAX;
7604#endif
7605}
7606
7607
7608
7609/*********************************************************************************************************************************
7610* Emitters for IEM_MC_CALL_CIMPL_XXX *
7611*********************************************************************************************************************************/
7612
7613/**
7614 * Emits code to load a reference to the given guest register into @a idxGprDst.
7615 */
7616DECL_HIDDEN_THROW(uint32_t)
7617iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
7618 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
7619{
7620#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7621 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
7622#endif
7623
7624 /*
7625 * Get the offset relative to the CPUMCTX structure.
7626 */
7627 uint32_t offCpumCtx;
7628 switch (enmClass)
7629 {
7630 case kIemNativeGstRegRef_Gpr:
7631 Assert(idxRegInClass < 16);
7632 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
7633 break;
7634
7635 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
7636 Assert(idxRegInClass < 4);
7637 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
7638 break;
7639
7640 case kIemNativeGstRegRef_EFlags:
7641 Assert(idxRegInClass == 0);
7642 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
7643 break;
7644
7645 case kIemNativeGstRegRef_MxCsr:
7646 Assert(idxRegInClass == 0);
7647 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
7648 break;
7649
7650 case kIemNativeGstRegRef_FpuReg:
7651 Assert(idxRegInClass < 8);
7652 AssertFailed(); /** @todo what kind of indexing? */
7653 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7654 break;
7655
7656 case kIemNativeGstRegRef_MReg:
7657 Assert(idxRegInClass < 8);
7658 AssertFailed(); /** @todo what kind of indexing? */
7659 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7660 break;
7661
7662 case kIemNativeGstRegRef_XReg:
7663 Assert(idxRegInClass < 16);
7664 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
7665 break;
7666
7667 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
7668 Assert(idxRegInClass == 0);
7669 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
7670 break;
7671
7672 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
7673 Assert(idxRegInClass == 0);
7674 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
7675 break;
7676
7677 default:
7678 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
7679 }
7680
7681 /*
7682 * Load the value into the destination register.
7683 */
7684#ifdef RT_ARCH_AMD64
7685 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
7686
7687#elif defined(RT_ARCH_ARM64)
7688 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7689 Assert(offCpumCtx < 4096);
7690 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
7691
7692#else
7693# error "Port me!"
7694#endif
7695
7696 return off;
7697}
7698
7699
7700/**
7701 * Common code for CIMPL and AIMPL calls.
7702 *
7703 * These are calls that uses argument variables and such. They should not be
7704 * confused with internal calls required to implement an MC operation,
7705 * like a TLB load and similar.
7706 *
7707 * Upon return all that is left to do is to load any hidden arguments and
7708 * perform the call. All argument variables are freed.
7709 *
7710 * @returns New code buffer offset; throws VBox status code on error.
7711 * @param pReNative The native recompile state.
7712 * @param off The code buffer offset.
7713 * @param cArgs The total nubmer of arguments (includes hidden
7714 * count).
7715 * @param cHiddenArgs The number of hidden arguments. The hidden
7716 * arguments must not have any variable declared for
7717 * them, whereas all the regular arguments must
7718 * (tstIEMCheckMc ensures this).
7719 * @param fFlushPendingWrites Flag whether to flush pending writes (default true),
7720 * this will still flush pending writes in call volatile registers if false.
7721 */
7722DECL_HIDDEN_THROW(uint32_t)
7723iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs,
7724 bool fFlushPendingWrites /*= true*/)
7725{
7726#ifdef VBOX_STRICT
7727 /*
7728 * Assert sanity.
7729 */
7730 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
7731 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
7732 for (unsigned i = 0; i < cHiddenArgs; i++)
7733 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
7734 for (unsigned i = cHiddenArgs; i < cArgs; i++)
7735 {
7736 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
7737 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
7738 }
7739 iemNativeRegAssertSanity(pReNative);
7740#endif
7741
7742 /* We don't know what the called function makes use of, so flush any pending register writes. */
7743 RT_NOREF(fFlushPendingWrites);
7744#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7745 if (fFlushPendingWrites)
7746#endif
7747 off = iemNativeRegFlushPendingWrites(pReNative, off);
7748
7749 /*
7750 * Before we do anything else, go over variables that are referenced and
7751 * make sure they are not in a register.
7752 */
7753 uint32_t bmVars = pReNative->Core.bmVars;
7754 if (bmVars)
7755 {
7756 do
7757 {
7758 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7759 bmVars &= ~RT_BIT_32(idxVar);
7760
7761 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
7762 {
7763 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
7764#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7765 if ( idxRegOld != UINT8_MAX
7766 && pReNative->Core.aVars[idxVar].fSimdReg)
7767 {
7768 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7769 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
7770
7771 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7772 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
7773 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
7774 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7775 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
7776 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7777 else
7778 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7779
7780 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
7781 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
7782
7783 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7784 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
7785 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
7786 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
7787 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
7788 }
7789 else
7790#endif
7791 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
7792 {
7793 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7794 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
7795 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
7796 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7797 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7798
7799 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7800 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
7801 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
7802 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
7803 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
7804 }
7805 }
7806 } while (bmVars != 0);
7807#if 0 //def VBOX_STRICT
7808 iemNativeRegAssertSanity(pReNative);
7809#endif
7810 }
7811
7812 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
7813
7814#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7815 /*
7816 * At the very first step go over the host registers that will be used for arguments
7817 * don't shadow anything which needs writing back first.
7818 */
7819 for (uint32_t i = 0; i < cRegArgs; i++)
7820 {
7821 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7822
7823 /* Writeback any dirty guest shadows before using this register. */
7824 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows)
7825 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxArgReg);
7826 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows));
7827 }
7828#endif
7829
7830 /*
7831 * First, go over the host registers that will be used for arguments and make
7832 * sure they either hold the desired argument or are free.
7833 */
7834 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
7835 {
7836 for (uint32_t i = 0; i < cRegArgs; i++)
7837 {
7838 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7839 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
7840 {
7841 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
7842 {
7843 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
7844 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7845 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7846 Assert(pVar->idxReg == idxArgReg);
7847 uint8_t const uArgNo = pVar->uArgNo;
7848 if (uArgNo == i)
7849 { /* prefect */ }
7850 /* The variable allocator logic should make sure this is impossible,
7851 except for when the return register is used as a parameter (ARM,
7852 but not x86). */
7853#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
7854 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
7855 {
7856# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7857# error "Implement this"
7858# endif
7859 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
7860 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
7861 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
7862 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7863 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
7864 }
7865#endif
7866 else
7867 {
7868 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7869
7870 if (pVar->enmKind == kIemNativeVarKind_Stack)
7871 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
7872 else
7873 {
7874 /* just free it, can be reloaded if used again */
7875 pVar->idxReg = UINT8_MAX;
7876 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
7877 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
7878 }
7879 }
7880 }
7881 else
7882 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
7883 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
7884 }
7885 }
7886#if 0 //def VBOX_STRICT
7887 iemNativeRegAssertSanity(pReNative);
7888#endif
7889 }
7890
7891 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
7892
7893#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7894 /*
7895 * If there are any stack arguments, make sure they are in their place as well.
7896 *
7897 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
7898 * the caller) be loading it later and it must be free (see first loop).
7899 */
7900 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
7901 {
7902 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
7903 {
7904 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
7905 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
7906 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7907 {
7908 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
7909 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
7910 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
7911 pVar->idxReg = UINT8_MAX;
7912 }
7913 else
7914 {
7915 /* Use ARG0 as temp for stuff we need registers for. */
7916 switch (pVar->enmKind)
7917 {
7918 case kIemNativeVarKind_Stack:
7919 {
7920 uint8_t const idxStackSlot = pVar->idxStackSlot;
7921 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7922 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
7923 iemNativeStackCalcBpDisp(idxStackSlot));
7924 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7925 continue;
7926 }
7927
7928 case kIemNativeVarKind_Immediate:
7929 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
7930 continue;
7931
7932 case kIemNativeVarKind_VarRef:
7933 {
7934 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
7935 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
7936 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7937 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
7938 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
7939# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7940 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
7941 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
7942 if ( fSimdReg
7943 && idxRegOther != UINT8_MAX)
7944 {
7945 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7946 if (cbVar == sizeof(RTUINT128U))
7947 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
7948 else
7949 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
7950 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
7951 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7952 }
7953 else
7954# endif
7955 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
7956 {
7957 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
7958 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
7959 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7960 }
7961 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
7962 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7963 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
7964 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7965 continue;
7966 }
7967
7968 case kIemNativeVarKind_GstRegRef:
7969 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
7970 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
7971 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7972 continue;
7973
7974 case kIemNativeVarKind_Invalid:
7975 case kIemNativeVarKind_End:
7976 break;
7977 }
7978 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
7979 }
7980 }
7981# if 0 //def VBOX_STRICT
7982 iemNativeRegAssertSanity(pReNative);
7983# endif
7984 }
7985#else
7986 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
7987#endif
7988
7989 /*
7990 * Make sure the argument variables are loaded into their respective registers.
7991 *
7992 * We can optimize this by ASSUMING that any register allocations are for
7993 * registeres that have already been loaded and are ready. The previous step
7994 * saw to that.
7995 */
7996 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
7997 {
7998 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
7999 {
8000 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8001 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8002 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8003 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8004 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8005 else
8006 {
8007 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8008 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8009 {
8010 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8011 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8012 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8013 | RT_BIT_32(idxArgReg);
8014 pVar->idxReg = idxArgReg;
8015 }
8016 else
8017 {
8018 /* Use ARG0 as temp for stuff we need registers for. */
8019 switch (pVar->enmKind)
8020 {
8021 case kIemNativeVarKind_Stack:
8022 {
8023 uint8_t const idxStackSlot = pVar->idxStackSlot;
8024 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8025 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8026 continue;
8027 }
8028
8029 case kIemNativeVarKind_Immediate:
8030 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8031 continue;
8032
8033 case kIemNativeVarKind_VarRef:
8034 {
8035 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8036 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8037 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8038 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8039 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8040 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8041#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8042 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8043 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8044 if ( fSimdReg
8045 && idxRegOther != UINT8_MAX)
8046 {
8047 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8048 if (cbVar == sizeof(RTUINT128U))
8049 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8050 else
8051 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8052 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8053 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8054 }
8055 else
8056#endif
8057 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8058 {
8059 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8060 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8061 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8062 }
8063 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8064 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8065 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8066 continue;
8067 }
8068
8069 case kIemNativeVarKind_GstRegRef:
8070 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8071 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8072 continue;
8073
8074 case kIemNativeVarKind_Invalid:
8075 case kIemNativeVarKind_End:
8076 break;
8077 }
8078 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8079 }
8080 }
8081 }
8082#if 0 //def VBOX_STRICT
8083 iemNativeRegAssertSanity(pReNative);
8084#endif
8085 }
8086#ifdef VBOX_STRICT
8087 else
8088 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8089 {
8090 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8091 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8092 }
8093#endif
8094
8095 /*
8096 * Free all argument variables (simplified).
8097 * Their lifetime always expires with the call they are for.
8098 */
8099 /** @todo Make the python script check that arguments aren't used after
8100 * IEM_MC_CALL_XXXX. */
8101 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8102 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8103 * an argument value. There is also some FPU stuff. */
8104 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8105 {
8106 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8107 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8108
8109 /* no need to free registers: */
8110 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8111 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8112 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8113 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8114 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8115 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8116
8117 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8118 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8119 iemNativeVarFreeStackSlots(pReNative, idxVar);
8120 }
8121 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8122
8123 /*
8124 * Flush volatile registers as we make the call.
8125 */
8126 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8127
8128 return off;
8129}
8130
8131
8132
8133/*********************************************************************************************************************************
8134* TLB Lookup. *
8135*********************************************************************************************************************************/
8136
8137/**
8138 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8139 */
8140DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
8141{
8142 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
8143 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
8144 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
8145 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
8146
8147 /* Do the lookup manually. */
8148 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
8149 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
8150 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
8151 if (RT_LIKELY(pTlbe->uTag == uTag))
8152 {
8153 /*
8154 * Check TLB page table level access flags.
8155 */
8156 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8157 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8158 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8159 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8160 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8161 | IEMTLBE_F_PG_UNASSIGNED
8162 | IEMTLBE_F_PT_NO_ACCESSED
8163 | fNoWriteNoDirty | fNoUser);
8164 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8165 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8166 {
8167 /*
8168 * Return the address.
8169 */
8170 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8171 if ((uintptr_t)pbAddr == uResult)
8172 return;
8173 RT_NOREF(cbMem);
8174 AssertFailed();
8175 }
8176 else
8177 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8178 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8179 }
8180 else
8181 AssertFailed();
8182 RT_BREAKPOINT();
8183}
8184
8185/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8186
8187
8188
8189/*********************************************************************************************************************************
8190* Recompiler Core. *
8191*********************************************************************************************************************************/
8192
8193/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8194static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8195{
8196 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8197 pDis->cbCachedInstr += cbMaxRead;
8198 RT_NOREF(cbMinRead);
8199 return VERR_NO_DATA;
8200}
8201
8202
8203DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8204{
8205 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8206 {
8207#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8208 ENTRY(fLocalForcedActions),
8209 ENTRY(iem.s.rcPassUp),
8210 ENTRY(iem.s.fExec),
8211 ENTRY(iem.s.pbInstrBuf),
8212 ENTRY(iem.s.uInstrBufPc),
8213 ENTRY(iem.s.GCPhysInstrBuf),
8214 ENTRY(iem.s.cbInstrBufTotal),
8215 ENTRY(iem.s.idxTbCurInstr),
8216#ifdef VBOX_WITH_STATISTICS
8217 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8218 ENTRY(iem.s.StatNativeTlbHitsForStore),
8219 ENTRY(iem.s.StatNativeTlbHitsForStack),
8220 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8221 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8222 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8223 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8224 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8225#endif
8226 ENTRY(iem.s.DataTlb.uTlbRevision),
8227 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8228 ENTRY(iem.s.DataTlb.cTlbHits),
8229 ENTRY(iem.s.DataTlb.aEntries),
8230 ENTRY(iem.s.CodeTlb.uTlbRevision),
8231 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8232 ENTRY(iem.s.CodeTlb.cTlbHits),
8233 ENTRY(iem.s.CodeTlb.aEntries),
8234 ENTRY(pVMR3),
8235 ENTRY(cpum.GstCtx.rax),
8236 ENTRY(cpum.GstCtx.ah),
8237 ENTRY(cpum.GstCtx.rcx),
8238 ENTRY(cpum.GstCtx.ch),
8239 ENTRY(cpum.GstCtx.rdx),
8240 ENTRY(cpum.GstCtx.dh),
8241 ENTRY(cpum.GstCtx.rbx),
8242 ENTRY(cpum.GstCtx.bh),
8243 ENTRY(cpum.GstCtx.rsp),
8244 ENTRY(cpum.GstCtx.rbp),
8245 ENTRY(cpum.GstCtx.rsi),
8246 ENTRY(cpum.GstCtx.rdi),
8247 ENTRY(cpum.GstCtx.r8),
8248 ENTRY(cpum.GstCtx.r9),
8249 ENTRY(cpum.GstCtx.r10),
8250 ENTRY(cpum.GstCtx.r11),
8251 ENTRY(cpum.GstCtx.r12),
8252 ENTRY(cpum.GstCtx.r13),
8253 ENTRY(cpum.GstCtx.r14),
8254 ENTRY(cpum.GstCtx.r15),
8255 ENTRY(cpum.GstCtx.es.Sel),
8256 ENTRY(cpum.GstCtx.es.u64Base),
8257 ENTRY(cpum.GstCtx.es.u32Limit),
8258 ENTRY(cpum.GstCtx.es.Attr),
8259 ENTRY(cpum.GstCtx.cs.Sel),
8260 ENTRY(cpum.GstCtx.cs.u64Base),
8261 ENTRY(cpum.GstCtx.cs.u32Limit),
8262 ENTRY(cpum.GstCtx.cs.Attr),
8263 ENTRY(cpum.GstCtx.ss.Sel),
8264 ENTRY(cpum.GstCtx.ss.u64Base),
8265 ENTRY(cpum.GstCtx.ss.u32Limit),
8266 ENTRY(cpum.GstCtx.ss.Attr),
8267 ENTRY(cpum.GstCtx.ds.Sel),
8268 ENTRY(cpum.GstCtx.ds.u64Base),
8269 ENTRY(cpum.GstCtx.ds.u32Limit),
8270 ENTRY(cpum.GstCtx.ds.Attr),
8271 ENTRY(cpum.GstCtx.fs.Sel),
8272 ENTRY(cpum.GstCtx.fs.u64Base),
8273 ENTRY(cpum.GstCtx.fs.u32Limit),
8274 ENTRY(cpum.GstCtx.fs.Attr),
8275 ENTRY(cpum.GstCtx.gs.Sel),
8276 ENTRY(cpum.GstCtx.gs.u64Base),
8277 ENTRY(cpum.GstCtx.gs.u32Limit),
8278 ENTRY(cpum.GstCtx.gs.Attr),
8279 ENTRY(cpum.GstCtx.rip),
8280 ENTRY(cpum.GstCtx.eflags),
8281 ENTRY(cpum.GstCtx.uRipInhibitInt),
8282 ENTRY(cpum.GstCtx.cr0),
8283 ENTRY(cpum.GstCtx.cr4),
8284 ENTRY(cpum.GstCtx.aXcr[0]),
8285 ENTRY(cpum.GstCtx.aXcr[1]),
8286#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8287 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8288 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8289 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8290 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8291 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8292 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8293 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8294 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8295 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8296 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8297 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8298 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8299 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8300 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8301 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8302 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8303 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8304 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8305 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8306 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8307 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8308 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8309 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8310 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8311 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8312 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8313 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8314 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8315 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8316 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8317 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8318 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8319#endif
8320#undef ENTRY
8321 };
8322#ifdef VBOX_STRICT
8323 static bool s_fOrderChecked = false;
8324 if (!s_fOrderChecked)
8325 {
8326 s_fOrderChecked = true;
8327 uint32_t offPrev = s_aMembers[0].off;
8328 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8329 {
8330 Assert(s_aMembers[i].off > offPrev);
8331 offPrev = s_aMembers[i].off;
8332 }
8333 }
8334#endif
8335
8336 /*
8337 * Binary lookup.
8338 */
8339 unsigned iStart = 0;
8340 unsigned iEnd = RT_ELEMENTS(s_aMembers);
8341 for (;;)
8342 {
8343 unsigned const iCur = iStart + (iEnd - iStart) / 2;
8344 uint32_t const offCur = s_aMembers[iCur].off;
8345 if (off < offCur)
8346 {
8347 if (iCur != iStart)
8348 iEnd = iCur;
8349 else
8350 break;
8351 }
8352 else if (off > offCur)
8353 {
8354 if (iCur + 1 < iEnd)
8355 iStart = iCur + 1;
8356 else
8357 break;
8358 }
8359 else
8360 return s_aMembers[iCur].pszName;
8361 }
8362#ifdef VBOX_WITH_STATISTICS
8363 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
8364 return "iem.s.acThreadedFuncStats[iFn]";
8365#endif
8366 return NULL;
8367}
8368
8369
8370DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
8371{
8372 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
8373#if defined(RT_ARCH_AMD64)
8374 static const char * const a_apszMarkers[] =
8375 {
8376 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
8377 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
8378 };
8379#endif
8380
8381 char szDisBuf[512];
8382 DISSTATE Dis;
8383 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
8384 uint32_t const cNative = pTb->Native.cInstructions;
8385 uint32_t offNative = 0;
8386#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8387 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
8388#endif
8389 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8390 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8391 : DISCPUMODE_64BIT;
8392#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8393 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
8394#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8395 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
8396#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8397# error "Port me"
8398#else
8399 csh hDisasm = ~(size_t)0;
8400# if defined(RT_ARCH_AMD64)
8401 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
8402# elif defined(RT_ARCH_ARM64)
8403 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
8404# else
8405# error "Port me"
8406# endif
8407 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
8408
8409 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
8410 //Assert(rcCs == CS_ERR_OK);
8411#endif
8412
8413 /*
8414 * Print TB info.
8415 */
8416 pHlp->pfnPrintf(pHlp,
8417 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
8418 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
8419 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
8420 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
8421#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8422 if (pDbgInfo && pDbgInfo->cEntries > 1)
8423 {
8424 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
8425
8426 /*
8427 * This disassembly is driven by the debug info which follows the native
8428 * code and indicates when it starts with the next guest instructions,
8429 * where labels are and such things.
8430 */
8431 uint32_t idxThreadedCall = 0;
8432 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
8433 uint8_t idxRange = UINT8_MAX;
8434 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
8435 uint32_t offRange = 0;
8436 uint32_t offOpcodes = 0;
8437 uint32_t const cbOpcodes = pTb->cbOpcodes;
8438 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
8439 uint32_t const cDbgEntries = pDbgInfo->cEntries;
8440 uint32_t iDbgEntry = 1;
8441 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
8442
8443 while (offNative < cNative)
8444 {
8445 /* If we're at or have passed the point where the next chunk of debug
8446 info starts, process it. */
8447 if (offDbgNativeNext <= offNative)
8448 {
8449 offDbgNativeNext = UINT32_MAX;
8450 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
8451 {
8452 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
8453 {
8454 case kIemTbDbgEntryType_GuestInstruction:
8455 {
8456 /* Did the exec flag change? */
8457 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
8458 {
8459 pHlp->pfnPrintf(pHlp,
8460 " fExec change %#08x -> %#08x %s\n",
8461 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8462 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8463 szDisBuf, sizeof(szDisBuf)));
8464 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
8465 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8466 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8467 : DISCPUMODE_64BIT;
8468 }
8469
8470 /* New opcode range? We need to fend up a spurious debug info entry here for cases
8471 where the compilation was aborted before the opcode was recorded and the actual
8472 instruction was translated to a threaded call. This may happen when we run out
8473 of ranges, or when some complicated interrupts/FFs are found to be pending or
8474 similar. So, we just deal with it here rather than in the compiler code as it
8475 is a lot simpler to do here. */
8476 if ( idxRange == UINT8_MAX
8477 || idxRange >= cRanges
8478 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
8479 {
8480 idxRange += 1;
8481 if (idxRange < cRanges)
8482 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
8483 else
8484 continue;
8485 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
8486 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
8487 + (pTb->aRanges[idxRange].idxPhysPage == 0
8488 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8489 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
8490 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8491 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
8492 pTb->aRanges[idxRange].idxPhysPage);
8493 GCPhysPc += offRange;
8494 }
8495
8496 /* Disassemble the instruction. */
8497 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
8498 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
8499 uint32_t cbInstr = 1;
8500 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8501 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
8502 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8503 if (RT_SUCCESS(rc))
8504 {
8505 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8506 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8507 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8508 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8509
8510 static unsigned const s_offMarker = 55;
8511 static char const s_szMarker[] = " ; <--- guest";
8512 if (cch < s_offMarker)
8513 {
8514 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
8515 cch = s_offMarker;
8516 }
8517 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
8518 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
8519
8520 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
8521 }
8522 else
8523 {
8524 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
8525 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
8526 cbInstr = 1;
8527 }
8528 GCPhysPc += cbInstr;
8529 offOpcodes += cbInstr;
8530 offRange += cbInstr;
8531 continue;
8532 }
8533
8534 case kIemTbDbgEntryType_ThreadedCall:
8535 pHlp->pfnPrintf(pHlp,
8536 " Call #%u to %s (%u args) - %s\n",
8537 idxThreadedCall,
8538 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8539 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8540 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
8541 idxThreadedCall++;
8542 continue;
8543
8544 case kIemTbDbgEntryType_GuestRegShadowing:
8545 {
8546 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8547 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
8548 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
8549 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
8550 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8551 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
8552 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s \n", pszGstReg,
8553 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
8554 else
8555 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
8556 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
8557 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8558 continue;
8559 }
8560
8561#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8562 case kIemTbDbgEntryType_GuestSimdRegShadowing:
8563 {
8564 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8565 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
8566 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
8567 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
8568 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8569 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
8570 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
8571 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
8572 else
8573 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
8574 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
8575 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8576 continue;
8577 }
8578#endif
8579
8580 case kIemTbDbgEntryType_Label:
8581 {
8582 const char *pszName = "what_the_fudge";
8583 const char *pszComment = "";
8584 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
8585 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
8586 {
8587 case kIemNativeLabelType_Return: pszName = "Return"; break;
8588 case kIemNativeLabelType_ReturnBreak: pszName = "ReturnBreak"; break;
8589 case kIemNativeLabelType_ReturnWithFlags: pszName = "ReturnWithFlags"; break;
8590 case kIemNativeLabelType_NonZeroRetOrPassUp: pszName = "NonZeroRetOrPassUp"; break;
8591 case kIemNativeLabelType_RaiseDe: pszName = "RaiseDe"; break;
8592 case kIemNativeLabelType_RaiseUd: pszName = "RaiseUd"; break;
8593 case kIemNativeLabelType_RaiseSseRelated: pszName = "RaiseSseRelated"; break;
8594 case kIemNativeLabelType_RaiseAvxRelated: pszName = "RaiseAvxRelated"; break;
8595 case kIemNativeLabelType_RaiseSseAvxFpRelated: pszName = "RaiseSseAvxFpRelated"; break;
8596 case kIemNativeLabelType_RaiseNm: pszName = "RaiseNm"; break;
8597 case kIemNativeLabelType_RaiseGp0: pszName = "RaiseGp0"; break;
8598 case kIemNativeLabelType_RaiseMf: pszName = "RaiseMf"; break;
8599 case kIemNativeLabelType_RaiseXf: pszName = "RaiseXf"; break;
8600 case kIemNativeLabelType_ObsoleteTb: pszName = "ObsoleteTb"; break;
8601 case kIemNativeLabelType_NeedCsLimChecking: pszName = "NeedCsLimChecking"; break;
8602 case kIemNativeLabelType_CheckBranchMiss: pszName = "CheckBranchMiss"; break;
8603 case kIemNativeLabelType_If:
8604 pszName = "If";
8605 fNumbered = true;
8606 break;
8607 case kIemNativeLabelType_Else:
8608 pszName = "Else";
8609 fNumbered = true;
8610 pszComment = " ; regs state restored pre-if-block";
8611 break;
8612 case kIemNativeLabelType_Endif:
8613 pszName = "Endif";
8614 fNumbered = true;
8615 break;
8616 case kIemNativeLabelType_CheckIrq:
8617 pszName = "CheckIrq_CheckVM";
8618 fNumbered = true;
8619 break;
8620 case kIemNativeLabelType_TlbLookup:
8621 pszName = "TlbLookup";
8622 fNumbered = true;
8623 break;
8624 case kIemNativeLabelType_TlbMiss:
8625 pszName = "TlbMiss";
8626 fNumbered = true;
8627 break;
8628 case kIemNativeLabelType_TlbDone:
8629 pszName = "TlbDone";
8630 fNumbered = true;
8631 break;
8632 case kIemNativeLabelType_Invalid:
8633 case kIemNativeLabelType_End:
8634 break;
8635 }
8636 if (fNumbered)
8637 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
8638 else
8639 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
8640 continue;
8641 }
8642
8643 case kIemTbDbgEntryType_NativeOffset:
8644 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
8645 Assert(offDbgNativeNext >= offNative);
8646 break;
8647
8648#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8649 case kIemTbDbgEntryType_DelayedPcUpdate:
8650 pHlp->pfnPrintf(pHlp, " Updating guest PC value by %u (cInstrSkipped=%u)\n",
8651 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
8652 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
8653 continue;
8654#endif
8655
8656#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8657 case kIemTbDbgEntryType_GuestRegDirty:
8658 {
8659 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8660 const char * const pszGstReg = pEntry->GuestRegDirty.fSimdReg
8661 ? g_aGstSimdShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName
8662 : g_aGstShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName;
8663 const char * const pszHstReg = pEntry->GuestRegDirty.fSimdReg
8664 ? g_apszIemNativeHstSimdRegNames[pEntry->GuestRegDirty.idxHstReg]
8665 : g_apszIemNativeHstRegNames[pEntry->GuestRegDirty.idxHstReg];
8666 pHlp->pfnPrintf(pHlp, " Guest register %s (shadowed by %s) is now marked dirty (intent)\n",
8667 pszGstReg, pszHstReg);
8668 continue;
8669 }
8670
8671 case kIemTbDbgEntryType_GuestRegWriteback:
8672 pHlp->pfnPrintf(pHlp, " Writing dirty %s registers (gst %#RX32)\n",
8673 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fSimdReg ? "SIMD" : "general",
8674 (uint64_t)pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fGstReg
8675 << (pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.cShift * 25));
8676 continue;
8677#endif
8678
8679 default:
8680 AssertFailed();
8681 }
8682 iDbgEntry++;
8683 break;
8684 }
8685 }
8686
8687 /*
8688 * Disassemble the next native instruction.
8689 */
8690 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
8691# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8692 uint32_t cbInstr = sizeof(paNative[0]);
8693 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
8694 if (RT_SUCCESS(rc))
8695 {
8696# if defined(RT_ARCH_AMD64)
8697 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
8698 {
8699 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
8700 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
8701 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
8702 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
8703 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
8704 uInfo & 0x8000 ? "recompiled" : "todo");
8705 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
8706 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
8707 else
8708 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
8709 }
8710 else
8711# endif
8712 {
8713 const char *pszAnnotation = NULL;
8714# ifdef RT_ARCH_AMD64
8715 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8716 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8717 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8718 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8719 PCDISOPPARAM pMemOp;
8720 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
8721 pMemOp = &Dis.Param1;
8722 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
8723 pMemOp = &Dis.Param2;
8724 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
8725 pMemOp = &Dis.Param3;
8726 else
8727 pMemOp = NULL;
8728 if ( pMemOp
8729 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
8730 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
8731 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
8732 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
8733
8734#elif defined(RT_ARCH_ARM64)
8735 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
8736 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8737 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8738# else
8739# error "Port me"
8740# endif
8741 if (pszAnnotation)
8742 {
8743 static unsigned const s_offAnnotation = 55;
8744 size_t const cchAnnotation = strlen(pszAnnotation);
8745 size_t cchDis = strlen(szDisBuf);
8746 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
8747 {
8748 if (cchDis < s_offAnnotation)
8749 {
8750 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
8751 cchDis = s_offAnnotation;
8752 }
8753 szDisBuf[cchDis++] = ' ';
8754 szDisBuf[cchDis++] = ';';
8755 szDisBuf[cchDis++] = ' ';
8756 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
8757 }
8758 }
8759 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
8760 }
8761 }
8762 else
8763 {
8764# if defined(RT_ARCH_AMD64)
8765 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
8766 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
8767# elif defined(RT_ARCH_ARM64)
8768 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
8769# else
8770# error "Port me"
8771# endif
8772 cbInstr = sizeof(paNative[0]);
8773 }
8774 offNative += cbInstr / sizeof(paNative[0]);
8775
8776# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8777 cs_insn *pInstr;
8778 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
8779 (uintptr_t)pNativeCur, 1, &pInstr);
8780 if (cInstrs > 0)
8781 {
8782 Assert(cInstrs == 1);
8783 const char *pszAnnotation = NULL;
8784# if defined(RT_ARCH_ARM64)
8785 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
8786 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
8787 {
8788 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
8789 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
8790 char *psz = strchr(pInstr->op_str, '[');
8791 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
8792 {
8793 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
8794 int32_t off = -1;
8795 psz += 4;
8796 if (*psz == ']')
8797 off = 0;
8798 else if (*psz == ',')
8799 {
8800 psz = RTStrStripL(psz + 1);
8801 if (*psz == '#')
8802 off = RTStrToInt32(&psz[1]);
8803 /** @todo deal with index registers and LSL as well... */
8804 }
8805 if (off >= 0)
8806 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
8807 }
8808 }
8809# endif
8810
8811 size_t const cchOp = strlen(pInstr->op_str);
8812# if defined(RT_ARCH_AMD64)
8813 if (pszAnnotation)
8814 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
8815 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
8816 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
8817 else
8818 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
8819 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
8820
8821# else
8822 if (pszAnnotation)
8823 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
8824 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
8825 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
8826 else
8827 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
8828 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
8829# endif
8830 offNative += pInstr->size / sizeof(*pNativeCur);
8831 cs_free(pInstr, cInstrs);
8832 }
8833 else
8834 {
8835# if defined(RT_ARCH_AMD64)
8836 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
8837 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
8838# else
8839 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
8840# endif
8841 offNative++;
8842 }
8843# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8844 }
8845 }
8846 else
8847#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
8848 {
8849 /*
8850 * No debug info, just disassemble the x86 code and then the native code.
8851 *
8852 * First the guest code:
8853 */
8854 for (unsigned i = 0; i < pTb->cRanges; i++)
8855 {
8856 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
8857 + (pTb->aRanges[i].idxPhysPage == 0
8858 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8859 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
8860 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8861 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
8862 unsigned off = pTb->aRanges[i].offOpcodes;
8863 /** @todo this ain't working when crossing pages! */
8864 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
8865 while (off < cbOpcodes)
8866 {
8867 uint32_t cbInstr = 1;
8868 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8869 &pTb->pabOpcodes[off], cbOpcodes - off,
8870 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8871 if (RT_SUCCESS(rc))
8872 {
8873 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8874 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8875 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8876 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8877 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
8878 GCPhysPc += cbInstr;
8879 off += cbInstr;
8880 }
8881 else
8882 {
8883 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
8884 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
8885 break;
8886 }
8887 }
8888 }
8889
8890 /*
8891 * Then the native code:
8892 */
8893 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
8894 while (offNative < cNative)
8895 {
8896 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
8897# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8898 uint32_t cbInstr = sizeof(paNative[0]);
8899 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
8900 if (RT_SUCCESS(rc))
8901 {
8902# if defined(RT_ARCH_AMD64)
8903 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
8904 {
8905 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
8906 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
8907 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
8908 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
8909 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
8910 uInfo & 0x8000 ? "recompiled" : "todo");
8911 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
8912 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
8913 else
8914 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
8915 }
8916 else
8917# endif
8918 {
8919# ifdef RT_ARCH_AMD64
8920 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8921 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8922 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8923 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8924# elif defined(RT_ARCH_ARM64)
8925 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
8926 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8927 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8928# else
8929# error "Port me"
8930# endif
8931 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
8932 }
8933 }
8934 else
8935 {
8936# if defined(RT_ARCH_AMD64)
8937 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
8938 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
8939# else
8940 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
8941# endif
8942 cbInstr = sizeof(paNative[0]);
8943 }
8944 offNative += cbInstr / sizeof(paNative[0]);
8945
8946# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8947 cs_insn *pInstr;
8948 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
8949 (uintptr_t)pNativeCur, 1, &pInstr);
8950 if (cInstrs > 0)
8951 {
8952 Assert(cInstrs == 1);
8953# if defined(RT_ARCH_AMD64)
8954 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
8955 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
8956# else
8957 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
8958 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
8959# endif
8960 offNative += pInstr->size / sizeof(*pNativeCur);
8961 cs_free(pInstr, cInstrs);
8962 }
8963 else
8964 {
8965# if defined(RT_ARCH_AMD64)
8966 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
8967 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
8968# else
8969 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
8970# endif
8971 offNative++;
8972 }
8973# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8974 }
8975 }
8976
8977#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8978 /* Cleanup. */
8979 cs_close(&hDisasm);
8980#endif
8981}
8982
8983
8984/**
8985 * Recompiles the given threaded TB into a native one.
8986 *
8987 * In case of failure the translation block will be returned as-is.
8988 *
8989 * @returns pTb.
8990 * @param pVCpu The cross context virtual CPU structure of the calling
8991 * thread.
8992 * @param pTb The threaded translation to recompile to native.
8993 */
8994DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
8995{
8996#if 0 /* For profiling the native recompiler code. */
8997l_profile_again:
8998#endif
8999 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
9000
9001 /*
9002 * The first time thru, we allocate the recompiler state, the other times
9003 * we just need to reset it before using it again.
9004 */
9005 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
9006 if (RT_LIKELY(pReNative))
9007 iemNativeReInit(pReNative, pTb);
9008 else
9009 {
9010 pReNative = iemNativeInit(pVCpu, pTb);
9011 AssertReturn(pReNative, pTb);
9012 }
9013
9014#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9015 /*
9016 * First do liveness analysis. This is done backwards.
9017 */
9018 {
9019 uint32_t idxCall = pTb->Thrd.cCalls;
9020 if (idxCall <= pReNative->cLivenessEntriesAlloc)
9021 { /* likely */ }
9022 else
9023 {
9024 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
9025 while (idxCall > cAlloc)
9026 cAlloc *= 2;
9027 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
9028 AssertReturn(pvNew, pTb);
9029 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
9030 pReNative->cLivenessEntriesAlloc = cAlloc;
9031 }
9032 AssertReturn(idxCall > 0, pTb);
9033 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
9034
9035 /* The initial (final) entry. */
9036 idxCall--;
9037 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
9038
9039 /* Loop backwards thru the calls and fill in the other entries. */
9040 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
9041 while (idxCall > 0)
9042 {
9043 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
9044 if (pfnLiveness)
9045 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
9046 else
9047 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
9048 pCallEntry--;
9049 idxCall--;
9050 }
9051
9052# ifdef VBOX_WITH_STATISTICS
9053 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
9054 to 'clobbered' rather that 'input'. */
9055 /** @todo */
9056# endif
9057 }
9058#endif
9059
9060 /*
9061 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
9062 * for aborting if an error happens.
9063 */
9064 uint32_t cCallsLeft = pTb->Thrd.cCalls;
9065#ifdef LOG_ENABLED
9066 uint32_t const cCallsOrg = cCallsLeft;
9067#endif
9068 uint32_t off = 0;
9069 int rc = VINF_SUCCESS;
9070 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9071 {
9072 /*
9073 * Emit prolog code (fixed).
9074 */
9075 off = iemNativeEmitProlog(pReNative, off);
9076
9077 /*
9078 * Convert the calls to native code.
9079 */
9080#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9081 int32_t iGstInstr = -1;
9082#endif
9083#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
9084 uint32_t cThreadedCalls = 0;
9085 uint32_t cRecompiledCalls = 0;
9086#endif
9087#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9088 uint32_t idxCurCall = 0;
9089#endif
9090 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
9091 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
9092 while (cCallsLeft-- > 0)
9093 {
9094 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
9095#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9096 pReNative->idxCurCall = idxCurCall;
9097#endif
9098
9099 /*
9100 * Debug info, assembly markup and statistics.
9101 */
9102#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
9103 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
9104 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
9105#endif
9106#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9107 iemNativeDbgInfoAddNativeOffset(pReNative, off);
9108 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
9109 {
9110 if (iGstInstr < (int32_t)pTb->cInstructions)
9111 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
9112 else
9113 Assert(iGstInstr == pTb->cInstructions);
9114 iGstInstr = pCallEntry->idxInstr;
9115 }
9116 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
9117#endif
9118#if defined(VBOX_STRICT)
9119 off = iemNativeEmitMarker(pReNative, off,
9120 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
9121#endif
9122#if defined(VBOX_STRICT)
9123 iemNativeRegAssertSanity(pReNative);
9124#endif
9125#ifdef VBOX_WITH_STATISTICS
9126 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
9127#endif
9128
9129 /*
9130 * Actual work.
9131 */
9132 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
9133 pfnRecom ? "(recompiled)" : "(todo)"));
9134 if (pfnRecom) /** @todo stats on this. */
9135 {
9136 off = pfnRecom(pReNative, off, pCallEntry);
9137 STAM_REL_STATS({cRecompiledCalls++;});
9138 }
9139 else
9140 {
9141 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
9142 STAM_REL_STATS({cThreadedCalls++;});
9143 }
9144 Assert(off <= pReNative->cInstrBufAlloc);
9145 Assert(pReNative->cCondDepth == 0);
9146
9147#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
9148 if (LogIs2Enabled())
9149 {
9150 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
9151# ifndef IEMLIVENESS_EXTENDED_LAYOUT
9152 static const char s_achState[] = "CUXI";
9153# else
9154 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
9155# endif
9156
9157 char szGpr[17];
9158 for (unsigned i = 0; i < 16; i++)
9159 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
9160 szGpr[16] = '\0';
9161
9162 char szSegBase[X86_SREG_COUNT + 1];
9163 char szSegLimit[X86_SREG_COUNT + 1];
9164 char szSegAttrib[X86_SREG_COUNT + 1];
9165 char szSegSel[X86_SREG_COUNT + 1];
9166 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
9167 {
9168 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
9169 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
9170 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
9171 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
9172 }
9173 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
9174 = szSegSel[X86_SREG_COUNT] = '\0';
9175
9176 char szEFlags[8];
9177 for (unsigned i = 0; i < 7; i++)
9178 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
9179 szEFlags[7] = '\0';
9180
9181 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
9182 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
9183 }
9184#endif
9185
9186 /*
9187 * Advance.
9188 */
9189 pCallEntry++;
9190#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9191 idxCurCall++;
9192#endif
9193 }
9194
9195 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
9196 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
9197 if (!cThreadedCalls)
9198 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
9199
9200 /*
9201 * Emit the epilog code.
9202 */
9203 uint32_t idxReturnLabel;
9204 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
9205
9206 /*
9207 * Generate special jump labels.
9208 */
9209 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
9210 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
9211 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
9212 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
9213
9214 /*
9215 * Generate simple TB tail labels that just calls a help with a pVCpu
9216 * arg and either return or longjmps/throws a non-zero status.
9217 *
9218 * The array entries must be ordered by enmLabel value so we can index
9219 * using fTailLabels bit numbers.
9220 */
9221 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9222 static struct
9223 {
9224 IEMNATIVELABELTYPE enmLabel;
9225 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9226 } const g_aSimpleTailLabels[] =
9227 {
9228 { kIemNativeLabelType_Invalid, NULL },
9229 { kIemNativeLabelType_RaiseDe, iemNativeHlpExecRaiseDe },
9230 { kIemNativeLabelType_RaiseUd, iemNativeHlpExecRaiseUd },
9231 { kIemNativeLabelType_RaiseSseRelated, iemNativeHlpExecRaiseSseRelated },
9232 { kIemNativeLabelType_RaiseAvxRelated, iemNativeHlpExecRaiseAvxRelated },
9233 { kIemNativeLabelType_RaiseSseAvxFpRelated, iemNativeHlpExecRaiseSseAvxFpRelated },
9234 { kIemNativeLabelType_RaiseNm, iemNativeHlpExecRaiseNm },
9235 { kIemNativeLabelType_RaiseGp0, iemNativeHlpExecRaiseGp0 },
9236 { kIemNativeLabelType_RaiseMf, iemNativeHlpExecRaiseMf },
9237 { kIemNativeLabelType_RaiseXf, iemNativeHlpExecRaiseXf },
9238 { kIemNativeLabelType_ObsoleteTb, iemNativeHlpObsoleteTb },
9239 { kIemNativeLabelType_NeedCsLimChecking, iemNativeHlpNeedCsLimChecking },
9240 { kIemNativeLabelType_CheckBranchMiss, iemNativeHlpCheckBranchMiss },
9241 };
9242 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
9243 AssertCompile(kIemNativeLabelType_Invalid == 0);
9244 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
9245 if (fTailLabels)
9246 {
9247 do
9248 {
9249 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
9250 fTailLabels &= ~RT_BIT_64(enmLabel);
9251 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
9252
9253 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
9254 Assert(idxLabel != UINT32_MAX);
9255 if (idxLabel != UINT32_MAX)
9256 {
9257 iemNativeLabelDefine(pReNative, idxLabel, off);
9258
9259 /* int pfnCallback(PVMCPUCC pVCpu) */
9260 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9261 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
9262
9263 /* jump back to the return sequence. */
9264 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
9265 }
9266
9267 } while (fTailLabels);
9268 }
9269 }
9270 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9271 {
9272 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
9273 return pTb;
9274 }
9275 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
9276 Assert(off <= pReNative->cInstrBufAlloc);
9277
9278 /*
9279 * Make sure all labels has been defined.
9280 */
9281 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
9282#ifdef VBOX_STRICT
9283 uint32_t const cLabels = pReNative->cLabels;
9284 for (uint32_t i = 0; i < cLabels; i++)
9285 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
9286#endif
9287
9288#if 0 /* For profiling the native recompiler code. */
9289 if (pTb->Thrd.cCalls >= 136)
9290 {
9291 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
9292 goto l_profile_again;
9293 }
9294#endif
9295
9296 /*
9297 * Allocate executable memory, copy over the code we've generated.
9298 */
9299 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
9300 if (pTbAllocator->pDelayedFreeHead)
9301 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
9302
9303 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb);
9304 AssertReturn(paFinalInstrBuf, pTb);
9305 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
9306
9307 /*
9308 * Apply fixups.
9309 */
9310 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
9311 uint32_t const cFixups = pReNative->cFixups;
9312 for (uint32_t i = 0; i < cFixups; i++)
9313 {
9314 Assert(paFixups[i].off < off);
9315 Assert(paFixups[i].idxLabel < cLabels);
9316 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
9317 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
9318 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
9319 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
9320 switch (paFixups[i].enmType)
9321 {
9322#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
9323 case kIemNativeFixupType_Rel32:
9324 Assert(paFixups[i].off + 4 <= off);
9325 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9326 continue;
9327
9328#elif defined(RT_ARCH_ARM64)
9329 case kIemNativeFixupType_RelImm26At0:
9330 {
9331 Assert(paFixups[i].off < off);
9332 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9333 Assert(offDisp >= -262144 && offDisp < 262144);
9334 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
9335 continue;
9336 }
9337
9338 case kIemNativeFixupType_RelImm19At5:
9339 {
9340 Assert(paFixups[i].off < off);
9341 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9342 Assert(offDisp >= -262144 && offDisp < 262144);
9343 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
9344 continue;
9345 }
9346
9347 case kIemNativeFixupType_RelImm14At5:
9348 {
9349 Assert(paFixups[i].off < off);
9350 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9351 Assert(offDisp >= -8192 && offDisp < 8192);
9352 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
9353 continue;
9354 }
9355
9356#endif
9357 case kIemNativeFixupType_Invalid:
9358 case kIemNativeFixupType_End:
9359 break;
9360 }
9361 AssertFailed();
9362 }
9363
9364 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
9365 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
9366
9367 /*
9368 * Convert the translation block.
9369 */
9370 RTMemFree(pTb->Thrd.paCalls);
9371 pTb->Native.paInstructions = paFinalInstrBuf;
9372 pTb->Native.cInstructions = off;
9373 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
9374#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9375 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
9376 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
9377#endif
9378
9379 Assert(pTbAllocator->cThreadedTbs > 0);
9380 pTbAllocator->cThreadedTbs -= 1;
9381 pTbAllocator->cNativeTbs += 1;
9382 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
9383
9384#ifdef LOG_ENABLED
9385 /*
9386 * Disassemble to the log if enabled.
9387 */
9388 if (LogIs3Enabled())
9389 {
9390 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
9391 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
9392# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
9393 RTLogFlush(NULL);
9394# endif
9395 }
9396#endif
9397 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
9398
9399 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
9400 return pTb;
9401}
9402
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette