VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp@ 105490

Last change on this file since 105490 was 105490, checked in by vboxsync, 4 months ago

VMM/IEM: Basic infrastructure to natively recompile SIMD floating point instructions, bugref:10652

SIMD floating point operation behavior depends on the guest MXCSR value which needs to be written to the
host's floating point control register (MXCSR on x86, FPCR on arm64 which needs conversion) and needs to be
restored to the host's value when the TB finished execution to avoid inconsistencies in case the guest
changes MXCSR. The ARM implementation does not conform to the x86 behavior because default NaN values have
the sign bit clear on arm64 while they are set on x86. There are rounding differences as well and earlier
ARMv8 revisions don't support the FPCR.FIZ and FPCR.AH features. Should still work out as long as the guest
doesn't try to do funny stuff.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 129.6 KB
Line 
1/* $Id: IEMAllThrdRecompiler.cpp 105490 2024-07-24 14:49:29Z vboxsync $ */
2/** @file
3 * IEM - Instruction Decoding and Threaded Recompilation.
4 *
5 * Logging group IEM_RE_THREADED assignments:
6 * - Level 1 (Log) : Errors, exceptions, interrupts and such major events. [same as IEM]
7 * - Flow (LogFlow) : TB calls being emitted.
8 * - Level 2 (Log2) : Basic instruction execution state info. [same as IEM]
9 * - Level 3 (Log3) : More detailed execution state info. [same as IEM]
10 * - Level 4 (Log4) : Decoding mnemonics w/ EIP. [same as IEM]
11 * - Level 5 (Log5) : Decoding details. [same as IEM]
12 * - Level 6 (Log6) : TB opcode range management.
13 * - Level 7 (Log7) : TB obsoletion.
14 * - Level 8 (Log8) : TB compilation.
15 * - Level 9 (Log9) : TB exec.
16 * - Level 10 (Log10): TB block lookup.
17 * - Level 11 (Log11): TB block lookup details.
18 * - Level 12 (Log12): TB insertion.
19 */
20
21/*
22 * Copyright (C) 2011-2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#ifndef LOG_GROUP /* defined when included by tstIEMCheckMc.cpp */
48# define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
49#endif
50#define IEM_WITH_CODE_TLB_AND_OPCODE_BUF /* A bit hackish, but its all in IEMInline.h. */
51#define VMCPU_INCL_CPUM_GST_CTX
52#include <VBox/vmm/iem.h>
53#include <VBox/vmm/cpum.h>
54#include <VBox/vmm/apic.h>
55#include <VBox/vmm/pdm.h>
56#include <VBox/vmm/pgm.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/em.h>
59#include <VBox/vmm/hm.h>
60#include <VBox/vmm/nem.h>
61#include <VBox/vmm/gim.h>
62#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
63# include <VBox/vmm/em.h>
64# include <VBox/vmm/hm_svm.h>
65#endif
66#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
67# include <VBox/vmm/hmvmxinline.h>
68#endif
69#include <VBox/vmm/tm.h>
70#include <VBox/vmm/dbgf.h>
71#include <VBox/vmm/dbgftrace.h>
72#ifndef TST_IEM_CHECK_MC
73# include "IEMInternal.h"
74#endif
75#include <VBox/vmm/vmcc.h>
76#include <VBox/log.h>
77#include <VBox/err.h>
78#include <VBox/param.h>
79#include <VBox/dis.h>
80#include <VBox/disopcode-x86-amd64.h>
81#include <iprt/asm-math.h>
82#include <iprt/assert.h>
83#include <iprt/mem.h>
84#include <iprt/string.h>
85#include <iprt/sort.h>
86#include <iprt/x86.h>
87
88#ifndef TST_IEM_CHECK_MC
89# include "IEMInline.h"
90# include "IEMOpHlp.h"
91# include "IEMMc.h"
92#endif
93
94#include "IEMThreadedFunctions.h"
95#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
96# include "IEMN8veRecompiler.h"
97#endif
98
99
100/*
101 * Narrow down configs here to avoid wasting time on unused configs here.
102 */
103
104#ifndef IEM_WITH_CODE_TLB
105# error The code TLB must be enabled for the recompiler.
106#endif
107
108#ifndef IEM_WITH_DATA_TLB
109# error The data TLB must be enabled for the recompiler.
110#endif
111
112#ifndef IEM_WITH_SETJMP
113# error The setjmp approach must be enabled for the recompiler.
114#endif
115
116#if defined(IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS) && !defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
117# error "IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS requires IEMNATIVE_WITH_SIMD_REG_ALLOCATOR"
118#endif
119
120
121/**
122 * Calculates the effective address of a ModR/M memory operand, extended version
123 * for use in the recompilers.
124 *
125 * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.
126 *
127 * May longjmp on internal error.
128 *
129 * @return The effective address.
130 * @param pVCpu The cross context virtual CPU structure of the calling thread.
131 * @param bRm The ModRM byte.
132 * @param cbImmAndRspOffset - First byte: The size of any immediate
133 * following the effective address opcode bytes
134 * (only for RIP relative addressing).
135 * - Second byte: RSP displacement (for POP [ESP]).
136 * @param puInfo Extra info: 32-bit displacement (bits 31:0) and
137 * SIB byte (bits 39:32).
138 *
139 * @note This must be defined in a source file with matching
140 * IEM_WITH_CODE_TLB_AND_OPCODE_BUF define till the define is made default
141 * or implemented differently...
142 */
143RTGCPTR iemOpHlpCalcRmEffAddrJmpEx(PVMCPUCC pVCpu, uint8_t bRm, uint32_t cbImmAndRspOffset, uint64_t *puInfo) IEM_NOEXCEPT_MAY_LONGJMP
144{
145 Log5(("iemOpHlpCalcRmEffAddrJmp: bRm=%#x\n", bRm));
146# define SET_SS_DEF() \
147 do \
148 { \
149 if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SEG_MASK)) \
150 pVCpu->iem.s.iEffSeg = X86_SREG_SS; \
151 } while (0)
152
153 if (!IEM_IS_64BIT_CODE(pVCpu))
154 {
155/** @todo Check the effective address size crap! */
156 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_16BIT)
157 {
158 uint16_t u16EffAddr;
159
160 /* Handle the disp16 form with no registers first. */
161 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
162 {
163 IEM_OPCODE_GET_NEXT_U16(&u16EffAddr);
164 *puInfo = u16EffAddr;
165 }
166 else
167 {
168 /* Get the displacment. */
169 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
170 {
171 case 0: u16EffAddr = 0; break;
172 case 1: IEM_OPCODE_GET_NEXT_S8_SX_U16(&u16EffAddr); break;
173 case 2: IEM_OPCODE_GET_NEXT_U16(&u16EffAddr); break;
174 default: AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_1)); /* (caller checked for these) */
175 }
176 *puInfo = u16EffAddr;
177
178 /* Add the base and index registers to the disp. */
179 switch (bRm & X86_MODRM_RM_MASK)
180 {
181 case 0: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.si; break;
182 case 1: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.di; break;
183 case 2: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.si; SET_SS_DEF(); break;
184 case 3: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.di; SET_SS_DEF(); break;
185 case 4: u16EffAddr += pVCpu->cpum.GstCtx.si; break;
186 case 5: u16EffAddr += pVCpu->cpum.GstCtx.di; break;
187 case 6: u16EffAddr += pVCpu->cpum.GstCtx.bp; SET_SS_DEF(); break;
188 case 7: u16EffAddr += pVCpu->cpum.GstCtx.bx; break;
189 }
190 }
191
192 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#06RX16 uInfo=%#RX64\n", u16EffAddr, *puInfo));
193 return u16EffAddr;
194 }
195
196 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
197 uint32_t u32EffAddr;
198 uint64_t uInfo;
199
200 /* Handle the disp32 form with no registers first. */
201 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
202 {
203 IEM_OPCODE_GET_NEXT_U32(&u32EffAddr);
204 uInfo = u32EffAddr;
205 }
206 else
207 {
208 /* Get the register (or SIB) value. */
209 uInfo = 0;
210 switch ((bRm & X86_MODRM_RM_MASK))
211 {
212 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
213 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
214 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
215 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
216 case 4: /* SIB */
217 {
218 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
219 uInfo = (uint64_t)bSib << 32;
220
221 /* Get the index and scale it. */
222 switch ((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
223 {
224 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
225 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
226 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
227 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
228 case 4: u32EffAddr = 0; /*none */ break;
229 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; break;
230 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
231 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
232 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
233 }
234 u32EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
235
236 /* add base */
237 switch (bSib & X86_SIB_BASE_MASK)
238 {
239 case 0: u32EffAddr += pVCpu->cpum.GstCtx.eax; break;
240 case 1: u32EffAddr += pVCpu->cpum.GstCtx.ecx; break;
241 case 2: u32EffAddr += pVCpu->cpum.GstCtx.edx; break;
242 case 3: u32EffAddr += pVCpu->cpum.GstCtx.ebx; break;
243 case 4: u32EffAddr += pVCpu->cpum.GstCtx.esp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
244 case 5:
245 if ((bRm & X86_MODRM_MOD_MASK) != 0)
246 {
247 u32EffAddr += pVCpu->cpum.GstCtx.ebp;
248 SET_SS_DEF();
249 }
250 else
251 {
252 uint32_t u32Disp;
253 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
254 u32EffAddr += u32Disp;
255 uInfo |= u32Disp;
256 }
257 break;
258 case 6: u32EffAddr += pVCpu->cpum.GstCtx.esi; break;
259 case 7: u32EffAddr += pVCpu->cpum.GstCtx.edi; break;
260 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
261 }
262 break;
263 }
264 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; SET_SS_DEF(); break;
265 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
266 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
267 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
268 }
269
270 /* Get and add the displacement. */
271 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
272 {
273 case 0:
274 break;
275 case 1:
276 {
277 int8_t i8Disp; IEM_OPCODE_GET_NEXT_S8(&i8Disp);
278 u32EffAddr += i8Disp;
279 uInfo |= (uint32_t)(int32_t)i8Disp;
280 break;
281 }
282 case 2:
283 {
284 uint32_t u32Disp; IEM_OPCODE_GET_NEXT_U32(&u32Disp);
285 u32EffAddr += u32Disp;
286 uInfo |= u32Disp;
287 break;
288 }
289 default:
290 AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_2)); /* (caller checked for these) */
291 }
292 }
293
294 *puInfo = uInfo;
295 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RX32 uInfo=%#RX64\n", u32EffAddr, uInfo));
296 return u32EffAddr;
297 }
298
299 uint64_t u64EffAddr;
300 uint64_t uInfo;
301
302 /* Handle the rip+disp32 form with no registers first. */
303 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
304 {
305 IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64EffAddr);
306 uInfo = (uint32_t)u64EffAddr;
307 u64EffAddr += pVCpu->cpum.GstCtx.rip + IEM_GET_INSTR_LEN(pVCpu) + (cbImmAndRspOffset & UINT32_C(0xff));
308 }
309 else
310 {
311 /* Get the register (or SIB) value. */
312 uInfo = 0;
313 switch ((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB)
314 {
315 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
316 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
317 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
318 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
319 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; SET_SS_DEF(); break;
320 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
321 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
322 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
323 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
324 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
325 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
326 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
327 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
328 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
329 /* SIB */
330 case 4:
331 case 12:
332 {
333 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
334 uInfo = (uint64_t)bSib << 32;
335
336 /* Get the index and scale it. */
337 switch (((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK) | pVCpu->iem.s.uRexIndex)
338 {
339 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
340 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
341 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
342 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
343 case 4: u64EffAddr = 0; /*none */ break;
344 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; break;
345 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
346 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
347 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
348 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
349 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
350 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
351 case 12: u64EffAddr = pVCpu->cpum.GstCtx.r12; break;
352 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
353 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
354 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
355 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
356 }
357 u64EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
358
359 /* add base */
360 switch ((bSib & X86_SIB_BASE_MASK) | pVCpu->iem.s.uRexB)
361 {
362 case 0: u64EffAddr += pVCpu->cpum.GstCtx.rax; break;
363 case 1: u64EffAddr += pVCpu->cpum.GstCtx.rcx; break;
364 case 2: u64EffAddr += pVCpu->cpum.GstCtx.rdx; break;
365 case 3: u64EffAddr += pVCpu->cpum.GstCtx.rbx; break;
366 case 4: u64EffAddr += pVCpu->cpum.GstCtx.rsp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
367 case 6: u64EffAddr += pVCpu->cpum.GstCtx.rsi; break;
368 case 7: u64EffAddr += pVCpu->cpum.GstCtx.rdi; break;
369 case 8: u64EffAddr += pVCpu->cpum.GstCtx.r8; break;
370 case 9: u64EffAddr += pVCpu->cpum.GstCtx.r9; break;
371 case 10: u64EffAddr += pVCpu->cpum.GstCtx.r10; break;
372 case 11: u64EffAddr += pVCpu->cpum.GstCtx.r11; break;
373 case 12: u64EffAddr += pVCpu->cpum.GstCtx.r12; break;
374 case 14: u64EffAddr += pVCpu->cpum.GstCtx.r14; break;
375 case 15: u64EffAddr += pVCpu->cpum.GstCtx.r15; break;
376 /* complicated encodings */
377 case 5:
378 case 13:
379 if ((bRm & X86_MODRM_MOD_MASK) != 0)
380 {
381 if (!pVCpu->iem.s.uRexB)
382 {
383 u64EffAddr += pVCpu->cpum.GstCtx.rbp;
384 SET_SS_DEF();
385 }
386 else
387 u64EffAddr += pVCpu->cpum.GstCtx.r13;
388 }
389 else
390 {
391 uint32_t u32Disp;
392 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
393 u64EffAddr += (int32_t)u32Disp;
394 uInfo |= u32Disp;
395 }
396 break;
397 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
398 }
399 break;
400 }
401 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
402 }
403
404 /* Get and add the displacement. */
405 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
406 {
407 case 0:
408 break;
409 case 1:
410 {
411 int8_t i8Disp;
412 IEM_OPCODE_GET_NEXT_S8(&i8Disp);
413 u64EffAddr += i8Disp;
414 uInfo |= (uint32_t)(int32_t)i8Disp;
415 break;
416 }
417 case 2:
418 {
419 uint32_t u32Disp;
420 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
421 u64EffAddr += (int32_t)u32Disp;
422 uInfo |= u32Disp;
423 break;
424 }
425 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX); /* (caller checked for these) */
426 }
427
428 }
429
430 *puInfo = uInfo;
431 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT)
432 {
433 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr, uInfo));
434 return u64EffAddr;
435 }
436 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
437 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr & UINT32_MAX, uInfo));
438 return u64EffAddr & UINT32_MAX;
439}
440
441
442/*********************************************************************************************************************************
443* Translation Block Cache. *
444*********************************************************************************************************************************/
445
446/** @callback_method_impl{FNRTSORTCMP, Compare two TBs for pruning sorting purposes.} */
447static DECLCALLBACK(int) iemTbCachePruneCmpTb(void const *pvElement1, void const *pvElement2, void *pvUser)
448{
449 PCIEMTB const pTb1 = (PCIEMTB)pvElement1;
450 PCIEMTB const pTb2 = (PCIEMTB)pvElement2;
451 uint32_t const cMsSinceUse1 = (uint32_t)(uintptr_t)pvUser - pTb1->msLastUsed;
452 uint32_t const cMsSinceUse2 = (uint32_t)(uintptr_t)pvUser - pTb2->msLastUsed;
453 if (cMsSinceUse1 != cMsSinceUse2)
454 return cMsSinceUse1 < cMsSinceUse2 ? -1 : 1;
455 if (pTb1->cUsed != pTb2->cUsed)
456 return pTb1->cUsed > pTb2->cUsed ? -1 : 1;
457 if ((pTb1->fFlags & IEMTB_F_TYPE_MASK) != (pTb2->fFlags & IEMTB_F_TYPE_MASK))
458 return (pTb1->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? -1 : 1;
459 return 0;
460}
461
462#ifdef VBOX_STRICT
463/**
464 * Assertion helper that checks a collisions list count.
465 */
466static void iemTbCacheAssertCorrectCount(PIEMTBCACHE pTbCache, uint32_t idxHash, const char *pszOperation)
467{
468 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
469 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
470 while (pTb)
471 {
472 pTb = pTb->pNext;
473 cLeft--;
474 }
475 AssertMsg(cLeft == 0,
476 ("idxHash=%#x cLeft=%d; entry count=%d; %s\n",
477 idxHash, cLeft, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]), pszOperation));
478}
479#endif
480
481
482DECL_NO_INLINE(static, void) iemTbCacheAddWithPruning(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb, uint32_t idxHash)
483{
484 STAM_PROFILE_START(&pTbCache->StatPrune, a);
485
486 /*
487 * First convert the collision list to an array.
488 */
489 PIEMTB apSortedTbs[IEMTBCACHE_PTR_MAX_COUNT];
490 uintptr_t cInserted = 0;
491 PIEMTB pTbCollision = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
492
493 pTbCache->apHash[idxHash] = NULL; /* Must NULL the entry before trying to free anything. */
494
495 while (pTbCollision && cInserted < RT_ELEMENTS(apSortedTbs))
496 {
497 apSortedTbs[cInserted++] = pTbCollision;
498 pTbCollision = pTbCollision->pNext;
499 }
500
501 /* Free any excess (impossible). */
502 if (RT_LIKELY(!pTbCollision))
503 Assert(cInserted == RT_ELEMENTS(apSortedTbs));
504 else
505 do
506 {
507 PIEMTB pTbToFree = pTbCollision;
508 pTbCollision = pTbToFree->pNext;
509 iemTbAllocatorFree(pVCpu, pTbToFree);
510 } while (pTbCollision);
511
512 /*
513 * Sort it by most recently used and usage count.
514 */
515 RTSortApvShell((void **)apSortedTbs, cInserted, iemTbCachePruneCmpTb, (void *)(uintptr_t)pVCpu->iem.s.msRecompilerPollNow);
516
517 /* We keep half the list for now. Perhaps a bit aggressive... */
518 uintptr_t const cKeep = cInserted / 2;
519
520 /* First free up the TBs we don't wish to keep (before creating the new
521 list because otherwise the free code will scan the list for each one
522 without ever finding it). */
523 for (uintptr_t idx = cKeep; idx < cInserted; idx++)
524 iemTbAllocatorFree(pVCpu, apSortedTbs[idx]);
525
526 /* Then chain the new TB together with the ones we like to keep of the
527 existing ones and insert this list into the hash table. */
528 pTbCollision = pTb;
529 for (uintptr_t idx = 0; idx < cKeep; idx++)
530 pTbCollision = pTbCollision->pNext = apSortedTbs[idx];
531 pTbCollision->pNext = NULL;
532
533 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cKeep + 1);
534#ifdef VBOX_STRICT
535 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add w/ pruning");
536#endif
537
538 STAM_PROFILE_STOP(&pTbCache->StatPrune, a);
539}
540
541
542static void iemTbCacheAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
543{
544 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
545 PIEMTB const pTbOldHead = pTbCache->apHash[idxHash];
546 if (!pTbOldHead)
547 {
548 pTb->pNext = NULL;
549 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, 1); /** @todo could make 1 implicit... */
550 }
551 else
552 {
553 STAM_REL_COUNTER_INC(&pTbCache->cCollisions);
554 uintptr_t cCollisions = IEMTBCACHE_PTR_GET_COUNT(pTbOldHead);
555 if (cCollisions < IEMTBCACHE_PTR_MAX_COUNT)
556 {
557 pTb->pNext = IEMTBCACHE_PTR_GET_TB(pTbOldHead);
558 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cCollisions + 1);
559#ifdef VBOX_STRICT
560 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add");
561#endif
562 }
563 else
564 iemTbCacheAddWithPruning(pVCpu, pTbCache, pTb, idxHash);
565 }
566}
567
568
569/**
570 * Unlinks @a pTb from the hash table if found in it.
571 *
572 * @returns true if unlinked, false if not present.
573 * @param pTbCache The hash table.
574 * @param pTb The TB to remove.
575 */
576static bool iemTbCacheRemove(PIEMTBCACHE pTbCache, PIEMTB pTb)
577{
578 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
579 PIEMTB pTbHash = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
580 uint32_t volatile cLength = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]); RT_NOREF(cLength);
581
582 /*
583 * At the head of the collision list?
584 */
585 if (pTbHash == pTb)
586 {
587 if (!pTb->pNext)
588 pTbCache->apHash[idxHash] = NULL;
589 else
590 {
591 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb->pNext,
592 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
593#ifdef VBOX_STRICT
594 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #1");
595#endif
596 }
597 return true;
598 }
599
600 /*
601 * Search the collision list.
602 */
603 PIEMTB const pTbHead = pTbHash;
604 while (pTbHash)
605 {
606 PIEMTB const pNextTb = pTbHash->pNext;
607 if (pNextTb == pTb)
608 {
609 pTbHash->pNext = pTb->pNext;
610 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTbHead, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
611#ifdef VBOX_STRICT
612 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #2");
613#endif
614 return true;
615 }
616 pTbHash = pNextTb;
617 }
618 return false;
619}
620
621
622/**
623 * Looks up a TB for the given PC and flags in the cache.
624 *
625 * @returns Pointer to TB on success, NULL if not found.
626 * @param pVCpu The cross context virtual CPU structure of the
627 * calling thread.
628 * @param pTbCache The translation block cache.
629 * @param GCPhysPc The PC to look up a TB for.
630 * @param fExtraFlags The extra flags to join with IEMCPU::fExec for
631 * the lookup.
632 * @thread EMT(pVCpu)
633 */
634static PIEMTB iemTbCacheLookup(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache,
635 RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP /** @todo r=bird: no longjumping here, right? iemNativeRecompile is noexcept. */
636{
637 uint32_t const fFlags = ((pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags) & IEMTB_F_KEY_MASK;
638
639 /*
640 * First consult the lookup table entry.
641 */
642 PIEMTB * const ppTbLookup = pVCpu->iem.s.ppTbLookupEntryR3;
643 PIEMTB pTb = *ppTbLookup;
644 if (pTb)
645 {
646 if (pTb->GCPhysPc == GCPhysPc)
647 {
648 if ( (pTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == (fFlags | IEMTB_F_TYPE_NATIVE)
649 || (pTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == (fFlags | IEMTB_F_TYPE_THREADED) )
650 {
651 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
652 {
653 STAM_COUNTER_INC(&pTbCache->cLookupHitsViaTbLookupTable);
654 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
655 pTb->cUsed++;
656#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
657 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != pVCpu->iem.s.uTbNativeRecompileAtUsedCount)
658 {
659 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p)\n", fFlags, GCPhysPc, pTb, ppTbLookup));
660 return pTb;
661 }
662 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p) - recompiling\n", fFlags, GCPhysPc, pTb, ppTbLookup));
663 return iemNativeRecompile(pVCpu, pTb);
664#else
665 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p)\n", fFlags, GCPhysPc, pTb, ppTbLookup));
666 return pTb;
667#endif
668 }
669 }
670 }
671 }
672
673 /*
674 * Then consult the hash table.
675 */
676 uint32_t const idxHash = IEMTBCACHE_HASH_NO_KEY_MASK(pTbCache, fFlags, GCPhysPc);
677#if defined(VBOX_STRICT) || defined(LOG_ENABLED)
678 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
679#endif
680 pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
681 while (pTb)
682 {
683 if (pTb->GCPhysPc == GCPhysPc)
684 {
685 if ((pTb->fFlags & IEMTB_F_KEY_MASK) == fFlags)
686 {
687 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
688 {
689 STAM_COUNTER_INC(&pTbCache->cLookupHits);
690 AssertMsg(cLeft > 0, ("%d\n", cLeft));
691
692 *ppTbLookup = pTb;
693 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
694 pTb->cUsed++;
695#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
696 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != pVCpu->iem.s.uTbNativeRecompileAtUsedCount)
697 {
698 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
699 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
700 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
701 return pTb;
702 }
703 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d) - recompiling\n",
704 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
705 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
706 return iemNativeRecompile(pVCpu, pTb);
707#else
708 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
709 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
710 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
711 return pTb;
712#endif
713 }
714 Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));
715 }
716 else
717 Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));
718 }
719 else
720 Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));
721
722 pTb = pTb->pNext;
723#ifdef VBOX_STRICT
724 cLeft--;
725#endif
726 }
727 AssertMsg(cLeft == 0, ("%d\n", cLeft));
728 STAM_REL_COUNTER_INC(&pTbCache->cLookupMisses);
729 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: NULL - (%p L %d)\n", fFlags, GCPhysPc, idxHash,
730 IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]), IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
731 return pTb;
732}
733
734
735/*********************************************************************************************************************************
736* Translation Block Allocator.
737*********************************************************************************************************************************/
738/*
739 * Translation block allocationmanagement.
740 */
741
742#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
743# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
744 ((a_idxTb) >> (a_pTbAllocator)->cChunkShift)
745# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
746 ((a_idxTb) & (a_pTbAllocator)->fChunkMask)
747# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
748 ((uint32_t)(a_idxChunk) << (a_pTbAllocator)->cChunkShift)
749#else
750# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
751 ((a_idxTb) / (a_pTbAllocator)->cTbsPerChunk)
752# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
753 ((a_idxTb) - (a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
754# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
755 ((uint32_t)(a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
756#endif
757/** Makes a TB index from a chunk index and TB index within that chunk. */
758#define IEMTBALLOC_IDX_MAKE(a_pTbAllocator, a_idxChunk, a_idxInChunk) \
759 (IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) + (a_idxInChunk))
760
761
762/**
763 * Initializes the TB allocator and cache for an EMT.
764 *
765 * @returns VBox status code.
766 * @param pVM The VM handle.
767 * @param cInitialTbs The initial number of translation blocks to
768 * preallocator.
769 * @param cMaxTbs The max number of translation blocks allowed.
770 * @param cbInitialExec The initial size of the executable memory allocator.
771 * @param cbMaxExec The max size of the executable memory allocator.
772 * @param cbChunkExec The chunk size for executable memory allocator. Zero
773 * or UINT32_MAX for automatically determining this.
774 * @thread EMT
775 */
776DECLCALLBACK(int) iemTbInit(PVMCC pVM, uint32_t cInitialTbs, uint32_t cMaxTbs,
777 uint64_t cbInitialExec, uint64_t cbMaxExec, uint32_t cbChunkExec)
778{
779 PVMCPUCC pVCpu = VMMGetCpu(pVM);
780 Assert(!pVCpu->iem.s.pTbCacheR3);
781 Assert(!pVCpu->iem.s.pTbAllocatorR3);
782
783 /*
784 * Calculate the chunk size of the TB allocator.
785 * The minimum chunk size is 2MiB.
786 */
787 AssertCompile(!(sizeof(IEMTB) & IEMTBCACHE_PTR_COUNT_MASK));
788 uint32_t cbPerChunk = _2M;
789 uint32_t cTbsPerChunk = _2M / sizeof(IEMTB);
790#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
791 uint8_t const cTbShift = ASMBitFirstSetU32((uint32_t)sizeof(IEMTB)) - 1;
792 uint8_t cChunkShift = 21 - cTbShift;
793 AssertCompile(RT_BIT_32(21) == _2M); Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
794#endif
795 for (;;)
796 {
797 if (cMaxTbs <= cTbsPerChunk * (uint64_t)RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks))
798 break;
799 cbPerChunk *= 2;
800 cTbsPerChunk = cbPerChunk / sizeof(IEMTB);
801#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
802 cChunkShift += 1;
803#endif
804 }
805
806 uint32_t cMaxChunks = (cMaxTbs + cTbsPerChunk - 1) / cTbsPerChunk;
807 Assert(cMaxChunks * cTbsPerChunk >= cMaxTbs);
808 Assert(cMaxChunks <= RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks));
809
810 cMaxTbs = cMaxChunks * cTbsPerChunk;
811
812 /*
813 * Allocate and initalize it.
814 */
815 PIEMTBALLOCATOR const pTbAllocator = (PIEMTBALLOCATOR)RTMemAllocZ(sizeof(*pTbAllocator));
816 if (!pTbAllocator)
817 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
818 "Failed to allocate %zu bytes (max %u TBs) for the TB allocator of VCpu #%u",
819 sizeof(*pTbAllocator), cMaxTbs, pVCpu->idCpu);
820 pTbAllocator->uMagic = IEMTBALLOCATOR_MAGIC;
821 pTbAllocator->cMaxChunks = (uint8_t)cMaxChunks;
822 pTbAllocator->cTbsPerChunk = cTbsPerChunk;
823 pTbAllocator->cbPerChunk = cbPerChunk;
824 pTbAllocator->cMaxTbs = cMaxTbs;
825 pTbAllocator->pTbsFreeHead = NULL;
826#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
827 pTbAllocator->fChunkMask = cTbsPerChunk - 1;
828 pTbAllocator->cChunkShift = cChunkShift;
829 Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
830#endif
831
832 pVCpu->iem.s.pTbAllocatorR3 = pTbAllocator;
833
834 /*
835 * Allocate the initial chunks.
836 */
837 for (uint32_t idxChunk = 0; ; idxChunk++)
838 {
839 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs = (PIEMTB)RTMemPageAllocZ(cbPerChunk);
840 if (!paTbs)
841 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
842 "Failed to initial %zu bytes for the #%u chunk of TBs for VCpu #%u",
843 cbPerChunk, idxChunk, pVCpu->idCpu);
844
845 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
846 {
847 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
848 paTbs[iTb].pNext = pTbAllocator->pTbsFreeHead;
849 pTbAllocator->pTbsFreeHead = &paTbs[iTb];
850 }
851 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
852 pTbAllocator->cTotalTbs += cTbsPerChunk;
853
854 if ((idxChunk + 1) * cTbsPerChunk >= cInitialTbs)
855 break;
856 }
857
858 /*
859 * Calculate the size of the hash table. We double the max TB count and
860 * round it up to the nearest power of two.
861 */
862 uint32_t cCacheEntries = cMaxTbs * 2;
863 if (!RT_IS_POWER_OF_TWO(cCacheEntries))
864 {
865 uint8_t const iBitTop = ASMBitFirstSetU32(cCacheEntries);
866 cCacheEntries = RT_BIT_32(iBitTop);
867 Assert(cCacheEntries >= cMaxTbs * 2);
868 }
869
870 size_t const cbTbCache = RT_UOFFSETOF_DYN(IEMTBCACHE, apHash[cCacheEntries]);
871 PIEMTBCACHE const pTbCache = (PIEMTBCACHE)RTMemAllocZ(cbTbCache);
872 if (!pTbCache)
873 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
874 "Failed to allocate %zu bytes (%u entries) for the TB cache of VCpu #%u",
875 cbTbCache, cCacheEntries, pVCpu->idCpu);
876
877 /*
878 * Initialize it (assumes zeroed by the allocator).
879 */
880 pTbCache->uMagic = IEMTBCACHE_MAGIC;
881 pTbCache->cHash = cCacheEntries;
882 pTbCache->uHashMask = cCacheEntries - 1;
883 Assert(pTbCache->cHash > pTbCache->uHashMask);
884 pVCpu->iem.s.pTbCacheR3 = pTbCache;
885
886 /*
887 * Initialize the native executable memory allocator.
888 */
889#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
890 int rc = iemExecMemAllocatorInit(pVCpu, cbMaxExec, cbInitialExec, cbChunkExec);
891 AssertLogRelRCReturn(rc, rc);
892#else
893 RT_NOREF(cbMaxExec, cbInitialExec, cbChunkExec);
894#endif
895
896 return VINF_SUCCESS;
897}
898
899
900/**
901 * Inner free worker.
902 */
903static void iemTbAllocatorFreeInner(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator,
904 PIEMTB pTb, uint32_t idxChunk, uint32_t idxInChunk)
905{
906 Assert(idxChunk < pTbAllocator->cAllocatedChunks); RT_NOREF(idxChunk);
907 Assert(idxInChunk < pTbAllocator->cTbsPerChunk); RT_NOREF(idxInChunk);
908 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[idxChunk].paTbs) == idxInChunk);
909#ifdef VBOX_STRICT
910 for (PIEMTB pTbOther = pTbAllocator->pDelayedFreeHead; pTbOther; pTbOther = pTbOther->pNext)
911 Assert(pTbOther != pTb);
912#endif
913
914 /*
915 * Unlink the TB from the hash table.
916 */
917 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
918
919 /*
920 * Free the TB itself.
921 */
922 switch (pTb->fFlags & IEMTB_F_TYPE_MASK)
923 {
924 case IEMTB_F_TYPE_THREADED:
925 pTbAllocator->cThreadedTbs -= 1;
926 RTMemFree(pTb->Thrd.paCalls);
927 break;
928#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
929 case IEMTB_F_TYPE_NATIVE:
930 pTbAllocator->cNativeTbs -= 1;
931 iemExecMemAllocatorFree(pVCpu, pTb->Native.paInstructions,
932 pTb->Native.cInstructions * sizeof(pTb->Native.paInstructions[0]));
933 pTb->Native.paInstructions = NULL; /* required by iemExecMemAllocatorPrune */
934 break;
935#endif
936 default:
937 AssertFailed();
938 }
939
940 RTMemFree(IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, 0)); /* Frees both the TB lookup table and opcode bytes. */
941
942 pTb->pNext = pTbAllocator->pTbsFreeHead;
943 pTbAllocator->pTbsFreeHead = pTb;
944 pTb->fFlags = 0;
945 pTb->GCPhysPc = UINT64_MAX;
946 pTb->Gen.uPtr = 0;
947 pTb->Gen.uData = 0;
948 pTb->cTbLookupEntries = 0;
949 pTb->cbOpcodes = 0;
950 pTb->pabOpcodes = NULL;
951
952 Assert(pTbAllocator->cInUseTbs > 0);
953
954 pTbAllocator->cInUseTbs -= 1;
955 STAM_REL_COUNTER_INC(&pTbAllocator->StatFrees);
956}
957
958
959/**
960 * Frees the given TB.
961 *
962 * @param pVCpu The cross context virtual CPU structure of the calling
963 * thread.
964 * @param pTb The translation block to free.
965 * @thread EMT(pVCpu)
966 */
967DECLHIDDEN(void) iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb)
968{
969 /*
970 * Validate state.
971 */
972 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
973 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
974 uint8_t const idxChunk = pTb->idxAllocChunk;
975 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
976 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
977 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
978
979 /*
980 * Invalidate the TB lookup pointer and call the inner worker.
981 */
982 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
983 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
984}
985
986
987/**
988 * Schedules a TB for freeing when it's not longer being executed and/or part of
989 * the caller's call stack.
990 *
991 * The TB will be removed from the translation block cache, though, so it isn't
992 * possible to executed it again and the IEMTB::pNext member can be used to link
993 * it together with other TBs awaiting freeing.
994 *
995 * @param pVCpu The cross context virtual CPU structure of the calling
996 * thread.
997 * @param pTb The translation block to schedule for freeing.
998 */
999static void iemTbAlloctorScheduleForFree(PVMCPUCC pVCpu, PIEMTB pTb)
1000{
1001 /*
1002 * Validate state.
1003 */
1004 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1005 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1006 Assert(pTb->idxAllocChunk < pTbAllocator->cAllocatedChunks);
1007 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs) < pTbAllocator->cTbsPerChunk);
1008 Assert( (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE
1009 || (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1010#ifdef VBOX_STRICT
1011 for (PIEMTB pTbOther = pTbAllocator->pDelayedFreeHead; pTbOther; pTbOther = pTbOther->pNext)
1012 Assert(pTbOther != pTb);
1013#endif
1014
1015 /*
1016 * Remove it from the cache and prepend it to the allocator's todo list.
1017 *
1018 * Note! It could still be in various lookup tables, so we trash the GCPhys
1019 * and CS attribs to ensure it won't be reused.
1020 */
1021 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
1022 pTb->GCPhysPc = NIL_RTGCPHYS;
1023 pTb->x86.fAttr = UINT16_MAX;
1024
1025 pTb->pNext = pTbAllocator->pDelayedFreeHead;
1026 pTbAllocator->pDelayedFreeHead = pTb;
1027}
1028
1029
1030/**
1031 * Processes the delayed frees.
1032 *
1033 * This is called by the allocator function as well as the native recompile
1034 * function before making any TB or executable memory allocations respectively.
1035 */
1036void iemTbAllocatorProcessDelayedFrees(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator)
1037{
1038 /** @todo r-bird: these have already been removed from the cache,
1039 * iemTbAllocatorFree/Inner redoes that, which is a waste of time. */
1040 PIEMTB pTb = pTbAllocator->pDelayedFreeHead;
1041 pTbAllocator->pDelayedFreeHead = NULL;
1042 while (pTb)
1043 {
1044 PIEMTB const pTbNext = pTb->pNext;
1045 Assert(pVCpu->iem.s.pCurTbR3 != pTb);
1046 iemTbAllocatorFree(pVCpu, pTb);
1047 pTb = pTbNext;
1048 }
1049}
1050
1051
1052/**
1053 * Grow the translation block allocator with another chunk.
1054 */
1055static int iemTbAllocatorGrow(PVMCPUCC pVCpu)
1056{
1057 /*
1058 * Validate state.
1059 */
1060 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1061 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
1062 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
1063 uint32_t const idxChunk = pTbAllocator->cAllocatedChunks;
1064 AssertReturn(idxChunk < pTbAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1065
1066 /*
1067 * Allocate a new chunk and add it to the allocator.
1068 */
1069 PIEMTB const paTbs = (PIEMTB)RTMemPageAllocZ(pTbAllocator->cbPerChunk);
1070 AssertLogRelReturn(paTbs, VERR_NO_PAGE_MEMORY);
1071 pTbAllocator->aChunks[idxChunk].paTbs = paTbs;
1072
1073 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1074 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
1075 {
1076 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1077 paTbs[iTb].pNext = pTbAllocator->pTbsFreeHead;
1078 pTbAllocator->pTbsFreeHead = &paTbs[iTb];
1079 }
1080 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
1081 pTbAllocator->cTotalTbs += cTbsPerChunk;
1082
1083 return VINF_SUCCESS;
1084}
1085
1086
1087/**
1088 * Allocates a TB from allocator with free block.
1089 *
1090 * This is common code to both the fast and slow allocator code paths.
1091 */
1092DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAllocCore(PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1093{
1094 Assert(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs);
1095 Assert(pTbAllocator->pTbsFreeHead);
1096
1097 PIEMTB const pTb = pTbAllocator->pTbsFreeHead;
1098 pTbAllocator->pTbsFreeHead = pTb->pNext;
1099 pTbAllocator->cInUseTbs += 1;
1100 if (fThreaded)
1101 pTbAllocator->cThreadedTbs += 1;
1102 else
1103 pTbAllocator->cNativeTbs += 1;
1104 STAM_REL_COUNTER_INC(&pTbAllocator->StatAllocs);
1105 return pTb;
1106}
1107
1108
1109/**
1110 * Slow path for iemTbAllocatorAlloc.
1111 */
1112static PIEMTB iemTbAllocatorAllocSlow(PVMCPUCC pVCpu, PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1113{
1114 /*
1115 * With some luck we can add another chunk.
1116 */
1117 if (pTbAllocator->cAllocatedChunks < pTbAllocator->cMaxChunks)
1118 {
1119 int rc = iemTbAllocatorGrow(pVCpu);
1120 if (RT_SUCCESS(rc))
1121 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1122 }
1123
1124 /*
1125 * We have to prune stuff. Sigh.
1126 *
1127 * This requires scanning for older TBs and kick them out. Not sure how to
1128 * best do this as we don't want to maintain any list of TBs ordered by last
1129 * usage time. But one reasonably simple approach would be that each time we
1130 * get here we continue a sequential scan of the allocation chunks,
1131 * considering just a smallish number of TBs and freeing a fixed portion of
1132 * them. Say, we consider the next 128 TBs, freeing the least recently used
1133 * in out of groups of 4 TBs, resulting in 32 free TBs.
1134 */
1135 STAM_PROFILE_START(&pTbAllocator->StatPrune, a);
1136 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1137 uint32_t const cTbsToPrune = 128;
1138 uint32_t const cTbsPerGroup = 4;
1139 uint32_t cFreedTbs = 0;
1140#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
1141 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom & ~(uint32_t)(cTbsToPrune - 1); /* Stay within a chunk! */
1142#else
1143 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom;
1144#endif
1145 if (idxTbPruneFrom >= pTbAllocator->cMaxTbs)
1146 idxTbPruneFrom = 0;
1147 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1148 {
1149 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1150 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1151 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1152 uint32_t cMsAge = msNow - pTb->msLastUsed;
1153 Assert(pTb->fFlags & IEMTB_F_TYPE_MASK);
1154
1155 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1156 {
1157#ifndef IEMTB_SIZE_IS_POWER_OF_TWO
1158 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1159 { /* likely */ }
1160 else
1161 {
1162 idxInChunk2 = 0;
1163 idxChunk2 += 1;
1164 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1165 idxChunk2 = 0;
1166 }
1167#endif
1168 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1169 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1170 if ( cMsAge2 > cMsAge
1171 || (cMsAge2 == cMsAge && pTb2->cUsed < pTb->cUsed))
1172 {
1173 Assert(pTb2->fFlags & IEMTB_F_TYPE_MASK);
1174 pTb = pTb2;
1175 idxChunk = idxChunk2;
1176 idxInChunk = idxInChunk2;
1177 cMsAge = cMsAge2;
1178 }
1179 }
1180
1181 /* Free the TB. */
1182 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1183 cFreedTbs++; /* paranoia */
1184 }
1185 pTbAllocator->iPruneFrom = idxTbPruneFrom;
1186 STAM_PROFILE_STOP(&pTbAllocator->StatPrune, a);
1187
1188 /* Flush the TB lookup entry pointer. */
1189 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1190
1191 /*
1192 * Allocate a TB from the ones we've pruned.
1193 */
1194 if (cFreedTbs)
1195 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1196 return NULL;
1197}
1198
1199
1200/**
1201 * Allocate a translation block.
1202 *
1203 * @returns Pointer to block on success, NULL if we're out and is unable to
1204 * free up an existing one (very unlikely once implemented).
1205 * @param pVCpu The cross context virtual CPU structure of the calling
1206 * thread.
1207 * @param fThreaded Set if threaded TB being allocated, clear if native TB.
1208 * For statistics.
1209 */
1210DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAlloc(PVMCPUCC pVCpu, bool fThreaded)
1211{
1212 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1213 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1214
1215 /* Free any pending TBs before we proceed. */
1216 if (!pTbAllocator->pDelayedFreeHead)
1217 { /* probably likely */ }
1218 else
1219 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1220
1221 /* If the allocator is full, take slow code path.*/
1222 if (RT_LIKELY(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs))
1223 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1224 return iemTbAllocatorAllocSlow(pVCpu, pTbAllocator, fThreaded);
1225}
1226
1227
1228/**
1229 * This is called when we're out of space for native TBs.
1230 *
1231 * This uses a variation on the pruning in iemTbAllocatorAllocSlow.
1232 * The difference is that we only prune native TBs and will only free any if
1233 * there are least two in a group. The conditions under which we're called are
1234 * different - there will probably be free TBs in the table when we're called.
1235 * Therefore we increase the group size and max scan length, though we'll stop
1236 * scanning once we've reached the requested size (@a cNeededInstrs) and freed
1237 * up at least 8 TBs.
1238 */
1239void iemTbAllocatorFreeupNativeSpace(PVMCPUCC pVCpu, uint32_t cNeededInstrs)
1240{
1241 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1242 AssertReturnVoid(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1243
1244 STAM_REL_PROFILE_START(&pTbAllocator->StatPruneNative, a);
1245
1246 /*
1247 * Flush the delayed free list before we start freeing TBs indiscriminately.
1248 */
1249 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1250
1251 /*
1252 * Scan and free TBs.
1253 */
1254 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1255 uint32_t const cTbsToPrune = 128 * 8;
1256 uint32_t const cTbsPerGroup = 4 * 4;
1257 uint32_t cFreedTbs = 0;
1258 uint32_t cMaxInstrs = 0;
1259 uint32_t idxTbPruneFrom = pTbAllocator->iPruneNativeFrom & ~(uint32_t)(cTbsPerGroup - 1);
1260 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1261 {
1262 if (idxTbPruneFrom >= pTbAllocator->cTotalTbs)
1263 idxTbPruneFrom = 0;
1264 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1265 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1266 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1267 uint32_t cMsAge = pTb->fFlags & IEMTB_F_TYPE_NATIVE ? msNow - pTb->msLastUsed : msNow;
1268 uint8_t cNativeTbs = (pTb->fFlags & IEMTB_F_TYPE_NATIVE) != 0;
1269
1270 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1271 {
1272 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1273 { /* likely */ }
1274 else
1275 {
1276 idxInChunk2 = 0;
1277 idxChunk2 += 1;
1278 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1279 idxChunk2 = 0;
1280 }
1281 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1282 if (pTb2->fFlags & IEMTB_F_TYPE_NATIVE)
1283 {
1284 cNativeTbs += 1;
1285 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1286 if ( cMsAge2 > cMsAge
1287 || ( cMsAge2 == cMsAge
1288 && ( pTb2->cUsed < pTb->cUsed
1289 || ( pTb2->cUsed == pTb->cUsed
1290 && pTb2->Native.cInstructions > pTb->Native.cInstructions)))
1291 || !(pTb->fFlags & IEMTB_F_TYPE_NATIVE))
1292 {
1293 pTb = pTb2;
1294 idxChunk = idxChunk2;
1295 idxInChunk = idxInChunk2;
1296 cMsAge = cMsAge2;
1297 }
1298 }
1299 }
1300
1301 /* Free the TB if we found at least two native one in this group. */
1302 if (cNativeTbs >= 2)
1303 {
1304 cMaxInstrs = RT_MAX(cMaxInstrs, pTb->Native.cInstructions);
1305 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1306 cFreedTbs++;
1307 if (cFreedTbs >= 8 && cMaxInstrs >= cNeededInstrs)
1308 break;
1309 }
1310 }
1311 pTbAllocator->iPruneNativeFrom = idxTbPruneFrom;
1312
1313 STAM_REL_PROFILE_STOP(&pTbAllocator->StatPruneNative, a);
1314}
1315
1316
1317/*********************************************************************************************************************************
1318* Threaded Recompiler Core *
1319*********************************************************************************************************************************/
1320/**
1321 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
1322 * @returns pszBuf.
1323 * @param fFlags The flags.
1324 * @param pszBuf The output buffer.
1325 * @param cbBuf The output buffer size. At least 32 bytes.
1326 */
1327DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
1328{
1329 Assert(cbBuf >= 32);
1330 static RTSTRTUPLE const s_aModes[] =
1331 {
1332 /* [00] = */ { RT_STR_TUPLE("16BIT") },
1333 /* [01] = */ { RT_STR_TUPLE("32BIT") },
1334 /* [02] = */ { RT_STR_TUPLE("!2!") },
1335 /* [03] = */ { RT_STR_TUPLE("!3!") },
1336 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
1337 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
1338 /* [06] = */ { RT_STR_TUPLE("!6!") },
1339 /* [07] = */ { RT_STR_TUPLE("!7!") },
1340 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
1341 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
1342 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
1343 /* [0b] = */ { RT_STR_TUPLE("!b!") },
1344 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
1345 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
1346 /* [0e] = */ { RT_STR_TUPLE("!e!") },
1347 /* [0f] = */ { RT_STR_TUPLE("!f!") },
1348 /* [10] = */ { RT_STR_TUPLE("!10!") },
1349 /* [11] = */ { RT_STR_TUPLE("!11!") },
1350 /* [12] = */ { RT_STR_TUPLE("!12!") },
1351 /* [13] = */ { RT_STR_TUPLE("!13!") },
1352 /* [14] = */ { RT_STR_TUPLE("!14!") },
1353 /* [15] = */ { RT_STR_TUPLE("!15!") },
1354 /* [16] = */ { RT_STR_TUPLE("!16!") },
1355 /* [17] = */ { RT_STR_TUPLE("!17!") },
1356 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
1357 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
1358 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
1359 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
1360 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
1361 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
1362 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
1363 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
1364 };
1365 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
1366 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
1367 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
1368
1369 pszBuf[off++] = ' ';
1370 pszBuf[off++] = 'C';
1371 pszBuf[off++] = 'P';
1372 pszBuf[off++] = 'L';
1373 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
1374 Assert(off < 32);
1375
1376 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
1377
1378 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
1379 {
1380 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
1381 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
1382 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
1383 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
1384 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
1385 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
1386 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
1387 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
1388 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
1389 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
1390 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
1391 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
1392 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
1393 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
1394 };
1395 if (fFlags)
1396 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1397 if (s_aFlags[i].fFlag & fFlags)
1398 {
1399 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
1400 pszBuf[off++] = ' ';
1401 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
1402 off += s_aFlags[i].cchName;
1403 fFlags &= ~s_aFlags[i].fFlag;
1404 if (!fFlags)
1405 break;
1406 }
1407 pszBuf[off] = '\0';
1408
1409 return pszBuf;
1410}
1411
1412
1413/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
1414static DECLCALLBACK(int) iemThreadedDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
1415{
1416 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
1417 pDis->cbCachedInstr += cbMaxRead;
1418 RT_NOREF(cbMinRead);
1419 return VERR_NO_DATA;
1420}
1421
1422
1423/**
1424 * Worker for iemThreadedDisassembleTb.
1425 */
1426static void iemThreadedDumpLookupTable(PCIEMTB pTb, PCDBGFINFOHLP pHlp, unsigned idxFirst, unsigned cEntries,
1427 const char *pszLeadText = " TB Lookup:") RT_NOEXCEPT
1428{
1429 if (idxFirst + cEntries <= pTb->cTbLookupEntries)
1430 {
1431 PIEMTB * const papTbLookup = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxFirst);
1432 pHlp->pfnPrintf(pHlp, "%s", pszLeadText);
1433 for (uint8_t iLookup = 0; iLookup < cEntries; iLookup++)
1434 {
1435 PIEMTB pLookupTb = papTbLookup[iLookup];
1436 if (pLookupTb)
1437 pHlp->pfnPrintf(pHlp, "%c%p (%s)", iLookup ? ',' : ' ', pLookupTb,
1438 (pLookupTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED ? "threaded"
1439 : (pLookupTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? "native"
1440 : "invalid");
1441 else
1442 pHlp->pfnPrintf(pHlp, "%cNULL", iLookup ? ',' : ' ');
1443 }
1444 pHlp->pfnPrintf(pHlp, "\n");
1445 }
1446 else
1447 {
1448 pHlp->pfnPrintf(pHlp, " !!Bogus TB lookup info: idxFirst=%#x L %u > cTbLookupEntries=%#x!!\n",
1449 idxFirst, cEntries, pTb->cTbLookupEntries);
1450 AssertMsgFailed(("idxFirst=%#x L %u > cTbLookupEntries=%#x\n", idxFirst, cEntries, pTb->cTbLookupEntries));
1451 }
1452}
1453
1454
1455DECLHIDDEN(void) iemThreadedDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
1456{
1457 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1458
1459 char szDisBuf[512];
1460
1461 /*
1462 * Print TB info.
1463 */
1464 pHlp->pfnPrintf(pHlp,
1465 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u cTbLookupEntries=%u\n"
1466 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
1467 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges, pTb->cTbLookupEntries,
1468 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
1469
1470 /*
1471 * This disassembly is driven by the debug info which follows the native
1472 * code and indicates when it starts with the next guest instructions,
1473 * where labels are and such things.
1474 */
1475 DISSTATE Dis;
1476 PCIEMTHRDEDCALLENTRY const paCalls = pTb->Thrd.paCalls;
1477 uint32_t const cCalls = pTb->Thrd.cCalls;
1478 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
1479 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
1480 : DISCPUMODE_64BIT;
1481 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
1482 uint8_t idxRange = UINT8_MAX;
1483 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
1484 uint32_t offRange = 0;
1485 uint32_t offOpcodes = 0;
1486 uint32_t const cbOpcodes = pTb->cbOpcodes;
1487 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
1488 bool fTbLookupSeen0 = false;
1489
1490 for (uint32_t iCall = 0; iCall < cCalls; iCall++)
1491 {
1492 /*
1493 * New opcode range?
1494 */
1495 if ( idxRange == UINT8_MAX
1496 || idxRange >= cRanges
1497 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
1498 {
1499 idxRange += 1;
1500 if (idxRange < cRanges)
1501 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
1502 else
1503 continue;
1504 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
1505 + (pTb->aRanges[idxRange].idxPhysPage == 0
1506 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1507 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
1508 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
1509 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
1510 pTb->aRanges[idxRange].idxPhysPage);
1511 GCPhysPc += offRange;
1512 }
1513
1514 /*
1515 * Disassemble another guest instruction?
1516 */
1517 if ( paCalls[iCall].offOpcode != offOpcodes
1518 && paCalls[iCall].cbOpcode > 0
1519 && (uint32_t)(cbOpcodes - paCalls[iCall].offOpcode) <= cbOpcodes /* paranoia^2 */ )
1520 {
1521 offOpcodes = paCalls[iCall].offOpcode;
1522 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
1523 uint32_t cbInstr = 1;
1524 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
1525 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
1526 iemThreadedDisasReadBytesDummy, NULL, &Dis, &cbInstr);
1527 if (RT_SUCCESS(rc))
1528 {
1529 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
1530 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
1531 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
1532 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
1533 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
1534 }
1535 else
1536 {
1537 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
1538 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
1539 cbInstr = paCalls[iCall].cbOpcode;
1540 }
1541 GCPhysPc += cbInstr;
1542 offRange += cbInstr;
1543 }
1544
1545 /*
1546 * Dump call details.
1547 */
1548 pHlp->pfnPrintf(pHlp,
1549 " Call #%u to %s (%u args)\n",
1550 iCall, g_apszIemThreadedFunctions[paCalls[iCall].enmFunction],
1551 g_acIemThreadedFunctionUsedArgs[paCalls[iCall].enmFunction]);
1552 if (paCalls[iCall].uTbLookup != 0)
1553 {
1554 uint8_t const idxFirst = IEM_TB_LOOKUP_TAB_GET_IDX(paCalls[iCall].uTbLookup);
1555 fTbLookupSeen0 = idxFirst == 0;
1556 iemThreadedDumpLookupTable(pTb, pHlp, idxFirst, IEM_TB_LOOKUP_TAB_GET_SIZE(paCalls[iCall].uTbLookup));
1557 }
1558
1559 /*
1560 * Snoop fExec.
1561 */
1562 switch (paCalls[iCall].enmFunction)
1563 {
1564 default:
1565 break;
1566 case kIemThreadedFunc_BltIn_CheckMode:
1567 fExec = paCalls[iCall].auParams[0];
1568 break;
1569 }
1570 }
1571
1572 if (!fTbLookupSeen0)
1573 iemThreadedDumpLookupTable(pTb, pHlp, 0, 1, " Fallback TB Lookup:");
1574}
1575
1576
1577
1578/**
1579 * Allocate a translation block for threadeded recompilation.
1580 *
1581 * This is allocated with maxed out call table and storage for opcode bytes,
1582 * because it's only supposed to be called once per EMT to allocate the TB
1583 * pointed to by IEMCPU::pThrdCompileTbR3.
1584 *
1585 * @returns Pointer to the translation block on success, NULL on failure.
1586 * @param pVM The cross context virtual machine structure.
1587 * @param pVCpu The cross context virtual CPU structure of the calling
1588 * thread.
1589 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1590 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1591 */
1592static PIEMTB iemThreadedTbAlloc(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1593{
1594 PIEMTB pTb = (PIEMTB)RTMemAllocZ(sizeof(IEMTB));
1595 if (pTb)
1596 {
1597 unsigned const cCalls = 256;
1598 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAlloc(sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1599 if (pTb->Thrd.paCalls)
1600 {
1601 pTb->pabOpcodes = (uint8_t *)RTMemAlloc(cCalls * 16);
1602 if (pTb->pabOpcodes)
1603 {
1604 pVCpu->iem.s.cbOpcodesAllocated = cCalls * 16;
1605 pTb->Thrd.cAllocated = cCalls;
1606 pTb->Thrd.cCalls = 0;
1607 pTb->cbOpcodes = 0;
1608 pTb->pNext = NULL;
1609 pTb->cUsed = 0;
1610 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1611 pTb->idxAllocChunk = UINT8_MAX;
1612 pTb->GCPhysPc = GCPhysPc;
1613 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1614 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1615 pTb->cInstructions = 0;
1616 pTb->cTbLookupEntries = 1; /* Entry zero is for anything w/o a specific entry. */
1617
1618 /* Init the first opcode range. */
1619 pTb->cRanges = 1;
1620 pTb->aRanges[0].cbOpcodes = 0;
1621 pTb->aRanges[0].offOpcodes = 0;
1622 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1623 pTb->aRanges[0].u2Unused = 0;
1624 pTb->aRanges[0].idxPhysPage = 0;
1625 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1626 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1627
1628 return pTb;
1629 }
1630 RTMemFree(pTb->Thrd.paCalls);
1631 }
1632 RTMemFree(pTb);
1633 }
1634 RT_NOREF(pVM);
1635 return NULL;
1636}
1637
1638
1639/**
1640 * Called on the TB that are dedicated for recompilation before it's reused.
1641 *
1642 * @param pVCpu The cross context virtual CPU structure of the calling
1643 * thread.
1644 * @param pTb The translation block to reuse.
1645 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1646 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1647 */
1648static void iemThreadedTbReuse(PVMCPUCC pVCpu, PIEMTB pTb, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1649{
1650 pTb->GCPhysPc = GCPhysPc;
1651 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1652 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1653 pTb->Thrd.cCalls = 0;
1654 pTb->cbOpcodes = 0;
1655 pTb->cInstructions = 0;
1656 pTb->cTbLookupEntries = 1; /* Entry zero is for anything w/o a specific entry. */
1657
1658 /* Init the first opcode range. */
1659 pTb->cRanges = 1;
1660 pTb->aRanges[0].cbOpcodes = 0;
1661 pTb->aRanges[0].offOpcodes = 0;
1662 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1663 pTb->aRanges[0].u2Unused = 0;
1664 pTb->aRanges[0].idxPhysPage = 0;
1665 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1666 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1667}
1668
1669
1670/**
1671 * Used to duplicate a threded translation block after recompilation is done.
1672 *
1673 * @returns Pointer to the translation block on success, NULL on failure.
1674 * @param pVM The cross context virtual machine structure.
1675 * @param pVCpu The cross context virtual CPU structure of the calling
1676 * thread.
1677 * @param pTbSrc The TB to duplicate.
1678 */
1679static PIEMTB iemThreadedTbDuplicate(PVMCC pVM, PVMCPUCC pVCpu, PCIEMTB pTbSrc)
1680{
1681 /*
1682 * Just using the heap for now. Will make this more efficient and
1683 * complicated later, don't worry. :-)
1684 */
1685 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
1686 if (pTb)
1687 {
1688 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
1689 memcpy(pTb, pTbSrc, sizeof(*pTb));
1690 pTb->idxAllocChunk = idxAllocChunk;
1691
1692 unsigned const cCalls = pTbSrc->Thrd.cCalls;
1693 Assert(cCalls > 0);
1694 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemDup(pTbSrc->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1695 if (pTb->Thrd.paCalls)
1696 {
1697 size_t const cbTbLookup = pTbSrc->cTbLookupEntries * sizeof(PIEMTB);
1698 Assert(cbTbLookup > 0);
1699 size_t const cbOpcodes = pTbSrc->cbOpcodes;
1700 Assert(cbOpcodes > 0);
1701 size_t const cbBoth = cbTbLookup + RT_ALIGN_Z(cbOpcodes, sizeof(PIEMTB));
1702 uint8_t * const pbBoth = (uint8_t *)RTMemAlloc(cbBoth);
1703 if (pbBoth)
1704 {
1705 RT_BZERO(pbBoth, cbTbLookup);
1706 pTb->pabOpcodes = (uint8_t *)memcpy(&pbBoth[cbTbLookup], pTbSrc->pabOpcodes, cbOpcodes);
1707 pTb->Thrd.cAllocated = cCalls;
1708 pTb->pNext = NULL;
1709 pTb->cUsed = 0;
1710 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1711 pTb->fFlags = pTbSrc->fFlags;
1712
1713 return pTb;
1714 }
1715 RTMemFree(pTb->Thrd.paCalls);
1716 }
1717 iemTbAllocatorFree(pVCpu, pTb);
1718 }
1719 RT_NOREF(pVM);
1720 return NULL;
1721
1722}
1723
1724
1725/**
1726 * Adds the given TB to the hash table.
1727 *
1728 * @param pVCpu The cross context virtual CPU structure of the calling
1729 * thread.
1730 * @param pTbCache The cache to add it to.
1731 * @param pTb The translation block to add.
1732 */
1733static void iemThreadedTbAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
1734{
1735 iemTbCacheAdd(pVCpu, pTbCache, pTb);
1736
1737 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbInstr, pTb->cInstructions);
1738 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbLookupEntries, pTb->cTbLookupEntries);
1739 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedCalls, pTb->Thrd.cCalls);
1740 if (LogIs12Enabled())
1741 {
1742 Log12(("TB added: %p %RGp LB %#x fl=%#x idxHash=%#x cRanges=%u cInstr=%u cCalls=%u\n",
1743 pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc),
1744 pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));
1745 for (uint8_t idxRange = 0; idxRange < pTb->cRanges; idxRange++)
1746 Log12((" range#%u: offPg=%#05x offOp=%#04x LB %#04x pg#%u=%RGp\n", idxRange, pTb->aRanges[idxRange].offPhysPage,
1747 pTb->aRanges[idxRange].offOpcodes, pTb->aRanges[idxRange].cbOpcodes, pTb->aRanges[idxRange].idxPhysPage,
1748 pTb->aRanges[idxRange].idxPhysPage == 0
1749 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1750 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]));
1751 }
1752}
1753
1754
1755/**
1756 * Called by opcode verifier functions when they detect a problem.
1757 */
1758void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb, bool fSafeToFree)
1759{
1760 /* We cannot free the current TB (indicated by fSafeToFree) because:
1761 - A threaded TB will have its current call entry accessed
1762 to update pVCpu->iem.s.cInstructions.
1763 - A native TB will have code left to execute. */
1764 if (fSafeToFree)
1765 iemTbAllocatorFree(pVCpu, pTb);
1766 else
1767 iemTbAlloctorScheduleForFree(pVCpu, pTb);
1768}
1769
1770
1771/*
1772 * Real code.
1773 */
1774
1775#ifdef LOG_ENABLED
1776/**
1777 * Logs the current instruction.
1778 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1779 * @param pszFunction The IEM function doing the execution.
1780 * @param idxInstr The instruction number in the block.
1781 */
1782static void iemThreadedLogCurInstr(PVMCPUCC pVCpu, const char *pszFunction, uint32_t idxInstr) RT_NOEXCEPT
1783{
1784# ifdef IN_RING3
1785 if (LogIs2Enabled())
1786 {
1787 char szInstr[256];
1788 uint32_t cbInstr = 0;
1789 DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, 0, 0,
1790 DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
1791 szInstr, sizeof(szInstr), &cbInstr);
1792
1793 PCX86FXSTATE pFpuCtx = &pVCpu->cpum.GstCtx.XState.x87;
1794 Log2(("**** %s fExec=%x pTb=%p cUsed=%u #%u\n"
1795 " eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
1796 " eip=%08x esp=%08x ebp=%08x iopl=%d tr=%04x\n"
1797 " cs=%04x ss=%04x ds=%04x es=%04x fs=%04x gs=%04x efl=%08x\n"
1798 " fsw=%04x fcw=%04x ftw=%02x mxcsr=%04x/%04x\n"
1799 " %s\n"
1800 , pszFunction, pVCpu->iem.s.fExec, pVCpu->iem.s.pCurTbR3, pVCpu->iem.s.pCurTbR3 ? pVCpu->iem.s.pCurTbR3->cUsed : 0, idxInstr,
1801 pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ebx, pVCpu->cpum.GstCtx.ecx, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.esi, pVCpu->cpum.GstCtx.edi,
1802 pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.ebp, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, pVCpu->cpum.GstCtx.tr.Sel,
1803 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.ds.Sel, pVCpu->cpum.GstCtx.es.Sel,
1804 pVCpu->cpum.GstCtx.fs.Sel, pVCpu->cpum.GstCtx.gs.Sel, pVCpu->cpum.GstCtx.eflags.u,
1805 pFpuCtx->FSW, pFpuCtx->FCW, pFpuCtx->FTW, pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK,
1806 szInstr));
1807
1808 /*if (LogIs3Enabled()) - this outputs an insane amount of stuff, so disabled.
1809 DBGFR3InfoEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, "cpumguest", "verbose", NULL); */
1810 }
1811 else
1812# endif
1813 LogFlow(("%s: cs:rip=%04x:%08RX64 ss:rsp=%04x:%08RX64 EFL=%06x\n", pszFunction, pVCpu->cpum.GstCtx.cs.Sel,
1814 pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.u));
1815}
1816#endif /* LOG_ENABLED */
1817
1818
1819#if 0
1820static VBOXSTRICTRC iemThreadedCompileLongJumped(PVMCC pVM, PVMCPUCC pVCpu, VBOXSTRICTRC rcStrict)
1821{
1822 RT_NOREF(pVM, pVCpu);
1823 return rcStrict;
1824}
1825#endif
1826
1827
1828/**
1829 * Initializes the decoder state when compiling TBs.
1830 *
1831 * This presumes that fExec has already be initialized.
1832 *
1833 * This is very similar to iemInitDecoder() and iemReInitDecoder(), so may need
1834 * to apply fixes to them as well.
1835 *
1836 * @param pVCpu The cross context virtual CPU structure of the calling
1837 * thread.
1838 * @param fReInit Clear for the first call for a TB, set for subsequent
1839 * calls from inside the compile loop where we can skip a
1840 * couple of things.
1841 * @param fExtraFlags The extra translation block flags when @a fReInit is
1842 * true, otherwise ignored. Only IEMTB_F_INHIBIT_SHADOW is
1843 * checked.
1844 */
1845DECL_FORCE_INLINE(void) iemThreadedCompileInitDecoder(PVMCPUCC pVCpu, bool const fReInit, uint32_t const fExtraFlags)
1846{
1847 /* ASSUMES: That iemInitExec was already called and that anyone changing
1848 CPU state affecting the fExec bits since then will have updated fExec! */
1849 AssertMsg((pVCpu->iem.s.fExec & ~IEM_F_USER_OPTS) == iemCalcExecFlags(pVCpu),
1850 ("fExec=%#x iemCalcExecModeFlags=%#x\n", pVCpu->iem.s.fExec, iemCalcExecFlags(pVCpu)));
1851
1852 IEMMODE const enmMode = IEM_GET_CPU_MODE(pVCpu);
1853
1854 /* Decoder state: */
1855 pVCpu->iem.s.enmDefAddrMode = enmMode; /** @todo check if this is correct... */
1856 pVCpu->iem.s.enmEffAddrMode = enmMode;
1857 if (enmMode != IEMMODE_64BIT)
1858 {
1859 pVCpu->iem.s.enmDefOpSize = enmMode; /** @todo check if this is correct... */
1860 pVCpu->iem.s.enmEffOpSize = enmMode;
1861 }
1862 else
1863 {
1864 pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
1865 pVCpu->iem.s.enmEffOpSize = IEMMODE_32BIT;
1866 }
1867 pVCpu->iem.s.fPrefixes = 0;
1868 pVCpu->iem.s.uRexReg = 0;
1869 pVCpu->iem.s.uRexB = 0;
1870 pVCpu->iem.s.uRexIndex = 0;
1871 pVCpu->iem.s.idxPrefix = 0;
1872 pVCpu->iem.s.uVex3rdReg = 0;
1873 pVCpu->iem.s.uVexLength = 0;
1874 pVCpu->iem.s.fEvexStuff = 0;
1875 pVCpu->iem.s.iEffSeg = X86_SREG_DS;
1876 pVCpu->iem.s.offModRm = 0;
1877 pVCpu->iem.s.iNextMapping = 0;
1878
1879 if (!fReInit)
1880 {
1881 pVCpu->iem.s.cActiveMappings = 0;
1882 pVCpu->iem.s.rcPassUp = VINF_SUCCESS;
1883 pVCpu->iem.s.fEndTb = false;
1884 pVCpu->iem.s.fTbCheckOpcodes = true; /* (check opcodes for before executing the first instruction) */
1885 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
1886 pVCpu->iem.s.fTbCrossedPage = false;
1887 pVCpu->iem.s.cInstrTillIrqCheck = !(fExtraFlags & IEMTB_F_INHIBIT_SHADOW) ? 32 : 0;
1888 pVCpu->iem.s.fTbCurInstrIsSti = false;
1889 /* Force RF clearing and TF checking on first instruction in the block
1890 as we don't really know what came before and should assume the worst: */
1891 pVCpu->iem.s.fTbPrevInstr = IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_END_TB;
1892 }
1893 else
1894 {
1895 Assert(pVCpu->iem.s.cActiveMappings == 0);
1896 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
1897 Assert(pVCpu->iem.s.fEndTb == false);
1898 Assert(pVCpu->iem.s.fTbCrossedPage == false);
1899 pVCpu->iem.s.fTbPrevInstr = pVCpu->iem.s.fTbCurInstr;
1900 }
1901 pVCpu->iem.s.fTbCurInstr = 0;
1902
1903#ifdef DBGFTRACE_ENABLED
1904 switch (IEM_GET_CPU_MODE(pVCpu))
1905 {
1906 case IEMMODE_64BIT:
1907 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I64/%u %08llx", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.rip);
1908 break;
1909 case IEMMODE_32BIT:
1910 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I32/%u %04x:%08x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1911 break;
1912 case IEMMODE_16BIT:
1913 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I16/%u %04x:%04x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1914 break;
1915 }
1916#endif
1917}
1918
1919
1920/**
1921 * Initializes the opcode fetcher when starting the compilation.
1922 *
1923 * @param pVCpu The cross context virtual CPU structure of the calling
1924 * thread.
1925 */
1926DECL_FORCE_INLINE(void) iemThreadedCompileInitOpcodeFetching(PVMCPUCC pVCpu)
1927{
1928 /* Almost everything is done by iemGetPcWithPhysAndCode() already. We just need to initialize the index into abOpcode. */
1929#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1930 pVCpu->iem.s.offOpcode = 0;
1931#else
1932 RT_NOREF(pVCpu);
1933#endif
1934}
1935
1936
1937/**
1938 * Re-initializes the opcode fetcher between instructions while compiling.
1939 *
1940 * @param pVCpu The cross context virtual CPU structure of the calling
1941 * thread.
1942 */
1943DECL_FORCE_INLINE(void) iemThreadedCompileReInitOpcodeFetching(PVMCPUCC pVCpu)
1944{
1945 if (pVCpu->iem.s.pbInstrBuf)
1946 {
1947 uint64_t off = pVCpu->cpum.GstCtx.rip;
1948 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
1949 off += pVCpu->cpum.GstCtx.cs.u64Base;
1950 off -= pVCpu->iem.s.uInstrBufPc;
1951 if (off < pVCpu->iem.s.cbInstrBufTotal)
1952 {
1953 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
1954 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
1955 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
1956 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
1957 else
1958 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
1959 }
1960 else
1961 {
1962 pVCpu->iem.s.pbInstrBuf = NULL;
1963 pVCpu->iem.s.offInstrNextByte = 0;
1964 pVCpu->iem.s.offCurInstrStart = 0;
1965 pVCpu->iem.s.cbInstrBuf = 0;
1966 pVCpu->iem.s.cbInstrBufTotal = 0;
1967 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1968 }
1969 }
1970 else
1971 {
1972 pVCpu->iem.s.offInstrNextByte = 0;
1973 pVCpu->iem.s.offCurInstrStart = 0;
1974 pVCpu->iem.s.cbInstrBuf = 0;
1975 pVCpu->iem.s.cbInstrBufTotal = 0;
1976#ifdef VBOX_STRICT
1977 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1978#endif
1979 }
1980#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1981 pVCpu->iem.s.offOpcode = 0;
1982#endif
1983}
1984
1985#ifdef LOG_ENABLED
1986
1987/**
1988 * Inserts a NOP call.
1989 *
1990 * This is for debugging.
1991 *
1992 * @returns true on success, false if we're out of call entries.
1993 * @param pTb The translation block being compiled.
1994 */
1995bool iemThreadedCompileEmitNop(PIEMTB pTb)
1996{
1997 /* Emit the call. */
1998 uint32_t const idxCall = pTb->Thrd.cCalls;
1999 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2000 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2001 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2002 pCall->enmFunction = kIemThreadedFunc_BltIn_Nop;
2003 pCall->idxInstr = pTb->cInstructions - 1;
2004 pCall->cbOpcode = 0;
2005 pCall->offOpcode = 0;
2006 pCall->uTbLookup = 0;
2007 pCall->uUnused0 = 0;
2008 pCall->auParams[0] = 0;
2009 pCall->auParams[1] = 0;
2010 pCall->auParams[2] = 0;
2011 return true;
2012}
2013
2014
2015/**
2016 * Called by iemThreadedCompile if cpu state logging is desired.
2017 *
2018 * @returns true on success, false if we're out of call entries.
2019 * @param pTb The translation block being compiled.
2020 */
2021bool iemThreadedCompileEmitLogCpuState(PIEMTB pTb)
2022{
2023 /* Emit the call. */
2024 uint32_t const idxCall = pTb->Thrd.cCalls;
2025 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2026 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2027 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2028 pCall->enmFunction = kIemThreadedFunc_BltIn_LogCpuState;
2029 pCall->idxInstr = pTb->cInstructions - 1;
2030 pCall->cbOpcode = 0;
2031 pCall->offOpcode = 0;
2032 pCall->uTbLookup = 0;
2033 pCall->uUnused0 = 0;
2034 pCall->auParams[0] = RT_MAKE_U16(pCall->idxInstr, idxCall); /* currently not used, but whatever */
2035 pCall->auParams[1] = 0;
2036 pCall->auParams[2] = 0;
2037 return true;
2038}
2039
2040#endif /* LOG_ENABLED */
2041
2042DECLINLINE(void) iemThreadedCopyOpcodeBytesInline(PCVMCPUCC pVCpu, uint8_t *pbDst, uint8_t cbInstr)
2043{
2044 switch (cbInstr)
2045 {
2046 default: AssertMsgFailed(("%#x\n", cbInstr)); RT_FALL_THROUGH();
2047 case 15: pbDst[14] = pVCpu->iem.s.abOpcode[14]; RT_FALL_THROUGH();
2048 case 14: pbDst[13] = pVCpu->iem.s.abOpcode[13]; RT_FALL_THROUGH();
2049 case 13: pbDst[12] = pVCpu->iem.s.abOpcode[12]; RT_FALL_THROUGH();
2050 case 12: pbDst[11] = pVCpu->iem.s.abOpcode[11]; RT_FALL_THROUGH();
2051 case 11: pbDst[10] = pVCpu->iem.s.abOpcode[10]; RT_FALL_THROUGH();
2052 case 10: pbDst[9] = pVCpu->iem.s.abOpcode[9]; RT_FALL_THROUGH();
2053 case 9: pbDst[8] = pVCpu->iem.s.abOpcode[8]; RT_FALL_THROUGH();
2054 case 8: pbDst[7] = pVCpu->iem.s.abOpcode[7]; RT_FALL_THROUGH();
2055 case 7: pbDst[6] = pVCpu->iem.s.abOpcode[6]; RT_FALL_THROUGH();
2056 case 6: pbDst[5] = pVCpu->iem.s.abOpcode[5]; RT_FALL_THROUGH();
2057 case 5: pbDst[4] = pVCpu->iem.s.abOpcode[4]; RT_FALL_THROUGH();
2058 case 4: pbDst[3] = pVCpu->iem.s.abOpcode[3]; RT_FALL_THROUGH();
2059 case 3: pbDst[2] = pVCpu->iem.s.abOpcode[2]; RT_FALL_THROUGH();
2060 case 2: pbDst[1] = pVCpu->iem.s.abOpcode[1]; RT_FALL_THROUGH();
2061 case 1: pbDst[0] = pVCpu->iem.s.abOpcode[0]; break;
2062 }
2063}
2064
2065
2066/**
2067 * Called by IEM_MC2_BEGIN_EMIT_CALLS() under one of these conditions:
2068 *
2069 * - CS LIM check required.
2070 * - Must recheck opcode bytes.
2071 * - Previous instruction branched.
2072 * - TLB load detected, probably due to page crossing.
2073 *
2074 * @returns true if everything went well, false if we're out of space in the TB
2075 * (e.g. opcode ranges) or needs to start doing CS.LIM checks.
2076 * @param pVCpu The cross context virtual CPU structure of the calling
2077 * thread.
2078 * @param pTb The translation block being compiled.
2079 */
2080bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb)
2081{
2082 Log6(("%04x:%08RX64: iemThreadedCompileBeginEmitCallsComplications\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2083 Assert((pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) == 0);
2084#if 0
2085 if (pVCpu->cpum.GstCtx.rip >= 0xc0000000 && !LogIsEnabled())
2086 RTLogChangeFlags(NULL, 0, RTLOGFLAGS_DISABLED);
2087#endif
2088
2089 /*
2090 * If we're not in 64-bit mode and not already checking CS.LIM we need to
2091 * see if it's needed to start checking.
2092 */
2093 bool fConsiderCsLimChecking;
2094 uint32_t const fMode = pVCpu->iem.s.fExec & IEM_F_MODE_MASK;
2095 if ( fMode == IEM_F_MODE_X86_64BIT
2096 || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS)
2097 || fMode == IEM_F_MODE_X86_32BIT_PROT_FLAT
2098 || fMode == IEM_F_MODE_X86_32BIT_FLAT)
2099 fConsiderCsLimChecking = false; /* already enabled or not needed */
2100 else
2101 {
2102 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
2103 if (offFromLim >= GUEST_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
2104 fConsiderCsLimChecking = true; /* likely */
2105 else
2106 {
2107 Log8(("%04x:%08RX64: Needs CS.LIM checks (%#RX64)\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, offFromLim));
2108 return false;
2109 }
2110 }
2111
2112 /*
2113 * Prepare call now, even before we know if can accept the instruction in this TB.
2114 * This allows us amending parameters w/o making every case suffer.
2115 */
2116 uint8_t const cbInstr = IEM_GET_INSTR_LEN(pVCpu);
2117 uint16_t const offOpcode = pTb->cbOpcodes;
2118 uint8_t idxRange = pTb->cRanges - 1;
2119
2120 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls];
2121 pCall->idxInstr = pTb->cInstructions;
2122 pCall->cbOpcode = cbInstr;
2123 pCall->offOpcode = offOpcode;
2124 pCall->uTbLookup = 0;
2125 pCall->uUnused0 = 0;
2126 pCall->auParams[0] = (uint32_t)cbInstr
2127 | (uint32_t)(pVCpu->iem.s.fExec << 8) /* liveness: Enough of fExec for IEM_F_MODE_X86_IS_FLAT. */
2128 /* The upper dword is sometimes used for cbStartPage. */;
2129 pCall->auParams[1] = idxRange;
2130 pCall->auParams[2] = offOpcode - pTb->aRanges[idxRange].offOpcodes;
2131
2132/** @todo check if we require IEMTB_F_CS_LIM_CHECKS for any new page we've
2133 * gotten onto. If we do, stop */
2134
2135 /*
2136 * Case 1: We've branched (RIP changed).
2137 *
2138 * Loop check: If the new PC (GCPhysPC) is within a opcode range of this
2139 * TB, end the TB here as it is most likely a loop and if it
2140 * made sense to unroll it, the guest code compiler should've
2141 * done it already.
2142 *
2143 * Sub-case 1a: Same page, no TLB load (fTbCrossedPage is false).
2144 * Req: 1 extra range, no extra phys.
2145 *
2146 * Sub-case 1b: Different page but no page boundrary crossing, so TLB load
2147 * necessary (fTbCrossedPage is true).
2148 * Req: 1 extra range, probably 1 extra phys page entry.
2149 *
2150 * Sub-case 1c: Different page, so TLB load necessary (fTbCrossedPage is true),
2151 * but in addition we cross into the following page and require
2152 * another TLB load.
2153 * Req: 2 extra ranges, probably 2 extra phys page entries.
2154 *
2155 * Sub-case 1d: Same page, so no initial TLB load necessary, but we cross into
2156 * the following page (thus fTbCrossedPage is true).
2157 * Req: 2 extra ranges, probably 1 extra phys page entry.
2158 *
2159 * Note! The setting fTbCrossedPage is done by the iemOpcodeFetchBytesJmp, but
2160 * it may trigger "spuriously" from the CPU point of view because of
2161 * physical page changes that'll invalid the physical TLB and trigger a
2162 * call to the function. In theory this be a big deal, just a bit
2163 * performance loss as we'll pick the LoadingTlb variants.
2164 *
2165 * Note! We do not currently optimize branching to the next instruction (sorry
2166 * 32-bit PIC code). We could maybe do that in the branching code that
2167 * sets (or not) fTbBranched.
2168 */
2169 /** @todo Optimize 'jmp .next_instr' and 'call .next_instr'. Seen the jmp
2170 * variant in win 3.1 code and the call variant in 32-bit linux PIC
2171 * code. This'll require filtering out far jmps and calls, as they
2172 * load CS which should technically be considered indirect since the
2173 * GDT/LDT entry's base address can be modified independently from
2174 * the code. */
2175 if (pVCpu->iem.s.fTbBranched != IEMBRANCHED_F_NO)
2176 {
2177 if ( !pVCpu->iem.s.fTbCrossedPage /* 1a */
2178 || pVCpu->iem.s.offCurInstrStart >= 0 /* 1b */ )
2179 {
2180 /* 1a + 1b - instruction fully within the branched to page. */
2181 Assert(pVCpu->iem.s.offCurInstrStart >= 0);
2182 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr <= GUEST_PAGE_SIZE);
2183
2184 if (!(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_ZERO))
2185 {
2186 /* Check that we've got a free range. */
2187 idxRange += 1;
2188 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2189 { /* likely */ }
2190 else
2191 {
2192 Log8(("%04x:%08RX64: out of ranges after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2193 return false;
2194 }
2195 pCall->auParams[1] = idxRange;
2196 pCall->auParams[2] = 0;
2197
2198 /* Check that we've got a free page slot. */
2199 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2200 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2201 uint8_t idxPhysPage;
2202 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2203 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 0;
2204 else if (pTb->aGCPhysPages[0] == NIL_RTGCPHYS)
2205 {
2206 pTb->aGCPhysPages[0] = GCPhysNew;
2207 pTb->aRanges[idxRange].idxPhysPage = 1;
2208 idxPhysPage = UINT8_MAX;
2209 }
2210 else if (pTb->aGCPhysPages[0] == GCPhysNew)
2211 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 1;
2212 else if (pTb->aGCPhysPages[1] == NIL_RTGCPHYS)
2213 {
2214 pTb->aGCPhysPages[1] = GCPhysNew;
2215 pTb->aRanges[idxRange].idxPhysPage = 2;
2216 idxPhysPage = UINT8_MAX;
2217 }
2218 else if (pTb->aGCPhysPages[1] == GCPhysNew)
2219 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 2;
2220 else
2221 {
2222 Log8(("%04x:%08RX64: out of aGCPhysPages entires after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2223 return false;
2224 }
2225
2226 /* Loop check: We weave the loop check in here to optimize the lookup. */
2227 if (idxPhysPage != UINT8_MAX)
2228 {
2229 uint32_t const offPhysPc = pVCpu->iem.s.offCurInstrStart;
2230 for (uint8_t idxLoopRange = 0; idxLoopRange < idxRange; idxLoopRange++)
2231 if ( pTb->aRanges[idxLoopRange].idxPhysPage == idxPhysPage
2232 && offPhysPc - (uint32_t)pTb->aRanges[idxLoopRange].offPhysPage
2233 < (uint32_t)pTb->aRanges[idxLoopRange].cbOpcodes)
2234 {
2235 Log8(("%04x:%08RX64: loop detected after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2236 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbLoopInTbDetected);
2237 return false;
2238 }
2239 }
2240
2241 /* Finish setting up the new range. */
2242 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2243 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2244 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2245 pTb->aRanges[idxRange].u2Unused = 0;
2246 pTb->cRanges++;
2247 Log6(("%04x:%08RX64: new range #%u same page: offPhysPage=%#x offOpcodes=%#x\n",
2248 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].offPhysPage,
2249 pTb->aRanges[idxRange].offOpcodes));
2250 }
2251 else
2252 {
2253 Log8(("%04x:%08RX64: zero byte jump\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2254 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2255 }
2256
2257 /* Determin which function we need to load & check.
2258 Note! For jumps to a new page, we'll set both fTbBranched and
2259 fTbCrossedPage to avoid unnecessary TLB work for intra
2260 page branching */
2261 if ( (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR)) /* Far is basically indirect. */
2262 || pVCpu->iem.s.fTbCrossedPage)
2263 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2264 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
2265 : !fConsiderCsLimChecking
2266 ? kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
2267 : kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim;
2268 else if (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_CONDITIONAL | /* paranoia: */ IEMBRANCHED_F_DIRECT))
2269 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2270 ? kIemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
2271 : !fConsiderCsLimChecking
2272 ? kIemThreadedFunc_BltIn_CheckPcAndOpcodes
2273 : kIemThreadedFunc_BltIn_CheckPcAndOpcodesConsiderCsLim;
2274 else
2275 {
2276 Assert(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_RELATIVE);
2277 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2278 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2279 : !fConsiderCsLimChecking
2280 ? kIemThreadedFunc_BltIn_CheckOpcodes
2281 : kIemThreadedFunc_BltIn_CheckOpcodesConsiderCsLim;
2282 }
2283 }
2284 else
2285 {
2286 /* 1c + 1d - instruction crosses pages. */
2287 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2288 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2289
2290 /* Lazy bird: Check that this isn't case 1c, since we've already
2291 load the first physical address. End the TB and
2292 make it a case 2b instead.
2293
2294 Hmm. Too much bother to detect, so just do the same
2295 with case 1d as well. */
2296#if 0 /** @todo get back to this later when we've got the actual branch code in
2297 * place. */
2298 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2299
2300 /* Check that we've got two free ranges. */
2301 if (idxRange + 2 < RT_ELEMENTS(pTb->aRanges))
2302 { /* likely */ }
2303 else
2304 return false;
2305 idxRange += 1;
2306 pCall->auParams[1] = idxRange;
2307 pCall->auParams[2] = 0;
2308
2309 /* ... */
2310
2311#else
2312 Log8(("%04x:%08RX64: complicated post-branch condition, ending TB.\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2313 return false;
2314#endif
2315 }
2316 }
2317
2318 /*
2319 * Case 2: Page crossing.
2320 *
2321 * Sub-case 2a: The instruction starts on the first byte in the next page.
2322 *
2323 * Sub-case 2b: The instruction has opcode bytes in both the current and
2324 * following page.
2325 *
2326 * Both cases requires a new range table entry and probably a new physical
2327 * page entry. The difference is in which functions to emit and whether to
2328 * add bytes to the current range.
2329 */
2330 else if (pVCpu->iem.s.fTbCrossedPage)
2331 {
2332 /* Check that we've got a free range. */
2333 idxRange += 1;
2334 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2335 { /* likely */ }
2336 else
2337 {
2338 Log8(("%04x:%08RX64: out of ranges while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2339 return false;
2340 }
2341
2342 /* Check that we've got a free page slot. */
2343 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2344 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2345 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2346 pTb->aRanges[idxRange].idxPhysPage = 0;
2347 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
2348 || pTb->aGCPhysPages[0] == GCPhysNew)
2349 {
2350 pTb->aGCPhysPages[0] = GCPhysNew;
2351 pTb->aRanges[idxRange].idxPhysPage = 1;
2352 }
2353 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
2354 || pTb->aGCPhysPages[1] == GCPhysNew)
2355 {
2356 pTb->aGCPhysPages[1] = GCPhysNew;
2357 pTb->aRanges[idxRange].idxPhysPage = 2;
2358 }
2359 else
2360 {
2361 Log8(("%04x:%08RX64: out of aGCPhysPages entires while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2362 return false;
2363 }
2364
2365 if (((pTb->aRanges[idxRange - 1].offPhysPage + pTb->aRanges[idxRange - 1].cbOpcodes) & GUEST_PAGE_OFFSET_MASK) == 0)
2366 {
2367 Assert(pVCpu->iem.s.offCurInstrStart == 0);
2368 pCall->auParams[1] = idxRange;
2369 pCall->auParams[2] = 0;
2370
2371 /* Finish setting up the new range. */
2372 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2373 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2374 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2375 pTb->aRanges[idxRange].u2Unused = 0;
2376 pTb->cRanges++;
2377 Log6(("%04x:%08RX64: new range #%u new page (a) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2378 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2379 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2380
2381 /* Determin which function we need to load & check. */
2382 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2383 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
2384 : !fConsiderCsLimChecking
2385 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
2386 : kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim;
2387 }
2388 else
2389 {
2390 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2391 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2392 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2393 pCall->auParams[0] |= (uint64_t)cbStartPage << 32;
2394
2395 /* We've good. Split the instruction over the old and new range table entries. */
2396 pTb->aRanges[idxRange - 1].cbOpcodes += cbStartPage;
2397
2398 pTb->aRanges[idxRange].offPhysPage = 0;
2399 pTb->aRanges[idxRange].offOpcodes = offOpcode + cbStartPage;
2400 pTb->aRanges[idxRange].cbOpcodes = cbInstr - cbStartPage;
2401 pTb->aRanges[idxRange].u2Unused = 0;
2402 pTb->cRanges++;
2403 Log6(("%04x:%08RX64: new range #%u new page (b) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2404 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2405 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2406
2407 /* Determin which function we need to load & check. */
2408 if (pVCpu->iem.s.fTbCheckOpcodes)
2409 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2410 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
2411 : !fConsiderCsLimChecking
2412 ? kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
2413 : kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim;
2414 else
2415 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2416 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
2417 : !fConsiderCsLimChecking
2418 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
2419 : kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim;
2420 }
2421 }
2422
2423 /*
2424 * Regular case: No new range required.
2425 */
2426 else
2427 {
2428 Assert(pVCpu->iem.s.fTbCheckOpcodes || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS));
2429 if (pVCpu->iem.s.fTbCheckOpcodes)
2430 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2431 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2432 : kIemThreadedFunc_BltIn_CheckOpcodes;
2433 else
2434 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckCsLim;
2435
2436 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2437 pTb->cbOpcodes = offOpcode + cbInstr;
2438 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2439 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2440 }
2441
2442 /*
2443 * Commit the call.
2444 */
2445 pTb->Thrd.cCalls++;
2446
2447 /*
2448 * Clear state.
2449 */
2450 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
2451 pVCpu->iem.s.fTbCrossedPage = false;
2452 pVCpu->iem.s.fTbCheckOpcodes = false;
2453
2454 /*
2455 * Copy opcode bytes.
2456 */
2457 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2458 pTb->cbOpcodes = offOpcode + cbInstr;
2459 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2460
2461 return true;
2462}
2463
2464
2465/**
2466 * Worker for iemThreadedCompileBeginEmitCallsComplications and
2467 * iemThreadedCompileCheckIrq that checks for pending delivarable events.
2468 *
2469 * @returns true if anything is pending, false if not.
2470 * @param pVCpu The cross context virtual CPU structure of the calling
2471 * thread.
2472 */
2473DECL_FORCE_INLINE(bool) iemThreadedCompileIsIrqOrForceFlagPending(PVMCPUCC pVCpu)
2474{
2475 uint64_t fCpu = pVCpu->fLocalForcedActions;
2476 fCpu &= VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI;
2477#if 1
2478 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2479 if (RT_LIKELY( !fCpu
2480 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2481 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2482 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))) ))
2483 return false;
2484 return true;
2485#else
2486 return false;
2487#endif
2488
2489}
2490
2491
2492/**
2493 * Called by iemThreadedCompile when a block requires a mode check.
2494 *
2495 * @returns true if we should continue, false if we're out of call entries.
2496 * @param pVCpu The cross context virtual CPU structure of the calling
2497 * thread.
2498 * @param pTb The translation block being compiled.
2499 */
2500static bool iemThreadedCompileEmitCheckMode(PVMCPUCC pVCpu, PIEMTB pTb)
2501{
2502 /* Emit the call. */
2503 uint32_t const idxCall = pTb->Thrd.cCalls;
2504 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2505 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2506 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2507 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckMode;
2508 pCall->idxInstr = pTb->cInstructions - 1;
2509 pCall->cbOpcode = 0;
2510 pCall->offOpcode = 0;
2511 pCall->uTbLookup = 0;
2512 pCall->uUnused0 = 0;
2513 pCall->auParams[0] = pVCpu->iem.s.fExec;
2514 pCall->auParams[1] = 0;
2515 pCall->auParams[2] = 0;
2516 LogFunc(("%04x:%08RX64 fExec=%#x\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, pVCpu->iem.s.fExec));
2517 return true;
2518}
2519
2520
2521/**
2522 * Called by IEM_MC2_BEGIN_EMIT_CALLS() when IEM_CIMPL_F_CHECK_IRQ_BEFORE is
2523 * set.
2524 *
2525 * @returns true if we should continue, false if an IRQ is deliverable or a
2526 * relevant force flag is pending.
2527 * @param pVCpu The cross context virtual CPU structure of the calling
2528 * thread.
2529 * @param pTb The translation block being compiled.
2530 * @sa iemThreadedCompileCheckIrq
2531 */
2532bool iemThreadedCompileEmitIrqCheckBefore(PVMCPUCC pVCpu, PIEMTB pTb)
2533{
2534 /*
2535 * Skip this we've already emitted a call after the previous instruction
2536 * or if it's the first call, as we're always checking FFs between blocks.
2537 */
2538 uint32_t const idxCall = pTb->Thrd.cCalls;
2539 if ( idxCall > 0
2540 && pTb->Thrd.paCalls[idxCall - 1].enmFunction != kIemThreadedFunc_BltIn_CheckIrq)
2541 {
2542 /* Emit the call. */
2543 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2544 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2545 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2546 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2547 pCall->idxInstr = pTb->cInstructions;
2548 pCall->offOpcode = 0;
2549 pCall->cbOpcode = 0;
2550 pCall->uTbLookup = 0;
2551 pCall->uUnused0 = 0;
2552 pCall->auParams[0] = 0;
2553 pCall->auParams[1] = 0;
2554 pCall->auParams[2] = 0;
2555 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2556
2557 /* Reset the IRQ check value. */
2558 pVCpu->iem.s.cInstrTillIrqCheck = !CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) ? 32 : 0;
2559
2560 /*
2561 * Check for deliverable IRQs and pending force flags.
2562 */
2563 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2564 }
2565 return true; /* continue */
2566}
2567
2568
2569/**
2570 * Emits an IRQ check call and checks for pending IRQs.
2571 *
2572 * @returns true if we should continue, false if an IRQ is deliverable or a
2573 * relevant force flag is pending.
2574 * @param pVCpu The cross context virtual CPU structure of the calling
2575 * thread.
2576 * @param pTb The transation block.
2577 * @sa iemThreadedCompileBeginEmitCallsComplications
2578 */
2579static bool iemThreadedCompileCheckIrqAfter(PVMCPUCC pVCpu, PIEMTB pTb)
2580{
2581 /* Check again in a little bit, unless it is immediately following an STI
2582 in which case we *must* check immediately after the next instruction
2583 as well in case it's executed with interrupt inhibition. We could
2584 otherwise miss the interrupt window. See the irq2 wait2 varaiant in
2585 bs3-timers-1 which is doing sti + sti + cli. */
2586 if (!pVCpu->iem.s.fTbCurInstrIsSti)
2587 pVCpu->iem.s.cInstrTillIrqCheck = 32;
2588 else
2589 {
2590 pVCpu->iem.s.fTbCurInstrIsSti = false;
2591 pVCpu->iem.s.cInstrTillIrqCheck = 0;
2592 }
2593 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2594
2595 /*
2596 * Emit the call.
2597 */
2598 AssertReturn(pTb->Thrd.cCalls < pTb->Thrd.cAllocated, false);
2599 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls++];
2600 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2601 pCall->idxInstr = pTb->cInstructions;
2602 pCall->offOpcode = 0;
2603 pCall->cbOpcode = 0;
2604 pCall->uTbLookup = 0;
2605 pCall->uUnused0 = 0;
2606 pCall->auParams[0] = 0;
2607 pCall->auParams[1] = 0;
2608 pCall->auParams[2] = 0;
2609
2610 /*
2611 * Check for deliverable IRQs and pending force flags.
2612 */
2613 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2614}
2615
2616
2617/**
2618 * Compiles a new TB and executes it.
2619 *
2620 * We combine compilation and execution here as it makes it simpler code flow
2621 * in the main loop and it allows interpreting while compiling if we want to
2622 * explore that option.
2623 *
2624 * @returns Strict VBox status code.
2625 * @param pVM The cross context virtual machine structure.
2626 * @param pVCpu The cross context virtual CPU structure of the calling
2627 * thread.
2628 * @param GCPhysPc The physical address corresponding to the current
2629 * RIP+CS.BASE.
2630 * @param fExtraFlags Extra translation block flags: IEMTB_F_INHIBIT_SHADOW,
2631 * IEMTB_F_INHIBIT_NMI, IEMTB_F_CS_LIM_CHECKS.
2632 */
2633static VBOXSTRICTRC iemThreadedCompile(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
2634{
2635 Assert(!(fExtraFlags & IEMTB_F_TYPE_MASK));
2636 fExtraFlags |= IEMTB_F_TYPE_THREADED;
2637
2638 /*
2639 * Get the TB we use for the recompiling. This is a maxed-out TB so
2640 * that'll we'll make a more efficient copy of when we're done compiling.
2641 */
2642 PIEMTB pTb = pVCpu->iem.s.pThrdCompileTbR3;
2643 if (pTb)
2644 iemThreadedTbReuse(pVCpu, pTb, GCPhysPc, fExtraFlags);
2645 else
2646 {
2647 pTb = iemThreadedTbAlloc(pVM, pVCpu, GCPhysPc, fExtraFlags);
2648 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2649 pVCpu->iem.s.pThrdCompileTbR3 = pTb;
2650 }
2651
2652 /* Set the current TB so iemThreadedCompileLongJumped and the CIMPL
2653 functions may get at it. */
2654 pVCpu->iem.s.pCurTbR3 = pTb;
2655
2656#if 0
2657 /* Make sure the CheckIrq condition matches the one in EM. */
2658 iemThreadedCompileCheckIrqAfter(pVCpu, pTb);
2659 const uint32_t cZeroCalls = 1;
2660#else
2661 const uint32_t cZeroCalls = 0;
2662#endif
2663
2664 /*
2665 * Now for the recomplication. (This mimicks IEMExecLots in many ways.)
2666 */
2667 iemThreadedCompileInitDecoder(pVCpu, false /*fReInit*/, fExtraFlags);
2668 iemThreadedCompileInitOpcodeFetching(pVCpu);
2669 VBOXSTRICTRC rcStrict;
2670 for (;;)
2671 {
2672 /* Process the next instruction. */
2673#ifdef LOG_ENABLED
2674 iemThreadedLogCurInstr(pVCpu, "CC", pTb->cInstructions);
2675 uint16_t const uCsLog = pVCpu->cpum.GstCtx.cs.Sel;
2676 uint64_t const uRipLog = pVCpu->cpum.GstCtx.rip;
2677 Assert(uCsLog != 0 || uRipLog > 0x400 || !IEM_IS_REAL_OR_V86_MODE(pVCpu)); /* Detect executing RM interrupt table. */
2678#endif
2679 uint8_t b; IEM_OPCODE_GET_FIRST_U8(&b);
2680 uint16_t const cCallsPrev = pTb->Thrd.cCalls;
2681
2682 rcStrict = FNIEMOP_CALL(g_apfnIemThreadedRecompilerOneByteMap[b]);
2683#if 0
2684 for (unsigned i = cCallsPrev; i < pTb->Thrd.cCalls; i++)
2685 Log8(("-> %#u/%u - %d %s\n", i, pTb->Thrd.paCalls[i].idxInstr, pTb->Thrd.paCalls[i].enmFunction,
2686 g_apszIemThreadedFunctions[pTb->Thrd.paCalls[i].enmFunction]));
2687#endif
2688 if ( rcStrict == VINF_SUCCESS
2689 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS
2690 && !pVCpu->iem.s.fEndTb)
2691 {
2692 Assert(pTb->Thrd.cCalls > cCallsPrev);
2693 Assert(cCallsPrev - pTb->Thrd.cCalls < 5);
2694
2695 pVCpu->iem.s.cInstructions++;
2696
2697 /* Check for mode change _after_ certain CIMPL calls, so check that
2698 we continue executing with the same mode value. */
2699 if (!(pVCpu->iem.s.fTbCurInstr & (IEM_CIMPL_F_MODE | IEM_CIMPL_F_XCPT | IEM_CIMPL_F_VMEXIT)))
2700 { /* probable */ }
2701 else if (RT_LIKELY(iemThreadedCompileEmitCheckMode(pVCpu, pTb)))
2702 { /* extremely likely */ }
2703 else
2704 break;
2705
2706#if defined(LOG_ENABLED) && 0 /* for debugging */
2707 //iemThreadedCompileEmitNop(pTb);
2708 iemThreadedCompileEmitLogCpuState(pTb);
2709#endif
2710 }
2711 else
2712 {
2713 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, rc=%d\n",
2714 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, VBOXSTRICTRC_VAL(rcStrict)));
2715 if (rcStrict == VINF_IEM_RECOMPILE_END_TB)
2716 rcStrict = VINF_SUCCESS;
2717
2718 if (pTb->Thrd.cCalls > cZeroCalls)
2719 {
2720 if (cCallsPrev != pTb->Thrd.cCalls)
2721 pVCpu->iem.s.cInstructions++;
2722 break;
2723 }
2724
2725 pVCpu->iem.s.pCurTbR3 = NULL;
2726 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2727 }
2728
2729 /* Check for IRQs? */
2730 if (pVCpu->iem.s.cInstrTillIrqCheck > 0)
2731 pVCpu->iem.s.cInstrTillIrqCheck--;
2732 else if (!iemThreadedCompileCheckIrqAfter(pVCpu, pTb))
2733 break;
2734
2735 /* Still space in the TB? */
2736 if ( pTb->Thrd.cCalls + 5 < pTb->Thrd.cAllocated
2737 && pTb->cbOpcodes + 16 <= pVCpu->iem.s.cbOpcodesAllocated
2738 && pTb->cTbLookupEntries < 127)
2739 iemThreadedCompileInitDecoder(pVCpu, true /*fReInit*/, 0);
2740 else
2741 {
2742 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, %u opcode bytes, %u TB lookup entries - full\n",
2743 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, pTb->cbOpcodes, pTb->cTbLookupEntries));
2744 break;
2745 }
2746 iemThreadedCompileReInitOpcodeFetching(pVCpu);
2747 }
2748
2749 /*
2750 * Reserve lookup space for the final call entry if necessary.
2751 */
2752 PIEMTHRDEDCALLENTRY pFinalCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls - 1];
2753 if (pTb->Thrd.cCalls > 1)
2754 {
2755 if (pFinalCall->uTbLookup == 0)
2756 {
2757 pFinalCall->uTbLookup = IEM_TB_LOOKUP_TAB_MAKE(pTb->cTbLookupEntries, 0);
2758 pTb->cTbLookupEntries += 1;
2759 }
2760 }
2761 else if (pFinalCall->uTbLookup != 0)
2762 {
2763 Assert(pTb->cTbLookupEntries > 1);
2764 pFinalCall->uTbLookup -= 1;
2765 pTb->cTbLookupEntries -= 1;
2766 }
2767
2768 /*
2769 * Duplicate the TB into a completed one and link it.
2770 */
2771 pTb = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
2772 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2773
2774 iemThreadedTbAdd(pVCpu, pVCpu->iem.s.pTbCacheR3, pTb);
2775
2776#ifdef IEM_COMPILE_ONLY_MODE
2777 /*
2778 * Execute the translation block.
2779 */
2780#endif
2781
2782 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2783}
2784
2785
2786
2787/*********************************************************************************************************************************
2788* Recompiled Execution Core *
2789*********************************************************************************************************************************/
2790
2791/** Helper for iemTbExec. */
2792DECL_FORCE_INLINE(PIEMTB *) iemTbGetTbLookupEntryWithRip(PCIEMTB pTb, uint8_t uTbLookup, uint64_t uRip)
2793{
2794 uint8_t const idx = IEM_TB_LOOKUP_TAB_GET_IDX_WITH_RIP(uTbLookup, uRip);
2795 Assert(idx < pTb->cTbLookupEntries);
2796 return IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idx);
2797}
2798
2799
2800/**
2801 * Executes a translation block.
2802 *
2803 * @returns Strict VBox status code.
2804 * @param pVCpu The cross context virtual CPU structure of the calling
2805 * thread.
2806 * @param pTb The translation block to execute.
2807 */
2808static VBOXSTRICTRC iemTbExec(PVMCPUCC pVCpu, PIEMTB pTb) IEM_NOEXCEPT_MAY_LONGJMP
2809{
2810 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & (RTGCPHYS)GUEST_PAGE_OFFSET_MASK));
2811
2812 /*
2813 * Set the current TB so CIMPL functions may get at it.
2814 */
2815 pVCpu->iem.s.pCurTbR3 = pTb;
2816 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, 0);
2817
2818 /*
2819 * Execute the block.
2820 */
2821#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
2822 if (pTb->fFlags & IEMTB_F_TYPE_NATIVE)
2823 {
2824 pVCpu->iem.s.cTbExecNative++;
2825# ifdef LOG_ENABLED
2826 iemThreadedLogCurInstr(pVCpu, "EXn", 0);
2827# endif
2828
2829# ifndef IEMNATIVE_WITH_RECOMPILER_PROLOGUE_SINGLETON
2830# ifdef RT_ARCH_AMD64
2831 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu);
2832# else
2833 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu, &pVCpu->cpum.GstCtx);
2834# endif
2835# else
2836# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
2837 AssertCompileMemberOffset(VMCPUCC, iem.s.pvTbFramePointerR3, 0x7c8); /* This is assumed in iemNativeTbEntry */
2838# endif
2839# ifdef RT_ARCH_AMD64
2840 VBOXSTRICTRC const rcStrict = iemNativeTbEntry(pVCpu, (uintptr_t)pTb->Native.paInstructions);
2841# else
2842 VBOXSTRICTRC const rcStrict = iemNativeTbEntry(pVCpu, &pVCpu->cpum.GstCtx, (uintptr_t)pTb->Native.paInstructions);
2843# endif
2844# endif
2845
2846# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
2847 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
2848# endif
2849# ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
2850 /* Restore FPCR/MXCSR if the TB modified it. */
2851 if (pVCpu->iem.s.uRegFpCtrl != IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED)
2852 {
2853 iemNativeFpCtrlRegRestore(pVCpu->iem.s.uRegFpCtrl);
2854 /* Reset for the next round saving us an unconditional instruction on next TB entry. */
2855 pVCpu->iem.s.uRegFpCtrl = IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED;
2856 }
2857# endif
2858# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
2859 Assert(pVCpu->iem.s.fSkippingEFlags == 0);
2860# endif
2861 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2862 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2863 { /* likely */ }
2864 else
2865 {
2866 /* pVCpu->iem.s.cInstructions is incremented by iemNativeHlpExecStatusCodeFiddling. */
2867 pVCpu->iem.s.pCurTbR3 = NULL;
2868
2869 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2870 only to break out of TB execution early. */
2871 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2872 {
2873 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnBreak);
2874 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2875 }
2876
2877 /* VINF_IEM_REEXEC_BREAK_FF should be treated as VINF_SUCCESS as it's
2878 only to break out of TB execution early due to pending FFs. */
2879 if (rcStrict == VINF_IEM_REEXEC_BREAK_FF)
2880 {
2881 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnBreakFF);
2882 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2883 }
2884
2885 /* VINF_IEM_REEXEC_WITH_FLAGS needs to receive special treatment
2886 and converted to VINF_SUCCESS or whatever is appropriate. */
2887 if (rcStrict == VINF_IEM_REEXEC_FINISH_WITH_FLAGS)
2888 {
2889 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnWithFlags);
2890 return iemExecStatusCodeFiddling(pVCpu, iemFinishInstructionWithFlagsSet(pVCpu, VINF_SUCCESS));
2891 }
2892
2893 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnOtherStatus);
2894 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2895 }
2896 }
2897 else
2898#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER */
2899 {
2900 /*
2901 * The threaded execution loop.
2902 */
2903 pVCpu->iem.s.cTbExecThreaded++;
2904#ifdef LOG_ENABLED
2905 uint64_t uRipPrev = UINT64_MAX;
2906#endif
2907 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
2908 uint32_t cCallsLeft = pTb->Thrd.cCalls;
2909 while (cCallsLeft-- > 0)
2910 {
2911#ifdef LOG_ENABLED
2912 if (pVCpu->cpum.GstCtx.rip != uRipPrev)
2913 {
2914 uRipPrev = pVCpu->cpum.GstCtx.rip;
2915 iemThreadedLogCurInstr(pVCpu, "EXt", pTb->Thrd.cCalls - cCallsLeft - 1);
2916 }
2917 Log9(("%04x:%08RX64: #%d/%d - %d %s\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
2918 pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr, pCallEntry->enmFunction,
2919 g_apszIemThreadedFunctions[pCallEntry->enmFunction]));
2920#endif
2921#ifdef VBOX_WITH_STATISTICS
2922 AssertCompile(RT_ELEMENTS(pVCpu->iem.s.acThreadedFuncStats) >= kIemThreadedFunc_End);
2923 pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction] += 1;
2924#endif
2925 VBOXSTRICTRC const rcStrict = g_apfnIemThreadedFunctions[pCallEntry->enmFunction](pVCpu,
2926 pCallEntry->auParams[0],
2927 pCallEntry->auParams[1],
2928 pCallEntry->auParams[2]);
2929 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2930 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2931 pCallEntry++;
2932 else
2933 {
2934 pVCpu->iem.s.cInstructions += pCallEntry->idxInstr; /* This may be one short, but better than zero. */
2935 pVCpu->iem.s.pCurTbR3 = NULL;
2936 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaks);
2937 pVCpu->iem.s.ppTbLookupEntryR3 = iemTbGetTbLookupEntryWithRip(pTb, pCallEntry->uTbLookup, pVCpu->cpum.GstCtx.rip);
2938
2939 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2940 only to break out of TB execution early. */
2941 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2942 {
2943#ifdef VBOX_WITH_STATISTICS
2944 if (pCallEntry->uTbLookup)
2945 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaksWithLookup);
2946 else
2947 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaksWithoutLookup);
2948#endif
2949 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2950 }
2951 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2952 }
2953 }
2954
2955 /* Update the lookup entry. */
2956 pVCpu->iem.s.ppTbLookupEntryR3 = iemTbGetTbLookupEntryWithRip(pTb, pCallEntry[-1].uTbLookup, pVCpu->cpum.GstCtx.rip);
2957 }
2958
2959 pVCpu->iem.s.cInstructions += pTb->cInstructions;
2960 pVCpu->iem.s.pCurTbR3 = NULL;
2961 return VINF_SUCCESS;
2962}
2963
2964
2965/**
2966 * This is called when the PC doesn't match the current pbInstrBuf.
2967 *
2968 * Upon return, we're ready for opcode fetching. But please note that
2969 * pbInstrBuf can be NULL iff the memory doesn't have readable backing (i.e.
2970 * MMIO or unassigned).
2971 */
2972static RTGCPHYS iemGetPcWithPhysAndCodeMissed(PVMCPUCC pVCpu)
2973{
2974 pVCpu->iem.s.pbInstrBuf = NULL;
2975 pVCpu->iem.s.offCurInstrStart = 0;
2976 pVCpu->iem.s.offInstrNextByte = 0;
2977 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
2978 return pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart;
2979}
2980
2981
2982/** @todo need private inline decl for throw/nothrow matching IEM_WITH_SETJMP? */
2983DECL_FORCE_INLINE_THROW(RTGCPHYS) iemGetPcWithPhysAndCode(PVMCPUCC pVCpu)
2984{
2985 /*
2986 * Set uCurTbStartPc to RIP and calc the effective PC.
2987 */
2988 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
2989 pVCpu->iem.s.uCurTbStartPc = uPc;
2990 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
2991 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
2992
2993 /*
2994 * Advance within the current buffer (PAGE) when possible.
2995 */
2996 if (pVCpu->iem.s.pbInstrBuf)
2997 {
2998 uint64_t off = uPc - pVCpu->iem.s.uInstrBufPc;
2999 if (off < pVCpu->iem.s.cbInstrBufTotal)
3000 {
3001 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
3002 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
3003 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
3004 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
3005 else
3006 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
3007
3008 return pVCpu->iem.s.GCPhysInstrBuf + off;
3009 }
3010 }
3011 return iemGetPcWithPhysAndCodeMissed(pVCpu);
3012}
3013
3014
3015/**
3016 * Determines the extra IEMTB_F_XXX flags.
3017 *
3018 * @returns A mix of IEMTB_F_INHIBIT_SHADOW, IEMTB_F_INHIBIT_NMI and
3019 * IEMTB_F_CS_LIM_CHECKS (or zero).
3020 * @param pVCpu The cross context virtual CPU structure of the calling
3021 * thread.
3022 */
3023DECL_FORCE_INLINE(uint32_t) iemGetTbFlagsForCurrentPc(PVMCPUCC pVCpu)
3024{
3025 uint32_t fRet = 0;
3026
3027 /*
3028 * Determine the inhibit bits.
3029 */
3030 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (CPUMCTX_INHIBIT_SHADOW | CPUMCTX_INHIBIT_NMI)))
3031 { /* typical */ }
3032 else
3033 {
3034 if (CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))
3035 fRet |= IEMTB_F_INHIBIT_SHADOW;
3036 if (CPUMAreInterruptsInhibitedByNmiEx(&pVCpu->cpum.GstCtx))
3037 fRet |= IEMTB_F_INHIBIT_NMI;
3038 }
3039
3040 /*
3041 * Return IEMTB_F_CS_LIM_CHECKS if the current PC is invalid or if it is
3042 * likely to go invalid before the end of the translation block.
3043 */
3044 if (IEM_F_MODE_X86_IS_FLAT(pVCpu->iem.s.fExec))
3045 return fRet;
3046
3047 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
3048 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
3049 return fRet;
3050 return fRet | IEMTB_F_CS_LIM_CHECKS;
3051}
3052
3053
3054VMM_INT_DECL(VBOXSTRICTRC) IEMExecRecompiler(PVMCC pVM, PVMCPUCC pVCpu)
3055{
3056 /*
3057 * See if there is an interrupt pending in TRPM, inject it if we can.
3058 */
3059 if (!TRPMHasTrap(pVCpu))
3060 { /* likely */ }
3061 else
3062 {
3063 VBOXSTRICTRC rcStrict = iemExecInjectPendingTrap(pVCpu);
3064 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
3065 { /*likely */ }
3066 else
3067 return rcStrict;
3068 }
3069
3070 /*
3071 * Init the execution environment.
3072 */
3073#if 1 /** @todo this seems like a good idea, however if we ever share memory
3074 * directly with other threads on the host, it isn't necessarily... */
3075 if (pVM->cCpus == 1)
3076 iemInitExec(pVCpu, IEM_F_X86_DISREGARD_LOCK /*fExecOpts*/);
3077 else
3078#endif
3079 iemInitExec(pVCpu, 0 /*fExecOpts*/);
3080 if (RT_LIKELY(pVCpu->iem.s.msRecompilerPollNow != 0))
3081 { }
3082 else
3083 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(TMVirtualGetNoCheck(pVM) / RT_NS_1MS);
3084 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
3085
3086 /*
3087 * Run-loop.
3088 *
3089 * If we're using setjmp/longjmp we combine all the catching here to avoid
3090 * having to call setjmp for each block we're executing.
3091 */
3092 PIEMTBCACHE const pTbCache = pVCpu->iem.s.pTbCacheR3;
3093 for (;;)
3094 {
3095 VBOXSTRICTRC rcStrict;
3096 IEM_TRY_SETJMP(pVCpu, rcStrict)
3097 {
3098 uint32_t const cPollRate = 511; /* EM.cpp passes 4095 to IEMExecLots, so an eigth of that seems reasonable for now. */
3099 for (uint32_t iIterations = 0; ; iIterations++)
3100 {
3101 /* Translate PC to physical address, we'll need this for both lookup and compilation. */
3102 RTGCPHYS const GCPhysPc = iemGetPcWithPhysAndCode(pVCpu);
3103 if (RT_LIKELY(pVCpu->iem.s.pbInstrBuf != NULL))
3104 {
3105 uint32_t const fExtraFlags = iemGetTbFlagsForCurrentPc(pVCpu);
3106 PIEMTB const pTb = iemTbCacheLookup(pVCpu, pTbCache, GCPhysPc, fExtraFlags);
3107 if (pTb)
3108 rcStrict = iemTbExec(pVCpu, pTb);
3109 else
3110 rcStrict = iemThreadedCompile(pVM, pVCpu, GCPhysPc, fExtraFlags);
3111 }
3112 else
3113 {
3114 /* This can only happen if the current PC cannot be translated into a
3115 host pointer, which means we're in MMIO or unmapped memory... */
3116#if defined(VBOX_STRICT) && defined(IN_RING3)
3117 rcStrict = DBGFSTOP(pVM);
3118 if (rcStrict != VINF_SUCCESS && rcStrict != VERR_DBGF_NOT_ATTACHED)
3119 return rcStrict;
3120#endif
3121 rcStrict = IEMExecLots(pVCpu, 2048, cPollRate, NULL);
3122 }
3123 if (rcStrict == VINF_SUCCESS)
3124 {
3125 Assert(pVCpu->iem.s.cActiveMappings == 0);
3126
3127 uint64_t fCpu = pVCpu->fLocalForcedActions;
3128 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
3129 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
3130 | VMCPU_FF_TLB_FLUSH
3131 | VMCPU_FF_UNHALT );
3132 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
3133 if (RT_LIKELY( ( !fCpu
3134 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
3135 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
3136 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
3137 && !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) ))
3138 {
3139 if (RT_LIKELY( (iIterations & cPollRate) != 0
3140 || !TMTimerPollBoolWith32BitMilliTS(pVM, pVCpu, &pVCpu->iem.s.msRecompilerPollNow)))
3141 { /* likely */ }
3142 else
3143 return VINF_SUCCESS;
3144 }
3145 else
3146 return VINF_SUCCESS;
3147 }
3148 else
3149 return rcStrict;
3150 }
3151 }
3152 IEM_CATCH_LONGJMP_BEGIN(pVCpu, rcStrict);
3153 {
3154 Assert(rcStrict != VINF_IEM_REEXEC_BREAK);
3155 pVCpu->iem.s.cLongJumps++;
3156#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3157 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
3158#endif
3159 if (pVCpu->iem.s.cActiveMappings > 0)
3160 iemMemRollback(pVCpu);
3161
3162#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
3163 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
3164 if (pTb && (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
3165 {
3166 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitLongJump);
3167# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3168 Assert(pVCpu->iem.s.idxTbCurInstr < pTb->cInstructions);
3169 pVCpu->iem.s.cInstructions += pVCpu->iem.s.idxTbCurInstr;
3170# endif
3171
3172#ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
3173 /* Restore FPCR/MXCSR if the TB modified it. */
3174 if (pVCpu->iem.s.uRegFpCtrl != IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED)
3175 {
3176 iemNativeFpCtrlRegRestore(pVCpu->iem.s.uRegFpCtrl);
3177 /* Reset for the next round saving us an unconditional instruction on next TB entry. */
3178 pVCpu->iem.s.uRegFpCtrl = IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED;
3179 }
3180#endif
3181 }
3182#endif
3183
3184#if 0 /** @todo do we need to clean up anything? If not, we can drop the pTb = NULL some lines up and change the scope. */
3185 /* If pTb isn't NULL we're in iemTbExec. */
3186 if (!pTb)
3187 {
3188 /* If pCurTbR3 is NULL, we're in iemGetPcWithPhysAndCode.*/
3189 pTb = pVCpu->iem.s.pCurTbR3;
3190 if (pTb)
3191 {
3192 if (pTb == pVCpu->iem.s.pThrdCompileTbR3)
3193 return iemThreadedCompileLongJumped(pVM, pVCpu, rcStrict);
3194 Assert(pTb != pVCpu->iem.s.pNativeCompileTbR3);
3195 }
3196 }
3197#endif
3198 pVCpu->iem.s.pCurTbR3 = NULL;
3199 return rcStrict;
3200 }
3201 IEM_CATCH_LONGJMP_END(pVCpu);
3202 }
3203}
3204
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette