VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp@ 106402

Last change on this file since 106402 was 106402, checked in by vboxsync, 6 weeks ago

VMM/IEM: Build fix for compiling w/o the native recompiler. bugref:10720

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 161.7 KB
Line 
1/* $Id: IEMAllThrdRecompiler.cpp 106402 2024-10-16 20:56:39Z vboxsync $ */
2/** @file
3 * IEM - Instruction Decoding and Threaded Recompilation.
4 *
5 * Logging group IEM_RE_THREADED assignments:
6 * - Level 1 (Log) : Errors, exceptions, interrupts and such major events. [same as IEM]
7 * - Flow (LogFlow) : TB calls being emitted.
8 * - Level 2 (Log2) : Basic instruction execution state info. [same as IEM]
9 * - Level 3 (Log3) : More detailed execution state info. [same as IEM]
10 * - Level 4 (Log4) : Decoding mnemonics w/ EIP. [same as IEM]
11 * - Level 5 (Log5) : Decoding details. [same as IEM]
12 * - Level 6 (Log6) : TB opcode range management.
13 * - Level 7 (Log7) : TB obsoletion.
14 * - Level 8 (Log8) : TB compilation.
15 * - Level 9 (Log9) : TB exec.
16 * - Level 10 (Log10): TB block lookup.
17 * - Level 11 (Log11): TB block lookup details.
18 * - Level 12 (Log12): TB insertion.
19 */
20
21/*
22 * Copyright (C) 2011-2024 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#ifndef LOG_GROUP /* defined when included by tstIEMCheckMc.cpp */
48# define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
49#endif
50#define IEM_WITH_CODE_TLB_AND_OPCODE_BUF /* A bit hackish, but its all in IEMInline.h. */
51#define VMCPU_INCL_CPUM_GST_CTX
52#include <VBox/vmm/iem.h>
53#include <VBox/vmm/cpum.h>
54#include <VBox/vmm/apic.h>
55#include <VBox/vmm/pdm.h>
56#include <VBox/vmm/pgm.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/em.h>
59#include <VBox/vmm/hm.h>
60#include <VBox/vmm/nem.h>
61#include <VBox/vmm/gim.h>
62#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
63# include <VBox/vmm/em.h>
64# include <VBox/vmm/hm_svm.h>
65#endif
66#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
67# include <VBox/vmm/hmvmxinline.h>
68#endif
69#include <VBox/vmm/tm.h>
70#include <VBox/vmm/dbgf.h>
71#include <VBox/vmm/dbgftrace.h>
72#ifndef TST_IEM_CHECK_MC
73# include "IEMInternal.h"
74#endif
75#include <VBox/vmm/vmcc.h>
76#include <VBox/log.h>
77#include <VBox/err.h>
78#include <VBox/param.h>
79#include <VBox/dis.h>
80#include <VBox/disopcode-x86-amd64.h>
81#include <iprt/asm-math.h>
82#include <iprt/assert.h>
83#include <iprt/mem.h>
84#include <iprt/string.h>
85#include <iprt/sort.h>
86#include <iprt/x86.h>
87
88#ifndef TST_IEM_CHECK_MC
89# include "IEMInline.h"
90# include "IEMOpHlp.h"
91# include "IEMMc.h"
92#endif
93
94#include "IEMThreadedFunctions.h"
95#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
96# include "IEMN8veRecompiler.h"
97#endif
98
99
100/*
101 * Narrow down configs here to avoid wasting time on unused configs here.
102 */
103
104#ifndef IEM_WITH_CODE_TLB
105# error The code TLB must be enabled for the recompiler.
106#endif
107
108#ifndef IEM_WITH_DATA_TLB
109# error The data TLB must be enabled for the recompiler.
110#endif
111
112#ifndef IEM_WITH_SETJMP
113# error The setjmp approach must be enabled for the recompiler.
114#endif
115
116#if defined(IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS) && !defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
117# error "IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS requires IEMNATIVE_WITH_SIMD_REG_ALLOCATOR"
118#endif
119
120
121/*********************************************************************************************************************************
122* Internal Functions *
123*********************************************************************************************************************************/
124#if defined(VBOX_WITH_IEM_NATIVE_RECOMPILER) && defined(VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING)
125static void iemThreadedSaveTbForProfiling(PVMCPU pVCpu, PCIEMTB pTb);
126#endif
127
128
129/**
130 * Calculates the effective address of a ModR/M memory operand, extended version
131 * for use in the recompilers.
132 *
133 * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.
134 *
135 * May longjmp on internal error.
136 *
137 * @return The effective address.
138 * @param pVCpu The cross context virtual CPU structure of the calling thread.
139 * @param bRm The ModRM byte.
140 * @param cbImmAndRspOffset - First byte: The size of any immediate
141 * following the effective address opcode bytes
142 * (only for RIP relative addressing).
143 * - Second byte: RSP displacement (for POP [ESP]).
144 * @param puInfo Extra info: 32-bit displacement (bits 31:0) and
145 * SIB byte (bits 39:32).
146 *
147 * @note This must be defined in a source file with matching
148 * IEM_WITH_CODE_TLB_AND_OPCODE_BUF define till the define is made default
149 * or implemented differently...
150 */
151RTGCPTR iemOpHlpCalcRmEffAddrJmpEx(PVMCPUCC pVCpu, uint8_t bRm, uint32_t cbImmAndRspOffset, uint64_t *puInfo) IEM_NOEXCEPT_MAY_LONGJMP
152{
153 Log5(("iemOpHlpCalcRmEffAddrJmp: bRm=%#x\n", bRm));
154# define SET_SS_DEF() \
155 do \
156 { \
157 if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SEG_MASK)) \
158 pVCpu->iem.s.iEffSeg = X86_SREG_SS; \
159 } while (0)
160
161 if (!IEM_IS_64BIT_CODE(pVCpu))
162 {
163/** @todo Check the effective address size crap! */
164 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_16BIT)
165 {
166 uint16_t u16EffAddr;
167
168 /* Handle the disp16 form with no registers first. */
169 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
170 {
171 IEM_OPCODE_GET_NEXT_U16(&u16EffAddr);
172 *puInfo = u16EffAddr;
173 }
174 else
175 {
176 /* Get the displacment. */
177 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
178 {
179 case 0: u16EffAddr = 0; break;
180 case 1: IEM_OPCODE_GET_NEXT_S8_SX_U16(&u16EffAddr); break;
181 case 2: IEM_OPCODE_GET_NEXT_U16(&u16EffAddr); break;
182 default: AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_1)); /* (caller checked for these) */
183 }
184 *puInfo = u16EffAddr;
185
186 /* Add the base and index registers to the disp. */
187 switch (bRm & X86_MODRM_RM_MASK)
188 {
189 case 0: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.si; break;
190 case 1: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.di; break;
191 case 2: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.si; SET_SS_DEF(); break;
192 case 3: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.di; SET_SS_DEF(); break;
193 case 4: u16EffAddr += pVCpu->cpum.GstCtx.si; break;
194 case 5: u16EffAddr += pVCpu->cpum.GstCtx.di; break;
195 case 6: u16EffAddr += pVCpu->cpum.GstCtx.bp; SET_SS_DEF(); break;
196 case 7: u16EffAddr += pVCpu->cpum.GstCtx.bx; break;
197 }
198 }
199
200 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#06RX16 uInfo=%#RX64\n", u16EffAddr, *puInfo));
201 return u16EffAddr;
202 }
203
204 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
205 uint32_t u32EffAddr;
206 uint64_t uInfo;
207
208 /* Handle the disp32 form with no registers first. */
209 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
210 {
211 IEM_OPCODE_GET_NEXT_U32(&u32EffAddr);
212 uInfo = u32EffAddr;
213 }
214 else
215 {
216 /* Get the register (or SIB) value. */
217 uInfo = 0;
218 switch ((bRm & X86_MODRM_RM_MASK))
219 {
220 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
221 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
222 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
223 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
224 case 4: /* SIB */
225 {
226 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
227 uInfo = (uint64_t)bSib << 32;
228
229 /* Get the index and scale it. */
230 switch ((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
231 {
232 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
233 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
234 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
235 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
236 case 4: u32EffAddr = 0; /*none */ break;
237 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; break;
238 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
239 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
240 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
241 }
242 u32EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
243
244 /* add base */
245 switch (bSib & X86_SIB_BASE_MASK)
246 {
247 case 0: u32EffAddr += pVCpu->cpum.GstCtx.eax; break;
248 case 1: u32EffAddr += pVCpu->cpum.GstCtx.ecx; break;
249 case 2: u32EffAddr += pVCpu->cpum.GstCtx.edx; break;
250 case 3: u32EffAddr += pVCpu->cpum.GstCtx.ebx; break;
251 case 4: u32EffAddr += pVCpu->cpum.GstCtx.esp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
252 case 5:
253 if ((bRm & X86_MODRM_MOD_MASK) != 0)
254 {
255 u32EffAddr += pVCpu->cpum.GstCtx.ebp;
256 SET_SS_DEF();
257 }
258 else
259 {
260 uint32_t u32Disp;
261 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
262 u32EffAddr += u32Disp;
263 uInfo |= u32Disp;
264 }
265 break;
266 case 6: u32EffAddr += pVCpu->cpum.GstCtx.esi; break;
267 case 7: u32EffAddr += pVCpu->cpum.GstCtx.edi; break;
268 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
269 }
270 break;
271 }
272 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; SET_SS_DEF(); break;
273 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
274 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
275 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
276 }
277
278 /* Get and add the displacement. */
279 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
280 {
281 case 0:
282 break;
283 case 1:
284 {
285 int8_t i8Disp; IEM_OPCODE_GET_NEXT_S8(&i8Disp);
286 u32EffAddr += i8Disp;
287 uInfo |= (uint32_t)(int32_t)i8Disp;
288 break;
289 }
290 case 2:
291 {
292 uint32_t u32Disp; IEM_OPCODE_GET_NEXT_U32(&u32Disp);
293 u32EffAddr += u32Disp;
294 uInfo |= u32Disp;
295 break;
296 }
297 default:
298 AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_2)); /* (caller checked for these) */
299 }
300 }
301
302 *puInfo = uInfo;
303 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RX32 uInfo=%#RX64\n", u32EffAddr, uInfo));
304 return u32EffAddr;
305 }
306
307 uint64_t u64EffAddr;
308 uint64_t uInfo;
309
310 /* Handle the rip+disp32 form with no registers first. */
311 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
312 {
313 IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64EffAddr);
314 uInfo = (uint32_t)u64EffAddr;
315 u64EffAddr += pVCpu->cpum.GstCtx.rip + IEM_GET_INSTR_LEN(pVCpu) + (cbImmAndRspOffset & UINT32_C(0xff));
316 }
317 else
318 {
319 /* Get the register (or SIB) value. */
320 uInfo = 0;
321 switch ((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB)
322 {
323 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
324 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
325 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
326 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
327 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; SET_SS_DEF(); break;
328 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
329 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
330 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
331 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
332 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
333 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
334 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
335 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
336 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
337 /* SIB */
338 case 4:
339 case 12:
340 {
341 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
342 uInfo = (uint64_t)bSib << 32;
343
344 /* Get the index and scale it. */
345 switch (((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK) | pVCpu->iem.s.uRexIndex)
346 {
347 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
348 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
349 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
350 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
351 case 4: u64EffAddr = 0; /*none */ break;
352 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; break;
353 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
354 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
355 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
356 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
357 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
358 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
359 case 12: u64EffAddr = pVCpu->cpum.GstCtx.r12; break;
360 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
361 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
362 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
363 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
364 }
365 u64EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
366
367 /* add base */
368 switch ((bSib & X86_SIB_BASE_MASK) | pVCpu->iem.s.uRexB)
369 {
370 case 0: u64EffAddr += pVCpu->cpum.GstCtx.rax; break;
371 case 1: u64EffAddr += pVCpu->cpum.GstCtx.rcx; break;
372 case 2: u64EffAddr += pVCpu->cpum.GstCtx.rdx; break;
373 case 3: u64EffAddr += pVCpu->cpum.GstCtx.rbx; break;
374 case 4: u64EffAddr += pVCpu->cpum.GstCtx.rsp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
375 case 6: u64EffAddr += pVCpu->cpum.GstCtx.rsi; break;
376 case 7: u64EffAddr += pVCpu->cpum.GstCtx.rdi; break;
377 case 8: u64EffAddr += pVCpu->cpum.GstCtx.r8; break;
378 case 9: u64EffAddr += pVCpu->cpum.GstCtx.r9; break;
379 case 10: u64EffAddr += pVCpu->cpum.GstCtx.r10; break;
380 case 11: u64EffAddr += pVCpu->cpum.GstCtx.r11; break;
381 case 12: u64EffAddr += pVCpu->cpum.GstCtx.r12; break;
382 case 14: u64EffAddr += pVCpu->cpum.GstCtx.r14; break;
383 case 15: u64EffAddr += pVCpu->cpum.GstCtx.r15; break;
384 /* complicated encodings */
385 case 5:
386 case 13:
387 if ((bRm & X86_MODRM_MOD_MASK) != 0)
388 {
389 if (!pVCpu->iem.s.uRexB)
390 {
391 u64EffAddr += pVCpu->cpum.GstCtx.rbp;
392 SET_SS_DEF();
393 }
394 else
395 u64EffAddr += pVCpu->cpum.GstCtx.r13;
396 }
397 else
398 {
399 uint32_t u32Disp;
400 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
401 u64EffAddr += (int32_t)u32Disp;
402 uInfo |= u32Disp;
403 }
404 break;
405 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
406 }
407 break;
408 }
409 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
410 }
411
412 /* Get and add the displacement. */
413 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
414 {
415 case 0:
416 break;
417 case 1:
418 {
419 int8_t i8Disp;
420 IEM_OPCODE_GET_NEXT_S8(&i8Disp);
421 u64EffAddr += i8Disp;
422 uInfo |= (uint32_t)(int32_t)i8Disp;
423 break;
424 }
425 case 2:
426 {
427 uint32_t u32Disp;
428 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
429 u64EffAddr += (int32_t)u32Disp;
430 uInfo |= u32Disp;
431 break;
432 }
433 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX); /* (caller checked for these) */
434 }
435
436 }
437
438 *puInfo = uInfo;
439 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT)
440 {
441 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr, uInfo));
442 return u64EffAddr;
443 }
444 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
445 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr & UINT32_MAX, uInfo));
446 return u64EffAddr & UINT32_MAX;
447}
448
449
450
451/*********************************************************************************************************************************
452* Translation Block Cache. *
453*********************************************************************************************************************************/
454
455/** @callback_method_impl{FNRTSORTCMP, Compare two TBs for pruning sorting purposes.} */
456static DECLCALLBACK(int) iemTbCachePruneCmpTb(void const *pvElement1, void const *pvElement2, void *pvUser)
457{
458 PCIEMTB const pTb1 = (PCIEMTB)pvElement1;
459 PCIEMTB const pTb2 = (PCIEMTB)pvElement2;
460 uint32_t const cMsSinceUse1 = (uint32_t)(uintptr_t)pvUser - pTb1->msLastUsed;
461 uint32_t const cMsSinceUse2 = (uint32_t)(uintptr_t)pvUser - pTb2->msLastUsed;
462 if (cMsSinceUse1 != cMsSinceUse2)
463 return cMsSinceUse1 < cMsSinceUse2 ? -1 : 1;
464 if (pTb1->cUsed != pTb2->cUsed)
465 return pTb1->cUsed > pTb2->cUsed ? -1 : 1;
466 if ((pTb1->fFlags & IEMTB_F_TYPE_MASK) != (pTb2->fFlags & IEMTB_F_TYPE_MASK))
467 return (pTb1->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? -1 : 1;
468 return 0;
469}
470
471#ifdef VBOX_STRICT
472/**
473 * Assertion helper that checks a collisions list count.
474 */
475static void iemTbCacheAssertCorrectCount(PIEMTBCACHE pTbCache, uint32_t idxHash, const char *pszOperation)
476{
477 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
478 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
479 while (pTb)
480 {
481 pTb = pTb->pNext;
482 cLeft--;
483 }
484 AssertMsg(cLeft == 0,
485 ("idxHash=%#x cLeft=%d; entry count=%d; %s\n",
486 idxHash, cLeft, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]), pszOperation));
487}
488#endif
489
490
491DECL_NO_INLINE(static, void) iemTbCacheAddWithPruning(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb, uint32_t idxHash)
492{
493 STAM_PROFILE_START(&pTbCache->StatPrune, a);
494
495 /*
496 * First convert the collision list to an array.
497 */
498 PIEMTB apSortedTbs[IEMTBCACHE_PTR_MAX_COUNT];
499 uintptr_t cInserted = 0;
500 PIEMTB pTbCollision = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
501
502 pTbCache->apHash[idxHash] = NULL; /* Must NULL the entry before trying to free anything. */
503
504 while (pTbCollision && cInserted < RT_ELEMENTS(apSortedTbs))
505 {
506 apSortedTbs[cInserted++] = pTbCollision;
507 pTbCollision = pTbCollision->pNext;
508 }
509
510 /* Free any excess (impossible). */
511 if (RT_LIKELY(!pTbCollision))
512 Assert(cInserted == RT_ELEMENTS(apSortedTbs));
513 else
514 do
515 {
516 PIEMTB pTbToFree = pTbCollision;
517 pTbCollision = pTbToFree->pNext;
518 iemTbAllocatorFree(pVCpu, pTbToFree);
519 } while (pTbCollision);
520
521 /*
522 * Sort it by most recently used and usage count.
523 */
524 RTSortApvShell((void **)apSortedTbs, cInserted, iemTbCachePruneCmpTb, (void *)(uintptr_t)pVCpu->iem.s.msRecompilerPollNow);
525
526 /* We keep half the list for now. Perhaps a bit aggressive... */
527 uintptr_t const cKeep = cInserted / 2;
528
529 /* First free up the TBs we don't wish to keep (before creating the new
530 list because otherwise the free code will scan the list for each one
531 without ever finding it). */
532 for (uintptr_t idx = cKeep; idx < cInserted; idx++)
533 iemTbAllocatorFree(pVCpu, apSortedTbs[idx]);
534
535 /* Then chain the new TB together with the ones we like to keep of the
536 existing ones and insert this list into the hash table. */
537 pTbCollision = pTb;
538 for (uintptr_t idx = 0; idx < cKeep; idx++)
539 pTbCollision = pTbCollision->pNext = apSortedTbs[idx];
540 pTbCollision->pNext = NULL;
541
542 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cKeep + 1);
543#ifdef VBOX_STRICT
544 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add w/ pruning");
545#endif
546
547 STAM_PROFILE_STOP(&pTbCache->StatPrune, a);
548}
549
550
551static void iemTbCacheAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
552{
553 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
554 PIEMTB const pTbOldHead = pTbCache->apHash[idxHash];
555 if (!pTbOldHead)
556 {
557 pTb->pNext = NULL;
558 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, 1); /** @todo could make 1 implicit... */
559 }
560 else
561 {
562 STAM_REL_COUNTER_INC(&pTbCache->cCollisions);
563 uintptr_t cCollisions = IEMTBCACHE_PTR_GET_COUNT(pTbOldHead);
564 if (cCollisions < IEMTBCACHE_PTR_MAX_COUNT)
565 {
566 pTb->pNext = IEMTBCACHE_PTR_GET_TB(pTbOldHead);
567 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cCollisions + 1);
568#ifdef VBOX_STRICT
569 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add");
570#endif
571 }
572 else
573 iemTbCacheAddWithPruning(pVCpu, pTbCache, pTb, idxHash);
574 }
575}
576
577
578/**
579 * Unlinks @a pTb from the hash table if found in it.
580 *
581 * @returns true if unlinked, false if not present.
582 * @param pTbCache The hash table.
583 * @param pTb The TB to remove.
584 */
585static bool iemTbCacheRemove(PIEMTBCACHE pTbCache, PIEMTB pTb)
586{
587 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
588 PIEMTB pTbHash = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
589 uint32_t volatile cLength = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]); RT_NOREF(cLength);
590
591 /*
592 * At the head of the collision list?
593 */
594 if (pTbHash == pTb)
595 {
596 if (!pTb->pNext)
597 pTbCache->apHash[idxHash] = NULL;
598 else
599 {
600 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb->pNext,
601 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
602#ifdef VBOX_STRICT
603 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #1");
604#endif
605 }
606 return true;
607 }
608
609 /*
610 * Search the collision list.
611 */
612 PIEMTB const pTbHead = pTbHash;
613 while (pTbHash)
614 {
615 PIEMTB const pNextTb = pTbHash->pNext;
616 if (pNextTb == pTb)
617 {
618 pTbHash->pNext = pTb->pNext;
619 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTbHead, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
620#ifdef VBOX_STRICT
621 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #2");
622#endif
623 return true;
624 }
625 pTbHash = pNextTb;
626 }
627 return false;
628}
629
630
631/**
632 * Looks up a TB for the given PC and flags in the cache.
633 *
634 * @returns Pointer to TB on success, NULL if not found.
635 * @param pVCpu The cross context virtual CPU structure of the
636 * calling thread.
637 * @param pTbCache The translation block cache.
638 * @param GCPhysPc The PC to look up a TB for.
639 * @param fExtraFlags The extra flags to join with IEMCPU::fExec for
640 * the lookup.
641 * @thread EMT(pVCpu)
642 */
643static PIEMTB iemTbCacheLookup(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache,
644 RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP /** @todo r=bird: no longjumping here, right? iemNativeRecompile is noexcept. */
645{
646 uint32_t const fFlags = ((pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags) & IEMTB_F_KEY_MASK;
647
648 /*
649 * First consult the lookup table entry.
650 */
651 PIEMTB * const ppTbLookup = pVCpu->iem.s.ppTbLookupEntryR3;
652 PIEMTB pTb = *ppTbLookup;
653 if (pTb)
654 {
655 if (pTb->GCPhysPc == GCPhysPc)
656 {
657 if ( (pTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == (fFlags | IEMTB_F_TYPE_NATIVE)
658 || (pTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == (fFlags | IEMTB_F_TYPE_THREADED) )
659 {
660 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
661 {
662 STAM_COUNTER_INC(&pTbCache->cLookupHitsViaTbLookupTable);
663 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
664 pTb->cUsed++;
665#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
666 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != pVCpu->iem.s.uTbNativeRecompileAtUsedCount)
667 {
668 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p)\n", fFlags, GCPhysPc, pTb, ppTbLookup));
669 return pTb;
670 }
671 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p) - recompiling\n", fFlags, GCPhysPc, pTb, ppTbLookup));
672# ifdef VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING
673 iemThreadedSaveTbForProfiling(pVCpu, pTb);
674# endif
675 return iemNativeRecompile(pVCpu, pTb);
676#else
677 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p)\n", fFlags, GCPhysPc, pTb, ppTbLookup));
678 return pTb;
679#endif
680 }
681 }
682 }
683 }
684
685 /*
686 * Then consult the hash table.
687 */
688 uint32_t const idxHash = IEMTBCACHE_HASH_NO_KEY_MASK(pTbCache, fFlags, GCPhysPc);
689#if defined(VBOX_STRICT) || defined(LOG_ENABLED)
690 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
691#endif
692 pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
693 while (pTb)
694 {
695 if (pTb->GCPhysPc == GCPhysPc)
696 {
697 if ((pTb->fFlags & IEMTB_F_KEY_MASK) == fFlags)
698 {
699 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
700 {
701 STAM_COUNTER_INC(&pTbCache->cLookupHits);
702 AssertMsg(cLeft > 0, ("%d\n", cLeft));
703
704 *ppTbLookup = pTb;
705 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
706 pTb->cUsed++;
707#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
708 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != pVCpu->iem.s.uTbNativeRecompileAtUsedCount)
709 {
710 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
711 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
712 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
713 return pTb;
714 }
715 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d) - recompiling\n",
716 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
717 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
718 return iemNativeRecompile(pVCpu, pTb);
719#else
720 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
721 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
722 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
723 return pTb;
724#endif
725 }
726 Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));
727 }
728 else
729 Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));
730 }
731 else
732 Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));
733
734 pTb = pTb->pNext;
735#ifdef VBOX_STRICT
736 cLeft--;
737#endif
738 }
739 AssertMsg(cLeft == 0, ("%d\n", cLeft));
740 STAM_REL_COUNTER_INC(&pTbCache->cLookupMisses);
741 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: NULL - (%p L %d)\n", fFlags, GCPhysPc, idxHash,
742 IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]), IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
743 return pTb;
744}
745
746
747/*********************************************************************************************************************************
748* Translation Block Allocator.
749*********************************************************************************************************************************/
750/*
751 * Translation block allocationmanagement.
752 */
753
754#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
755# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
756 ((a_idxTb) >> (a_pTbAllocator)->cChunkShift)
757# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
758 ((a_idxTb) & (a_pTbAllocator)->fChunkMask)
759# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
760 ((uint32_t)(a_idxChunk) << (a_pTbAllocator)->cChunkShift)
761#else
762# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
763 ((a_idxTb) / (a_pTbAllocator)->cTbsPerChunk)
764# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
765 ((a_idxTb) - (a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
766# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
767 ((uint32_t)(a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
768#endif
769/** Makes a TB index from a chunk index and TB index within that chunk. */
770#define IEMTBALLOC_IDX_MAKE(a_pTbAllocator, a_idxChunk, a_idxInChunk) \
771 (IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) + (a_idxInChunk))
772
773
774/**
775 * Initializes the TB allocator and cache for an EMT.
776 *
777 * @returns VBox status code.
778 * @param pVM The VM handle.
779 * @param cInitialTbs The initial number of translation blocks to
780 * preallocator.
781 * @param cMaxTbs The max number of translation blocks allowed.
782 * @param cbInitialExec The initial size of the executable memory allocator.
783 * @param cbMaxExec The max size of the executable memory allocator.
784 * @param cbChunkExec The chunk size for executable memory allocator. Zero
785 * or UINT32_MAX for automatically determining this.
786 * @thread EMT
787 */
788DECLCALLBACK(int) iemTbInit(PVMCC pVM, uint32_t cInitialTbs, uint32_t cMaxTbs,
789 uint64_t cbInitialExec, uint64_t cbMaxExec, uint32_t cbChunkExec)
790{
791 PVMCPUCC pVCpu = VMMGetCpu(pVM);
792 Assert(!pVCpu->iem.s.pTbCacheR3);
793 Assert(!pVCpu->iem.s.pTbAllocatorR3);
794
795 /*
796 * Calculate the chunk size of the TB allocator.
797 * The minimum chunk size is 2MiB.
798 */
799 AssertCompile(!(sizeof(IEMTB) & IEMTBCACHE_PTR_COUNT_MASK));
800 uint32_t cbPerChunk = _2M;
801 uint32_t cTbsPerChunk = _2M / sizeof(IEMTB);
802#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
803 uint8_t const cTbShift = ASMBitFirstSetU32((uint32_t)sizeof(IEMTB)) - 1;
804 uint8_t cChunkShift = 21 - cTbShift;
805 AssertCompile(RT_BIT_32(21) == _2M); Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
806#endif
807 for (;;)
808 {
809 if (cMaxTbs <= cTbsPerChunk * (uint64_t)RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks))
810 break;
811 cbPerChunk *= 2;
812 cTbsPerChunk = cbPerChunk / sizeof(IEMTB);
813#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
814 cChunkShift += 1;
815#endif
816 }
817
818 uint32_t cMaxChunks = (cMaxTbs + cTbsPerChunk - 1) / cTbsPerChunk;
819 Assert(cMaxChunks * cTbsPerChunk >= cMaxTbs);
820 Assert(cMaxChunks <= RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks));
821
822 cMaxTbs = cMaxChunks * cTbsPerChunk;
823
824 /*
825 * Allocate and initalize it.
826 */
827 PIEMTBALLOCATOR const pTbAllocator = (PIEMTBALLOCATOR)RTMemAllocZ(sizeof(*pTbAllocator));
828 if (!pTbAllocator)
829 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
830 "Failed to allocate %zu bytes (max %u TBs) for the TB allocator of VCpu #%u",
831 sizeof(*pTbAllocator), cMaxTbs, pVCpu->idCpu);
832 pTbAllocator->uMagic = IEMTBALLOCATOR_MAGIC;
833 pTbAllocator->cMaxChunks = (uint8_t)cMaxChunks;
834 pTbAllocator->cTbsPerChunk = cTbsPerChunk;
835 pTbAllocator->cbPerChunk = cbPerChunk;
836 pTbAllocator->cMaxTbs = cMaxTbs;
837 pTbAllocator->pTbsFreeHead = NULL;
838#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
839 pTbAllocator->fChunkMask = cTbsPerChunk - 1;
840 pTbAllocator->cChunkShift = cChunkShift;
841 Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
842#endif
843
844 pVCpu->iem.s.pTbAllocatorR3 = pTbAllocator;
845
846 /*
847 * Allocate the initial chunks.
848 */
849 for (uint32_t idxChunk = 0; ; idxChunk++)
850 {
851 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs = (PIEMTB)RTMemPageAllocZ(cbPerChunk);
852 if (!paTbs)
853 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
854 "Failed to initial %zu bytes for the #%u chunk of TBs for VCpu #%u",
855 cbPerChunk, idxChunk, pVCpu->idCpu);
856
857 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
858 {
859 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
860 paTbs[iTb].pNext = pTbAllocator->pTbsFreeHead;
861 pTbAllocator->pTbsFreeHead = &paTbs[iTb];
862 }
863 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
864 pTbAllocator->cTotalTbs += cTbsPerChunk;
865
866 if ((idxChunk + 1) * cTbsPerChunk >= cInitialTbs)
867 break;
868 }
869
870 /*
871 * Calculate the size of the hash table. We double the max TB count and
872 * round it up to the nearest power of two.
873 */
874 uint32_t cCacheEntries = cMaxTbs * 2;
875 if (!RT_IS_POWER_OF_TWO(cCacheEntries))
876 {
877 uint8_t const iBitTop = ASMBitFirstSetU32(cCacheEntries);
878 cCacheEntries = RT_BIT_32(iBitTop);
879 Assert(cCacheEntries >= cMaxTbs * 2);
880 }
881
882 size_t const cbTbCache = RT_UOFFSETOF_DYN(IEMTBCACHE, apHash[cCacheEntries]);
883 PIEMTBCACHE const pTbCache = (PIEMTBCACHE)RTMemAllocZ(cbTbCache);
884 if (!pTbCache)
885 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
886 "Failed to allocate %zu bytes (%u entries) for the TB cache of VCpu #%u",
887 cbTbCache, cCacheEntries, pVCpu->idCpu);
888
889 /*
890 * Initialize it (assumes zeroed by the allocator).
891 */
892 pTbCache->uMagic = IEMTBCACHE_MAGIC;
893 pTbCache->cHash = cCacheEntries;
894 pTbCache->uHashMask = cCacheEntries - 1;
895 Assert(pTbCache->cHash > pTbCache->uHashMask);
896 pVCpu->iem.s.pTbCacheR3 = pTbCache;
897
898 /*
899 * Initialize the native executable memory allocator.
900 */
901#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
902 int rc = iemExecMemAllocatorInit(pVCpu, cbMaxExec, cbInitialExec, cbChunkExec);
903 AssertLogRelRCReturn(rc, rc);
904#else
905 RT_NOREF(cbMaxExec, cbInitialExec, cbChunkExec);
906#endif
907
908 return VINF_SUCCESS;
909}
910
911
912/**
913 * Inner free worker.
914 *
915 * The @a a_fType parameter allows us to eliminate the type check when we know
916 * which type of TB is being freed.
917 */
918template<uint32_t a_fType>
919DECL_FORCE_INLINE(void)
920iemTbAllocatorFreeInner(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator, PIEMTB pTb, uint32_t idxChunk, uint32_t idxInChunk)
921{
922#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
923 AssertCompile(a_fType == 0 || a_fType == IEMTB_F_TYPE_THREADED || a_fType == IEMTB_F_TYPE_NATIVE);
924#else
925 AssertCompile(a_fType == 0 || a_fType == IEMTB_F_TYPE_THREADED);
926#endif
927 Assert(idxChunk < pTbAllocator->cAllocatedChunks); RT_NOREF(idxChunk);
928 Assert(idxInChunk < pTbAllocator->cTbsPerChunk); RT_NOREF(idxInChunk);
929 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[idxChunk].paTbs) == idxInChunk);
930#ifdef VBOX_STRICT
931 for (PIEMTB pTbOther = pTbAllocator->pDelayedFreeHead; pTbOther; pTbOther = pTbOther->pNext)
932 Assert(pTbOther != pTb);
933#endif
934
935 /*
936 * Unlink the TB from the hash table.
937 */
938 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
939
940 /*
941 * Free the TB itself.
942 */
943 if RT_CONSTEXPR_IF(a_fType == 0)
944 switch (pTb->fFlags & IEMTB_F_TYPE_MASK)
945 {
946 case IEMTB_F_TYPE_THREADED:
947 pTbAllocator->cThreadedTbs -= 1;
948 RTMemFree(pTb->Thrd.paCalls);
949 break;
950#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
951 case IEMTB_F_TYPE_NATIVE:
952 pTbAllocator->cNativeTbs -= 1;
953 iemExecMemAllocatorFree(pVCpu, pTb->Native.paInstructions,
954 pTb->Native.cInstructions * sizeof(pTb->Native.paInstructions[0]));
955 pTb->Native.paInstructions = NULL; /* required by iemExecMemAllocatorPrune */
956 break;
957#endif
958 default:
959 AssertFailed();
960 }
961#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
962 else if RT_CONSTEXPR_IF(a_fType == IEMTB_F_TYPE_NATIVE)
963 {
964 Assert((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
965 pTbAllocator->cNativeTbs -= 1;
966 iemExecMemAllocatorFree(pVCpu, pTb->Native.paInstructions,
967 pTb->Native.cInstructions * sizeof(pTb->Native.paInstructions[0]));
968 pTb->Native.paInstructions = NULL; /* required by iemExecMemAllocatorPrune */
969 }
970#endif
971 else
972 {
973 Assert((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
974 pTbAllocator->cThreadedTbs -= 1;
975 RTMemFree(pTb->Thrd.paCalls);
976 }
977
978 RTMemFree(IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, 0)); /* Frees both the TB lookup table and opcode bytes. */
979
980 pTb->pNext = pTbAllocator->pTbsFreeHead;
981 pTbAllocator->pTbsFreeHead = pTb;
982 pTb->fFlags = 0;
983 pTb->GCPhysPc = UINT64_MAX;
984 pTb->Gen.uPtr = 0;
985 pTb->Gen.uData = 0;
986 pTb->cTbLookupEntries = 0;
987 pTb->cbOpcodes = 0;
988 pTb->pabOpcodes = NULL;
989
990 Assert(pTbAllocator->cInUseTbs > 0);
991
992 pTbAllocator->cInUseTbs -= 1;
993 STAM_REL_COUNTER_INC(&pTbAllocator->StatFrees);
994}
995
996
997/**
998 * Frees the given TB.
999 *
1000 * @param pVCpu The cross context virtual CPU structure of the calling
1001 * thread.
1002 * @param pTb The translation block to free.
1003 * @thread EMT(pVCpu)
1004 */
1005DECLHIDDEN(void) iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb)
1006{
1007 /*
1008 * Validate state.
1009 */
1010 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1011 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1012 uint8_t const idxChunk = pTb->idxAllocChunk;
1013 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
1014 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
1015 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
1016
1017 /*
1018 * Invalidate the TB lookup pointer and call the inner worker.
1019 */
1020 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1021 iemTbAllocatorFreeInner<0>(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
1022}
1023
1024#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
1025
1026/**
1027 * Interface used by iemExecMemAllocatorPrune.
1028 */
1029DECLHIDDEN(void) iemTbAllocatorFreeBulk(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator, PIEMTB pTb)
1030{
1031 Assert(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1032
1033 uint8_t const idxChunk = pTb->idxAllocChunk;
1034 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
1035 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
1036 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
1037
1038 iemTbAllocatorFreeInner<IEMTB_F_TYPE_NATIVE>(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
1039}
1040
1041
1042/**
1043 * Interface used by iemExecMemAllocatorPrune.
1044 */
1045DECLHIDDEN(PIEMTBALLOCATOR) iemTbAllocatorFreeBulkStart(PVMCPUCC pVCpu)
1046{
1047 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1048 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1049
1050 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1051
1052 /* It should be sufficient to do this once. */
1053 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1054
1055 return pTbAllocator;
1056}
1057
1058#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER */
1059
1060/**
1061 * Schedules a TB for freeing when it's not longer being executed and/or part of
1062 * the caller's call stack.
1063 *
1064 * The TB will be removed from the translation block cache, though, so it isn't
1065 * possible to executed it again and the IEMTB::pNext member can be used to link
1066 * it together with other TBs awaiting freeing.
1067 *
1068 * @param pVCpu The cross context virtual CPU structure of the calling
1069 * thread.
1070 * @param pTb The translation block to schedule for freeing.
1071 */
1072static void iemTbAlloctorScheduleForFree(PVMCPUCC pVCpu, PIEMTB pTb)
1073{
1074 /*
1075 * Validate state.
1076 */
1077 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1078 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1079 Assert(pTb->idxAllocChunk < pTbAllocator->cAllocatedChunks);
1080 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs) < pTbAllocator->cTbsPerChunk);
1081 Assert( (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE
1082 || (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1083#ifdef VBOX_STRICT
1084 for (PIEMTB pTbOther = pTbAllocator->pDelayedFreeHead; pTbOther; pTbOther = pTbOther->pNext)
1085 Assert(pTbOther != pTb);
1086#endif
1087
1088 /*
1089 * Remove it from the cache and prepend it to the allocator's todo list.
1090 *
1091 * Note! It could still be in various lookup tables, so we trash the GCPhys
1092 * and CS attribs to ensure it won't be reused.
1093 */
1094 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
1095 pTb->GCPhysPc = NIL_RTGCPHYS;
1096 pTb->x86.fAttr = UINT16_MAX;
1097
1098 pTb->pNext = pTbAllocator->pDelayedFreeHead;
1099 pTbAllocator->pDelayedFreeHead = pTb;
1100}
1101
1102
1103/**
1104 * Processes the delayed frees.
1105 *
1106 * This is called by the allocator function as well as the native recompile
1107 * function before making any TB or executable memory allocations respectively.
1108 */
1109void iemTbAllocatorProcessDelayedFrees(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator)
1110{
1111 /** @todo r-bird: these have already been removed from the cache,
1112 * iemTbAllocatorFree/Inner redoes that, which is a waste of time. */
1113 PIEMTB pTb = pTbAllocator->pDelayedFreeHead;
1114 pTbAllocator->pDelayedFreeHead = NULL;
1115 while (pTb)
1116 {
1117 PIEMTB const pTbNext = pTb->pNext;
1118 Assert(pVCpu->iem.s.pCurTbR3 != pTb);
1119 iemTbAllocatorFree(pVCpu, pTb);
1120 pTb = pTbNext;
1121 }
1122}
1123
1124
1125#if 0
1126/**
1127 * Frees all TBs.
1128 */
1129static int iemTbAllocatorFreeAll(PVMCPUCC pVCpu)
1130{
1131 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1132 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
1133 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
1134
1135 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1136
1137 uint32_t idxChunk = pTbAllocator->cAllocatedChunks;
1138 while (idxChunk-- > 0)
1139 {
1140 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs;
1141 uint32_t idxTb = pTbAllocator->cTbsPerChunk;
1142 while (idxTb-- > 0)
1143 {
1144 PIEMTB const pTb = &paTbs[idxTb];
1145 if (pTb->fFlags)
1146 iemTbAllocatorFreeInner<0>(pVCpu, pTbAllocator, pTb, idxChunk, idxTb);
1147 }
1148 }
1149
1150 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1151
1152# if 1
1153 /* Reset the free list. */
1154 pTbAllocator->pTbsFreeHead = NULL;
1155 idxChunk = pTbAllocator->cAllocatedChunks;
1156 while (idxChunk-- > 0)
1157 {
1158 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1159 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs;
1160 RT_BZERO(paTbs, sizeof(paTbs[0]) * cTbsPerChunk);
1161 for (uint32_t idxTb = 0; idxTb < cTbsPerChunk; idxTb++)
1162 {
1163 paTbs[idxTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1164 paTbs[idxTb].pNext = pTbAllocator->pTbsFreeHead;
1165 pTbAllocator->pTbsFreeHead = &paTbs[idxTb];
1166 }
1167 }
1168# endif
1169
1170# if 1
1171 /* Completely reset the TB cache. */
1172 RT_BZERO(pVCpu->iem.s.pTbCacheR3->apHash, sizeof(pVCpu->iem.s.pTbCacheR3->apHash[0]) * pVCpu->iem.s.pTbCacheR3->cHash);
1173# endif
1174
1175 return VINF_SUCCESS;
1176}
1177#endif
1178
1179
1180/**
1181 * Grow the translation block allocator with another chunk.
1182 */
1183static int iemTbAllocatorGrow(PVMCPUCC pVCpu)
1184{
1185 /*
1186 * Validate state.
1187 */
1188 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1189 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
1190 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
1191 uint32_t const idxChunk = pTbAllocator->cAllocatedChunks;
1192 AssertReturn(idxChunk < pTbAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1193
1194 /*
1195 * Allocate a new chunk and add it to the allocator.
1196 */
1197 PIEMTB const paTbs = (PIEMTB)RTMemPageAllocZ(pTbAllocator->cbPerChunk);
1198 AssertLogRelReturn(paTbs, VERR_NO_PAGE_MEMORY);
1199 pTbAllocator->aChunks[idxChunk].paTbs = paTbs;
1200
1201 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1202 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
1203 {
1204 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1205 paTbs[iTb].pNext = pTbAllocator->pTbsFreeHead;
1206 pTbAllocator->pTbsFreeHead = &paTbs[iTb];
1207 }
1208 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
1209 pTbAllocator->cTotalTbs += cTbsPerChunk;
1210
1211 return VINF_SUCCESS;
1212}
1213
1214
1215/**
1216 * Allocates a TB from allocator with free block.
1217 *
1218 * This is common code to both the fast and slow allocator code paths.
1219 */
1220DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAllocCore(PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1221{
1222 Assert(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs);
1223 Assert(pTbAllocator->pTbsFreeHead);
1224
1225 PIEMTB const pTb = pTbAllocator->pTbsFreeHead;
1226 pTbAllocator->pTbsFreeHead = pTb->pNext;
1227 pTbAllocator->cInUseTbs += 1;
1228 if (fThreaded)
1229 pTbAllocator->cThreadedTbs += 1;
1230 else
1231 pTbAllocator->cNativeTbs += 1;
1232 STAM_REL_COUNTER_INC(&pTbAllocator->StatAllocs);
1233 return pTb;
1234}
1235
1236
1237/**
1238 * Slow path for iemTbAllocatorAlloc.
1239 */
1240static PIEMTB iemTbAllocatorAllocSlow(PVMCPUCC pVCpu, PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1241{
1242 /*
1243 * With some luck we can add another chunk.
1244 */
1245 if (pTbAllocator->cAllocatedChunks < pTbAllocator->cMaxChunks)
1246 {
1247 int rc = iemTbAllocatorGrow(pVCpu);
1248 if (RT_SUCCESS(rc))
1249 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1250 }
1251
1252 /*
1253 * We have to prune stuff. Sigh.
1254 *
1255 * This requires scanning for older TBs and kick them out. Not sure how to
1256 * best do this as we don't want to maintain any list of TBs ordered by last
1257 * usage time. But one reasonably simple approach would be that each time we
1258 * get here we continue a sequential scan of the allocation chunks,
1259 * considering just a smallish number of TBs and freeing a fixed portion of
1260 * them. Say, we consider the next 128 TBs, freeing the least recently used
1261 * in out of groups of 4 TBs, resulting in 32 free TBs.
1262 */
1263 STAM_PROFILE_START(&pTbAllocator->StatPrune, a);
1264 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1265 uint32_t const cTbsToPrune = 128;
1266 uint32_t const cTbsPerGroup = 4;
1267 uint32_t cFreedTbs = 0;
1268#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
1269 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom & ~(uint32_t)(cTbsToPrune - 1); /* Stay within a chunk! */
1270#else
1271 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom;
1272#endif
1273 if (idxTbPruneFrom >= pTbAllocator->cMaxTbs)
1274 idxTbPruneFrom = 0;
1275 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1276 {
1277 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1278 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1279 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1280 uint32_t cMsAge = msNow - pTb->msLastUsed;
1281 Assert(pTb->fFlags & IEMTB_F_TYPE_MASK);
1282
1283 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1284 {
1285#ifndef IEMTB_SIZE_IS_POWER_OF_TWO
1286 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1287 { /* likely */ }
1288 else
1289 {
1290 idxInChunk2 = 0;
1291 idxChunk2 += 1;
1292 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1293 idxChunk2 = 0;
1294 }
1295#endif
1296 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1297 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1298 if ( cMsAge2 > cMsAge
1299 || (cMsAge2 == cMsAge && pTb2->cUsed < pTb->cUsed))
1300 {
1301 Assert(pTb2->fFlags & IEMTB_F_TYPE_MASK);
1302 pTb = pTb2;
1303 idxChunk = idxChunk2;
1304 idxInChunk = idxInChunk2;
1305 cMsAge = cMsAge2;
1306 }
1307 }
1308
1309 /* Free the TB. */
1310 iemTbAllocatorFreeInner<0>(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1311 cFreedTbs++; /* paranoia */
1312 }
1313 pTbAllocator->iPruneFrom = idxTbPruneFrom;
1314 STAM_PROFILE_STOP(&pTbAllocator->StatPrune, a);
1315
1316 /* Flush the TB lookup entry pointer. */
1317 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1318
1319 /*
1320 * Allocate a TB from the ones we've pruned.
1321 */
1322 if (cFreedTbs)
1323 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1324 return NULL;
1325}
1326
1327
1328/**
1329 * Allocate a translation block.
1330 *
1331 * @returns Pointer to block on success, NULL if we're out and is unable to
1332 * free up an existing one (very unlikely once implemented).
1333 * @param pVCpu The cross context virtual CPU structure of the calling
1334 * thread.
1335 * @param fThreaded Set if threaded TB being allocated, clear if native TB.
1336 * For statistics.
1337 */
1338DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAlloc(PVMCPUCC pVCpu, bool fThreaded)
1339{
1340 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1341 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1342
1343 /* Free any pending TBs before we proceed. */
1344 if (!pTbAllocator->pDelayedFreeHead)
1345 { /* probably likely */ }
1346 else
1347 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1348
1349 /* If the allocator is full, take slow code path.*/
1350 if (RT_LIKELY(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs))
1351 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1352 return iemTbAllocatorAllocSlow(pVCpu, pTbAllocator, fThreaded);
1353}
1354
1355
1356#if 0 /*def VBOX_WITH_IEM_NATIVE_RECOMPILER*/
1357/**
1358 * This is called when we're out of space for native TBs.
1359 *
1360 * This uses a variation on the pruning in iemTbAllocatorAllocSlow.
1361 * The difference is that we only prune native TBs and will only free any if
1362 * there are least two in a group. The conditions under which we're called are
1363 * different - there will probably be free TBs in the table when we're called.
1364 * Therefore we increase the group size and max scan length, though we'll stop
1365 * scanning once we've reached the requested size (@a cNeededInstrs) and freed
1366 * up at least 8 TBs.
1367 */
1368void iemTbAllocatorFreeupNativeSpace(PVMCPUCC pVCpu, uint32_t cNeededInstrs)
1369{
1370 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1371 AssertReturnVoid(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1372
1373 STAM_REL_PROFILE_START(&pTbAllocator->StatPruneNative, a);
1374
1375 /*
1376 * Flush the delayed free list before we start freeing TBs indiscriminately.
1377 */
1378 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1379
1380 /*
1381 * Scan and free TBs.
1382 */
1383 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1384 uint32_t const cTbsToPrune = 128 * 8;
1385 uint32_t const cTbsPerGroup = 4 * 4;
1386 uint32_t cFreedTbs = 0;
1387 uint32_t cMaxInstrs = 0;
1388 uint32_t idxTbPruneFrom = pTbAllocator->iPruneNativeFrom & ~(uint32_t)(cTbsPerGroup - 1);
1389 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1390 {
1391 if (idxTbPruneFrom >= pTbAllocator->cTotalTbs)
1392 idxTbPruneFrom = 0;
1393 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1394 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1395 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1396 uint32_t cMsAge = pTb->fFlags & IEMTB_F_TYPE_NATIVE ? msNow - pTb->msLastUsed : msNow;
1397 uint8_t cNativeTbs = (pTb->fFlags & IEMTB_F_TYPE_NATIVE) != 0;
1398
1399 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1400 {
1401 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1402 { /* likely */ }
1403 else
1404 {
1405 idxInChunk2 = 0;
1406 idxChunk2 += 1;
1407 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1408 idxChunk2 = 0;
1409 }
1410 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1411 if (pTb2->fFlags & IEMTB_F_TYPE_NATIVE)
1412 {
1413 cNativeTbs += 1;
1414 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1415 if ( cMsAge2 > cMsAge
1416 || ( cMsAge2 == cMsAge
1417 && ( pTb2->cUsed < pTb->cUsed
1418 || ( pTb2->cUsed == pTb->cUsed
1419 && pTb2->Native.cInstructions > pTb->Native.cInstructions)))
1420 || !(pTb->fFlags & IEMTB_F_TYPE_NATIVE))
1421 {
1422 pTb = pTb2;
1423 idxChunk = idxChunk2;
1424 idxInChunk = idxInChunk2;
1425 cMsAge = cMsAge2;
1426 }
1427 }
1428 }
1429
1430 /* Free the TB if we found at least two native one in this group. */
1431 if (cNativeTbs >= 2)
1432 {
1433 cMaxInstrs = RT_MAX(cMaxInstrs, pTb->Native.cInstructions);
1434 iemTbAllocatorFreeInner<IEMTB_F_TYPE_NATIVE>(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1435 cFreedTbs++;
1436 if (cFreedTbs >= 8 && cMaxInstrs >= cNeededInstrs)
1437 break;
1438 }
1439 }
1440 pTbAllocator->iPruneNativeFrom = idxTbPruneFrom;
1441
1442 STAM_REL_PROFILE_STOP(&pTbAllocator->StatPruneNative, a);
1443}
1444#endif /* unused / VBOX_WITH_IEM_NATIVE_RECOMPILER */
1445
1446
1447/*********************************************************************************************************************************
1448* Threaded Recompiler Core *
1449*********************************************************************************************************************************/
1450/**
1451 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
1452 * @returns pszBuf.
1453 * @param fFlags The flags.
1454 * @param pszBuf The output buffer.
1455 * @param cbBuf The output buffer size. At least 32 bytes.
1456 */
1457DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
1458{
1459 Assert(cbBuf >= 32);
1460 static RTSTRTUPLE const s_aModes[] =
1461 {
1462 /* [00] = */ { RT_STR_TUPLE("16BIT") },
1463 /* [01] = */ { RT_STR_TUPLE("32BIT") },
1464 /* [02] = */ { RT_STR_TUPLE("!2!") },
1465 /* [03] = */ { RT_STR_TUPLE("!3!") },
1466 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
1467 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
1468 /* [06] = */ { RT_STR_TUPLE("!6!") },
1469 /* [07] = */ { RT_STR_TUPLE("!7!") },
1470 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
1471 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
1472 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
1473 /* [0b] = */ { RT_STR_TUPLE("!b!") },
1474 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
1475 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
1476 /* [0e] = */ { RT_STR_TUPLE("!e!") },
1477 /* [0f] = */ { RT_STR_TUPLE("!f!") },
1478 /* [10] = */ { RT_STR_TUPLE("!10!") },
1479 /* [11] = */ { RT_STR_TUPLE("!11!") },
1480 /* [12] = */ { RT_STR_TUPLE("!12!") },
1481 /* [13] = */ { RT_STR_TUPLE("!13!") },
1482 /* [14] = */ { RT_STR_TUPLE("!14!") },
1483 /* [15] = */ { RT_STR_TUPLE("!15!") },
1484 /* [16] = */ { RT_STR_TUPLE("!16!") },
1485 /* [17] = */ { RT_STR_TUPLE("!17!") },
1486 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
1487 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
1488 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
1489 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
1490 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
1491 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
1492 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
1493 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
1494 };
1495 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
1496 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
1497 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
1498
1499 pszBuf[off++] = ' ';
1500 pszBuf[off++] = 'C';
1501 pszBuf[off++] = 'P';
1502 pszBuf[off++] = 'L';
1503 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
1504 Assert(off < 32);
1505
1506 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
1507
1508 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
1509 {
1510 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
1511 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
1512 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
1513 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
1514 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
1515 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
1516 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
1517 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
1518 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
1519 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
1520 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
1521 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
1522 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
1523 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
1524 };
1525 if (fFlags)
1526 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1527 if (s_aFlags[i].fFlag & fFlags)
1528 {
1529 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
1530 pszBuf[off++] = ' ';
1531 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
1532 off += s_aFlags[i].cchName;
1533 fFlags &= ~s_aFlags[i].fFlag;
1534 if (!fFlags)
1535 break;
1536 }
1537 pszBuf[off] = '\0';
1538
1539 return pszBuf;
1540}
1541
1542
1543/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
1544static DECLCALLBACK(int) iemThreadedDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
1545{
1546 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
1547 pDis->cbCachedInstr += cbMaxRead;
1548 RT_NOREF(cbMinRead);
1549 return VERR_NO_DATA;
1550}
1551
1552
1553/**
1554 * Worker for iemThreadedDisassembleTb.
1555 */
1556static void iemThreadedDumpLookupTable(PCIEMTB pTb, PCDBGFINFOHLP pHlp, unsigned idxFirst, unsigned cEntries,
1557 const char *pszLeadText = " TB Lookup:") RT_NOEXCEPT
1558{
1559 if (idxFirst + cEntries <= pTb->cTbLookupEntries)
1560 {
1561 PIEMTB * const papTbLookup = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxFirst);
1562 pHlp->pfnPrintf(pHlp, "%s", pszLeadText);
1563 for (uint8_t iLookup = 0; iLookup < cEntries; iLookup++)
1564 {
1565 PIEMTB pLookupTb = papTbLookup[iLookup];
1566 if (pLookupTb)
1567 pHlp->pfnPrintf(pHlp, "%c%p (%s)", iLookup ? ',' : ' ', pLookupTb,
1568 (pLookupTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED ? "threaded"
1569 : (pLookupTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? "native"
1570 : "invalid");
1571 else
1572 pHlp->pfnPrintf(pHlp, "%cNULL", iLookup ? ',' : ' ');
1573 }
1574 pHlp->pfnPrintf(pHlp, "\n");
1575 }
1576 else
1577 {
1578 pHlp->pfnPrintf(pHlp, " !!Bogus TB lookup info: idxFirst=%#x L %u > cTbLookupEntries=%#x!!\n",
1579 idxFirst, cEntries, pTb->cTbLookupEntries);
1580 AssertMsgFailed(("idxFirst=%#x L %u > cTbLookupEntries=%#x\n", idxFirst, cEntries, pTb->cTbLookupEntries));
1581 }
1582}
1583
1584
1585DECLHIDDEN(void) iemThreadedDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
1586{
1587 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1588
1589 char szDisBuf[512];
1590
1591 /*
1592 * Print TB info.
1593 */
1594 pHlp->pfnPrintf(pHlp,
1595 "pTb=%p: GCPhysPc=%RGp (%RGv) cInstructions=%u LB %#x cRanges=%u cTbLookupEntries=%u\n"
1596 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
1597 pTb, pTb->GCPhysPc, pTb->FlatPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges, pTb->cTbLookupEntries,
1598 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
1599
1600 /*
1601 * This disassembly is driven by the debug info which follows the native
1602 * code and indicates when it starts with the next guest instructions,
1603 * where labels are and such things.
1604 */
1605 DISSTATE Dis;
1606 PCIEMTHRDEDCALLENTRY const paCalls = pTb->Thrd.paCalls;
1607 uint32_t const cCalls = pTb->Thrd.cCalls;
1608 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
1609 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
1610 : DISCPUMODE_64BIT;
1611 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
1612 uint8_t idxRange = UINT8_MAX;
1613 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
1614 uint32_t offRange = 0;
1615 uint32_t offOpcodes = 0;
1616 uint32_t const cbOpcodes = pTb->cbOpcodes;
1617 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
1618 bool fTbLookupSeen0 = false;
1619
1620 for (uint32_t iCall = 0; iCall < cCalls; iCall++)
1621 {
1622 /*
1623 * New opcode range?
1624 */
1625 if ( idxRange == UINT8_MAX
1626 || idxRange >= cRanges
1627 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
1628 {
1629 idxRange += 1;
1630 if (idxRange < cRanges)
1631 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
1632 else
1633 continue;
1634 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
1635 + (pTb->aRanges[idxRange].idxPhysPage == 0
1636 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1637 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
1638 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
1639 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
1640 pTb->aRanges[idxRange].idxPhysPage);
1641 GCPhysPc += offRange;
1642 }
1643
1644 /*
1645 * Disassemble another guest instruction?
1646 */
1647 if ( paCalls[iCall].offOpcode != offOpcodes
1648 && paCalls[iCall].cbOpcode > 0
1649 && (uint32_t)(cbOpcodes - paCalls[iCall].offOpcode) <= cbOpcodes /* paranoia^2 */ )
1650 {
1651 offOpcodes = paCalls[iCall].offOpcode;
1652 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
1653 uint32_t cbInstr = 1;
1654 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
1655 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
1656 iemThreadedDisasReadBytesDummy, NULL, &Dis, &cbInstr);
1657 if (RT_SUCCESS(rc))
1658 {
1659 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
1660 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
1661 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
1662 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
1663 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
1664 }
1665 else
1666 {
1667 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
1668 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
1669 cbInstr = paCalls[iCall].cbOpcode;
1670 }
1671 GCPhysPc += cbInstr;
1672 offRange += cbInstr;
1673 }
1674
1675 /*
1676 * Dump call details.
1677 */
1678 pHlp->pfnPrintf(pHlp,
1679 " Call #%u to %s (%u args)\n",
1680 iCall, g_apszIemThreadedFunctions[paCalls[iCall].enmFunction],
1681 g_acIemThreadedFunctionUsedArgs[paCalls[iCall].enmFunction]);
1682 if (paCalls[iCall].uTbLookup != 0)
1683 {
1684 uint8_t const idxFirst = IEM_TB_LOOKUP_TAB_GET_IDX(paCalls[iCall].uTbLookup);
1685 fTbLookupSeen0 = idxFirst == 0;
1686 iemThreadedDumpLookupTable(pTb, pHlp, idxFirst, IEM_TB_LOOKUP_TAB_GET_SIZE(paCalls[iCall].uTbLookup));
1687 }
1688
1689 /*
1690 * Snoop fExec.
1691 */
1692 switch (paCalls[iCall].enmFunction)
1693 {
1694 default:
1695 break;
1696 case kIemThreadedFunc_BltIn_CheckMode:
1697 fExec = paCalls[iCall].auParams[0];
1698 break;
1699 }
1700 }
1701
1702 if (!fTbLookupSeen0)
1703 iemThreadedDumpLookupTable(pTb, pHlp, 0, 1, " Fallback TB Lookup:");
1704}
1705
1706
1707
1708/**
1709 * Allocate a translation block for threadeded recompilation.
1710 *
1711 * This is allocated with maxed out call table and storage for opcode bytes,
1712 * because it's only supposed to be called once per EMT to allocate the TB
1713 * pointed to by IEMCPU::pThrdCompileTbR3.
1714 *
1715 * @returns Pointer to the translation block on success, NULL on failure.
1716 * @param pVM The cross context virtual machine structure.
1717 * @param pVCpu The cross context virtual CPU structure of the calling
1718 * thread.
1719 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1720 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1721 */
1722static PIEMTB iemThreadedTbAlloc(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1723{
1724 PIEMTB pTb = (PIEMTB)RTMemAllocZ(sizeof(IEMTB));
1725 if (pTb)
1726 {
1727 unsigned const cCalls = 256;
1728 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAlloc(sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1729 if (pTb->Thrd.paCalls)
1730 {
1731 pTb->pabOpcodes = (uint8_t *)RTMemAlloc(cCalls * 16);
1732 if (pTb->pabOpcodes)
1733 {
1734 pVCpu->iem.s.cbOpcodesAllocated = cCalls * 16;
1735 pTb->Thrd.cAllocated = cCalls;
1736 pTb->Thrd.cCalls = 0;
1737 pTb->cbOpcodes = 0;
1738 pTb->pNext = NULL;
1739 pTb->cUsed = 0;
1740 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1741 pTb->idxAllocChunk = UINT8_MAX;
1742 pTb->GCPhysPc = GCPhysPc;
1743 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1744 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1745 pTb->cInstructions = 0;
1746 pTb->cTbLookupEntries = 1; /* Entry zero is for anything w/o a specific entry. */
1747
1748 /* Init the first opcode range. */
1749 pTb->cRanges = 1;
1750 pTb->aRanges[0].cbOpcodes = 0;
1751 pTb->aRanges[0].offOpcodes = 0;
1752 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1753 pTb->aRanges[0].u2Unused = 0;
1754 pTb->aRanges[0].idxPhysPage = 0;
1755 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1756 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1757
1758 return pTb;
1759 }
1760 RTMemFree(pTb->Thrd.paCalls);
1761 }
1762 RTMemFree(pTb);
1763 }
1764 RT_NOREF(pVM);
1765 return NULL;
1766}
1767
1768
1769/**
1770 * Called on the TB that are dedicated for recompilation before it's reused.
1771 *
1772 * @param pVCpu The cross context virtual CPU structure of the calling
1773 * thread.
1774 * @param pTb The translation block to reuse.
1775 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1776 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1777 */
1778static void iemThreadedTbReuse(PVMCPUCC pVCpu, PIEMTB pTb, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1779{
1780 pTb->GCPhysPc = GCPhysPc;
1781 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1782 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1783 pTb->Thrd.cCalls = 0;
1784 pTb->cbOpcodes = 0;
1785 pTb->cInstructions = 0;
1786 pTb->cTbLookupEntries = 1; /* Entry zero is for anything w/o a specific entry. */
1787
1788 /* Init the first opcode range. */
1789 pTb->cRanges = 1;
1790 pTb->aRanges[0].cbOpcodes = 0;
1791 pTb->aRanges[0].offOpcodes = 0;
1792 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1793 pTb->aRanges[0].u2Unused = 0;
1794 pTb->aRanges[0].idxPhysPage = 0;
1795 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1796 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1797}
1798
1799
1800/**
1801 * Used to duplicate a threded translation block after recompilation is done.
1802 *
1803 * @returns Pointer to the translation block on success, NULL on failure.
1804 * @param pVM The cross context virtual machine structure.
1805 * @param pVCpu The cross context virtual CPU structure of the calling
1806 * thread.
1807 * @param pTbSrc The TB to duplicate.
1808 */
1809static PIEMTB iemThreadedTbDuplicate(PVMCC pVM, PVMCPUCC pVCpu, PCIEMTB pTbSrc)
1810{
1811 /*
1812 * Just using the heap for now. Will make this more efficient and
1813 * complicated later, don't worry. :-)
1814 */
1815 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
1816 if (pTb)
1817 {
1818 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
1819 memcpy(pTb, pTbSrc, sizeof(*pTb));
1820 pTb->idxAllocChunk = idxAllocChunk;
1821
1822 unsigned const cCalls = pTbSrc->Thrd.cCalls;
1823 Assert(cCalls > 0);
1824 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemDup(pTbSrc->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1825 if (pTb->Thrd.paCalls)
1826 {
1827 size_t const cbTbLookup = pTbSrc->cTbLookupEntries * sizeof(PIEMTB);
1828 Assert(cbTbLookup > 0);
1829 size_t const cbOpcodes = pTbSrc->cbOpcodes;
1830 Assert(cbOpcodes > 0);
1831 size_t const cbBoth = cbTbLookup + RT_ALIGN_Z(cbOpcodes, sizeof(PIEMTB));
1832 uint8_t * const pbBoth = (uint8_t *)RTMemAlloc(cbBoth);
1833 if (pbBoth)
1834 {
1835 RT_BZERO(pbBoth, cbTbLookup);
1836 pTb->pabOpcodes = (uint8_t *)memcpy(&pbBoth[cbTbLookup], pTbSrc->pabOpcodes, cbOpcodes);
1837 pTb->Thrd.cAllocated = cCalls;
1838 pTb->pNext = NULL;
1839 pTb->cUsed = 0;
1840 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1841 pTb->fFlags = pTbSrc->fFlags;
1842
1843 return pTb;
1844 }
1845 RTMemFree(pTb->Thrd.paCalls);
1846 }
1847 iemTbAllocatorFree(pVCpu, pTb);
1848 }
1849 RT_NOREF(pVM);
1850 return NULL;
1851
1852}
1853
1854
1855/**
1856 * Adds the given TB to the hash table.
1857 *
1858 * @param pVCpu The cross context virtual CPU structure of the calling
1859 * thread.
1860 * @param pTbCache The cache to add it to.
1861 * @param pTb The translation block to add.
1862 */
1863static void iemThreadedTbAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
1864{
1865 iemTbCacheAdd(pVCpu, pTbCache, pTb);
1866
1867 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbInstr, pTb->cInstructions);
1868 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbLookupEntries, pTb->cTbLookupEntries);
1869 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedCalls, pTb->Thrd.cCalls);
1870 if (LogIs12Enabled())
1871 {
1872 Log12(("TB added: %p %RGp LB %#x fl=%#x idxHash=%#x cRanges=%u cInstr=%u cCalls=%u\n",
1873 pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc),
1874 pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));
1875 for (uint8_t idxRange = 0; idxRange < pTb->cRanges; idxRange++)
1876 Log12((" range#%u: offPg=%#05x offOp=%#04x LB %#04x pg#%u=%RGp\n", idxRange, pTb->aRanges[idxRange].offPhysPage,
1877 pTb->aRanges[idxRange].offOpcodes, pTb->aRanges[idxRange].cbOpcodes, pTb->aRanges[idxRange].idxPhysPage,
1878 pTb->aRanges[idxRange].idxPhysPage == 0
1879 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1880 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]));
1881 }
1882}
1883
1884
1885/**
1886 * Called by opcode verifier functions when they detect a problem.
1887 */
1888void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb, bool fSafeToFree)
1889{
1890 /* We cannot free the current TB (indicated by fSafeToFree) because:
1891 - A threaded TB will have its current call entry accessed
1892 to update pVCpu->iem.s.cInstructions.
1893 - A native TB will have code left to execute. */
1894 if (fSafeToFree)
1895 iemTbAllocatorFree(pVCpu, pTb);
1896 else
1897 iemTbAlloctorScheduleForFree(pVCpu, pTb);
1898}
1899
1900
1901/*
1902 * Real code.
1903 */
1904
1905#ifdef LOG_ENABLED
1906/**
1907 * Logs the current instruction.
1908 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1909 * @param pszFunction The IEM function doing the execution.
1910 * @param idxInstr The instruction number in the block.
1911 */
1912static void iemThreadedLogCurInstr(PVMCPUCC pVCpu, const char *pszFunction, uint32_t idxInstr) RT_NOEXCEPT
1913{
1914# ifdef IN_RING3
1915 if (LogIs2Enabled())
1916 {
1917 char szInstr[256];
1918 uint32_t cbInstr = 0;
1919 DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, 0, 0,
1920 DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
1921 szInstr, sizeof(szInstr), &cbInstr);
1922
1923 PCX86FXSTATE pFpuCtx = &pVCpu->cpum.GstCtx.XState.x87;
1924 Log2(("**** %s fExec=%x pTb=%p cUsed=%u #%u\n"
1925 " eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
1926 " eip=%08x esp=%08x ebp=%08x iopl=%d tr=%04x\n"
1927 " cs=%04x ss=%04x ds=%04x es=%04x fs=%04x gs=%04x efl=%08x\n"
1928 " fsw=%04x fcw=%04x ftw=%02x mxcsr=%04x/%04x\n"
1929 " %s\n"
1930 , pszFunction, pVCpu->iem.s.fExec, pVCpu->iem.s.pCurTbR3, pVCpu->iem.s.pCurTbR3 ? pVCpu->iem.s.pCurTbR3->cUsed : 0, idxInstr,
1931 pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ebx, pVCpu->cpum.GstCtx.ecx, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.esi, pVCpu->cpum.GstCtx.edi,
1932 pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.ebp, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, pVCpu->cpum.GstCtx.tr.Sel,
1933 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.ds.Sel, pVCpu->cpum.GstCtx.es.Sel,
1934 pVCpu->cpum.GstCtx.fs.Sel, pVCpu->cpum.GstCtx.gs.Sel, pVCpu->cpum.GstCtx.eflags.u,
1935 pFpuCtx->FSW, pFpuCtx->FCW, pFpuCtx->FTW, pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK,
1936 szInstr));
1937
1938 /*if (LogIs3Enabled()) - this outputs an insane amount of stuff, so disabled.
1939 DBGFR3InfoEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, "cpumguest", "verbose", NULL); */
1940 }
1941 else
1942# endif
1943 LogFlow(("%s: cs:rip=%04x:%08RX64 ss:rsp=%04x:%08RX64 EFL=%06x\n", pszFunction, pVCpu->cpum.GstCtx.cs.Sel,
1944 pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.u));
1945}
1946#endif /* LOG_ENABLED */
1947
1948
1949#if 0
1950static VBOXSTRICTRC iemThreadedCompileLongJumped(PVMCC pVM, PVMCPUCC pVCpu, VBOXSTRICTRC rcStrict)
1951{
1952 RT_NOREF(pVM, pVCpu);
1953 return rcStrict;
1954}
1955#endif
1956
1957
1958/**
1959 * Initializes the decoder state when compiling TBs.
1960 *
1961 * This presumes that fExec has already be initialized.
1962 *
1963 * This is very similar to iemInitDecoder() and iemReInitDecoder(), so may need
1964 * to apply fixes to them as well.
1965 *
1966 * @param pVCpu The cross context virtual CPU structure of the calling
1967 * thread.
1968 * @param fReInit Clear for the first call for a TB, set for subsequent
1969 * calls from inside the compile loop where we can skip a
1970 * couple of things.
1971 * @param fExtraFlags The extra translation block flags when @a fReInit is
1972 * true, otherwise ignored. Only IEMTB_F_INHIBIT_SHADOW is
1973 * checked.
1974 */
1975DECL_FORCE_INLINE(void) iemThreadedCompileInitDecoder(PVMCPUCC pVCpu, bool const fReInit, uint32_t const fExtraFlags)
1976{
1977 /* ASSUMES: That iemInitExec was already called and that anyone changing
1978 CPU state affecting the fExec bits since then will have updated fExec! */
1979 AssertMsg((pVCpu->iem.s.fExec & ~IEM_F_USER_OPTS) == iemCalcExecFlags(pVCpu),
1980 ("fExec=%#x iemCalcExecModeFlags=%#x\n", pVCpu->iem.s.fExec, iemCalcExecFlags(pVCpu)));
1981
1982 IEMMODE const enmMode = IEM_GET_CPU_MODE(pVCpu);
1983
1984 /* Decoder state: */
1985 pVCpu->iem.s.enmDefAddrMode = enmMode; /** @todo check if this is correct... */
1986 pVCpu->iem.s.enmEffAddrMode = enmMode;
1987 if (enmMode != IEMMODE_64BIT)
1988 {
1989 pVCpu->iem.s.enmDefOpSize = enmMode; /** @todo check if this is correct... */
1990 pVCpu->iem.s.enmEffOpSize = enmMode;
1991 }
1992 else
1993 {
1994 pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
1995 pVCpu->iem.s.enmEffOpSize = IEMMODE_32BIT;
1996 }
1997 pVCpu->iem.s.fPrefixes = 0;
1998 pVCpu->iem.s.uRexReg = 0;
1999 pVCpu->iem.s.uRexB = 0;
2000 pVCpu->iem.s.uRexIndex = 0;
2001 pVCpu->iem.s.idxPrefix = 0;
2002 pVCpu->iem.s.uVex3rdReg = 0;
2003 pVCpu->iem.s.uVexLength = 0;
2004 pVCpu->iem.s.fEvexStuff = 0;
2005 pVCpu->iem.s.iEffSeg = X86_SREG_DS;
2006 pVCpu->iem.s.offModRm = 0;
2007 pVCpu->iem.s.iNextMapping = 0;
2008
2009 if (!fReInit)
2010 {
2011 pVCpu->iem.s.cActiveMappings = 0;
2012 pVCpu->iem.s.rcPassUp = VINF_SUCCESS;
2013 pVCpu->iem.s.fEndTb = false;
2014 pVCpu->iem.s.fTbCheckOpcodes = true; /* (check opcodes for before executing the first instruction) */
2015 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
2016 pVCpu->iem.s.fTbCrossedPage = false;
2017 pVCpu->iem.s.cInstrTillIrqCheck = !(fExtraFlags & IEMTB_F_INHIBIT_SHADOW) ? 32 : 0;
2018 pVCpu->iem.s.idxLastCheckIrqCallNo = UINT16_MAX;
2019 pVCpu->iem.s.fTbCurInstrIsSti = false;
2020 /* Force RF clearing and TF checking on first instruction in the block
2021 as we don't really know what came before and should assume the worst: */
2022 pVCpu->iem.s.fTbPrevInstr = IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_END_TB;
2023 }
2024 else
2025 {
2026 Assert(pVCpu->iem.s.cActiveMappings == 0);
2027 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
2028 Assert(pVCpu->iem.s.fEndTb == false);
2029 Assert(pVCpu->iem.s.fTbCrossedPage == false);
2030 pVCpu->iem.s.fTbPrevInstr = pVCpu->iem.s.fTbCurInstr;
2031 }
2032 pVCpu->iem.s.fTbCurInstr = 0;
2033
2034#ifdef DBGFTRACE_ENABLED
2035 switch (IEM_GET_CPU_MODE(pVCpu))
2036 {
2037 case IEMMODE_64BIT:
2038 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I64/%u %08llx", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.rip);
2039 break;
2040 case IEMMODE_32BIT:
2041 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I32/%u %04x:%08x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
2042 break;
2043 case IEMMODE_16BIT:
2044 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I16/%u %04x:%04x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
2045 break;
2046 }
2047#endif
2048}
2049
2050
2051/**
2052 * Initializes the opcode fetcher when starting the compilation.
2053 *
2054 * @param pVCpu The cross context virtual CPU structure of the calling
2055 * thread.
2056 */
2057DECL_FORCE_INLINE(void) iemThreadedCompileInitOpcodeFetching(PVMCPUCC pVCpu)
2058{
2059 /* Almost everything is done by iemGetPcWithPhysAndCode() already. We just need to initialize the index into abOpcode. */
2060#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
2061 pVCpu->iem.s.offOpcode = 0;
2062#else
2063 RT_NOREF(pVCpu);
2064#endif
2065}
2066
2067
2068/**
2069 * Re-initializes the opcode fetcher between instructions while compiling.
2070 *
2071 * @param pVCpu The cross context virtual CPU structure of the calling
2072 * thread.
2073 */
2074DECL_FORCE_INLINE(void) iemThreadedCompileReInitOpcodeFetching(PVMCPUCC pVCpu)
2075{
2076 if (pVCpu->iem.s.pbInstrBuf)
2077 {
2078 uint64_t off = pVCpu->cpum.GstCtx.rip;
2079 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
2080 off += pVCpu->cpum.GstCtx.cs.u64Base;
2081 off -= pVCpu->iem.s.uInstrBufPc;
2082 if (off < pVCpu->iem.s.cbInstrBufTotal)
2083 {
2084 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
2085 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
2086 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
2087 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
2088 else
2089 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
2090 }
2091 else
2092 {
2093 pVCpu->iem.s.pbInstrBuf = NULL;
2094 pVCpu->iem.s.offInstrNextByte = 0;
2095 pVCpu->iem.s.offCurInstrStart = 0;
2096 pVCpu->iem.s.cbInstrBuf = 0;
2097 pVCpu->iem.s.cbInstrBufTotal = 0;
2098 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
2099 }
2100 }
2101 else
2102 {
2103 pVCpu->iem.s.offInstrNextByte = 0;
2104 pVCpu->iem.s.offCurInstrStart = 0;
2105 pVCpu->iem.s.cbInstrBuf = 0;
2106 pVCpu->iem.s.cbInstrBufTotal = 0;
2107#ifdef VBOX_STRICT
2108 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
2109#endif
2110 }
2111#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
2112 pVCpu->iem.s.offOpcode = 0;
2113#endif
2114}
2115
2116#ifdef LOG_ENABLED
2117
2118/**
2119 * Inserts a NOP call.
2120 *
2121 * This is for debugging.
2122 *
2123 * @returns true on success, false if we're out of call entries.
2124 * @param pTb The translation block being compiled.
2125 */
2126bool iemThreadedCompileEmitNop(PIEMTB pTb)
2127{
2128 /* Emit the call. */
2129 uint32_t const idxCall = pTb->Thrd.cCalls;
2130 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2131 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2132 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2133 pCall->enmFunction = kIemThreadedFunc_BltIn_Nop;
2134 pCall->idxInstr = pTb->cInstructions - 1;
2135 pCall->cbOpcode = 0;
2136 pCall->offOpcode = 0;
2137 pCall->uTbLookup = 0;
2138 pCall->fFlags = 0;
2139 pCall->auParams[0] = 0;
2140 pCall->auParams[1] = 0;
2141 pCall->auParams[2] = 0;
2142 return true;
2143}
2144
2145
2146/**
2147 * Called by iemThreadedCompile if cpu state logging is desired.
2148 *
2149 * @returns true on success, false if we're out of call entries.
2150 * @param pTb The translation block being compiled.
2151 */
2152bool iemThreadedCompileEmitLogCpuState(PIEMTB pTb)
2153{
2154 /* Emit the call. */
2155 uint32_t const idxCall = pTb->Thrd.cCalls;
2156 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2157 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2158 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2159 pCall->enmFunction = kIemThreadedFunc_BltIn_LogCpuState;
2160 pCall->idxInstr = pTb->cInstructions - 1;
2161 pCall->cbOpcode = 0;
2162 pCall->offOpcode = 0;
2163 pCall->uTbLookup = 0;
2164 pCall->fFlags = 0;
2165 pCall->auParams[0] = RT_MAKE_U16(pCall->idxInstr, idxCall); /* currently not used, but whatever */
2166 pCall->auParams[1] = 0;
2167 pCall->auParams[2] = 0;
2168 return true;
2169}
2170
2171#endif /* LOG_ENABLED */
2172
2173DECLINLINE(void) iemThreadedCopyOpcodeBytesInline(PCVMCPUCC pVCpu, uint8_t *pbDst, uint8_t cbInstr)
2174{
2175 switch (cbInstr)
2176 {
2177 default: AssertMsgFailed(("%#x\n", cbInstr)); RT_FALL_THROUGH();
2178 case 15: pbDst[14] = pVCpu->iem.s.abOpcode[14]; RT_FALL_THROUGH();
2179 case 14: pbDst[13] = pVCpu->iem.s.abOpcode[13]; RT_FALL_THROUGH();
2180 case 13: pbDst[12] = pVCpu->iem.s.abOpcode[12]; RT_FALL_THROUGH();
2181 case 12: pbDst[11] = pVCpu->iem.s.abOpcode[11]; RT_FALL_THROUGH();
2182 case 11: pbDst[10] = pVCpu->iem.s.abOpcode[10]; RT_FALL_THROUGH();
2183 case 10: pbDst[9] = pVCpu->iem.s.abOpcode[9]; RT_FALL_THROUGH();
2184 case 9: pbDst[8] = pVCpu->iem.s.abOpcode[8]; RT_FALL_THROUGH();
2185 case 8: pbDst[7] = pVCpu->iem.s.abOpcode[7]; RT_FALL_THROUGH();
2186 case 7: pbDst[6] = pVCpu->iem.s.abOpcode[6]; RT_FALL_THROUGH();
2187 case 6: pbDst[5] = pVCpu->iem.s.abOpcode[5]; RT_FALL_THROUGH();
2188 case 5: pbDst[4] = pVCpu->iem.s.abOpcode[4]; RT_FALL_THROUGH();
2189 case 4: pbDst[3] = pVCpu->iem.s.abOpcode[3]; RT_FALL_THROUGH();
2190 case 3: pbDst[2] = pVCpu->iem.s.abOpcode[2]; RT_FALL_THROUGH();
2191 case 2: pbDst[1] = pVCpu->iem.s.abOpcode[1]; RT_FALL_THROUGH();
2192 case 1: pbDst[0] = pVCpu->iem.s.abOpcode[0]; break;
2193 }
2194}
2195
2196#ifdef IEM_WITH_INTRA_TB_JUMPS
2197
2198/**
2199 * Emits the necessary tail calls for a full TB loop-jump.
2200 */
2201static bool iemThreadedCompileFullTbJump(PVMCPUCC pVCpu, PIEMTB pTb)
2202{
2203 /*
2204 * We need a timer and maybe IRQ check before jumping, so make sure
2205 * we've got sufficient call entries left before emitting anything.
2206 */
2207 uint32_t idxCall = pTb->Thrd.cCalls;
2208 if (idxCall + 1U <= pTb->Thrd.cAllocated)
2209 {
2210 /*
2211 * We're good, emit the calls.
2212 */
2213 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2214 pTb->Thrd.cCalls = (uint16_t)(idxCall + 2);
2215
2216 /* Always check timers as we risk getting stuck in a loop otherwise. We
2217 combine it with an IRQ check if that's not performed in the TB already. */
2218 pCall->enmFunction = pVCpu->iem.s.idxLastCheckIrqCallNo < idxCall
2219 ? kIemThreadedFunc_BltIn_CheckTimers
2220 : kIemThreadedFunc_BltIn_CheckTimersAndIrq;
2221 pCall->idxInstr = 0;
2222 pCall->offOpcode = 0;
2223 pCall->cbOpcode = 0;
2224 pCall->uTbLookup = 0;
2225 pCall->fFlags = 0;
2226 pCall->auParams[0] = 0;
2227 pCall->auParams[1] = 0;
2228 pCall->auParams[2] = 0;
2229 pCall++;
2230
2231 /* The jump callentry[0]. */
2232 pCall->enmFunction = kIemThreadedFunc_BltIn_Jump;
2233 pCall->idxInstr = 0;
2234 pCall->offOpcode = 0;
2235 pCall->cbOpcode = 0;
2236 pCall->uTbLookup = 0;
2237 pCall->fFlags = 0;
2238 pCall->auParams[0] = 0; /* jump target is call zero */
2239 pCall->auParams[1] = 0;
2240 pCall->auParams[2] = 0;
2241
2242 /* Mark callentry #0 as a jump target. */
2243 pTb->Thrd.paCalls[0].fFlags |= IEMTHREADEDCALLENTRY_F_JUMP_TARGET;
2244 }
2245
2246 return false;
2247}
2248
2249/**
2250 * Called by IEM_MC2_BEGIN_EMIT_CALLS when it detects that we're back at the
2251 * first instruction and we didn't just branch to it (that's handled below).
2252 *
2253 * This will emit a loop iff everything is compatible with that.
2254 */
2255DECLHIDDEN(int) iemThreadedCompileBackAtFirstInstruction(PVMCPU pVCpu, PIEMTB pTb) RT_NOEXCEPT
2256{
2257 /* Check if the mode matches. */
2258 if ( (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK)
2259 == (pTb->fFlags & IEMTB_F_KEY_MASK & ~IEMTB_F_CS_LIM_CHECKS))
2260 {
2261 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbLoopFullTbDetected2);
2262 iemThreadedCompileFullTbJump(pVCpu, pTb);
2263 }
2264 return VINF_IEM_RECOMPILE_END_TB;
2265}
2266
2267#endif /* IEM_WITH_INTRA_TB_JUMPS */
2268
2269
2270/**
2271 * Called by IEM_MC2_BEGIN_EMIT_CALLS() under one of these conditions:
2272 *
2273 * - CS LIM check required.
2274 * - Must recheck opcode bytes.
2275 * - Previous instruction branched.
2276 * - TLB load detected, probably due to page crossing.
2277 *
2278 * @returns true if everything went well, false if we're out of space in the TB
2279 * (e.g. opcode ranges) or needs to start doing CS.LIM checks.
2280 * @param pVCpu The cross context virtual CPU structure of the calling
2281 * thread.
2282 * @param pTb The translation block being compiled.
2283 */
2284bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb)
2285{
2286 Log6(("%04x:%08RX64: iemThreadedCompileBeginEmitCallsComplications\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2287 Assert((pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) == 0);
2288#if 0
2289 if (pVCpu->cpum.GstCtx.rip >= 0xc0000000 && !LogIsEnabled())
2290 RTLogChangeFlags(NULL, 0, RTLOGFLAGS_DISABLED);
2291#endif
2292
2293 /*
2294 * If we're not in 64-bit mode and not already checking CS.LIM we need to
2295 * see if it's needed to start checking.
2296 */
2297 bool fConsiderCsLimChecking;
2298 uint32_t const fMode = pVCpu->iem.s.fExec & IEM_F_MODE_MASK;
2299 if ( fMode == IEM_F_MODE_X86_64BIT
2300 || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS)
2301 || fMode == IEM_F_MODE_X86_32BIT_PROT_FLAT
2302 || fMode == IEM_F_MODE_X86_32BIT_FLAT)
2303 fConsiderCsLimChecking = false; /* already enabled or not needed */
2304 else
2305 {
2306 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
2307 if (offFromLim >= GUEST_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
2308 fConsiderCsLimChecking = true; /* likely */
2309 else
2310 {
2311 Log8(("%04x:%08RX64: Needs CS.LIM checks (%#RX64)\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, offFromLim));
2312 return false;
2313 }
2314 }
2315
2316 /*
2317 * Prepare call now, even before we know if can accept the instruction in this TB.
2318 * This allows us amending parameters w/o making every case suffer.
2319 */
2320 uint8_t const cbInstr = IEM_GET_INSTR_LEN(pVCpu);
2321 uint16_t const offOpcode = pTb->cbOpcodes;
2322 uint8_t idxRange = pTb->cRanges - 1;
2323
2324 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls];
2325 pCall->idxInstr = pTb->cInstructions;
2326 pCall->cbOpcode = cbInstr;
2327 pCall->offOpcode = offOpcode;
2328 pCall->uTbLookup = 0;
2329 pCall->fFlags = 0;
2330 pCall->auParams[0] = (uint32_t)cbInstr
2331 | (uint32_t)(pVCpu->iem.s.fExec << 8) /* liveness: Enough of fExec for IEM_F_MODE_X86_IS_FLAT. */
2332 /* The upper dword is sometimes used for cbStartPage. */;
2333 pCall->auParams[1] = idxRange;
2334 pCall->auParams[2] = offOpcode - pTb->aRanges[idxRange].offOpcodes;
2335
2336/** @todo check if we require IEMTB_F_CS_LIM_CHECKS for any new page we've
2337 * gotten onto. If we do, stop */
2338
2339 /*
2340 * Case 1: We've branched (RIP changed).
2341 *
2342 * Loop check: If the new PC (GCPhysPC) is within a opcode range of this
2343 * TB, end the TB here as it is most likely a loop and if it
2344 * made sense to unroll it, the guest code compiler should've
2345 * done it already.
2346 *
2347 * Sub-case 1a: Same page, no TLB load (fTbCrossedPage is false).
2348 * Req: 1 extra range, no extra phys.
2349 *
2350 * Sub-case 1b: Different page but no page boundrary crossing, so TLB load
2351 * necessary (fTbCrossedPage is true).
2352 * Req: 1 extra range, probably 1 extra phys page entry.
2353 *
2354 * Sub-case 1c: Different page, so TLB load necessary (fTbCrossedPage is true),
2355 * but in addition we cross into the following page and require
2356 * another TLB load.
2357 * Req: 2 extra ranges, probably 2 extra phys page entries.
2358 *
2359 * Sub-case 1d: Same page, so no initial TLB load necessary, but we cross into
2360 * the following page (thus fTbCrossedPage is true).
2361 * Req: 2 extra ranges, probably 1 extra phys page entry.
2362 *
2363 * Note! The setting fTbCrossedPage is done by the iemOpcodeFetchBytesJmp, but
2364 * it may trigger "spuriously" from the CPU point of view because of
2365 * physical page changes that'll invalid the physical TLB and trigger a
2366 * call to the function. In theory this be a big deal, just a bit
2367 * performance loss as we'll pick the LoadingTlb variants.
2368 *
2369 * Note! We do not currently optimize branching to the next instruction (sorry
2370 * 32-bit PIC code). We could maybe do that in the branching code that
2371 * sets (or not) fTbBranched.
2372 */
2373 /** @todo Optimize 'jmp .next_instr' and 'call .next_instr'. Seen the jmp
2374 * variant in win 3.1 code and the call variant in 32-bit linux PIC
2375 * code. This'll require filtering out far jmps and calls, as they
2376 * load CS which should technically be considered indirect since the
2377 * GDT/LDT entry's base address can be modified independently from
2378 * the code. */
2379 if (pVCpu->iem.s.fTbBranched != IEMBRANCHED_F_NO)
2380 {
2381 if ( !pVCpu->iem.s.fTbCrossedPage /* 1a */
2382 || pVCpu->iem.s.offCurInstrStart >= 0 /* 1b */ )
2383 {
2384 /* 1a + 1b - instruction fully within the branched to page. */
2385 Assert(pVCpu->iem.s.offCurInstrStart >= 0);
2386 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr <= GUEST_PAGE_SIZE);
2387
2388 if (!(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_ZERO))
2389 {
2390 /* Check that we've got a free range. */
2391 idxRange += 1;
2392 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2393 { /* likely */ }
2394 else
2395 {
2396 Log8(("%04x:%08RX64: out of ranges after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2397 return false;
2398 }
2399 pCall->auParams[1] = idxRange;
2400 pCall->auParams[2] = 0;
2401
2402 /* Check that we've got a free page slot. */
2403 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2404 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2405 uint8_t idxPhysPage;
2406 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2407 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 0;
2408 else if (pTb->aGCPhysPages[0] == NIL_RTGCPHYS)
2409 {
2410 pTb->aGCPhysPages[0] = GCPhysNew;
2411 pTb->aRanges[idxRange].idxPhysPage = 1;
2412 idxPhysPage = UINT8_MAX;
2413 }
2414 else if (pTb->aGCPhysPages[0] == GCPhysNew)
2415 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 1;
2416 else if (pTb->aGCPhysPages[1] == NIL_RTGCPHYS)
2417 {
2418 pTb->aGCPhysPages[1] = GCPhysNew;
2419 pTb->aRanges[idxRange].idxPhysPage = 2;
2420 idxPhysPage = UINT8_MAX;
2421 }
2422 else if (pTb->aGCPhysPages[1] == GCPhysNew)
2423 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 2;
2424 else
2425 {
2426 Log8(("%04x:%08RX64: out of aGCPhysPages entires after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2427 return false;
2428 }
2429
2430 /* Loop check: We weave the loop check in here to optimize the lookup. */
2431 if (idxPhysPage != UINT8_MAX)
2432 {
2433 uint32_t const offPhysPc = pVCpu->iem.s.offCurInstrStart;
2434 for (uint8_t idxLoopRange = 0; idxLoopRange < idxRange; idxLoopRange++)
2435 if ( pTb->aRanges[idxLoopRange].idxPhysPage == idxPhysPage
2436 && offPhysPc - (uint32_t)pTb->aRanges[idxLoopRange].offPhysPage
2437 < (uint32_t)pTb->aRanges[idxLoopRange].cbOpcodes)
2438 {
2439 Log8(("%04x:%08RX64: loop detected after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2440#ifdef IEM_WITH_INTRA_TB_JUMPS
2441 /* If we're looping back to the start of the TB and the mode is still the same,
2442 we could emit a jump optimization. For now we don't do page transitions
2443 as that implies TLB loading and such. */
2444 if ( idxLoopRange == 0
2445 && offPhysPc == pTb->aRanges[0].offPhysPage
2446 && (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK)
2447 == (pTb->fFlags & IEMTB_F_KEY_MASK & ~IEMTB_F_CS_LIM_CHECKS)
2448 && (pVCpu->iem.s.fTbBranched & ( IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR
2449 | IEMBRANCHED_F_STACK | IEMBRANCHED_F_RELATIVE))
2450 == IEMBRANCHED_F_RELATIVE)
2451 {
2452 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbLoopFullTbDetected);
2453 return iemThreadedCompileFullTbJump(pVCpu, pTb);
2454 }
2455#endif
2456 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbLoopInTbDetected);
2457 return false;
2458 }
2459 }
2460
2461 /* Finish setting up the new range. */
2462 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2463 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2464 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2465 pTb->aRanges[idxRange].u2Unused = 0;
2466 pTb->cRanges++;
2467 Log6(("%04x:%08RX64: new range #%u same page: offPhysPage=%#x offOpcodes=%#x\n",
2468 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].offPhysPage,
2469 pTb->aRanges[idxRange].offOpcodes));
2470 }
2471 else
2472 {
2473 Log8(("%04x:%08RX64: zero byte jump\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2474 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2475 }
2476
2477 /* Determin which function we need to load & check.
2478 Note! For jumps to a new page, we'll set both fTbBranched and
2479 fTbCrossedPage to avoid unnecessary TLB work for intra
2480 page branching */
2481 if ( (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR)) /* Far is basically indirect. */
2482 || pVCpu->iem.s.fTbCrossedPage)
2483 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2484 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
2485 : !fConsiderCsLimChecking
2486 ? kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
2487 : kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim;
2488 else if (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_CONDITIONAL | /* paranoia: */ IEMBRANCHED_F_DIRECT))
2489 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2490 ? kIemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
2491 : !fConsiderCsLimChecking
2492 ? kIemThreadedFunc_BltIn_CheckPcAndOpcodes
2493 : kIemThreadedFunc_BltIn_CheckPcAndOpcodesConsiderCsLim;
2494 else
2495 {
2496 Assert(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_RELATIVE);
2497 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2498 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2499 : !fConsiderCsLimChecking
2500 ? kIemThreadedFunc_BltIn_CheckOpcodes
2501 : kIemThreadedFunc_BltIn_CheckOpcodesConsiderCsLim;
2502 }
2503 }
2504 else
2505 {
2506 /* 1c + 1d - instruction crosses pages. */
2507 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2508 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2509
2510 /* Lazy bird: Check that this isn't case 1c, since we've already
2511 load the first physical address. End the TB and
2512 make it a case 2b instead.
2513
2514 Hmm. Too much bother to detect, so just do the same
2515 with case 1d as well. */
2516#if 0 /** @todo get back to this later when we've got the actual branch code in
2517 * place. */
2518 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2519
2520 /* Check that we've got two free ranges. */
2521 if (idxRange + 2 < RT_ELEMENTS(pTb->aRanges))
2522 { /* likely */ }
2523 else
2524 return false;
2525 idxRange += 1;
2526 pCall->auParams[1] = idxRange;
2527 pCall->auParams[2] = 0;
2528
2529 /* ... */
2530
2531#else
2532 Log8(("%04x:%08RX64: complicated post-branch condition, ending TB.\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2533 return false;
2534#endif
2535 }
2536 }
2537
2538 /*
2539 * Case 2: Page crossing.
2540 *
2541 * Sub-case 2a: The instruction starts on the first byte in the next page.
2542 *
2543 * Sub-case 2b: The instruction has opcode bytes in both the current and
2544 * following page.
2545 *
2546 * Both cases requires a new range table entry and probably a new physical
2547 * page entry. The difference is in which functions to emit and whether to
2548 * add bytes to the current range.
2549 */
2550 else if (pVCpu->iem.s.fTbCrossedPage)
2551 {
2552 /* Check that we've got a free range. */
2553 idxRange += 1;
2554 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2555 { /* likely */ }
2556 else
2557 {
2558 Log8(("%04x:%08RX64: out of ranges while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2559 return false;
2560 }
2561
2562 /* Check that we've got a free page slot. */
2563 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2564 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2565 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2566 pTb->aRanges[idxRange].idxPhysPage = 0;
2567 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
2568 || pTb->aGCPhysPages[0] == GCPhysNew)
2569 {
2570 pTb->aGCPhysPages[0] = GCPhysNew;
2571 pTb->aRanges[idxRange].idxPhysPage = 1;
2572 }
2573 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
2574 || pTb->aGCPhysPages[1] == GCPhysNew)
2575 {
2576 pTb->aGCPhysPages[1] = GCPhysNew;
2577 pTb->aRanges[idxRange].idxPhysPage = 2;
2578 }
2579 else
2580 {
2581 Log8(("%04x:%08RX64: out of aGCPhysPages entires while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2582 return false;
2583 }
2584
2585 if (((pTb->aRanges[idxRange - 1].offPhysPage + pTb->aRanges[idxRange - 1].cbOpcodes) & GUEST_PAGE_OFFSET_MASK) == 0)
2586 {
2587 Assert(pVCpu->iem.s.offCurInstrStart == 0);
2588 pCall->auParams[1] = idxRange;
2589 pCall->auParams[2] = 0;
2590
2591 /* Finish setting up the new range. */
2592 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2593 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2594 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2595 pTb->aRanges[idxRange].u2Unused = 0;
2596 pTb->cRanges++;
2597 Log6(("%04x:%08RX64: new range #%u new page (a) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2598 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2599 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2600
2601 /* Determin which function we need to load & check. */
2602 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2603 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
2604 : !fConsiderCsLimChecking
2605 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
2606 : kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim;
2607 }
2608 else
2609 {
2610 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2611 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2612 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2613 pCall->auParams[0] |= (uint64_t)cbStartPage << 32;
2614
2615 /* We've good. Split the instruction over the old and new range table entries. */
2616 pTb->aRanges[idxRange - 1].cbOpcodes += cbStartPage;
2617
2618 pTb->aRanges[idxRange].offPhysPage = 0;
2619 pTb->aRanges[idxRange].offOpcodes = offOpcode + cbStartPage;
2620 pTb->aRanges[idxRange].cbOpcodes = cbInstr - cbStartPage;
2621 pTb->aRanges[idxRange].u2Unused = 0;
2622 pTb->cRanges++;
2623 Log6(("%04x:%08RX64: new range #%u new page (b) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2624 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2625 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2626
2627 /* Determin which function we need to load & check. */
2628 if (pVCpu->iem.s.fTbCheckOpcodes)
2629 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2630 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
2631 : !fConsiderCsLimChecking
2632 ? kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
2633 : kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim;
2634 else
2635 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2636 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
2637 : !fConsiderCsLimChecking
2638 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
2639 : kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim;
2640 }
2641 }
2642
2643 /*
2644 * Regular case: No new range required.
2645 */
2646 else
2647 {
2648 Assert(pVCpu->iem.s.fTbCheckOpcodes || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS));
2649 if (pVCpu->iem.s.fTbCheckOpcodes)
2650 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2651 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2652 : kIemThreadedFunc_BltIn_CheckOpcodes;
2653 else
2654 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckCsLim;
2655
2656 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2657 pTb->cbOpcodes = offOpcode + cbInstr;
2658 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2659 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2660 }
2661
2662 /*
2663 * Commit the call.
2664 */
2665 pTb->Thrd.cCalls++;
2666
2667 /*
2668 * Clear state.
2669 */
2670 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
2671 pVCpu->iem.s.fTbCrossedPage = false;
2672 pVCpu->iem.s.fTbCheckOpcodes = false;
2673
2674 /*
2675 * Copy opcode bytes.
2676 */
2677 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2678 pTb->cbOpcodes = offOpcode + cbInstr;
2679 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2680
2681 return true;
2682}
2683
2684
2685/**
2686 * Worker for iemThreadedCompileBeginEmitCallsComplications and
2687 * iemThreadedCompileCheckIrq that checks for pending delivarable events.
2688 *
2689 * @returns true if anything is pending, false if not.
2690 * @param pVCpu The cross context virtual CPU structure of the calling
2691 * thread.
2692 */
2693DECL_FORCE_INLINE(bool) iemThreadedCompileIsIrqOrForceFlagPending(PVMCPUCC pVCpu)
2694{
2695 uint64_t fCpu = pVCpu->fLocalForcedActions;
2696 fCpu &= VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI;
2697#if 1
2698 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2699 if (RT_LIKELY( !fCpu
2700 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2701 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2702 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))) ))
2703 return false;
2704 return true;
2705#else
2706 return false;
2707#endif
2708
2709}
2710
2711
2712/**
2713 * Called by iemThreadedCompile when a block requires a mode check.
2714 *
2715 * @returns true if we should continue, false if we're out of call entries.
2716 * @param pVCpu The cross context virtual CPU structure of the calling
2717 * thread.
2718 * @param pTb The translation block being compiled.
2719 */
2720static bool iemThreadedCompileEmitCheckMode(PVMCPUCC pVCpu, PIEMTB pTb)
2721{
2722 /* Emit the call. */
2723 uint32_t const idxCall = pTb->Thrd.cCalls;
2724 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2725 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2726 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2727 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckMode;
2728 pCall->idxInstr = pTb->cInstructions - 1;
2729 pCall->cbOpcode = 0;
2730 pCall->offOpcode = 0;
2731 pCall->uTbLookup = 0;
2732 pCall->fFlags = 0;
2733 pCall->auParams[0] = pVCpu->iem.s.fExec;
2734 pCall->auParams[1] = 0;
2735 pCall->auParams[2] = 0;
2736 LogFunc(("%04x:%08RX64 fExec=%#x\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, pVCpu->iem.s.fExec));
2737 return true;
2738}
2739
2740
2741/**
2742 * Called by IEM_MC2_BEGIN_EMIT_CALLS() when IEM_CIMPL_F_CHECK_IRQ_BEFORE is
2743 * set.
2744 *
2745 * @returns true if we should continue, false if an IRQ is deliverable or a
2746 * relevant force flag is pending.
2747 * @param pVCpu The cross context virtual CPU structure of the calling
2748 * thread.
2749 * @param pTb The translation block being compiled.
2750 * @sa iemThreadedCompileCheckIrq
2751 */
2752bool iemThreadedCompileEmitIrqCheckBefore(PVMCPUCC pVCpu, PIEMTB pTb)
2753{
2754 /*
2755 * Skip this we've already emitted a call after the previous instruction
2756 * or if it's the first call, as we're always checking FFs between blocks.
2757 */
2758 uint32_t const idxCall = pTb->Thrd.cCalls;
2759 if ( idxCall > 0
2760 && pTb->Thrd.paCalls[idxCall - 1].enmFunction != kIemThreadedFunc_BltIn_CheckIrq)
2761 {
2762 /* Emit the call. */
2763 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2764 pVCpu->iem.s.idxLastCheckIrqCallNo = (uint16_t)idxCall;
2765 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2766 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2767 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2768 pCall->idxInstr = pTb->cInstructions;
2769 pCall->offOpcode = 0;
2770 pCall->cbOpcode = 0;
2771 pCall->uTbLookup = 0;
2772 pCall->fFlags = 0;
2773 pCall->auParams[0] = 0;
2774 pCall->auParams[1] = 0;
2775 pCall->auParams[2] = 0;
2776 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2777
2778 /* Reset the IRQ check value. */
2779 pVCpu->iem.s.cInstrTillIrqCheck = !CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) ? 32 : 0;
2780
2781 /*
2782 * Check for deliverable IRQs and pending force flags.
2783 */
2784 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2785 }
2786 return true; /* continue */
2787}
2788
2789
2790/**
2791 * Emits an IRQ check call and checks for pending IRQs.
2792 *
2793 * @returns true if we should continue, false if an IRQ is deliverable or a
2794 * relevant force flag is pending.
2795 * @param pVCpu The cross context virtual CPU structure of the calling
2796 * thread.
2797 * @param pTb The transation block.
2798 * @sa iemThreadedCompileBeginEmitCallsComplications
2799 */
2800static bool iemThreadedCompileCheckIrqAfter(PVMCPUCC pVCpu, PIEMTB pTb)
2801{
2802 /* Check again in a little bit, unless it is immediately following an STI
2803 in which case we *must* check immediately after the next instruction
2804 as well in case it's executed with interrupt inhibition. We could
2805 otherwise miss the interrupt window. See the irq2 wait2 varaiant in
2806 bs3-timers-1 which is doing sti + sti + cli. */
2807 if (!pVCpu->iem.s.fTbCurInstrIsSti)
2808 pVCpu->iem.s.cInstrTillIrqCheck = 32;
2809 else
2810 {
2811 pVCpu->iem.s.fTbCurInstrIsSti = false;
2812 pVCpu->iem.s.cInstrTillIrqCheck = 0;
2813 }
2814 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2815
2816 /*
2817 * Emit the call.
2818 */
2819 uint32_t const idxCall = pTb->Thrd.cCalls;
2820 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2821 pVCpu->iem.s.idxLastCheckIrqCallNo = (uint16_t)idxCall;
2822 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2823 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2824 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2825 pCall->idxInstr = pTb->cInstructions;
2826 pCall->offOpcode = 0;
2827 pCall->cbOpcode = 0;
2828 pCall->uTbLookup = 0;
2829 pCall->fFlags = 0;
2830 pCall->auParams[0] = 0;
2831 pCall->auParams[1] = 0;
2832 pCall->auParams[2] = 0;
2833
2834 /*
2835 * Check for deliverable IRQs and pending force flags.
2836 */
2837 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2838}
2839
2840
2841/**
2842 * Compiles a new TB and executes it.
2843 *
2844 * We combine compilation and execution here as it makes it simpler code flow
2845 * in the main loop and it allows interpreting while compiling if we want to
2846 * explore that option.
2847 *
2848 * @returns Strict VBox status code.
2849 * @param pVM The cross context virtual machine structure.
2850 * @param pVCpu The cross context virtual CPU structure of the calling
2851 * thread.
2852 * @param GCPhysPc The physical address corresponding to the current
2853 * RIP+CS.BASE.
2854 * @param fExtraFlags Extra translation block flags: IEMTB_F_INHIBIT_SHADOW,
2855 * IEMTB_F_INHIBIT_NMI, IEMTB_F_CS_LIM_CHECKS.
2856 */
2857static IEM_DECL_MSC_GUARD_IGNORE VBOXSTRICTRC
2858iemThreadedCompile(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
2859{
2860 IEMTLBTRACE_TB_COMPILE(pVCpu, GCPhysPc);
2861 Assert(!(fExtraFlags & IEMTB_F_TYPE_MASK));
2862 fExtraFlags |= IEMTB_F_TYPE_THREADED;
2863
2864 /*
2865 * Get the TB we use for the recompiling. This is a maxed-out TB so
2866 * that'll we'll make a more efficient copy of when we're done compiling.
2867 */
2868 PIEMTB pTb = pVCpu->iem.s.pThrdCompileTbR3;
2869 if (pTb)
2870 iemThreadedTbReuse(pVCpu, pTb, GCPhysPc, fExtraFlags);
2871 else
2872 {
2873 pTb = iemThreadedTbAlloc(pVM, pVCpu, GCPhysPc, fExtraFlags);
2874 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2875 pVCpu->iem.s.pThrdCompileTbR3 = pTb;
2876 }
2877 pTb->FlatPc = pVCpu->iem.s.uInstrBufPc | (GCPhysPc & GUEST_PAGE_OFFSET_MASK);
2878
2879 /* Set the current TB so iemThreadedCompileLongJumped and the CIMPL
2880 functions may get at it. */
2881 pVCpu->iem.s.pCurTbR3 = pTb;
2882
2883#if 0
2884 /* Make sure the CheckIrq condition matches the one in EM. */
2885 iemThreadedCompileCheckIrqAfter(pVCpu, pTb);
2886 const uint32_t cZeroCalls = 1;
2887#else
2888 const uint32_t cZeroCalls = 0;
2889#endif
2890
2891 /*
2892 * Now for the recomplication. (This mimicks IEMExecLots in many ways.)
2893 */
2894 iemThreadedCompileInitDecoder(pVCpu, false /*fReInit*/, fExtraFlags);
2895 iemThreadedCompileInitOpcodeFetching(pVCpu);
2896 VBOXSTRICTRC rcStrict;
2897 for (;;)
2898 {
2899 /* Process the next instruction. */
2900#ifdef LOG_ENABLED
2901 iemThreadedLogCurInstr(pVCpu, "CC", pTb->cInstructions);
2902 uint16_t const uCsLog = pVCpu->cpum.GstCtx.cs.Sel;
2903 uint64_t const uRipLog = pVCpu->cpum.GstCtx.rip;
2904 Assert(uCsLog != 0 || uRipLog > 0x400 || !IEM_IS_REAL_OR_V86_MODE(pVCpu)); /* Detect executing RM interrupt table. */
2905#endif
2906 uint8_t b; IEM_OPCODE_GET_FIRST_U8(&b);
2907 uint16_t const cCallsPrev = pTb->Thrd.cCalls;
2908
2909 rcStrict = FNIEMOP_CALL(g_apfnIemThreadedRecompilerOneByteMap[b]);
2910#if 0
2911 for (unsigned i = cCallsPrev; i < pTb->Thrd.cCalls; i++)
2912 Log8(("-> %#u/%u - %d %s\n", i, pTb->Thrd.paCalls[i].idxInstr, pTb->Thrd.paCalls[i].enmFunction,
2913 g_apszIemThreadedFunctions[pTb->Thrd.paCalls[i].enmFunction]));
2914#endif
2915 if ( rcStrict == VINF_SUCCESS
2916 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS
2917 && !pVCpu->iem.s.fEndTb)
2918 {
2919 Assert(pTb->Thrd.cCalls > cCallsPrev);
2920 Assert(cCallsPrev - pTb->Thrd.cCalls < 5);
2921
2922 pVCpu->iem.s.cInstructions++;
2923
2924 /* Check for mode change _after_ certain CIMPL calls, so check that
2925 we continue executing with the same mode value. */
2926 if (!(pVCpu->iem.s.fTbCurInstr & (IEM_CIMPL_F_MODE | IEM_CIMPL_F_XCPT | IEM_CIMPL_F_VMEXIT)))
2927 { /* probable */ }
2928 else if (RT_LIKELY(iemThreadedCompileEmitCheckMode(pVCpu, pTb)))
2929 { /* extremely likely */ }
2930 else
2931 break;
2932
2933#if defined(LOG_ENABLED) && 0 /* for debugging */
2934 //iemThreadedCompileEmitNop(pTb);
2935 iemThreadedCompileEmitLogCpuState(pTb);
2936#endif
2937 }
2938 else
2939 {
2940 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, rc=%d\n",
2941 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, VBOXSTRICTRC_VAL(rcStrict)));
2942 if (rcStrict == VINF_IEM_RECOMPILE_END_TB)
2943 rcStrict = VINF_SUCCESS;
2944
2945 if (pTb->Thrd.cCalls > cZeroCalls)
2946 {
2947 if (cCallsPrev != pTb->Thrd.cCalls)
2948 pVCpu->iem.s.cInstructions++;
2949 break;
2950 }
2951
2952 pVCpu->iem.s.pCurTbR3 = NULL;
2953 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2954 }
2955
2956 /* Check for IRQs? */
2957 if (pVCpu->iem.s.cInstrTillIrqCheck > 0)
2958 pVCpu->iem.s.cInstrTillIrqCheck--;
2959 else if (!iemThreadedCompileCheckIrqAfter(pVCpu, pTb))
2960 break;
2961
2962 /* Still space in the TB? */
2963 if ( pTb->Thrd.cCalls + 5 < pTb->Thrd.cAllocated
2964 && pTb->cbOpcodes + 16 <= pVCpu->iem.s.cbOpcodesAllocated
2965 && pTb->cTbLookupEntries < 127)
2966 iemThreadedCompileInitDecoder(pVCpu, true /*fReInit*/, 0);
2967 else
2968 {
2969 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, %u opcode bytes, %u TB lookup entries - full\n",
2970 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, pTb->cbOpcodes, pTb->cTbLookupEntries));
2971 break;
2972 }
2973 iemThreadedCompileReInitOpcodeFetching(pVCpu);
2974 }
2975
2976 /*
2977 * Reserve lookup space for the final call entry if necessary.
2978 */
2979 PIEMTHRDEDCALLENTRY pFinalCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls - 1];
2980 if (pTb->Thrd.cCalls > 1)
2981 {
2982 if (pFinalCall->uTbLookup == 0)
2983 {
2984 pFinalCall->uTbLookup = IEM_TB_LOOKUP_TAB_MAKE(pTb->cTbLookupEntries, 0);
2985 pTb->cTbLookupEntries += 1;
2986 }
2987 }
2988 else if (pFinalCall->uTbLookup != 0)
2989 {
2990 Assert(pTb->cTbLookupEntries > 1);
2991 pFinalCall->uTbLookup -= 1;
2992 pTb->cTbLookupEntries -= 1;
2993 }
2994
2995 /*
2996 * Duplicate the TB into a completed one and link it.
2997 */
2998 pTb = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
2999 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
3000
3001 iemThreadedTbAdd(pVCpu, pVCpu->iem.s.pTbCacheR3, pTb);
3002
3003#ifdef IEM_COMPILE_ONLY_MODE
3004 /*
3005 * Execute the translation block.
3006 */
3007#endif
3008
3009 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
3010}
3011
3012
3013
3014/*********************************************************************************************************************************
3015* Threaded Translation Block Saving and Restoring for Profiling the Native Recompiler *
3016*********************************************************************************************************************************/
3017#if defined(VBOX_WITH_IEM_NATIVE_RECOMPILER) && defined(VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING)
3018# include <iprt/message.h>
3019
3020static const SSMFIELD g_aIemThreadedTbFields[] =
3021{
3022 SSMFIELD_ENTRY( IEMTB, cUsed),
3023 SSMFIELD_ENTRY( IEMTB, msLastUsed),
3024 SSMFIELD_ENTRY_GCPHYS(IEMTB, GCPhysPc),
3025 SSMFIELD_ENTRY( IEMTB, fFlags),
3026 SSMFIELD_ENTRY( IEMTB, x86.fAttr),
3027 SSMFIELD_ENTRY( IEMTB, cRanges),
3028 SSMFIELD_ENTRY( IEMTB, cInstructions),
3029 SSMFIELD_ENTRY( IEMTB, Thrd.cCalls),
3030 SSMFIELD_ENTRY( IEMTB, cTbLookupEntries),
3031 SSMFIELD_ENTRY( IEMTB, cbOpcodes),
3032 SSMFIELD_ENTRY( IEMTB, FlatPc),
3033 SSMFIELD_ENTRY_GCPHYS(IEMTB, aGCPhysPages[0]),
3034 SSMFIELD_ENTRY_GCPHYS(IEMTB, aGCPhysPages[1]),
3035 SSMFIELD_ENTRY_TERM()
3036};
3037
3038/**
3039 * Saves a threaded TB to a dedicated saved state file.
3040 */
3041static void iemThreadedSaveTbForProfiling(PVMCPU pVCpu, PCIEMTB pTb)
3042{
3043 /* Only VCPU #0 for now. */
3044 if (pVCpu->idCpu != 0)
3045 return;
3046
3047 /*
3048 * Get the SSM handle, lazily opening the output file.
3049 */
3050 PSSMHANDLE const pNil = (PSSMHANDLE)~(uintptr_t)0; Assert(!RT_VALID_PTR(pNil));
3051 PSSMHANDLE pSSM = pVCpu->iem.s.pSsmThreadedTbsForProfiling;
3052 if (pSSM && pSSM != pNil)
3053 { /* likely */ }
3054 else if (pSSM)
3055 return;
3056 else
3057 {
3058 pVCpu->iem.s.pSsmThreadedTbsForProfiling = pNil;
3059 int rc = SSMR3Open("ThreadedTBsForRecompilerProfiling.sav", NULL, NULL, SSM_OPEN_F_FOR_WRITING, &pSSM);
3060 AssertLogRelRCReturnVoid(rc);
3061
3062 rc = SSMR3WriteFileHeader(pSSM, 1);
3063 AssertLogRelRCReturnVoid(rc); /* leaks SSM handle, but whatever. */
3064
3065 rc = SSMR3WriteUnitBegin(pSSM, "threaded-tbs", 1, 0);
3066 AssertLogRelRCReturnVoid(rc); /* leaks SSM handle, but whatever. */
3067 pVCpu->iem.s.pSsmThreadedTbsForProfiling = pSSM;
3068 }
3069
3070 /*
3071 * Do the actual saving.
3072 */
3073 SSMR3PutU32(pSSM, 0); /* Indicates that another TB follows. */
3074
3075 /* The basic structure. */
3076 SSMR3PutStructEx(pSSM, pTb, sizeof(*pTb), 0 /*fFlags*/, g_aIemThreadedTbFields, NULL);
3077
3078 /* The ranges. */
3079 for (uint32_t iRange = 0; iRange < pTb->cRanges; iRange++)
3080 {
3081 SSMR3PutU16(pSSM, pTb->aRanges[iRange].offOpcodes);
3082 SSMR3PutU16(pSSM, pTb->aRanges[iRange].cbOpcodes);
3083 SSMR3PutU16(pSSM, pTb->aRanges[iRange].offPhysPage | (pTb->aRanges[iRange].idxPhysPage << 14));
3084 }
3085
3086 /* The opcodes. */
3087 SSMR3PutMem(pSSM, pTb->pabOpcodes, pTb->cbOpcodes);
3088
3089 /* The threaded call table. */
3090 int rc = SSMR3PutMem(pSSM, pTb->Thrd.paCalls, sizeof(*pTb->Thrd.paCalls) * pTb->Thrd.cCalls);
3091 AssertLogRelMsgStmt(RT_SUCCESS(rc), ("rc=%Rrc\n", rc), pVCpu->iem.s.pSsmThreadedTbsForProfiling = pNil);
3092}
3093
3094
3095/**
3096 * Called by IEMR3Term to finish any open profile files.
3097 *
3098 * @note This is not called on the EMT for @a pVCpu, but rather on the thread
3099 * driving the VM termination.
3100 */
3101DECLHIDDEN(void) iemThreadedSaveTbForProfilingCleanup(PVMCPU pVCpu)
3102{
3103 PSSMHANDLE const pSSM = pVCpu->iem.s.pSsmThreadedTbsForProfiling;
3104 pVCpu->iem.s.pSsmThreadedTbsForProfiling = NULL;
3105 if (RT_VALID_PTR(pSSM))
3106 {
3107 /* Indicate that this is the end. */
3108 SSMR3PutU32(pSSM, UINT32_MAX);
3109
3110 int rc = SSMR3WriteUnitComplete(pSSM);
3111 AssertLogRelRC(rc);
3112 rc = SSMR3WriteFileFooter(pSSM);
3113 AssertLogRelRC(rc);
3114 rc = SSMR3Close(pSSM);
3115 AssertLogRelRC(rc);
3116 }
3117}
3118
3119#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER && VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING */
3120
3121#ifdef IN_RING3
3122/**
3123 * API use to process what iemThreadedSaveTbForProfiling() saved.
3124 *
3125 * @note Do not mix build types or revisions. Local changes between saving the
3126 * TBs and calling this API may cause unexpected trouble.
3127 */
3128VMMR3DECL(int) IEMR3ThreadedProfileRecompilingSavedTbs(PVM pVM, const char *pszFilename, uint32_t cMinTbs)
3129{
3130# if defined(VBOX_WITH_IEM_NATIVE_RECOMPILER) && defined(VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING)
3131 PVMCPU const pVCpu = pVM->apCpusR3[0];
3132
3133 /* We need to keep an eye on the TB allocator. */
3134 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
3135
3136 /*
3137 * Load the TBs from the file.
3138 */
3139 PSSMHANDLE pSSM = NULL;
3140 int rc = SSMR3Open(pszFilename, NULL, NULL, 0, &pSSM);
3141 if (RT_SUCCESS(rc))
3142 {
3143 uint32_t cTbs = 0;
3144 PIEMTB pTbHead = NULL;
3145 PIEMTB *ppTbTail = &pTbHead;
3146 uint32_t uVersion;
3147 rc = SSMR3Seek(pSSM, "threaded-tbs", 0, &uVersion);
3148 if (RT_SUCCESS(rc))
3149 {
3150 for (;; cTbs++)
3151 {
3152 /* Check for the end tag. */
3153 uint32_t uTag = 0;
3154 rc = SSMR3GetU32(pSSM, &uTag);
3155 AssertRCBreak(rc);
3156 if (uTag == UINT32_MAX)
3157 break;
3158 AssertBreakStmt(uTag == 0, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3159
3160 /* Do we have room for another TB? */
3161 if (pTbAllocator->cInUseTbs + 2 >= pTbAllocator->cMaxTbs)
3162 {
3163 RTMsgInfo("Too many TBs to load, stopping loading early.\n");
3164 break;
3165 }
3166
3167 /* Allocate a new TB. */
3168 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
3169 AssertBreakStmt(uTag == 0, rc = VERR_OUT_OF_RESOURCES);
3170
3171 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
3172 RT_ZERO(*pTb);
3173 pTb->idxAllocChunk = idxAllocChunk;
3174
3175 rc = SSMR3GetStructEx(pSSM, pTb, sizeof(*pTb), 0, g_aIemThreadedTbFields, NULL);
3176 if (RT_SUCCESS(rc))
3177 {
3178 AssertStmt(pTb->Thrd.cCalls > 0 && pTb->Thrd.cCalls <= _8K, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3179 AssertStmt(pTb->cbOpcodes > 0 && pTb->cbOpcodes <= _8K, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3180 AssertStmt(pTb->cRanges > 0 && pTb->cRanges <= RT_ELEMENTS(pTb->aRanges), rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3181 AssertStmt(pTb->cTbLookupEntries > 0 && pTb->cTbLookupEntries <= 136, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3182
3183 if (RT_SUCCESS(rc))
3184 for (uint32_t iRange = 0; iRange < pTb->cRanges; iRange++)
3185 {
3186 SSMR3GetU16(pSSM, &pTb->aRanges[iRange].offOpcodes);
3187 SSMR3GetU16(pSSM, &pTb->aRanges[iRange].cbOpcodes);
3188 uint16_t uTmp = 0;
3189 rc = SSMR3GetU16(pSSM, &uTmp);
3190 AssertRCBreak(rc);
3191 pTb->aRanges[iRange].offPhysPage = uTmp & GUEST_PAGE_OFFSET_MASK;
3192 pTb->aRanges[iRange].idxPhysPage = uTmp >> 14;
3193
3194 AssertBreakStmt(pTb->aRanges[iRange].idxPhysPage <= RT_ELEMENTS(pTb->aGCPhysPages),
3195 rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3196 AssertBreakStmt(pTb->aRanges[iRange].offOpcodes < pTb->cbOpcodes,
3197 rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3198 AssertBreakStmt(pTb->aRanges[iRange].offOpcodes + pTb->aRanges[iRange].cbOpcodes <= pTb->cbOpcodes,
3199 rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3200 }
3201
3202 if (RT_SUCCESS(rc))
3203 {
3204 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAllocZ(sizeof(IEMTHRDEDCALLENTRY) * pTb->Thrd.cCalls);
3205 if (pTb->Thrd.paCalls)
3206 {
3207 size_t const cbTbLookup = pTb->cTbLookupEntries * sizeof(PIEMTB);
3208 Assert(cbTbLookup > 0);
3209 size_t const cbOpcodes = pTb->cbOpcodes;
3210 Assert(cbOpcodes > 0);
3211 size_t const cbBoth = cbTbLookup + RT_ALIGN_Z(cbOpcodes, sizeof(PIEMTB));
3212 uint8_t * const pbBoth = (uint8_t *)RTMemAllocZ(cbBoth);
3213 if (pbBoth)
3214 {
3215 pTb->pabOpcodes = &pbBoth[cbTbLookup];
3216 SSMR3GetMem(pSSM, pTb->pabOpcodes, pTb->cbOpcodes);
3217 rc = SSMR3GetMem(pSSM, pTb->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * pTb->Thrd.cCalls);
3218 if (RT_SUCCESS(rc))
3219 {
3220 *ppTbTail = pTb;
3221 ppTbTail = &pTb->pNext;
3222 continue;
3223 }
3224 }
3225 else
3226 rc = VERR_NO_MEMORY;
3227 RTMemFree(pTb->Thrd.paCalls);
3228 }
3229 else
3230 rc = VERR_NO_MEMORY;
3231 }
3232 }
3233 iemTbAllocatorFree(pVCpu, pTb);
3234 break;
3235 }
3236 if (RT_FAILURE(rc))
3237 RTMsgError("Load error: %Rrc (cTbs=%u)", rc, cTbs);
3238 }
3239 else
3240 RTMsgError("SSMR3Seek failed on '%s': %Rrc", pszFilename, rc);
3241 SSMR3Close(pSSM);
3242 if (RT_SUCCESS(rc))
3243 {
3244 /*
3245 * Recompile the TBs.
3246 */
3247 if (pTbHead)
3248 {
3249 RTMsgInfo("Loaded %u TBs\n", cTbs);
3250 if (cTbs < cMinTbs)
3251 {
3252 RTMsgInfo("Duplicating TBs to reach %u TB target\n", cMinTbs);
3253 for (PIEMTB pTb = pTbHead;
3254 cTbs < cMinTbs && pTbAllocator->cInUseTbs + 2 <= pTbAllocator->cMaxTbs;
3255 pTb = pTb->pNext)
3256 {
3257 PIEMTB pTbCopy = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
3258 if (!pTbCopy)
3259 break;
3260 *ppTbTail = pTbCopy;
3261 ppTbTail = &pTbCopy->pNext;
3262 cTbs++;
3263 }
3264 }
3265
3266 PIEMTB pTbWarmup = iemThreadedTbDuplicate(pVM, pVCpu, pTbHead);
3267 if (pTbWarmup)
3268 {
3269 iemNativeRecompile(pVCpu, pTbWarmup);
3270 RTThreadSleep(512); /* to make the start visible in the profiler. */
3271 RTMsgInfo("Ready, set, go!\n");
3272
3273 if ((pTbWarmup->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
3274 {
3275 uint32_t cFailed = 0;
3276 uint64_t const nsStart = RTTimeNanoTS();
3277 for (PIEMTB pTb = pTbHead; pTb; pTb = pTb->pNext)
3278 {
3279 iemNativeRecompile(pVCpu, pTb);
3280 if ((pTb->fFlags & IEMTB_F_TYPE_MASK) != IEMTB_F_TYPE_NATIVE)
3281 cFailed++;
3282 }
3283 uint64_t const cNsElapsed = RTTimeNanoTS() - nsStart;
3284 RTMsgInfo("Recompiled %u TBs in %'RU64 ns - averaging %'RU64 ns/TB\n",
3285 cTbs, cNsElapsed, (cNsElapsed + cTbs - 1) / cTbs);
3286 if (cFailed)
3287 {
3288 RTMsgError("Unforuntately %u TB failed!", cFailed);
3289 rc = VERR_GENERAL_FAILURE;
3290 }
3291 RTThreadSleep(128); /* Another gap in the profiler timeline. */
3292 }
3293 else
3294 {
3295 RTMsgError("Failed to recompile the first TB!");
3296 rc = VERR_GENERAL_FAILURE;
3297 }
3298 }
3299 else
3300 rc = VERR_NO_MEMORY;
3301 }
3302 else
3303 {
3304 RTMsgError("'%s' contains no TBs!", pszFilename);
3305 rc = VERR_NO_DATA;
3306 }
3307 }
3308 }
3309 else
3310 RTMsgError("SSMR3Open failed on '%s': %Rrc", pszFilename, rc);
3311 return rc;
3312
3313# else
3314 RT_NOREF(pVM, pszFilename, cMinTbs);
3315 return VERR_NOT_IMPLEMENTED;
3316# endif
3317}
3318#endif /* IN_RING3 */
3319
3320
3321/*********************************************************************************************************************************
3322* Recompiled Execution Core *
3323*********************************************************************************************************************************/
3324
3325/** Default TB factor.
3326 * This is basically the number of nanoseconds we guess executing a TB takes
3327 * on average. We estimates it high if we can.
3328 * @note Best if this is a power of two so it can be translated to a shift. */
3329#define IEM_TIMER_POLL_DEFAULT_FACTOR UINT32_C(64)
3330/** The minimum number of nanoseconds we can allow between timer pollings.
3331 * This must take the cost of TMTimerPollBoolWithNanoTS into mind. We put that
3332 * cost at 104 ns now, thus this constant is at 256 ns. */
3333#define IEM_TIMER_POLL_MIN_NS UINT32_C(256)
3334/** The IEM_TIMER_POLL_MIN_NS value roughly translated to TBs, with some grains
3335 * of salt thrown in.
3336 * The idea is that we will be able to make progress with guest code execution
3337 * before polling timers and between running timers. */
3338#define IEM_TIMER_POLL_MIN_ITER UINT32_C(12)
3339/** The maximum number of nanoseconds we can allow between timer pollings.
3340 * This probably shouldn't be too high, as we don't have any timer
3341 * reprogramming feedback in the polling code. So, when a device reschedule a
3342 * timer for an earlier delivery, we won't know about it. */
3343#define IEM_TIMER_POLL_MAX_NS UINT32_C(8388608) /* 0x800000 ns = 8.4 ms */
3344/** The IEM_TIMER_POLL_MAX_NS value roughly translated to TBs, with some grains
3345 * of salt thrown in.
3346 * This helps control fluctuations in the NU benchmark. */
3347#define IEM_TIMER_POLL_MAX_ITER _512K
3348
3349#ifdef IEM_WITH_ADAPTIVE_TIMER_POLLING
3350/**
3351 * Calculates the number of TBs till the next timer polling using defaults.
3352 *
3353 * This is used when the previous run wasn't long enough to provide sufficient
3354 * data and when comming back from the HALT state and we haven't actually
3355 * executed anything for a while.
3356 */
3357DECL_FORCE_INLINE(uint32_t) iemPollTimersCalcDefaultCountdown(uint64_t cNsDelta) RT_NOEXCEPT
3358{
3359 if (cNsDelta >= IEM_TIMER_POLL_MAX_NS)
3360 return RT_MIN(IEM_TIMER_POLL_MAX_NS / IEM_TIMER_POLL_DEFAULT_FACTOR, IEM_TIMER_POLL_MAX_ITER);
3361
3362 cNsDelta = RT_BIT_64(ASMBitFirstSetU32(cNsDelta) - 1); /* round down to power of 2 */
3363 uint32_t const cRet = cNsDelta / IEM_TIMER_POLL_DEFAULT_FACTOR;
3364 if (cRet >= IEM_TIMER_POLL_MIN_ITER)
3365 {
3366 if (cRet <= IEM_TIMER_POLL_MAX_ITER)
3367 return cRet;
3368 return IEM_TIMER_POLL_MAX_ITER;
3369 }
3370 return IEM_TIMER_POLL_MIN_ITER;
3371}
3372#endif
3373
3374
3375/**
3376 * Helper for polling timers.
3377 */
3378DECLHIDDEN(int) iemPollTimers(PVMCC pVM, PVMCPUCC pVCpu) RT_NOEXCEPT
3379{
3380 STAM_PROFILE_START(&pVCpu->iem.s.StatTimerPoll, a);
3381
3382 /*
3383 * Check for VM_FF_TM_VIRTUAL_SYNC and call TMR3VirtualSyncFF if set.
3384 * This is something all EMTs can do.
3385 */
3386 /* If the virtual sync FF is set, respond to it. */
3387 bool fRanTimers = VM_FF_IS_SET(pVM, VM_FF_TM_VIRTUAL_SYNC);
3388 if (!fRanTimers)
3389 { /* likely */ }
3390 else
3391 {
3392 STAM_PROFILE_START(&pVCpu->iem.s.StatTimerPollRun, b);
3393 TMR3VirtualSyncFF(pVM, pVCpu);
3394 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPollRun, b);
3395 }
3396
3397 /*
3398 * Poll timers.
3399 *
3400 * On the 10980xe the polling averaging 314 ticks, with a min of 201, while
3401 * running a norton utilities DOS benchmark program. TSC runs at 3GHz,
3402 * translating that to 104 ns and 67 ns respectively. (An M2 booting win11
3403 * has an average of 2 ticks / 84 ns.)
3404 *
3405 * With the same setup the TMR3VirtualSyncFF and else branch here profiles
3406 * to 79751 ticks / 26583 ns on average, with a min of 1194 ticks / 398 ns.
3407 * (An M2 booting win11 has an average of 24 ticks / 1008 ns, with a min of
3408 * 8 ticks / 336 ns.)
3409 *
3410 * If we get a zero return value we run timers. Non-timer EMTs shouldn't
3411 * ever see a zero value here, so we just call TMR3TimerQueuesDo. However,
3412 * we do not re-run timers if we already called TMR3VirtualSyncFF above, we
3413 * try to make sure some code is executed first.
3414 */
3415 uint64_t nsNow = 0;
3416 uint64_t cNsDelta = TMTimerPollBoolWithNanoTS(pVM, pVCpu, &nsNow);
3417 if (cNsDelta >= 1) /* It is okay to run virtual sync timers a little early. */
3418 { /* likely */ }
3419 else if (!fRanTimers || VM_FF_IS_SET(pVM, VM_FF_TM_VIRTUAL_SYNC))
3420 {
3421 STAM_PROFILE_START(&pVCpu->iem.s.StatTimerPollRun, b);
3422 TMR3TimerQueuesDo(pVM);
3423 fRanTimers = true;
3424 nsNow = 0;
3425 cNsDelta = TMTimerPollBoolWithNanoTS(pVM, pVCpu, &nsNow);
3426 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPollRun, b);
3427 }
3428 else
3429 cNsDelta = 33;
3430
3431 /*
3432 * Calc interval and update the timestamps.
3433 */
3434 uint64_t const cNsSinceLast = nsNow - pVCpu->iem.s.nsRecompilerPollNow;
3435 pVCpu->iem.s.nsRecompilerPollNow = nsNow;
3436 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(nsNow / RT_NS_1MS);
3437
3438 /*
3439 * Set the next polling count down value.
3440 *
3441 * We take the previous value and adjust it according to the cNsSinceLast
3442 * value, if it's not within reason. This can't be too accurate since the
3443 * CheckIrq and intra-TB-checks aren't evenly spaced, they depends highly
3444 * on the guest code.
3445 */
3446#ifdef IEM_WITH_ADAPTIVE_TIMER_POLLING
3447 uint32_t cItersTillNextPoll = pVCpu->iem.s.cTbsTillNextTimerPollPrev;
3448 if (cNsDelta >= RT_NS_1SEC / 4)
3449 {
3450 /*
3451 * Non-timer EMTs should end up here with a fixed 500ms delta, just return
3452 * the max and keep the polling over head to the deadicated timer EMT.
3453 */
3454 AssertCompile(IEM_TIMER_POLL_MAX_ITER * IEM_TIMER_POLL_DEFAULT_FACTOR <= RT_NS_100MS);
3455 cItersTillNextPoll = IEM_TIMER_POLL_MAX_ITER;
3456 }
3457 else
3458 {
3459 /*
3460 * This is the timer EMT.
3461 */
3462 if (cNsDelta <= IEM_TIMER_POLL_MIN_NS)
3463 {
3464 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollTiny);
3465 cItersTillNextPoll = IEM_TIMER_POLL_MIN_ITER;
3466 }
3467 else
3468 {
3469 uint32_t const cNsDeltaAdj = cNsDelta >= IEM_TIMER_POLL_MAX_NS ? IEM_TIMER_POLL_MAX_NS : (uint32_t)cNsDelta;
3470 uint32_t const cNsDeltaSlack = cNsDelta >= IEM_TIMER_POLL_MAX_NS ? IEM_TIMER_POLL_MAX_NS / 2 : cNsDeltaAdj / 4;
3471 if ( cNsSinceLast < RT_MAX(IEM_TIMER_POLL_MIN_NS, 64)
3472 || cItersTillNextPoll < IEM_TIMER_POLL_MIN_ITER /* paranoia */)
3473 {
3474 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollDefaultCalc);
3475 cItersTillNextPoll = iemPollTimersCalcDefaultCountdown(cNsDeltaAdj);
3476 }
3477 else if ( cNsSinceLast >= cNsDeltaAdj + cNsDeltaSlack
3478 || cNsSinceLast <= cNsDeltaAdj - cNsDeltaSlack)
3479 {
3480 if (cNsSinceLast >= cItersTillNextPoll)
3481 {
3482 uint32_t uFactor = (uint32_t)(cNsSinceLast + cItersTillNextPoll - 1) / cItersTillNextPoll;
3483 cItersTillNextPoll = cNsDeltaAdj / uFactor;
3484 STAM_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTimerPollFactorDivision, uFactor);
3485 }
3486 else
3487 {
3488 uint32_t uFactor = cItersTillNextPoll / (uint32_t)cNsSinceLast;
3489 cItersTillNextPoll = cNsDeltaAdj * uFactor;
3490 STAM_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTimerPollFactorMultiplication, uFactor);
3491 }
3492
3493 if (cItersTillNextPoll >= IEM_TIMER_POLL_MIN_ITER)
3494 {
3495 if (cItersTillNextPoll <= IEM_TIMER_POLL_MAX_ITER)
3496 { /* likely */ }
3497 else
3498 {
3499 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollMax);
3500 cItersTillNextPoll = IEM_TIMER_POLL_MAX_ITER;
3501 }
3502 }
3503 else
3504 cItersTillNextPoll = IEM_TIMER_POLL_MIN_ITER;
3505 }
3506 else
3507 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollUnchanged);
3508 }
3509 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillNextPoll;
3510 }
3511#else
3512/** Poll timers every 400 us / 2500 Hz. (source: thin air) */
3513# define IEM_TIMER_POLL_IDEAL_NS (400U * RT_NS_1US)
3514 uint32_t cItersTillNextPoll = pVCpu->iem.s.cTbsTillNextTimerPollPrev;
3515 uint32_t const cNsIdealPollInterval = IEM_TIMER_POLL_IDEAL_NS;
3516 int64_t const nsFromIdeal = cNsSinceLast - cNsIdealPollInterval;
3517 if (nsFromIdeal < 0)
3518 {
3519 if ((uint64_t)-nsFromIdeal > cNsIdealPollInterval / 8 && cItersTillNextPoll < _64K)
3520 {
3521 cItersTillNextPoll += cItersTillNextPoll / 8;
3522 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillNextPoll;
3523 }
3524 }
3525 else
3526 {
3527 if ((uint64_t)nsFromIdeal > cNsIdealPollInterval / 8 && cItersTillNextPoll > 256)
3528 {
3529 cItersTillNextPoll -= cItersTillNextPoll / 8;
3530 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillNextPoll;
3531 }
3532 }
3533#endif
3534 pVCpu->iem.s.cTbsTillNextTimerPoll = cItersTillNextPoll;
3535
3536 /*
3537 * Repeat the IRQ and FF checks.
3538 */
3539 if (cNsDelta > 0)
3540 {
3541 uint32_t fCpu = pVCpu->fLocalForcedActions;
3542 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
3543 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
3544 | VMCPU_FF_TLB_FLUSH
3545 | VMCPU_FF_UNHALT );
3546 if (RT_LIKELY( ( !fCpu
3547 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
3548 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
3549 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx)) ) )
3550 && !VM_FF_IS_ANY_SET(pVCpu->CTX_SUFF(pVM), VM_FF_ALL_MASK) ))
3551 {
3552 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPoll, a);
3553 return VINF_SUCCESS;
3554 }
3555 }
3556 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPoll, a);
3557 return VINF_IEM_REEXEC_BREAK_FF;
3558}
3559
3560
3561/** Helper for iemTbExec. */
3562DECL_FORCE_INLINE(PIEMTB *) iemTbGetTbLookupEntryWithRip(PCIEMTB pTb, uint8_t uTbLookup, uint64_t uRip)
3563{
3564 uint8_t const idx = IEM_TB_LOOKUP_TAB_GET_IDX_WITH_RIP(uTbLookup, uRip);
3565 Assert(idx < pTb->cTbLookupEntries);
3566 return IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idx);
3567}
3568
3569
3570/**
3571 * Executes a translation block.
3572 *
3573 * @returns Strict VBox status code.
3574 * @param pVCpu The cross context virtual CPU structure of the calling
3575 * thread.
3576 * @param pTb The translation block to execute.
3577 */
3578static IEM_DECL_MSC_GUARD_IGNORE VBOXSTRICTRC iemTbExec(PVMCPUCC pVCpu, PIEMTB pTb) IEM_NOEXCEPT_MAY_LONGJMP
3579{
3580 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & (RTGCPHYS)GUEST_PAGE_OFFSET_MASK));
3581
3582 /*
3583 * Set the current TB so CIMPL functions may get at it.
3584 */
3585 pVCpu->iem.s.pCurTbR3 = pTb;
3586 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, 0);
3587
3588 /*
3589 * Execute the block.
3590 */
3591#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
3592 if (pTb->fFlags & IEMTB_F_TYPE_NATIVE)
3593 {
3594 pVCpu->iem.s.cTbExecNative++;
3595 IEMTLBTRACE_TB_EXEC_N8VE(pVCpu, pTb);
3596# ifdef LOG_ENABLED
3597 iemThreadedLogCurInstr(pVCpu, "EXn", 0);
3598# endif
3599
3600# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3601 AssertCompileMemberOffset(VMCPUCC, iem.s.pvTbFramePointerR3, 0x7c8); /* This is assumed in iemNativeTbEntry */
3602# endif
3603# ifdef RT_ARCH_AMD64
3604 VBOXSTRICTRC const rcStrict = iemNativeTbEntry(pVCpu, (uintptr_t)pTb->Native.paInstructions);
3605# else
3606 VBOXSTRICTRC const rcStrict = iemNativeTbEntry(pVCpu, &pVCpu->cpum.GstCtx, (uintptr_t)pTb->Native.paInstructions);
3607# endif
3608
3609# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3610 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
3611# endif
3612# ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
3613 /* Restore FPCR/MXCSR if the TB modified it. */
3614 if (pVCpu->iem.s.uRegFpCtrl != IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED)
3615 {
3616 iemNativeFpCtrlRegRestore(pVCpu->iem.s.uRegFpCtrl);
3617 /* Reset for the next round saving us an unconditional instruction on next TB entry. */
3618 pVCpu->iem.s.uRegFpCtrl = IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED;
3619 }
3620# endif
3621# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
3622 Assert(pVCpu->iem.s.fSkippingEFlags == 0);
3623# endif
3624 if (RT_LIKELY( rcStrict == VINF_SUCCESS
3625 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
3626 { /* likely */ }
3627 else
3628 {
3629 /* pVCpu->iem.s.cInstructions is incremented by iemNativeHlpExecStatusCodeFiddling. */
3630 pVCpu->iem.s.pCurTbR3 = NULL;
3631
3632 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
3633 only to break out of TB execution early. */
3634 if (rcStrict == VINF_IEM_REEXEC_BREAK)
3635 {
3636 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnBreak);
3637 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
3638 }
3639
3640 /* VINF_IEM_REEXEC_BREAK_FF should be treated as VINF_SUCCESS as it's
3641 only to break out of TB execution early due to pending FFs. */
3642 if (rcStrict == VINF_IEM_REEXEC_BREAK_FF)
3643 {
3644 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnBreakFF);
3645 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
3646 }
3647
3648 /* VINF_IEM_REEXEC_WITH_FLAGS needs to receive special treatment
3649 and converted to VINF_SUCCESS or whatever is appropriate. */
3650 if (rcStrict == VINF_IEM_REEXEC_FINISH_WITH_FLAGS)
3651 {
3652 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnWithFlags);
3653 return iemExecStatusCodeFiddling(pVCpu, iemFinishInstructionWithFlagsSet(pVCpu, VINF_SUCCESS));
3654 }
3655
3656 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnOtherStatus);
3657 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
3658 }
3659 }
3660 else
3661#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER */
3662 {
3663 /*
3664 * The threaded execution loop.
3665 */
3666 pVCpu->iem.s.cTbExecThreaded++;
3667 IEMTLBTRACE_TB_EXEC_THRD(pVCpu, pTb);
3668#ifdef LOG_ENABLED
3669 uint64_t uRipPrev = UINT64_MAX;
3670#endif
3671 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
3672 uint32_t cCallsLeft = pTb->Thrd.cCalls;
3673 while (cCallsLeft-- > 0)
3674 {
3675#ifdef LOG_ENABLED
3676 if (pVCpu->cpum.GstCtx.rip != uRipPrev)
3677 {
3678 uRipPrev = pVCpu->cpum.GstCtx.rip;
3679 iemThreadedLogCurInstr(pVCpu, "EXt", pTb->Thrd.cCalls - cCallsLeft - 1);
3680 }
3681 Log9(("%04x:%08RX64: #%d/%d - %d %s\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
3682 pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr, pCallEntry->enmFunction,
3683 g_apszIemThreadedFunctions[pCallEntry->enmFunction]));
3684#endif
3685#ifdef VBOX_WITH_STATISTICS
3686 AssertCompile(RT_ELEMENTS(pVCpu->iem.s.acThreadedFuncStats) >= kIemThreadedFunc_End);
3687 pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction] += 1;
3688#endif
3689 VBOXSTRICTRC const rcStrict = g_apfnIemThreadedFunctions[pCallEntry->enmFunction](pVCpu,
3690 pCallEntry->auParams[0],
3691 pCallEntry->auParams[1],
3692 pCallEntry->auParams[2]);
3693 if (RT_LIKELY( rcStrict == VINF_SUCCESS
3694 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
3695 pCallEntry++;
3696 else if (rcStrict == VINF_IEM_REEXEC_JUMP)
3697 {
3698 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
3699 Assert(cCallsLeft == 0);
3700 uint32_t const idxTarget = (uint32_t)pCallEntry->auParams[0];
3701 cCallsLeft = pTb->Thrd.cCalls;
3702 AssertBreak(idxTarget < cCallsLeft - 1);
3703 cCallsLeft -= idxTarget;
3704 pCallEntry = &pTb->Thrd.paCalls[idxTarget];
3705 AssertBreak(pCallEntry->fFlags & IEMTHREADEDCALLENTRY_F_JUMP_TARGET);
3706 }
3707 else
3708 {
3709 pVCpu->iem.s.cInstructions += pCallEntry->idxInstr; /* This may be one short, but better than zero. */
3710 pVCpu->iem.s.pCurTbR3 = NULL;
3711 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaks);
3712 pVCpu->iem.s.ppTbLookupEntryR3 = iemTbGetTbLookupEntryWithRip(pTb, pCallEntry->uTbLookup, pVCpu->cpum.GstCtx.rip);
3713
3714 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
3715 only to break out of TB execution early. */
3716 if (rcStrict == VINF_IEM_REEXEC_BREAK)
3717 {
3718#ifdef VBOX_WITH_STATISTICS
3719 if (pCallEntry->uTbLookup)
3720 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaksWithLookup);
3721 else
3722 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaksWithoutLookup);
3723#endif
3724 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
3725 }
3726 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
3727 }
3728 }
3729
3730 /* Update the lookup entry. */
3731 pVCpu->iem.s.ppTbLookupEntryR3 = iemTbGetTbLookupEntryWithRip(pTb, pCallEntry[-1].uTbLookup, pVCpu->cpum.GstCtx.rip);
3732 }
3733
3734 pVCpu->iem.s.cInstructions += pTb->cInstructions;
3735 pVCpu->iem.s.pCurTbR3 = NULL;
3736 return VINF_SUCCESS;
3737}
3738
3739
3740/**
3741 * This is called when the PC doesn't match the current pbInstrBuf.
3742 *
3743 * Upon return, we're ready for opcode fetching. But please note that
3744 * pbInstrBuf can be NULL iff the memory doesn't have readable backing (i.e.
3745 * MMIO or unassigned).
3746 */
3747static RTGCPHYS iemGetPcWithPhysAndCodeMissed(PVMCPUCC pVCpu)
3748{
3749 pVCpu->iem.s.pbInstrBuf = NULL;
3750 pVCpu->iem.s.offCurInstrStart = 0;
3751 pVCpu->iem.s.offInstrNextByte = 0;
3752 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
3753 return pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart;
3754}
3755
3756
3757/** @todo need private inline decl for throw/nothrow matching IEM_WITH_SETJMP? */
3758DECL_FORCE_INLINE_THROW(RTGCPHYS) iemGetPcWithPhysAndCode(PVMCPUCC pVCpu)
3759{
3760 /*
3761 * Set uCurTbStartPc to RIP and calc the effective PC.
3762 */
3763 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
3764#if 0 /* unused */
3765 pVCpu->iem.s.uCurTbStartPc = uPc;
3766#endif
3767 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
3768 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
3769
3770 /*
3771 * Advance within the current buffer (PAGE) when possible.
3772 */
3773 if (pVCpu->iem.s.pbInstrBuf)
3774 {
3775 uint64_t off = uPc - pVCpu->iem.s.uInstrBufPc;
3776 if (off < pVCpu->iem.s.cbInstrBufTotal)
3777 {
3778 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
3779 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
3780 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
3781 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
3782 else
3783 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
3784
3785 return pVCpu->iem.s.GCPhysInstrBuf + off;
3786 }
3787 }
3788 return iemGetPcWithPhysAndCodeMissed(pVCpu);
3789}
3790
3791
3792/**
3793 * Determines the extra IEMTB_F_XXX flags.
3794 *
3795 * @returns A mix of IEMTB_F_INHIBIT_SHADOW, IEMTB_F_INHIBIT_NMI and
3796 * IEMTB_F_CS_LIM_CHECKS (or zero).
3797 * @param pVCpu The cross context virtual CPU structure of the calling
3798 * thread.
3799 */
3800DECL_FORCE_INLINE(uint32_t) iemGetTbFlagsForCurrentPc(PVMCPUCC pVCpu)
3801{
3802 uint32_t fRet = 0;
3803
3804 /*
3805 * Determine the inhibit bits.
3806 */
3807 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (CPUMCTX_INHIBIT_SHADOW | CPUMCTX_INHIBIT_NMI)))
3808 { /* typical */ }
3809 else
3810 {
3811 if (CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))
3812 fRet |= IEMTB_F_INHIBIT_SHADOW;
3813 if (CPUMAreInterruptsInhibitedByNmiEx(&pVCpu->cpum.GstCtx))
3814 fRet |= IEMTB_F_INHIBIT_NMI;
3815 }
3816
3817 /*
3818 * Return IEMTB_F_CS_LIM_CHECKS if the current PC is invalid or if it is
3819 * likely to go invalid before the end of the translation block.
3820 */
3821 if (IEM_F_MODE_X86_IS_FLAT(pVCpu->iem.s.fExec))
3822 return fRet;
3823
3824 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
3825 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
3826 return fRet;
3827 return fRet | IEMTB_F_CS_LIM_CHECKS;
3828}
3829
3830
3831VMM_INT_DECL(VBOXSTRICTRC) IEMExecRecompiler(PVMCC pVM, PVMCPUCC pVCpu, bool fWasHalted)
3832{
3833 /*
3834 * See if there is an interrupt pending in TRPM, inject it if we can.
3835 */
3836 if (!TRPMHasTrap(pVCpu))
3837 { /* likely */ }
3838 else
3839 {
3840 VBOXSTRICTRC rcStrict = iemExecInjectPendingTrap(pVCpu);
3841 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
3842 { /*likely */ }
3843 else
3844 return rcStrict;
3845 }
3846
3847 /*
3848 * Init the execution environment.
3849 */
3850#if 1 /** @todo this seems like a good idea, however if we ever share memory
3851 * directly with other threads on the host, it isn't necessarily... */
3852 if (pVM->cCpus == 1)
3853 iemInitExec(pVCpu, IEM_F_X86_DISREGARD_LOCK /*fExecOpts*/);
3854 else
3855#endif
3856 iemInitExec(pVCpu, 0 /*fExecOpts*/);
3857
3858 if (RT_LIKELY(!fWasHalted && pVCpu->iem.s.msRecompilerPollNow != 0))
3859 { }
3860 else
3861 {
3862 /* Do polling after halt and the first time we get here. */
3863#ifdef IEM_WITH_ADAPTIVE_TIMER_POLLING
3864 uint64_t nsNow = 0;
3865 uint32_t const cItersTillPoll = iemPollTimersCalcDefaultCountdown(TMTimerPollBoolWithNanoTS(pVM, pVCpu, &nsNow));
3866 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillPoll;
3867 pVCpu->iem.s.cTbsTillNextTimerPoll = cItersTillPoll;
3868#else
3869 uint64_t const nsNow = TMVirtualGetNoCheck(pVM);
3870#endif
3871 pVCpu->iem.s.nsRecompilerPollNow = nsNow;
3872 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(nsNow / RT_NS_1MS);
3873 }
3874 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
3875
3876 /*
3877 * Run-loop.
3878 *
3879 * If we're using setjmp/longjmp we combine all the catching here to avoid
3880 * having to call setjmp for each block we're executing.
3881 */
3882 PIEMTBCACHE const pTbCache = pVCpu->iem.s.pTbCacheR3;
3883 for (;;)
3884 {
3885 VBOXSTRICTRC rcStrict;
3886 IEM_TRY_SETJMP(pVCpu, rcStrict)
3887 {
3888 for (;;)
3889 {
3890 /* Translate PC to physical address, we'll need this for both lookup and compilation. */
3891 RTGCPHYS const GCPhysPc = iemGetPcWithPhysAndCode(pVCpu);
3892 if (RT_LIKELY(pVCpu->iem.s.pbInstrBuf != NULL))
3893 {
3894 uint32_t const fExtraFlags = iemGetTbFlagsForCurrentPc(pVCpu);
3895 PIEMTB const pTb = iemTbCacheLookup(pVCpu, pTbCache, GCPhysPc, fExtraFlags);
3896 if (pTb)
3897 rcStrict = iemTbExec(pVCpu, pTb);
3898 else
3899 rcStrict = iemThreadedCompile(pVM, pVCpu, GCPhysPc, fExtraFlags);
3900 }
3901 else
3902 {
3903 /* This can only happen if the current PC cannot be translated into a
3904 host pointer, which means we're in MMIO or unmapped memory... */
3905#if defined(VBOX_STRICT) && defined(IN_RING3)
3906 rcStrict = DBGFSTOP(pVM);
3907 if (rcStrict != VINF_SUCCESS && rcStrict != VERR_DBGF_NOT_ATTACHED)
3908 return rcStrict;
3909#endif
3910 rcStrict = IEMExecLots(pVCpu, 2048, 511, NULL);
3911 }
3912 if (rcStrict == VINF_SUCCESS)
3913 {
3914 Assert(pVCpu->iem.s.cActiveMappings == 0);
3915
3916 /* Note! This IRQ/FF check is repeated in iemPollTimers, iemThreadedFunc_BltIn_CheckIrq
3917 and emitted by iemNativeRecompFunc_BltIn_CheckIrq. */
3918 uint64_t fCpu = pVCpu->fLocalForcedActions;
3919 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
3920 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
3921 | VMCPU_FF_TLB_FLUSH
3922 | VMCPU_FF_UNHALT );
3923 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
3924 if (RT_LIKELY( ( !fCpu
3925 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
3926 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
3927 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
3928 && !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) ))
3929 {
3930 /* Once in a while we need to poll timers here. */
3931 if ((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
3932 { /* likely */ }
3933 else
3934 {
3935 int rc = iemPollTimers(pVM, pVCpu);
3936 if (rc != VINF_SUCCESS)
3937 return VINF_SUCCESS;
3938 }
3939 }
3940 else
3941 return VINF_SUCCESS;
3942 }
3943 else
3944 return rcStrict;
3945 }
3946 }
3947 IEM_CATCH_LONGJMP_BEGIN(pVCpu, rcStrict);
3948 {
3949 Assert(rcStrict != VINF_IEM_REEXEC_BREAK);
3950 pVCpu->iem.s.cLongJumps++;
3951#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3952 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
3953#endif
3954 if (pVCpu->iem.s.cActiveMappings > 0)
3955 iemMemRollback(pVCpu);
3956
3957#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
3958 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
3959 if (pTb && (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
3960 {
3961 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitLongJump);
3962# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3963 Assert(pVCpu->iem.s.idxTbCurInstr < pTb->cInstructions);
3964 pVCpu->iem.s.cInstructions += pVCpu->iem.s.idxTbCurInstr;
3965# endif
3966
3967#ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
3968 /* Restore FPCR/MXCSR if the TB modified it. */
3969 if (pVCpu->iem.s.uRegFpCtrl != IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED)
3970 {
3971 iemNativeFpCtrlRegRestore(pVCpu->iem.s.uRegFpCtrl);
3972 /* Reset for the next round saving us an unconditional instruction on next TB entry. */
3973 pVCpu->iem.s.uRegFpCtrl = IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED;
3974 }
3975#endif
3976 }
3977#endif
3978
3979#if 0 /** @todo do we need to clean up anything? If not, we can drop the pTb = NULL some lines up and change the scope. */
3980 /* If pTb isn't NULL we're in iemTbExec. */
3981 if (!pTb)
3982 {
3983 /* If pCurTbR3 is NULL, we're in iemGetPcWithPhysAndCode.*/
3984 pTb = pVCpu->iem.s.pCurTbR3;
3985 if (pTb)
3986 {
3987 if (pTb == pVCpu->iem.s.pThrdCompileTbR3)
3988 return iemThreadedCompileLongJumped(pVM, pVCpu, rcStrict);
3989 Assert(pTb != pVCpu->iem.s.pNativeCompileTbR3);
3990 }
3991 }
3992#endif
3993 pVCpu->iem.s.pCurTbR3 = NULL;
3994 return rcStrict;
3995 }
3996 IEM_CATCH_LONGJMP_END(pVCpu);
3997 }
3998}
3999
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette