VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp

Last change on this file was 108409, checked in by vboxsync, 4 weeks ago

VMM/IEM: Made IEMAll.cpp build targeting arm. jiraref:VBP-1531

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 161.4 KB
Line 
1/* $Id: IEMAllThrdRecompiler.cpp 108409 2025-02-27 10:35:39Z vboxsync $ */
2/** @file
3 * IEM - Instruction Decoding and Threaded Recompilation.
4 *
5 * Logging group IEM_RE_THREADED assignments:
6 * - Level 1 (Log) : Errors, exceptions, interrupts and such major events. [same as IEM]
7 * - Flow (LogFlow) : TB calls being emitted.
8 * - Level 2 (Log2) : Basic instruction execution state info. [same as IEM]
9 * - Level 3 (Log3) : More detailed execution state info. [same as IEM]
10 * - Level 4 (Log4) : Decoding mnemonics w/ EIP. [same as IEM]
11 * - Level 5 (Log5) : Decoding details. [same as IEM]
12 * - Level 6 (Log6) : TB opcode range management.
13 * - Level 7 (Log7) : TB obsoletion.
14 * - Level 8 (Log8) : TB compilation.
15 * - Level 9 (Log9) : TB exec.
16 * - Level 10 (Log10): TB block lookup.
17 * - Level 11 (Log11): TB block lookup details.
18 * - Level 12 (Log12): TB insertion.
19 */
20
21/*
22 * Copyright (C) 2011-2024 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#ifndef LOG_GROUP /* defined when included by tstIEMCheckMc.cpp */
48# define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
49#endif
50#define IEM_WITH_CODE_TLB_AND_OPCODE_BUF /* A bit hackish, but its all in IEMInline.h. */
51#define VMCPU_INCL_CPUM_GST_CTX
52#ifdef IN_RING0
53# define VBOX_VMM_TARGET_X86
54#endif
55#include <VBox/vmm/iem.h>
56#include <VBox/vmm/cpum.h>
57#include <VBox/vmm/tm.h>
58#include <VBox/vmm/dbgf.h>
59#include <VBox/vmm/dbgftrace.h>
60#ifndef TST_IEM_CHECK_MC
61# include "IEMInternal.h"
62#endif
63#include <VBox/vmm/vmcc.h>
64#include <VBox/log.h>
65#include <VBox/err.h>
66#include <VBox/param.h>
67#include <VBox/dis.h>
68#include <VBox/disopcode-x86-amd64.h>
69#include <iprt/asm-math.h>
70#include <iprt/assert.h>
71#include <iprt/mem.h>
72#include <iprt/string.h>
73#include <iprt/sort.h>
74#include <iprt/x86.h>
75
76#include "IEMInline.h"
77#include "IEMInlineExec.h"
78#ifdef VBOX_VMM_TARGET_X86
79# include "target-x86/IEMInline-x86.h"
80# include "target-x86/IEMInlineDecode-x86.h"
81# include "target-x86/IEMInlineExec-x86.h"
82#elif defined(VBOX_VMM_TARGET_ARMV8)
83# include "target-armv8/IEMInlineExec-armv8.h"
84#endif
85#include "IEMOpHlp.h"
86#include "IEMMc.h"
87
88#include "IEMThreadedFunctions.h"
89#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
90# include "IEMN8veRecompiler.h"
91#endif
92
93
94/*
95 * Narrow down configs here to avoid wasting time on unused configs here.
96 */
97
98#ifndef IEM_WITH_CODE_TLB
99# error The code TLB must be enabled for the recompiler.
100#endif
101
102#ifndef IEM_WITH_DATA_TLB
103# error The data TLB must be enabled for the recompiler.
104#endif
105
106
107/*********************************************************************************************************************************
108* Internal Functions *
109*********************************************************************************************************************************/
110#if defined(VBOX_WITH_IEM_NATIVE_RECOMPILER) && defined(VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING)
111static void iemThreadedSaveTbForProfiling(PVMCPU pVCpu, PCIEMTB pTb);
112#endif
113
114
115/**
116 * Calculates the effective address of a ModR/M memory operand, extended version
117 * for use in the recompilers.
118 *
119 * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.
120 *
121 * May longjmp on internal error.
122 *
123 * @return The effective address.
124 * @param pVCpu The cross context virtual CPU structure of the calling thread.
125 * @param bRm The ModRM byte.
126 * @param cbImmAndRspOffset - First byte: The size of any immediate
127 * following the effective address opcode bytes
128 * (only for RIP relative addressing).
129 * - Second byte: RSP displacement (for POP [ESP]).
130 * @param puInfo Extra info: 32-bit displacement (bits 31:0) and
131 * SIB byte (bits 39:32).
132 *
133 * @note This must be defined in a source file with matching
134 * IEM_WITH_CODE_TLB_AND_OPCODE_BUF define till the define is made default
135 * or implemented differently...
136 */
137RTGCPTR iemOpHlpCalcRmEffAddrJmpEx(PVMCPUCC pVCpu, uint8_t bRm, uint32_t cbImmAndRspOffset, uint64_t *puInfo) IEM_NOEXCEPT_MAY_LONGJMP
138{
139 Log5(("iemOpHlpCalcRmEffAddrJmp: bRm=%#x\n", bRm));
140# define SET_SS_DEF() \
141 do \
142 { \
143 if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SEG_MASK)) \
144 pVCpu->iem.s.iEffSeg = X86_SREG_SS; \
145 } while (0)
146
147 if (!IEM_IS_64BIT_CODE(pVCpu))
148 {
149/** @todo Check the effective address size crap! */
150 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_16BIT)
151 {
152 uint16_t u16EffAddr;
153
154 /* Handle the disp16 form with no registers first. */
155 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
156 {
157 IEM_OPCODE_GET_NEXT_U16(&u16EffAddr);
158 *puInfo = u16EffAddr;
159 }
160 else
161 {
162 /* Get the displacment. */
163 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
164 {
165 case 0: u16EffAddr = 0; break;
166 case 1: IEM_OPCODE_GET_NEXT_S8_SX_U16(&u16EffAddr); break;
167 case 2: IEM_OPCODE_GET_NEXT_U16(&u16EffAddr); break;
168 default: AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_1)); /* (caller checked for these) */
169 }
170 *puInfo = u16EffAddr;
171
172 /* Add the base and index registers to the disp. */
173 switch (bRm & X86_MODRM_RM_MASK)
174 {
175 case 0: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.si; break;
176 case 1: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.di; break;
177 case 2: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.si; SET_SS_DEF(); break;
178 case 3: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.di; SET_SS_DEF(); break;
179 case 4: u16EffAddr += pVCpu->cpum.GstCtx.si; break;
180 case 5: u16EffAddr += pVCpu->cpum.GstCtx.di; break;
181 case 6: u16EffAddr += pVCpu->cpum.GstCtx.bp; SET_SS_DEF(); break;
182 case 7: u16EffAddr += pVCpu->cpum.GstCtx.bx; break;
183 }
184 }
185
186 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#06RX16 uInfo=%#RX64\n", u16EffAddr, *puInfo));
187 return u16EffAddr;
188 }
189
190 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
191 uint32_t u32EffAddr;
192 uint64_t uInfo;
193
194 /* Handle the disp32 form with no registers first. */
195 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
196 {
197 IEM_OPCODE_GET_NEXT_U32(&u32EffAddr);
198 uInfo = u32EffAddr;
199 }
200 else
201 {
202 /* Get the register (or SIB) value. */
203 uInfo = 0;
204 switch ((bRm & X86_MODRM_RM_MASK))
205 {
206 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
207 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
208 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
209 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
210 case 4: /* SIB */
211 {
212 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
213 uInfo = (uint64_t)bSib << 32;
214
215 /* Get the index and scale it. */
216 switch ((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
217 {
218 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
219 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
220 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
221 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
222 case 4: u32EffAddr = 0; /*none */ break;
223 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; break;
224 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
225 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
226 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
227 }
228 u32EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
229
230 /* add base */
231 switch (bSib & X86_SIB_BASE_MASK)
232 {
233 case 0: u32EffAddr += pVCpu->cpum.GstCtx.eax; break;
234 case 1: u32EffAddr += pVCpu->cpum.GstCtx.ecx; break;
235 case 2: u32EffAddr += pVCpu->cpum.GstCtx.edx; break;
236 case 3: u32EffAddr += pVCpu->cpum.GstCtx.ebx; break;
237 case 4: u32EffAddr += pVCpu->cpum.GstCtx.esp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
238 case 5:
239 if ((bRm & X86_MODRM_MOD_MASK) != 0)
240 {
241 u32EffAddr += pVCpu->cpum.GstCtx.ebp;
242 SET_SS_DEF();
243 }
244 else
245 {
246 uint32_t u32Disp;
247 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
248 u32EffAddr += u32Disp;
249 uInfo |= u32Disp;
250 }
251 break;
252 case 6: u32EffAddr += pVCpu->cpum.GstCtx.esi; break;
253 case 7: u32EffAddr += pVCpu->cpum.GstCtx.edi; break;
254 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
255 }
256 break;
257 }
258 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; SET_SS_DEF(); break;
259 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
260 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
261 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
262 }
263
264 /* Get and add the displacement. */
265 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
266 {
267 case 0:
268 break;
269 case 1:
270 {
271 int8_t i8Disp; IEM_OPCODE_GET_NEXT_S8(&i8Disp);
272 u32EffAddr += i8Disp;
273 uInfo |= (uint32_t)(int32_t)i8Disp;
274 break;
275 }
276 case 2:
277 {
278 uint32_t u32Disp; IEM_OPCODE_GET_NEXT_U32(&u32Disp);
279 u32EffAddr += u32Disp;
280 uInfo |= u32Disp;
281 break;
282 }
283 default:
284 AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_2)); /* (caller checked for these) */
285 }
286 }
287
288 *puInfo = uInfo;
289 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RX32 uInfo=%#RX64\n", u32EffAddr, uInfo));
290 return u32EffAddr;
291 }
292
293 uint64_t u64EffAddr;
294 uint64_t uInfo;
295
296 /* Handle the rip+disp32 form with no registers first. */
297 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
298 {
299 IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64EffAddr);
300 uInfo = (uint32_t)u64EffAddr;
301 u64EffAddr += pVCpu->cpum.GstCtx.rip + IEM_GET_INSTR_LEN(pVCpu) + (cbImmAndRspOffset & UINT32_C(0xff));
302 }
303 else
304 {
305 /* Get the register (or SIB) value. */
306 uInfo = 0;
307 switch ((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB)
308 {
309 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
310 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
311 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
312 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
313 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; SET_SS_DEF(); break;
314 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
315 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
316 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
317 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
318 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
319 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
320 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
321 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
322 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
323 /* SIB */
324 case 4:
325 case 12:
326 {
327 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
328 uInfo = (uint64_t)bSib << 32;
329
330 /* Get the index and scale it. */
331 switch (((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK) | pVCpu->iem.s.uRexIndex)
332 {
333 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
334 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
335 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
336 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
337 case 4: u64EffAddr = 0; /*none */ break;
338 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; break;
339 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
340 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
341 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
342 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
343 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
344 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
345 case 12: u64EffAddr = pVCpu->cpum.GstCtx.r12; break;
346 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
347 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
348 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
349 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
350 }
351 u64EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
352
353 /* add base */
354 switch ((bSib & X86_SIB_BASE_MASK) | pVCpu->iem.s.uRexB)
355 {
356 case 0: u64EffAddr += pVCpu->cpum.GstCtx.rax; break;
357 case 1: u64EffAddr += pVCpu->cpum.GstCtx.rcx; break;
358 case 2: u64EffAddr += pVCpu->cpum.GstCtx.rdx; break;
359 case 3: u64EffAddr += pVCpu->cpum.GstCtx.rbx; break;
360 case 4: u64EffAddr += pVCpu->cpum.GstCtx.rsp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
361 case 6: u64EffAddr += pVCpu->cpum.GstCtx.rsi; break;
362 case 7: u64EffAddr += pVCpu->cpum.GstCtx.rdi; break;
363 case 8: u64EffAddr += pVCpu->cpum.GstCtx.r8; break;
364 case 9: u64EffAddr += pVCpu->cpum.GstCtx.r9; break;
365 case 10: u64EffAddr += pVCpu->cpum.GstCtx.r10; break;
366 case 11: u64EffAddr += pVCpu->cpum.GstCtx.r11; break;
367 case 12: u64EffAddr += pVCpu->cpum.GstCtx.r12; break;
368 case 14: u64EffAddr += pVCpu->cpum.GstCtx.r14; break;
369 case 15: u64EffAddr += pVCpu->cpum.GstCtx.r15; break;
370 /* complicated encodings */
371 case 5:
372 case 13:
373 if ((bRm & X86_MODRM_MOD_MASK) != 0)
374 {
375 if (!pVCpu->iem.s.uRexB)
376 {
377 u64EffAddr += pVCpu->cpum.GstCtx.rbp;
378 SET_SS_DEF();
379 }
380 else
381 u64EffAddr += pVCpu->cpum.GstCtx.r13;
382 }
383 else
384 {
385 uint32_t u32Disp;
386 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
387 u64EffAddr += (int32_t)u32Disp;
388 uInfo |= u32Disp;
389 }
390 break;
391 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
392 }
393 break;
394 }
395 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
396 }
397
398 /* Get and add the displacement. */
399 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
400 {
401 case 0:
402 break;
403 case 1:
404 {
405 int8_t i8Disp;
406 IEM_OPCODE_GET_NEXT_S8(&i8Disp);
407 u64EffAddr += i8Disp;
408 uInfo |= (uint32_t)(int32_t)i8Disp;
409 break;
410 }
411 case 2:
412 {
413 uint32_t u32Disp;
414 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
415 u64EffAddr += (int32_t)u32Disp;
416 uInfo |= u32Disp;
417 break;
418 }
419 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX); /* (caller checked for these) */
420 }
421
422 }
423
424 *puInfo = uInfo;
425 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT)
426 {
427 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr, uInfo));
428 return u64EffAddr;
429 }
430 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
431 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr & UINT32_MAX, uInfo));
432 return u64EffAddr & UINT32_MAX;
433}
434
435
436
437/*********************************************************************************************************************************
438* Translation Block Cache. *
439*********************************************************************************************************************************/
440
441/** @callback_method_impl{FNRTSORTCMP, Compare two TBs for pruning sorting purposes.} */
442static DECLCALLBACK(int) iemTbCachePruneCmpTb(void const *pvElement1, void const *pvElement2, void *pvUser)
443{
444 PCIEMTB const pTb1 = (PCIEMTB)pvElement1;
445 PCIEMTB const pTb2 = (PCIEMTB)pvElement2;
446 uint32_t const cMsSinceUse1 = (uint32_t)(uintptr_t)pvUser - pTb1->msLastUsed;
447 uint32_t const cMsSinceUse2 = (uint32_t)(uintptr_t)pvUser - pTb2->msLastUsed;
448 if (cMsSinceUse1 != cMsSinceUse2)
449 return cMsSinceUse1 < cMsSinceUse2 ? -1 : 1;
450 if (pTb1->cUsed != pTb2->cUsed)
451 return pTb1->cUsed > pTb2->cUsed ? -1 : 1;
452 if ((pTb1->fFlags & IEMTB_F_TYPE_MASK) != (pTb2->fFlags & IEMTB_F_TYPE_MASK))
453 return (pTb1->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? -1 : 1;
454 return 0;
455}
456
457#ifdef VBOX_STRICT
458/**
459 * Assertion helper that checks a collisions list count.
460 */
461static void iemTbCacheAssertCorrectCount(PIEMTBCACHE pTbCache, uint32_t idxHash, const char *pszOperation)
462{
463 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
464 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
465 while (pTb)
466 {
467 pTb = pTb->pNext;
468 cLeft--;
469 }
470 AssertMsg(cLeft == 0,
471 ("idxHash=%#x cLeft=%d; entry count=%d; %s\n",
472 idxHash, cLeft, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]), pszOperation));
473}
474#endif
475
476
477DECL_NO_INLINE(static, void) iemTbCacheAddWithPruning(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb, uint32_t idxHash)
478{
479 STAM_PROFILE_START(&pTbCache->StatPrune, a);
480
481 /*
482 * First convert the collision list to an array.
483 */
484 PIEMTB apSortedTbs[IEMTBCACHE_PTR_MAX_COUNT];
485 uintptr_t cInserted = 0;
486 PIEMTB pTbCollision = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
487
488 pTbCache->apHash[idxHash] = NULL; /* Must NULL the entry before trying to free anything. */
489
490 while (pTbCollision && cInserted < RT_ELEMENTS(apSortedTbs))
491 {
492 apSortedTbs[cInserted++] = pTbCollision;
493 pTbCollision = pTbCollision->pNext;
494 }
495
496 /* Free any excess (impossible). */
497 if (RT_LIKELY(!pTbCollision))
498 Assert(cInserted == RT_ELEMENTS(apSortedTbs));
499 else
500 do
501 {
502 PIEMTB pTbToFree = pTbCollision;
503 pTbCollision = pTbToFree->pNext;
504 iemTbAllocatorFree(pVCpu, pTbToFree);
505 } while (pTbCollision);
506
507 /*
508 * Sort it by most recently used and usage count.
509 */
510 RTSortApvShell((void **)apSortedTbs, cInserted, iemTbCachePruneCmpTb, (void *)(uintptr_t)pVCpu->iem.s.msRecompilerPollNow);
511
512 /* We keep half the list for now. Perhaps a bit aggressive... */
513 uintptr_t const cKeep = cInserted / 2;
514
515 /* First free up the TBs we don't wish to keep (before creating the new
516 list because otherwise the free code will scan the list for each one
517 without ever finding it). */
518 for (uintptr_t idx = cKeep; idx < cInserted; idx++)
519 iemTbAllocatorFree(pVCpu, apSortedTbs[idx]);
520
521 /* Then chain the new TB together with the ones we like to keep of the
522 existing ones and insert this list into the hash table. */
523 pTbCollision = pTb;
524 for (uintptr_t idx = 0; idx < cKeep; idx++)
525 pTbCollision = pTbCollision->pNext = apSortedTbs[idx];
526 pTbCollision->pNext = NULL;
527
528 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cKeep + 1);
529#ifdef VBOX_STRICT
530 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add w/ pruning");
531#endif
532
533 STAM_PROFILE_STOP(&pTbCache->StatPrune, a);
534}
535
536
537static void iemTbCacheAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
538{
539 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
540 PIEMTB const pTbOldHead = pTbCache->apHash[idxHash];
541 if (!pTbOldHead)
542 {
543 pTb->pNext = NULL;
544 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, 1); /** @todo could make 1 implicit... */
545 }
546 else
547 {
548 STAM_REL_COUNTER_INC(&pTbCache->cCollisions);
549 uintptr_t cCollisions = IEMTBCACHE_PTR_GET_COUNT(pTbOldHead);
550 if (cCollisions < IEMTBCACHE_PTR_MAX_COUNT)
551 {
552 pTb->pNext = IEMTBCACHE_PTR_GET_TB(pTbOldHead);
553 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cCollisions + 1);
554#ifdef VBOX_STRICT
555 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add");
556#endif
557 }
558 else
559 iemTbCacheAddWithPruning(pVCpu, pTbCache, pTb, idxHash);
560 }
561}
562
563
564/**
565 * Unlinks @a pTb from the hash table if found in it.
566 *
567 * @returns true if unlinked, false if not present.
568 * @param pTbCache The hash table.
569 * @param pTb The TB to remove.
570 */
571static bool iemTbCacheRemove(PIEMTBCACHE pTbCache, PIEMTB pTb)
572{
573 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
574 PIEMTB pTbHash = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
575 uint32_t volatile cLength = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]); RT_NOREF(cLength);
576
577 /*
578 * At the head of the collision list?
579 */
580 if (pTbHash == pTb)
581 {
582 if (!pTb->pNext)
583 pTbCache->apHash[idxHash] = NULL;
584 else
585 {
586 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb->pNext,
587 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
588#ifdef VBOX_STRICT
589 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #1");
590#endif
591 }
592 return true;
593 }
594
595 /*
596 * Search the collision list.
597 */
598 PIEMTB const pTbHead = pTbHash;
599 while (pTbHash)
600 {
601 PIEMTB const pNextTb = pTbHash->pNext;
602 if (pNextTb == pTb)
603 {
604 pTbHash->pNext = pTb->pNext;
605 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTbHead, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
606#ifdef VBOX_STRICT
607 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #2");
608#endif
609 return true;
610 }
611 pTbHash = pNextTb;
612 }
613 return false;
614}
615
616
617/**
618 * Looks up a TB for the given PC and flags in the cache.
619 *
620 * @returns Pointer to TB on success, NULL if not found.
621 * @param pVCpu The cross context virtual CPU structure of the
622 * calling thread.
623 * @param pTbCache The translation block cache.
624 * @param GCPhysPc The PC to look up a TB for.
625 * @param fExtraFlags The extra flags to join with IEMCPU::fExec for
626 * the lookup.
627 * @thread EMT(pVCpu)
628 */
629static PIEMTB iemTbCacheLookup(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache,
630 RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP /** @todo r=bird: no longjumping here, right? iemNativeRecompile is noexcept. */
631{
632 uint32_t const fFlags = ((pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags) & IEMTB_F_KEY_MASK;
633
634 /*
635 * First consult the lookup table entry.
636 */
637 PIEMTB * const ppTbLookup = pVCpu->iem.s.ppTbLookupEntryR3;
638 PIEMTB pTb = *ppTbLookup;
639 if (pTb)
640 {
641 if (pTb->GCPhysPc == GCPhysPc)
642 {
643 if ( (pTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == (fFlags | IEMTB_F_TYPE_NATIVE)
644 || (pTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == (fFlags | IEMTB_F_TYPE_THREADED) )
645 {
646 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
647 {
648 STAM_COUNTER_INC(&pTbCache->cLookupHitsViaTbLookupTable);
649 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
650 pTb->cUsed++;
651#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
652 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != pVCpu->iem.s.uTbNativeRecompileAtUsedCount)
653 {
654 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p)\n", fFlags, GCPhysPc, pTb, ppTbLookup));
655 return pTb;
656 }
657 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p) - recompiling\n", fFlags, GCPhysPc, pTb, ppTbLookup));
658# ifdef VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING
659 iemThreadedSaveTbForProfiling(pVCpu, pTb);
660# endif
661 return iemNativeRecompile(pVCpu, pTb);
662#else
663 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p)\n", fFlags, GCPhysPc, pTb, ppTbLookup));
664 return pTb;
665#endif
666 }
667 }
668 }
669 }
670
671 /*
672 * Then consult the hash table.
673 */
674 uint32_t const idxHash = IEMTBCACHE_HASH_NO_KEY_MASK(pTbCache, fFlags, GCPhysPc);
675#if defined(VBOX_STRICT) || defined(LOG_ENABLED)
676 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
677#endif
678 pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
679 while (pTb)
680 {
681 if (pTb->GCPhysPc == GCPhysPc)
682 {
683 if ((pTb->fFlags & IEMTB_F_KEY_MASK) == fFlags)
684 {
685 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
686 {
687 STAM_COUNTER_INC(&pTbCache->cLookupHits);
688 AssertMsg(cLeft > 0, ("%d\n", cLeft));
689
690 *ppTbLookup = pTb;
691 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
692 pTb->cUsed++;
693#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
694 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != pVCpu->iem.s.uTbNativeRecompileAtUsedCount)
695 {
696 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
697 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
698 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
699 return pTb;
700 }
701 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d) - recompiling\n",
702 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
703 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
704 return iemNativeRecompile(pVCpu, pTb);
705#else
706 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
707 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
708 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
709 return pTb;
710#endif
711 }
712 Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));
713 }
714 else
715 Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));
716 }
717 else
718 Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));
719
720 pTb = pTb->pNext;
721#ifdef VBOX_STRICT
722 cLeft--;
723#endif
724 }
725 AssertMsg(cLeft == 0, ("%d\n", cLeft));
726 STAM_REL_COUNTER_INC(&pTbCache->cLookupMisses);
727 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: NULL - (%p L %d)\n", fFlags, GCPhysPc, idxHash,
728 IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]), IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
729 return pTb;
730}
731
732
733/*********************************************************************************************************************************
734* Translation Block Allocator.
735*********************************************************************************************************************************/
736/*
737 * Translation block allocationmanagement.
738 */
739
740#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
741# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
742 ((a_idxTb) >> (a_pTbAllocator)->cChunkShift)
743# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
744 ((a_idxTb) & (a_pTbAllocator)->fChunkMask)
745# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
746 ((uint32_t)(a_idxChunk) << (a_pTbAllocator)->cChunkShift)
747#else
748# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
749 ((a_idxTb) / (a_pTbAllocator)->cTbsPerChunk)
750# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
751 ((a_idxTb) - (a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
752# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
753 ((uint32_t)(a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
754#endif
755/** Makes a TB index from a chunk index and TB index within that chunk. */
756#define IEMTBALLOC_IDX_MAKE(a_pTbAllocator, a_idxChunk, a_idxInChunk) \
757 (IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) + (a_idxInChunk))
758
759
760/**
761 * Initializes the TB allocator and cache for an EMT.
762 *
763 * @returns VBox status code.
764 * @param pVM The VM handle.
765 * @param cInitialTbs The initial number of translation blocks to
766 * preallocator.
767 * @param cMaxTbs The max number of translation blocks allowed.
768 * @param cbInitialExec The initial size of the executable memory allocator.
769 * @param cbMaxExec The max size of the executable memory allocator.
770 * @param cbChunkExec The chunk size for executable memory allocator. Zero
771 * or UINT32_MAX for automatically determining this.
772 * @thread EMT
773 */
774DECLCALLBACK(int) iemTbInit(PVMCC pVM, uint32_t cInitialTbs, uint32_t cMaxTbs,
775 uint64_t cbInitialExec, uint64_t cbMaxExec, uint32_t cbChunkExec)
776{
777 PVMCPUCC pVCpu = VMMGetCpu(pVM);
778 Assert(!pVCpu->iem.s.pTbCacheR3);
779 Assert(!pVCpu->iem.s.pTbAllocatorR3);
780
781 /*
782 * Calculate the chunk size of the TB allocator.
783 * The minimum chunk size is 2MiB.
784 */
785 AssertCompile(!(sizeof(IEMTB) & IEMTBCACHE_PTR_COUNT_MASK));
786 uint32_t cbPerChunk = _2M;
787 uint32_t cTbsPerChunk = _2M / sizeof(IEMTB);
788#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
789 uint8_t const cTbShift = ASMBitFirstSetU32((uint32_t)sizeof(IEMTB)) - 1;
790 uint8_t cChunkShift = 21 - cTbShift;
791 AssertCompile(RT_BIT_32(21) == _2M); Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
792#endif
793 for (;;)
794 {
795 if (cMaxTbs <= cTbsPerChunk * (uint64_t)RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks))
796 break;
797 cbPerChunk *= 2;
798 cTbsPerChunk = cbPerChunk / sizeof(IEMTB);
799#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
800 cChunkShift += 1;
801#endif
802 }
803
804 uint32_t cMaxChunks = (cMaxTbs + cTbsPerChunk - 1) / cTbsPerChunk;
805 Assert(cMaxChunks * cTbsPerChunk >= cMaxTbs);
806 Assert(cMaxChunks <= RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks));
807
808 cMaxTbs = cMaxChunks * cTbsPerChunk;
809
810 /*
811 * Allocate and initalize it.
812 */
813 PIEMTBALLOCATOR const pTbAllocator = (PIEMTBALLOCATOR)RTMemAllocZ(sizeof(*pTbAllocator));
814 if (!pTbAllocator)
815 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
816 "Failed to allocate %zu bytes (max %u TBs) for the TB allocator of VCpu #%u",
817 sizeof(*pTbAllocator), cMaxTbs, pVCpu->idCpu);
818 pTbAllocator->uMagic = IEMTBALLOCATOR_MAGIC;
819 pTbAllocator->cMaxChunks = (uint8_t)cMaxChunks;
820 pTbAllocator->cTbsPerChunk = cTbsPerChunk;
821 pTbAllocator->cbPerChunk = cbPerChunk;
822 pTbAllocator->cMaxTbs = cMaxTbs;
823 pTbAllocator->pTbsFreeHead = NULL;
824#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
825 pTbAllocator->fChunkMask = cTbsPerChunk - 1;
826 pTbAllocator->cChunkShift = cChunkShift;
827 Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
828#endif
829
830 pVCpu->iem.s.pTbAllocatorR3 = pTbAllocator;
831
832 /*
833 * Allocate the initial chunks.
834 */
835 for (uint32_t idxChunk = 0; ; idxChunk++)
836 {
837 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs = (PIEMTB)RTMemPageAllocZ(cbPerChunk);
838 if (!paTbs)
839 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
840 "Failed to initial %zu bytes for the #%u chunk of TBs for VCpu #%u",
841 cbPerChunk, idxChunk, pVCpu->idCpu);
842
843 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
844 {
845 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
846 paTbs[iTb].pNext = pTbAllocator->pTbsFreeHead;
847 pTbAllocator->pTbsFreeHead = &paTbs[iTb];
848 }
849 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
850 pTbAllocator->cTotalTbs += cTbsPerChunk;
851
852 if ((idxChunk + 1) * cTbsPerChunk >= cInitialTbs)
853 break;
854 }
855
856 /*
857 * Calculate the size of the hash table. We double the max TB count and
858 * round it up to the nearest power of two.
859 */
860 uint32_t cCacheEntries = cMaxTbs * 2;
861 if (!RT_IS_POWER_OF_TWO(cCacheEntries))
862 {
863 uint8_t const iBitTop = ASMBitFirstSetU32(cCacheEntries);
864 cCacheEntries = RT_BIT_32(iBitTop);
865 Assert(cCacheEntries >= cMaxTbs * 2);
866 }
867
868 size_t const cbTbCache = RT_UOFFSETOF_DYN(IEMTBCACHE, apHash[cCacheEntries]);
869 PIEMTBCACHE const pTbCache = (PIEMTBCACHE)RTMemAllocZ(cbTbCache);
870 if (!pTbCache)
871 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
872 "Failed to allocate %zu bytes (%u entries) for the TB cache of VCpu #%u",
873 cbTbCache, cCacheEntries, pVCpu->idCpu);
874
875 /*
876 * Initialize it (assumes zeroed by the allocator).
877 */
878 pTbCache->uMagic = IEMTBCACHE_MAGIC;
879 pTbCache->cHash = cCacheEntries;
880 pTbCache->uHashMask = cCacheEntries - 1;
881 Assert(pTbCache->cHash > pTbCache->uHashMask);
882 pVCpu->iem.s.pTbCacheR3 = pTbCache;
883
884 /*
885 * Initialize the native executable memory allocator.
886 */
887#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
888 int rc = iemExecMemAllocatorInit(pVCpu, cbMaxExec, cbInitialExec, cbChunkExec);
889 AssertLogRelRCReturn(rc, rc);
890#else
891 RT_NOREF(cbMaxExec, cbInitialExec, cbChunkExec);
892#endif
893
894 return VINF_SUCCESS;
895}
896
897
898/**
899 * Inner free worker.
900 *
901 * The @a a_fType parameter allows us to eliminate the type check when we know
902 * which type of TB is being freed.
903 */
904template<uint32_t a_fType>
905DECL_FORCE_INLINE(void)
906iemTbAllocatorFreeInner(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator, PIEMTB pTb, uint32_t idxChunk, uint32_t idxInChunk)
907{
908#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
909 AssertCompile(a_fType == 0 || a_fType == IEMTB_F_TYPE_THREADED || a_fType == IEMTB_F_TYPE_NATIVE);
910#else
911 AssertCompile(a_fType == 0 || a_fType == IEMTB_F_TYPE_THREADED);
912#endif
913 Assert(idxChunk < pTbAllocator->cAllocatedChunks); RT_NOREF(idxChunk);
914 Assert(idxInChunk < pTbAllocator->cTbsPerChunk); RT_NOREF(idxInChunk);
915 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[idxChunk].paTbs) == idxInChunk);
916#ifdef VBOX_STRICT
917 for (PIEMTB pTbOther = pTbAllocator->pDelayedFreeHead; pTbOther; pTbOther = pTbOther->pNext)
918 Assert(pTbOther != pTb);
919#endif
920
921 /*
922 * Unlink the TB from the hash table.
923 */
924 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
925
926 /*
927 * Free the TB itself.
928 */
929 if RT_CONSTEXPR_IF(a_fType == 0)
930 switch (pTb->fFlags & IEMTB_F_TYPE_MASK)
931 {
932 case IEMTB_F_TYPE_THREADED:
933 pTbAllocator->cThreadedTbs -= 1;
934 RTMemFree(pTb->Thrd.paCalls);
935 break;
936#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
937 case IEMTB_F_TYPE_NATIVE:
938 pTbAllocator->cNativeTbs -= 1;
939 iemExecMemAllocatorFree(pVCpu, pTb->Native.paInstructions,
940 pTb->Native.cInstructions * sizeof(pTb->Native.paInstructions[0]));
941 pTb->Native.paInstructions = NULL; /* required by iemExecMemAllocatorPrune */
942 break;
943#endif
944 default:
945 AssertFailed();
946 }
947#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
948 else if RT_CONSTEXPR_IF(a_fType == IEMTB_F_TYPE_NATIVE)
949 {
950 Assert((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
951 pTbAllocator->cNativeTbs -= 1;
952 iemExecMemAllocatorFree(pVCpu, pTb->Native.paInstructions,
953 pTb->Native.cInstructions * sizeof(pTb->Native.paInstructions[0]));
954 pTb->Native.paInstructions = NULL; /* required by iemExecMemAllocatorPrune */
955 }
956#endif
957 else
958 {
959 Assert((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
960 pTbAllocator->cThreadedTbs -= 1;
961 RTMemFree(pTb->Thrd.paCalls);
962 }
963
964 RTMemFree(IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, 0)); /* Frees both the TB lookup table and opcode bytes. */
965
966 pTb->pNext = pTbAllocator->pTbsFreeHead;
967 pTbAllocator->pTbsFreeHead = pTb;
968 pTb->fFlags = 0;
969 pTb->GCPhysPc = UINT64_MAX;
970 pTb->Gen.uPtr = 0;
971 pTb->Gen.uData = 0;
972 pTb->cTbLookupEntries = 0;
973 pTb->cbOpcodes = 0;
974 pTb->pabOpcodes = NULL;
975
976 Assert(pTbAllocator->cInUseTbs > 0);
977
978 pTbAllocator->cInUseTbs -= 1;
979 STAM_REL_COUNTER_INC(&pTbAllocator->StatFrees);
980}
981
982
983/**
984 * Frees the given TB.
985 *
986 * @param pVCpu The cross context virtual CPU structure of the calling
987 * thread.
988 * @param pTb The translation block to free.
989 * @thread EMT(pVCpu)
990 */
991DECLHIDDEN(void) iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb)
992{
993 /*
994 * Validate state.
995 */
996 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
997 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
998 uint8_t const idxChunk = pTb->idxAllocChunk;
999 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
1000 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
1001 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
1002
1003 /*
1004 * Invalidate the TB lookup pointer and call the inner worker.
1005 */
1006 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1007 iemTbAllocatorFreeInner<0>(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
1008}
1009
1010#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
1011
1012/**
1013 * Interface used by iemExecMemAllocatorPrune.
1014 */
1015DECLHIDDEN(void) iemTbAllocatorFreeBulk(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator, PIEMTB pTb)
1016{
1017 Assert(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1018
1019 uint8_t const idxChunk = pTb->idxAllocChunk;
1020 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
1021 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
1022 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
1023
1024 iemTbAllocatorFreeInner<IEMTB_F_TYPE_NATIVE>(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
1025}
1026
1027
1028/**
1029 * Interface used by iemExecMemAllocatorPrune.
1030 */
1031DECLHIDDEN(PIEMTBALLOCATOR) iemTbAllocatorFreeBulkStart(PVMCPUCC pVCpu)
1032{
1033 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1034 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1035
1036 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1037
1038 /* It should be sufficient to do this once. */
1039 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1040
1041 return pTbAllocator;
1042}
1043
1044#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER */
1045
1046/**
1047 * Schedules a TB for freeing when it's not longer being executed and/or part of
1048 * the caller's call stack.
1049 *
1050 * The TB will be removed from the translation block cache, though, so it isn't
1051 * possible to executed it again and the IEMTB::pNext member can be used to link
1052 * it together with other TBs awaiting freeing.
1053 *
1054 * @param pVCpu The cross context virtual CPU structure of the calling
1055 * thread.
1056 * @param pTb The translation block to schedule for freeing.
1057 */
1058static void iemTbAlloctorScheduleForFree(PVMCPUCC pVCpu, PIEMTB pTb)
1059{
1060 /*
1061 * Validate state.
1062 */
1063 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1064 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1065 Assert(pTb->idxAllocChunk < pTbAllocator->cAllocatedChunks);
1066 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs) < pTbAllocator->cTbsPerChunk);
1067 Assert( (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE
1068 || (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1069#ifdef VBOX_STRICT
1070 for (PIEMTB pTbOther = pTbAllocator->pDelayedFreeHead; pTbOther; pTbOther = pTbOther->pNext)
1071 Assert(pTbOther != pTb);
1072#endif
1073
1074 /*
1075 * Remove it from the cache and prepend it to the allocator's todo list.
1076 *
1077 * Note! It could still be in various lookup tables, so we trash the GCPhys
1078 * and CS attribs to ensure it won't be reused.
1079 */
1080 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
1081 pTb->GCPhysPc = NIL_RTGCPHYS;
1082 pTb->x86.fAttr = UINT16_MAX;
1083
1084 pTb->pNext = pTbAllocator->pDelayedFreeHead;
1085 pTbAllocator->pDelayedFreeHead = pTb;
1086}
1087
1088
1089/**
1090 * Processes the delayed frees.
1091 *
1092 * This is called by the allocator function as well as the native recompile
1093 * function before making any TB or executable memory allocations respectively.
1094 */
1095void iemTbAllocatorProcessDelayedFrees(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator)
1096{
1097 /** @todo r-bird: these have already been removed from the cache,
1098 * iemTbAllocatorFree/Inner redoes that, which is a waste of time. */
1099 PIEMTB pTb = pTbAllocator->pDelayedFreeHead;
1100 pTbAllocator->pDelayedFreeHead = NULL;
1101 while (pTb)
1102 {
1103 PIEMTB const pTbNext = pTb->pNext;
1104 Assert(pVCpu->iem.s.pCurTbR3 != pTb);
1105 iemTbAllocatorFree(pVCpu, pTb);
1106 pTb = pTbNext;
1107 }
1108}
1109
1110
1111#if 0
1112/**
1113 * Frees all TBs.
1114 */
1115static int iemTbAllocatorFreeAll(PVMCPUCC pVCpu)
1116{
1117 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1118 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
1119 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
1120
1121 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1122
1123 uint32_t idxChunk = pTbAllocator->cAllocatedChunks;
1124 while (idxChunk-- > 0)
1125 {
1126 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs;
1127 uint32_t idxTb = pTbAllocator->cTbsPerChunk;
1128 while (idxTb-- > 0)
1129 {
1130 PIEMTB const pTb = &paTbs[idxTb];
1131 if (pTb->fFlags)
1132 iemTbAllocatorFreeInner<0>(pVCpu, pTbAllocator, pTb, idxChunk, idxTb);
1133 }
1134 }
1135
1136 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1137
1138# if 1
1139 /* Reset the free list. */
1140 pTbAllocator->pTbsFreeHead = NULL;
1141 idxChunk = pTbAllocator->cAllocatedChunks;
1142 while (idxChunk-- > 0)
1143 {
1144 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1145 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs;
1146 RT_BZERO(paTbs, sizeof(paTbs[0]) * cTbsPerChunk);
1147 for (uint32_t idxTb = 0; idxTb < cTbsPerChunk; idxTb++)
1148 {
1149 paTbs[idxTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1150 paTbs[idxTb].pNext = pTbAllocator->pTbsFreeHead;
1151 pTbAllocator->pTbsFreeHead = &paTbs[idxTb];
1152 }
1153 }
1154# endif
1155
1156# if 1
1157 /* Completely reset the TB cache. */
1158 RT_BZERO(pVCpu->iem.s.pTbCacheR3->apHash, sizeof(pVCpu->iem.s.pTbCacheR3->apHash[0]) * pVCpu->iem.s.pTbCacheR3->cHash);
1159# endif
1160
1161 return VINF_SUCCESS;
1162}
1163#endif
1164
1165
1166/**
1167 * Grow the translation block allocator with another chunk.
1168 */
1169static int iemTbAllocatorGrow(PVMCPUCC pVCpu)
1170{
1171 /*
1172 * Validate state.
1173 */
1174 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1175 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
1176 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
1177 uint32_t const idxChunk = pTbAllocator->cAllocatedChunks;
1178 AssertReturn(idxChunk < pTbAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1179
1180 /*
1181 * Allocate a new chunk and add it to the allocator.
1182 */
1183 PIEMTB const paTbs = (PIEMTB)RTMemPageAllocZ(pTbAllocator->cbPerChunk);
1184 AssertLogRelReturn(paTbs, VERR_NO_PAGE_MEMORY);
1185 pTbAllocator->aChunks[idxChunk].paTbs = paTbs;
1186
1187 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1188 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
1189 {
1190 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1191 paTbs[iTb].pNext = pTbAllocator->pTbsFreeHead;
1192 pTbAllocator->pTbsFreeHead = &paTbs[iTb];
1193 }
1194 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
1195 pTbAllocator->cTotalTbs += cTbsPerChunk;
1196
1197 return VINF_SUCCESS;
1198}
1199
1200
1201/**
1202 * Allocates a TB from allocator with free block.
1203 *
1204 * This is common code to both the fast and slow allocator code paths.
1205 */
1206DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAllocCore(PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1207{
1208 Assert(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs);
1209 Assert(pTbAllocator->pTbsFreeHead);
1210
1211 PIEMTB const pTb = pTbAllocator->pTbsFreeHead;
1212 pTbAllocator->pTbsFreeHead = pTb->pNext;
1213 pTbAllocator->cInUseTbs += 1;
1214 if (fThreaded)
1215 pTbAllocator->cThreadedTbs += 1;
1216 else
1217 pTbAllocator->cNativeTbs += 1;
1218 STAM_REL_COUNTER_INC(&pTbAllocator->StatAllocs);
1219 return pTb;
1220}
1221
1222
1223/**
1224 * Slow path for iemTbAllocatorAlloc.
1225 */
1226static PIEMTB iemTbAllocatorAllocSlow(PVMCPUCC pVCpu, PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1227{
1228 /*
1229 * With some luck we can add another chunk.
1230 */
1231 if (pTbAllocator->cAllocatedChunks < pTbAllocator->cMaxChunks)
1232 {
1233 int rc = iemTbAllocatorGrow(pVCpu);
1234 if (RT_SUCCESS(rc))
1235 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1236 }
1237
1238 /*
1239 * We have to prune stuff. Sigh.
1240 *
1241 * This requires scanning for older TBs and kick them out. Not sure how to
1242 * best do this as we don't want to maintain any list of TBs ordered by last
1243 * usage time. But one reasonably simple approach would be that each time we
1244 * get here we continue a sequential scan of the allocation chunks,
1245 * considering just a smallish number of TBs and freeing a fixed portion of
1246 * them. Say, we consider the next 128 TBs, freeing the least recently used
1247 * in out of groups of 4 TBs, resulting in 32 free TBs.
1248 */
1249 STAM_PROFILE_START(&pTbAllocator->StatPrune, a);
1250 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1251 uint32_t const cTbsToPrune = 128;
1252 uint32_t const cTbsPerGroup = 4;
1253 uint32_t cFreedTbs = 0;
1254#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
1255 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom & ~(uint32_t)(cTbsToPrune - 1); /* Stay within a chunk! */
1256#else
1257 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom;
1258#endif
1259 if (idxTbPruneFrom >= pTbAllocator->cMaxTbs)
1260 idxTbPruneFrom = 0;
1261 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1262 {
1263 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1264 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1265 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1266 uint32_t cMsAge = msNow - pTb->msLastUsed;
1267 Assert(pTb->fFlags & IEMTB_F_TYPE_MASK);
1268
1269 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1270 {
1271#ifndef IEMTB_SIZE_IS_POWER_OF_TWO
1272 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1273 { /* likely */ }
1274 else
1275 {
1276 idxInChunk2 = 0;
1277 idxChunk2 += 1;
1278 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1279 idxChunk2 = 0;
1280 }
1281#endif
1282 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1283 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1284 if ( cMsAge2 > cMsAge
1285 || (cMsAge2 == cMsAge && pTb2->cUsed < pTb->cUsed))
1286 {
1287 Assert(pTb2->fFlags & IEMTB_F_TYPE_MASK);
1288 pTb = pTb2;
1289 idxChunk = idxChunk2;
1290 idxInChunk = idxInChunk2;
1291 cMsAge = cMsAge2;
1292 }
1293 }
1294
1295 /* Free the TB. */
1296 iemTbAllocatorFreeInner<0>(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1297 cFreedTbs++; /* paranoia */
1298 }
1299 pTbAllocator->iPruneFrom = idxTbPruneFrom;
1300 STAM_PROFILE_STOP(&pTbAllocator->StatPrune, a);
1301
1302 /* Flush the TB lookup entry pointer. */
1303 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1304
1305 /*
1306 * Allocate a TB from the ones we've pruned.
1307 */
1308 if (cFreedTbs)
1309 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1310 return NULL;
1311}
1312
1313
1314/**
1315 * Allocate a translation block.
1316 *
1317 * @returns Pointer to block on success, NULL if we're out and is unable to
1318 * free up an existing one (very unlikely once implemented).
1319 * @param pVCpu The cross context virtual CPU structure of the calling
1320 * thread.
1321 * @param fThreaded Set if threaded TB being allocated, clear if native TB.
1322 * For statistics.
1323 */
1324DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAlloc(PVMCPUCC pVCpu, bool fThreaded)
1325{
1326 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1327 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1328
1329 /* Free any pending TBs before we proceed. */
1330 if (!pTbAllocator->pDelayedFreeHead)
1331 { /* probably likely */ }
1332 else
1333 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1334
1335 /* If the allocator is full, take slow code path.*/
1336 if (RT_LIKELY(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs))
1337 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1338 return iemTbAllocatorAllocSlow(pVCpu, pTbAllocator, fThreaded);
1339}
1340
1341
1342#if 0 /*def VBOX_WITH_IEM_NATIVE_RECOMPILER*/
1343/**
1344 * This is called when we're out of space for native TBs.
1345 *
1346 * This uses a variation on the pruning in iemTbAllocatorAllocSlow.
1347 * The difference is that we only prune native TBs and will only free any if
1348 * there are least two in a group. The conditions under which we're called are
1349 * different - there will probably be free TBs in the table when we're called.
1350 * Therefore we increase the group size and max scan length, though we'll stop
1351 * scanning once we've reached the requested size (@a cNeededInstrs) and freed
1352 * up at least 8 TBs.
1353 */
1354void iemTbAllocatorFreeupNativeSpace(PVMCPUCC pVCpu, uint32_t cNeededInstrs)
1355{
1356 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1357 AssertReturnVoid(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1358
1359 STAM_REL_PROFILE_START(&pTbAllocator->StatPruneNative, a);
1360
1361 /*
1362 * Flush the delayed free list before we start freeing TBs indiscriminately.
1363 */
1364 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1365
1366 /*
1367 * Scan and free TBs.
1368 */
1369 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1370 uint32_t const cTbsToPrune = 128 * 8;
1371 uint32_t const cTbsPerGroup = 4 * 4;
1372 uint32_t cFreedTbs = 0;
1373 uint32_t cMaxInstrs = 0;
1374 uint32_t idxTbPruneFrom = pTbAllocator->iPruneNativeFrom & ~(uint32_t)(cTbsPerGroup - 1);
1375 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1376 {
1377 if (idxTbPruneFrom >= pTbAllocator->cTotalTbs)
1378 idxTbPruneFrom = 0;
1379 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1380 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1381 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1382 uint32_t cMsAge = pTb->fFlags & IEMTB_F_TYPE_NATIVE ? msNow - pTb->msLastUsed : msNow;
1383 uint8_t cNativeTbs = (pTb->fFlags & IEMTB_F_TYPE_NATIVE) != 0;
1384
1385 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1386 {
1387 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1388 { /* likely */ }
1389 else
1390 {
1391 idxInChunk2 = 0;
1392 idxChunk2 += 1;
1393 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1394 idxChunk2 = 0;
1395 }
1396 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1397 if (pTb2->fFlags & IEMTB_F_TYPE_NATIVE)
1398 {
1399 cNativeTbs += 1;
1400 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1401 if ( cMsAge2 > cMsAge
1402 || ( cMsAge2 == cMsAge
1403 && ( pTb2->cUsed < pTb->cUsed
1404 || ( pTb2->cUsed == pTb->cUsed
1405 && pTb2->Native.cInstructions > pTb->Native.cInstructions)))
1406 || !(pTb->fFlags & IEMTB_F_TYPE_NATIVE))
1407 {
1408 pTb = pTb2;
1409 idxChunk = idxChunk2;
1410 idxInChunk = idxInChunk2;
1411 cMsAge = cMsAge2;
1412 }
1413 }
1414 }
1415
1416 /* Free the TB if we found at least two native one in this group. */
1417 if (cNativeTbs >= 2)
1418 {
1419 cMaxInstrs = RT_MAX(cMaxInstrs, pTb->Native.cInstructions);
1420 iemTbAllocatorFreeInner<IEMTB_F_TYPE_NATIVE>(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1421 cFreedTbs++;
1422 if (cFreedTbs >= 8 && cMaxInstrs >= cNeededInstrs)
1423 break;
1424 }
1425 }
1426 pTbAllocator->iPruneNativeFrom = idxTbPruneFrom;
1427
1428 STAM_REL_PROFILE_STOP(&pTbAllocator->StatPruneNative, a);
1429}
1430#endif /* unused / VBOX_WITH_IEM_NATIVE_RECOMPILER */
1431
1432
1433/*********************************************************************************************************************************
1434* Threaded Recompiler Core *
1435*********************************************************************************************************************************/
1436/**
1437 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
1438 * @returns pszBuf.
1439 * @param fFlags The flags.
1440 * @param pszBuf The output buffer.
1441 * @param cbBuf The output buffer size. At least 32 bytes.
1442 */
1443DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
1444{
1445 Assert(cbBuf >= 32);
1446 static RTSTRTUPLE const s_aModes[] =
1447 {
1448 /* [00] = */ { RT_STR_TUPLE("16BIT") },
1449 /* [01] = */ { RT_STR_TUPLE("32BIT") },
1450 /* [02] = */ { RT_STR_TUPLE("!2!") },
1451 /* [03] = */ { RT_STR_TUPLE("!3!") },
1452 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
1453 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
1454 /* [06] = */ { RT_STR_TUPLE("!6!") },
1455 /* [07] = */ { RT_STR_TUPLE("!7!") },
1456 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
1457 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
1458 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
1459 /* [0b] = */ { RT_STR_TUPLE("!b!") },
1460 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
1461 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
1462 /* [0e] = */ { RT_STR_TUPLE("!e!") },
1463 /* [0f] = */ { RT_STR_TUPLE("!f!") },
1464 /* [10] = */ { RT_STR_TUPLE("!10!") },
1465 /* [11] = */ { RT_STR_TUPLE("!11!") },
1466 /* [12] = */ { RT_STR_TUPLE("!12!") },
1467 /* [13] = */ { RT_STR_TUPLE("!13!") },
1468 /* [14] = */ { RT_STR_TUPLE("!14!") },
1469 /* [15] = */ { RT_STR_TUPLE("!15!") },
1470 /* [16] = */ { RT_STR_TUPLE("!16!") },
1471 /* [17] = */ { RT_STR_TUPLE("!17!") },
1472 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
1473 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
1474 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
1475 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
1476 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
1477 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
1478 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
1479 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
1480 };
1481 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
1482 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
1483 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
1484
1485 pszBuf[off++] = ' ';
1486 pszBuf[off++] = 'C';
1487 pszBuf[off++] = 'P';
1488 pszBuf[off++] = 'L';
1489 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
1490 Assert(off < 32);
1491
1492 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
1493
1494 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
1495 {
1496 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
1497 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
1498 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
1499 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
1500 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
1501 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
1502 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
1503 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
1504 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
1505 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_X86_INHIBIT_SHADOW },
1506 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_X86_INHIBIT_NMI },
1507 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_X86_CS_LIM_CHECKS },
1508 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
1509 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
1510 };
1511 if (fFlags)
1512 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1513 if (s_aFlags[i].fFlag & fFlags)
1514 {
1515 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
1516 pszBuf[off++] = ' ';
1517 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
1518 off += s_aFlags[i].cchName;
1519 fFlags &= ~s_aFlags[i].fFlag;
1520 if (!fFlags)
1521 break;
1522 }
1523 pszBuf[off] = '\0';
1524
1525 return pszBuf;
1526}
1527
1528
1529/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
1530static DECLCALLBACK(int) iemThreadedDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
1531{
1532 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
1533 pDis->cbCachedInstr += cbMaxRead;
1534 RT_NOREF(cbMinRead);
1535 return VERR_NO_DATA;
1536}
1537
1538
1539/**
1540 * Worker for iemThreadedDisassembleTb.
1541 */
1542static void iemThreadedDumpLookupTable(PCIEMTB pTb, PCDBGFINFOHLP pHlp, unsigned idxFirst, unsigned cEntries,
1543 const char *pszLeadText = " TB Lookup:") RT_NOEXCEPT
1544{
1545 if (idxFirst + cEntries <= pTb->cTbLookupEntries)
1546 {
1547 PIEMTB * const papTbLookup = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxFirst);
1548 pHlp->pfnPrintf(pHlp, "%s", pszLeadText);
1549 for (uint8_t iLookup = 0; iLookup < cEntries; iLookup++)
1550 {
1551 PIEMTB pLookupTb = papTbLookup[iLookup];
1552 if (pLookupTb)
1553 pHlp->pfnPrintf(pHlp, "%c%p (%s)", iLookup ? ',' : ' ', pLookupTb,
1554 (pLookupTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED ? "threaded"
1555 : (pLookupTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? "native"
1556 : "invalid");
1557 else
1558 pHlp->pfnPrintf(pHlp, "%cNULL", iLookup ? ',' : ' ');
1559 }
1560 pHlp->pfnPrintf(pHlp, "\n");
1561 }
1562 else
1563 {
1564 pHlp->pfnPrintf(pHlp, " !!Bogus TB lookup info: idxFirst=%#x L %u > cTbLookupEntries=%#x!!\n",
1565 idxFirst, cEntries, pTb->cTbLookupEntries);
1566 AssertMsgFailed(("idxFirst=%#x L %u > cTbLookupEntries=%#x\n", idxFirst, cEntries, pTb->cTbLookupEntries));
1567 }
1568}
1569
1570
1571DECLHIDDEN(void) iemThreadedDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
1572{
1573 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1574
1575 char szDisBuf[512];
1576
1577 /*
1578 * Print TB info.
1579 */
1580 pHlp->pfnPrintf(pHlp,
1581 "pTb=%p: GCPhysPc=%RGp (%RGv) cInstructions=%u LB %#x cRanges=%u cTbLookupEntries=%u\n"
1582 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
1583 pTb, pTb->GCPhysPc, pTb->FlatPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges, pTb->cTbLookupEntries,
1584 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
1585
1586 /*
1587 * This disassembly is driven by the debug info which follows the native
1588 * code and indicates when it starts with the next guest instructions,
1589 * where labels are and such things.
1590 */
1591 DISSTATE Dis;
1592 PCIEMTHRDEDCALLENTRY const paCalls = pTb->Thrd.paCalls;
1593 uint32_t const cCalls = pTb->Thrd.cCalls;
1594 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_X86_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
1595 : (pTb->fFlags & IEM_F_MODE_X86_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
1596 : DISCPUMODE_64BIT;
1597 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
1598 uint8_t idxRange = UINT8_MAX;
1599 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
1600 uint32_t offRange = 0;
1601 uint32_t offOpcodes = 0;
1602 uint32_t const cbOpcodes = pTb->cbOpcodes;
1603 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
1604 bool fTbLookupSeen0 = false;
1605
1606 for (uint32_t iCall = 0; iCall < cCalls; iCall++)
1607 {
1608 /*
1609 * New opcode range?
1610 */
1611 if ( idxRange == UINT8_MAX
1612 || idxRange >= cRanges
1613 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
1614 {
1615 idxRange += 1;
1616 if (idxRange < cRanges)
1617 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
1618 else
1619 continue;
1620 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
1621 + (pTb->aRanges[idxRange].idxPhysPage == 0
1622 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1623 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
1624 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
1625 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
1626 pTb->aRanges[idxRange].idxPhysPage);
1627 GCPhysPc += offRange;
1628 }
1629
1630 /*
1631 * Disassemble another guest instruction?
1632 */
1633 if ( paCalls[iCall].offOpcode != offOpcodes
1634 && paCalls[iCall].cbOpcode > 0
1635 && (uint32_t)(cbOpcodes - paCalls[iCall].offOpcode) <= cbOpcodes /* paranoia^2 */ )
1636 {
1637 offOpcodes = paCalls[iCall].offOpcode;
1638 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
1639 uint32_t cbInstr = 1;
1640 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
1641 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
1642 iemThreadedDisasReadBytesDummy, NULL, &Dis, &cbInstr);
1643 if (RT_SUCCESS(rc))
1644 {
1645 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
1646 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
1647 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
1648 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
1649 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
1650 }
1651 else
1652 {
1653 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
1654 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
1655 cbInstr = paCalls[iCall].cbOpcode;
1656 }
1657 GCPhysPc += cbInstr;
1658 offRange += cbInstr;
1659 }
1660
1661 /*
1662 * Dump call details.
1663 */
1664 pHlp->pfnPrintf(pHlp,
1665 " Call #%u to %s (%u args)\n",
1666 iCall, g_apszIemThreadedFunctions[paCalls[iCall].enmFunction],
1667 g_acIemThreadedFunctionUsedArgs[paCalls[iCall].enmFunction]);
1668 if (paCalls[iCall].uTbLookup != 0)
1669 {
1670 uint8_t const idxFirst = IEM_TB_LOOKUP_TAB_GET_IDX(paCalls[iCall].uTbLookup);
1671 fTbLookupSeen0 = idxFirst == 0;
1672 iemThreadedDumpLookupTable(pTb, pHlp, idxFirst, IEM_TB_LOOKUP_TAB_GET_SIZE(paCalls[iCall].uTbLookup));
1673 }
1674
1675 /*
1676 * Snoop fExec.
1677 */
1678 switch (paCalls[iCall].enmFunction)
1679 {
1680 default:
1681 break;
1682 case kIemThreadedFunc_BltIn_CheckMode:
1683 fExec = paCalls[iCall].auParams[0];
1684 break;
1685 }
1686 }
1687
1688 if (!fTbLookupSeen0)
1689 iemThreadedDumpLookupTable(pTb, pHlp, 0, 1, " Fallback TB Lookup:");
1690}
1691
1692
1693
1694/**
1695 * Allocate a translation block for threadeded recompilation.
1696 *
1697 * This is allocated with maxed out call table and storage for opcode bytes,
1698 * because it's only supposed to be called once per EMT to allocate the TB
1699 * pointed to by IEMCPU::pThrdCompileTbR3.
1700 *
1701 * @returns Pointer to the translation block on success, NULL on failure.
1702 * @param pVM The cross context virtual machine structure.
1703 * @param pVCpu The cross context virtual CPU structure of the calling
1704 * thread.
1705 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1706 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1707 */
1708static PIEMTB iemThreadedTbAlloc(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1709{
1710 PIEMTB pTb = (PIEMTB)RTMemAllocZ(sizeof(IEMTB));
1711 if (pTb)
1712 {
1713 unsigned const cCalls = 256;
1714 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAlloc(sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1715 if (pTb->Thrd.paCalls)
1716 {
1717 pTb->pabOpcodes = (uint8_t *)RTMemAlloc(cCalls * 16);
1718 if (pTb->pabOpcodes)
1719 {
1720 pVCpu->iem.s.cbOpcodesAllocated = cCalls * 16;
1721 pTb->Thrd.cAllocated = cCalls;
1722 pTb->Thrd.cCalls = 0;
1723 pTb->cbOpcodes = 0;
1724 pTb->pNext = NULL;
1725 pTb->cUsed = 0;
1726 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1727 pTb->idxAllocChunk = UINT8_MAX;
1728 pTb->GCPhysPc = GCPhysPc;
1729 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1730 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1731 pTb->cInstructions = 0;
1732 pTb->cTbLookupEntries = 1; /* Entry zero is for anything w/o a specific entry. */
1733
1734 /* Init the first opcode range. */
1735 pTb->cRanges = 1;
1736 pTb->aRanges[0].cbOpcodes = 0;
1737 pTb->aRanges[0].offOpcodes = 0;
1738 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1739 pTb->aRanges[0].u2Unused = 0;
1740 pTb->aRanges[0].idxPhysPage = 0;
1741 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1742 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1743
1744 return pTb;
1745 }
1746 RTMemFree(pTb->Thrd.paCalls);
1747 }
1748 RTMemFree(pTb);
1749 }
1750 RT_NOREF(pVM);
1751 return NULL;
1752}
1753
1754
1755/**
1756 * Called on the TB that are dedicated for recompilation before it's reused.
1757 *
1758 * @param pVCpu The cross context virtual CPU structure of the calling
1759 * thread.
1760 * @param pTb The translation block to reuse.
1761 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1762 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1763 */
1764static void iemThreadedTbReuse(PVMCPUCC pVCpu, PIEMTB pTb, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1765{
1766 pTb->GCPhysPc = GCPhysPc;
1767 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1768 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1769 pTb->Thrd.cCalls = 0;
1770 pTb->cbOpcodes = 0;
1771 pTb->cInstructions = 0;
1772 pTb->cTbLookupEntries = 1; /* Entry zero is for anything w/o a specific entry. */
1773
1774 /* Init the first opcode range. */
1775 pTb->cRanges = 1;
1776 pTb->aRanges[0].cbOpcodes = 0;
1777 pTb->aRanges[0].offOpcodes = 0;
1778 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1779 pTb->aRanges[0].u2Unused = 0;
1780 pTb->aRanges[0].idxPhysPage = 0;
1781 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1782 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1783}
1784
1785
1786/**
1787 * Used to duplicate a threded translation block after recompilation is done.
1788 *
1789 * @returns Pointer to the translation block on success, NULL on failure.
1790 * @param pVM The cross context virtual machine structure.
1791 * @param pVCpu The cross context virtual CPU structure of the calling
1792 * thread.
1793 * @param pTbSrc The TB to duplicate.
1794 */
1795static PIEMTB iemThreadedTbDuplicate(PVMCC pVM, PVMCPUCC pVCpu, PCIEMTB pTbSrc)
1796{
1797 /*
1798 * Just using the heap for now. Will make this more efficient and
1799 * complicated later, don't worry. :-)
1800 */
1801 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
1802 if (pTb)
1803 {
1804 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
1805 memcpy(pTb, pTbSrc, sizeof(*pTb));
1806 pTb->idxAllocChunk = idxAllocChunk;
1807
1808 unsigned const cCalls = pTbSrc->Thrd.cCalls;
1809 Assert(cCalls > 0);
1810 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemDup(pTbSrc->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1811 if (pTb->Thrd.paCalls)
1812 {
1813 size_t const cbTbLookup = pTbSrc->cTbLookupEntries * sizeof(PIEMTB);
1814 Assert(cbTbLookup > 0);
1815 size_t const cbOpcodes = pTbSrc->cbOpcodes;
1816 Assert(cbOpcodes > 0);
1817 size_t const cbBoth = cbTbLookup + RT_ALIGN_Z(cbOpcodes, sizeof(PIEMTB));
1818 uint8_t * const pbBoth = (uint8_t *)RTMemAlloc(cbBoth);
1819 if (pbBoth)
1820 {
1821 RT_BZERO(pbBoth, cbTbLookup);
1822 pTb->pabOpcodes = (uint8_t *)memcpy(&pbBoth[cbTbLookup], pTbSrc->pabOpcodes, cbOpcodes);
1823 pTb->Thrd.cAllocated = cCalls;
1824 pTb->pNext = NULL;
1825 pTb->cUsed = 0;
1826 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1827 pTb->fFlags = pTbSrc->fFlags;
1828
1829 return pTb;
1830 }
1831 RTMemFree(pTb->Thrd.paCalls);
1832 }
1833 iemTbAllocatorFree(pVCpu, pTb);
1834 }
1835 RT_NOREF(pVM);
1836 return NULL;
1837
1838}
1839
1840
1841/**
1842 * Adds the given TB to the hash table.
1843 *
1844 * @param pVCpu The cross context virtual CPU structure of the calling
1845 * thread.
1846 * @param pTbCache The cache to add it to.
1847 * @param pTb The translation block to add.
1848 */
1849static void iemThreadedTbAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
1850{
1851 iemTbCacheAdd(pVCpu, pTbCache, pTb);
1852
1853 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbInstr, pTb->cInstructions);
1854 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbLookupEntries, pTb->cTbLookupEntries);
1855 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedCalls, pTb->Thrd.cCalls);
1856 if (LogIs12Enabled())
1857 {
1858 Log12(("TB added: %p %RGp LB %#x fl=%#x idxHash=%#x cRanges=%u cInstr=%u cCalls=%u\n",
1859 pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc),
1860 pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));
1861 for (uint8_t idxRange = 0; idxRange < pTb->cRanges; idxRange++)
1862 Log12((" range#%u: offPg=%#05x offOp=%#04x LB %#04x pg#%u=%RGp\n", idxRange, pTb->aRanges[idxRange].offPhysPage,
1863 pTb->aRanges[idxRange].offOpcodes, pTb->aRanges[idxRange].cbOpcodes, pTb->aRanges[idxRange].idxPhysPage,
1864 pTb->aRanges[idxRange].idxPhysPage == 0
1865 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1866 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]));
1867 }
1868}
1869
1870
1871/**
1872 * Called by opcode verifier functions when they detect a problem.
1873 */
1874void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb, bool fSafeToFree)
1875{
1876 /* We cannot free the current TB (indicated by fSafeToFree) because:
1877 - A threaded TB will have its current call entry accessed
1878 to update pVCpu->iem.s.cInstructions.
1879 - A native TB will have code left to execute. */
1880 if (fSafeToFree)
1881 iemTbAllocatorFree(pVCpu, pTb);
1882 else
1883 iemTbAlloctorScheduleForFree(pVCpu, pTb);
1884}
1885
1886
1887/*
1888 * Real code.
1889 */
1890
1891#ifdef LOG_ENABLED
1892/**
1893 * Logs the current instruction.
1894 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1895 * @param pszFunction The IEM function doing the execution.
1896 * @param idxInstr The instruction number in the block.
1897 */
1898static void iemThreadedLogCurInstr(PVMCPUCC pVCpu, const char *pszFunction, uint32_t idxInstr) RT_NOEXCEPT
1899{
1900# ifdef IN_RING3
1901 if (LogIs2Enabled())
1902 {
1903 char szInstr[256];
1904 uint32_t cbInstr = 0;
1905 DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, 0, 0,
1906 DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
1907 szInstr, sizeof(szInstr), &cbInstr);
1908
1909 PCX86FXSTATE pFpuCtx = &pVCpu->cpum.GstCtx.XState.x87;
1910 Log2(("**** %s fExec=%x pTb=%p cUsed=%u #%u\n"
1911 " eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
1912 " eip=%08x esp=%08x ebp=%08x iopl=%d tr=%04x\n"
1913 " cs=%04x ss=%04x ds=%04x es=%04x fs=%04x gs=%04x efl=%08x\n"
1914 " fsw=%04x fcw=%04x ftw=%02x mxcsr=%04x/%04x\n"
1915 " %s\n"
1916 , pszFunction, pVCpu->iem.s.fExec, pVCpu->iem.s.pCurTbR3, pVCpu->iem.s.pCurTbR3 ? pVCpu->iem.s.pCurTbR3->cUsed : 0, idxInstr,
1917 pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ebx, pVCpu->cpum.GstCtx.ecx, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.esi, pVCpu->cpum.GstCtx.edi,
1918 pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.ebp, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, pVCpu->cpum.GstCtx.tr.Sel,
1919 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.ds.Sel, pVCpu->cpum.GstCtx.es.Sel,
1920 pVCpu->cpum.GstCtx.fs.Sel, pVCpu->cpum.GstCtx.gs.Sel, pVCpu->cpum.GstCtx.eflags.u,
1921 pFpuCtx->FSW, pFpuCtx->FCW, pFpuCtx->FTW, pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK,
1922 szInstr));
1923
1924 /*if (LogIs3Enabled()) - this outputs an insane amount of stuff, so disabled.
1925 DBGFR3InfoEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, "cpumguest", "verbose", NULL); */
1926 }
1927 else
1928# endif
1929 LogFlow(("%s: cs:rip=%04x:%08RX64 ss:rsp=%04x:%08RX64 EFL=%06x\n", pszFunction, pVCpu->cpum.GstCtx.cs.Sel,
1930 pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.u));
1931}
1932#endif /* LOG_ENABLED */
1933
1934
1935#if 0
1936static VBOXSTRICTRC iemThreadedCompileLongJumped(PVMCC pVM, PVMCPUCC pVCpu, VBOXSTRICTRC rcStrict)
1937{
1938 RT_NOREF(pVM, pVCpu);
1939 return rcStrict;
1940}
1941#endif
1942
1943
1944/**
1945 * Initializes the decoder state when compiling TBs.
1946 *
1947 * This presumes that fExec has already be initialized.
1948 *
1949 * This is very similar to iemInitDecoder() and iemReInitDecoder(), so may need
1950 * to apply fixes to them as well.
1951 *
1952 * @param pVCpu The cross context virtual CPU structure of the calling
1953 * thread.
1954 * @param fReInit Clear for the first call for a TB, set for subsequent
1955 * calls from inside the compile loop where we can skip a
1956 * couple of things.
1957 * @param fExtraFlags The extra translation block flags when @a fReInit is
1958 * true, otherwise ignored. Only IEMTB_F_X86_INHIBIT_SHADOW is
1959 * checked.
1960 */
1961DECL_FORCE_INLINE(void) iemThreadedCompileInitDecoder(PVMCPUCC pVCpu, bool const fReInit, uint32_t const fExtraFlags)
1962{
1963 /* ASSUMES: That iemInitExec was already called and that anyone changing
1964 CPU state affecting the fExec bits since then will have updated fExec! */
1965 AssertMsg((pVCpu->iem.s.fExec & ~IEM_F_USER_OPTS) == iemCalcExecFlags(pVCpu),
1966 ("fExec=%#x iemCalcExecModeFlags=%#x\n", pVCpu->iem.s.fExec, iemCalcExecFlags(pVCpu)));
1967
1968 IEMMODE const enmMode = IEM_GET_CPU_MODE(pVCpu);
1969
1970 /* Decoder state: */
1971 pVCpu->iem.s.enmDefAddrMode = enmMode; /** @todo check if this is correct... */
1972 pVCpu->iem.s.enmEffAddrMode = enmMode;
1973 if (enmMode != IEMMODE_64BIT)
1974 {
1975 pVCpu->iem.s.enmDefOpSize = enmMode; /** @todo check if this is correct... */
1976 pVCpu->iem.s.enmEffOpSize = enmMode;
1977 }
1978 else
1979 {
1980 pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
1981 pVCpu->iem.s.enmEffOpSize = IEMMODE_32BIT;
1982 }
1983 pVCpu->iem.s.fPrefixes = 0;
1984 pVCpu->iem.s.uRexReg = 0;
1985 pVCpu->iem.s.uRexB = 0;
1986 pVCpu->iem.s.uRexIndex = 0;
1987 pVCpu->iem.s.idxPrefix = 0;
1988 pVCpu->iem.s.uVex3rdReg = 0;
1989 pVCpu->iem.s.uVexLength = 0;
1990 pVCpu->iem.s.fEvexStuff = 0;
1991 pVCpu->iem.s.iEffSeg = X86_SREG_DS;
1992 pVCpu->iem.s.offModRm = 0;
1993 pVCpu->iem.s.iNextMapping = 0;
1994
1995 if (!fReInit)
1996 {
1997 pVCpu->iem.s.cActiveMappings = 0;
1998 pVCpu->iem.s.rcPassUp = VINF_SUCCESS;
1999 pVCpu->iem.s.fEndTb = false;
2000 pVCpu->iem.s.fTbCheckOpcodes = true; /* (check opcodes for before executing the first instruction) */
2001 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
2002 pVCpu->iem.s.fTbCrossedPage = false;
2003 pVCpu->iem.s.cInstrTillIrqCheck = !(fExtraFlags & IEMTB_F_X86_INHIBIT_SHADOW) ? 32 : 0;
2004 pVCpu->iem.s.idxLastCheckIrqCallNo = UINT16_MAX;
2005 pVCpu->iem.s.fTbCurInstrIsSti = false;
2006 /* Force RF clearing and TF checking on first instruction in the block
2007 as we don't really know what came before and should assume the worst: */
2008 pVCpu->iem.s.fTbPrevInstr = IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_END_TB;
2009 }
2010 else
2011 {
2012 Assert(pVCpu->iem.s.cActiveMappings == 0);
2013 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
2014 Assert(pVCpu->iem.s.fEndTb == false);
2015 Assert(pVCpu->iem.s.fTbCrossedPage == false);
2016 pVCpu->iem.s.fTbPrevInstr = pVCpu->iem.s.fTbCurInstr;
2017 }
2018 pVCpu->iem.s.fTbCurInstr = 0;
2019
2020#ifdef DBGFTRACE_ENABLED
2021 switch (IEM_GET_CPU_MODE(pVCpu))
2022 {
2023 case IEMMODE_64BIT:
2024 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I64/%u %08llx", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.rip);
2025 break;
2026 case IEMMODE_32BIT:
2027 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I32/%u %04x:%08x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
2028 break;
2029 case IEMMODE_16BIT:
2030 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I16/%u %04x:%04x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
2031 break;
2032 }
2033#endif
2034}
2035
2036
2037/**
2038 * Initializes the opcode fetcher when starting the compilation.
2039 *
2040 * @param pVCpu The cross context virtual CPU structure of the calling
2041 * thread.
2042 */
2043DECL_FORCE_INLINE(void) iemThreadedCompileInitOpcodeFetching(PVMCPUCC pVCpu)
2044{
2045 /* Almost everything is done by iemGetPcWithPhysAndCode() already. We just need to initialize the index into abOpcode. */
2046#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
2047 pVCpu->iem.s.offOpcode = 0;
2048#else
2049 RT_NOREF(pVCpu);
2050#endif
2051}
2052
2053
2054/**
2055 * Re-initializes the opcode fetcher between instructions while compiling.
2056 *
2057 * @param pVCpu The cross context virtual CPU structure of the calling
2058 * thread.
2059 */
2060DECL_FORCE_INLINE(void) iemThreadedCompileReInitOpcodeFetching(PVMCPUCC pVCpu)
2061{
2062 if (pVCpu->iem.s.pbInstrBuf)
2063 {
2064 uint64_t off = pVCpu->cpum.GstCtx.rip;
2065 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
2066 off += pVCpu->cpum.GstCtx.cs.u64Base;
2067 off -= pVCpu->iem.s.uInstrBufPc;
2068 if (off < pVCpu->iem.s.cbInstrBufTotal)
2069 {
2070 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
2071 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
2072 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
2073 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
2074 else
2075 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
2076 }
2077 else
2078 {
2079 pVCpu->iem.s.pbInstrBuf = NULL;
2080 pVCpu->iem.s.offInstrNextByte = 0;
2081 pVCpu->iem.s.offCurInstrStart = 0;
2082 pVCpu->iem.s.cbInstrBuf = 0;
2083 pVCpu->iem.s.cbInstrBufTotal = 0;
2084 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
2085 }
2086 }
2087 else
2088 {
2089 pVCpu->iem.s.offInstrNextByte = 0;
2090 pVCpu->iem.s.offCurInstrStart = 0;
2091 pVCpu->iem.s.cbInstrBuf = 0;
2092 pVCpu->iem.s.cbInstrBufTotal = 0;
2093#ifdef VBOX_STRICT
2094 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
2095#endif
2096 }
2097#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
2098 pVCpu->iem.s.offOpcode = 0;
2099#endif
2100}
2101
2102#ifdef LOG_ENABLED
2103
2104/**
2105 * Inserts a NOP call.
2106 *
2107 * This is for debugging.
2108 *
2109 * @returns true on success, false if we're out of call entries.
2110 * @param pTb The translation block being compiled.
2111 */
2112bool iemThreadedCompileEmitNop(PIEMTB pTb)
2113{
2114 /* Emit the call. */
2115 uint32_t const idxCall = pTb->Thrd.cCalls;
2116 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2117 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2118 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2119 pCall->enmFunction = kIemThreadedFunc_BltIn_Nop;
2120 pCall->idxInstr = pTb->cInstructions - 1;
2121 pCall->cbOpcode = 0;
2122 pCall->offOpcode = 0;
2123 pCall->uTbLookup = 0;
2124 pCall->fFlags = 0;
2125 pCall->auParams[0] = 0;
2126 pCall->auParams[1] = 0;
2127 pCall->auParams[2] = 0;
2128 return true;
2129}
2130
2131
2132/**
2133 * Called by iemThreadedCompile if cpu state logging is desired.
2134 *
2135 * @returns true on success, false if we're out of call entries.
2136 * @param pTb The translation block being compiled.
2137 */
2138bool iemThreadedCompileEmitLogCpuState(PIEMTB pTb)
2139{
2140 /* Emit the call. */
2141 uint32_t const idxCall = pTb->Thrd.cCalls;
2142 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2143 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2144 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2145 pCall->enmFunction = kIemThreadedFunc_BltIn_LogCpuState;
2146 pCall->idxInstr = pTb->cInstructions - 1;
2147 pCall->cbOpcode = 0;
2148 pCall->offOpcode = 0;
2149 pCall->uTbLookup = 0;
2150 pCall->fFlags = 0;
2151 pCall->auParams[0] = RT_MAKE_U16(pCall->idxInstr, idxCall); /* currently not used, but whatever */
2152 pCall->auParams[1] = 0;
2153 pCall->auParams[2] = 0;
2154 return true;
2155}
2156
2157#endif /* LOG_ENABLED */
2158
2159DECLINLINE(void) iemThreadedCopyOpcodeBytesInline(PCVMCPUCC pVCpu, uint8_t *pbDst, uint8_t cbInstr)
2160{
2161 switch (cbInstr)
2162 {
2163 default: AssertMsgFailed(("%#x\n", cbInstr)); RT_FALL_THROUGH();
2164 case 15: pbDst[14] = pVCpu->iem.s.abOpcode[14]; RT_FALL_THROUGH();
2165 case 14: pbDst[13] = pVCpu->iem.s.abOpcode[13]; RT_FALL_THROUGH();
2166 case 13: pbDst[12] = pVCpu->iem.s.abOpcode[12]; RT_FALL_THROUGH();
2167 case 12: pbDst[11] = pVCpu->iem.s.abOpcode[11]; RT_FALL_THROUGH();
2168 case 11: pbDst[10] = pVCpu->iem.s.abOpcode[10]; RT_FALL_THROUGH();
2169 case 10: pbDst[9] = pVCpu->iem.s.abOpcode[9]; RT_FALL_THROUGH();
2170 case 9: pbDst[8] = pVCpu->iem.s.abOpcode[8]; RT_FALL_THROUGH();
2171 case 8: pbDst[7] = pVCpu->iem.s.abOpcode[7]; RT_FALL_THROUGH();
2172 case 7: pbDst[6] = pVCpu->iem.s.abOpcode[6]; RT_FALL_THROUGH();
2173 case 6: pbDst[5] = pVCpu->iem.s.abOpcode[5]; RT_FALL_THROUGH();
2174 case 5: pbDst[4] = pVCpu->iem.s.abOpcode[4]; RT_FALL_THROUGH();
2175 case 4: pbDst[3] = pVCpu->iem.s.abOpcode[3]; RT_FALL_THROUGH();
2176 case 3: pbDst[2] = pVCpu->iem.s.abOpcode[2]; RT_FALL_THROUGH();
2177 case 2: pbDst[1] = pVCpu->iem.s.abOpcode[1]; RT_FALL_THROUGH();
2178 case 1: pbDst[0] = pVCpu->iem.s.abOpcode[0]; break;
2179 }
2180}
2181
2182#ifdef IEM_WITH_INTRA_TB_JUMPS
2183
2184/**
2185 * Emits the necessary tail calls for a full TB loop-jump.
2186 */
2187static bool iemThreadedCompileFullTbJump(PVMCPUCC pVCpu, PIEMTB pTb)
2188{
2189 /*
2190 * We need a timer and maybe IRQ check before jumping, so make sure
2191 * we've got sufficient call entries left before emitting anything.
2192 */
2193 uint32_t idxCall = pTb->Thrd.cCalls;
2194 if (idxCall + 1U <= pTb->Thrd.cAllocated)
2195 {
2196 /*
2197 * We're good, emit the calls.
2198 */
2199 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2200 pTb->Thrd.cCalls = (uint16_t)(idxCall + 2);
2201
2202 /* Always check timers as we risk getting stuck in a loop otherwise. We
2203 combine it with an IRQ check if that's not performed in the TB already. */
2204 pCall->enmFunction = pVCpu->iem.s.idxLastCheckIrqCallNo < idxCall
2205 ? kIemThreadedFunc_BltIn_CheckTimers
2206 : kIemThreadedFunc_BltIn_CheckTimersAndIrq;
2207 pCall->idxInstr = 0;
2208 pCall->offOpcode = 0;
2209 pCall->cbOpcode = 0;
2210 pCall->uTbLookup = 0;
2211 pCall->fFlags = 0;
2212 pCall->auParams[0] = 0;
2213 pCall->auParams[1] = 0;
2214 pCall->auParams[2] = 0;
2215 pCall++;
2216
2217 /* The jump callentry[0]. */
2218 pCall->enmFunction = kIemThreadedFunc_BltIn_Jump;
2219 pCall->idxInstr = 0;
2220 pCall->offOpcode = 0;
2221 pCall->cbOpcode = 0;
2222 pCall->uTbLookup = 0;
2223 pCall->fFlags = 0;
2224 pCall->auParams[0] = 0; /* jump target is call zero */
2225 pCall->auParams[1] = 0;
2226 pCall->auParams[2] = 0;
2227
2228 /* Mark callentry #0 as a jump target. */
2229 pTb->Thrd.paCalls[0].fFlags |= IEMTHREADEDCALLENTRY_F_JUMP_TARGET;
2230 }
2231
2232 return false;
2233}
2234
2235/**
2236 * Called by IEM_MC2_BEGIN_EMIT_CALLS when it detects that we're back at the
2237 * first instruction and we didn't just branch to it (that's handled below).
2238 *
2239 * This will emit a loop iff everything is compatible with that.
2240 */
2241DECLHIDDEN(int) iemThreadedCompileBackAtFirstInstruction(PVMCPU pVCpu, PIEMTB pTb) RT_NOEXCEPT
2242{
2243 /* Check if the mode matches. */
2244 if ( (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK)
2245 == (pTb->fFlags & IEMTB_F_KEY_MASK & ~IEMTB_F_X86_CS_LIM_CHECKS))
2246 {
2247 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbLoopFullTbDetected2);
2248 iemThreadedCompileFullTbJump(pVCpu, pTb);
2249 }
2250 return VINF_IEM_RECOMPILE_END_TB;
2251}
2252
2253#endif /* IEM_WITH_INTRA_TB_JUMPS */
2254
2255
2256/**
2257 * Called by IEM_MC2_BEGIN_EMIT_CALLS() under one of these conditions:
2258 *
2259 * - CS LIM check required.
2260 * - Must recheck opcode bytes.
2261 * - Previous instruction branched.
2262 * - TLB load detected, probably due to page crossing.
2263 *
2264 * @returns true if everything went well, false if we're out of space in the TB
2265 * (e.g. opcode ranges) or needs to start doing CS.LIM checks.
2266 * @param pVCpu The cross context virtual CPU structure of the calling
2267 * thread.
2268 * @param pTb The translation block being compiled.
2269 */
2270bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb)
2271{
2272 Log6(("%04x:%08RX64: iemThreadedCompileBeginEmitCallsComplications\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2273 Assert((pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) == 0);
2274#if 0
2275 if (pVCpu->cpum.GstCtx.rip >= 0xc0000000 && !LogIsEnabled())
2276 RTLogChangeFlags(NULL, 0, RTLOGFLAGS_DISABLED);
2277#endif
2278
2279 /*
2280 * If we're not in 64-bit mode and not already checking CS.LIM we need to
2281 * see if it's needed to start checking.
2282 */
2283 bool fConsiderCsLimChecking;
2284 uint32_t const fMode = pVCpu->iem.s.fExec & IEM_F_MODE_MASK;
2285 if ( fMode == IEM_F_MODE_X86_64BIT
2286 || (pTb->fFlags & IEMTB_F_X86_CS_LIM_CHECKS)
2287 || fMode == IEM_F_MODE_X86_32BIT_PROT_FLAT
2288 || fMode == IEM_F_MODE_X86_32BIT_FLAT)
2289 fConsiderCsLimChecking = false; /* already enabled or not needed */
2290 else
2291 {
2292 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
2293 if (offFromLim >= GUEST_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
2294 fConsiderCsLimChecking = true; /* likely */
2295 else
2296 {
2297 Log8(("%04x:%08RX64: Needs CS.LIM checks (%#RX64)\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, offFromLim));
2298 return false;
2299 }
2300 }
2301
2302 /*
2303 * Prepare call now, even before we know if can accept the instruction in this TB.
2304 * This allows us amending parameters w/o making every case suffer.
2305 */
2306 uint8_t const cbInstr = IEM_GET_INSTR_LEN(pVCpu);
2307 uint16_t const offOpcode = pTb->cbOpcodes;
2308 uint8_t idxRange = pTb->cRanges - 1;
2309
2310 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls];
2311 pCall->idxInstr = pTb->cInstructions;
2312 pCall->cbOpcode = cbInstr;
2313 pCall->offOpcode = offOpcode;
2314 pCall->uTbLookup = 0;
2315 pCall->fFlags = 0;
2316 pCall->auParams[0] = (uint32_t)cbInstr
2317 | (uint32_t)(pVCpu->iem.s.fExec << 8) /* liveness: Enough of fExec for IEM_F_MODE_X86_IS_FLAT. */
2318 /* The upper dword is sometimes used for cbStartPage. */;
2319 pCall->auParams[1] = idxRange;
2320 pCall->auParams[2] = offOpcode - pTb->aRanges[idxRange].offOpcodes;
2321
2322/** @todo check if we require IEMTB_F_X86_CS_LIM_CHECKS for any new page we've
2323 * gotten onto. If we do, stop */
2324
2325 /*
2326 * Case 1: We've branched (RIP changed).
2327 *
2328 * Loop check: If the new PC (GCPhysPC) is within a opcode range of this
2329 * TB, end the TB here as it is most likely a loop and if it
2330 * made sense to unroll it, the guest code compiler should've
2331 * done it already.
2332 *
2333 * Sub-case 1a: Same page, no TLB load (fTbCrossedPage is false).
2334 * Req: 1 extra range, no extra phys.
2335 *
2336 * Sub-case 1b: Different page but no page boundrary crossing, so TLB load
2337 * necessary (fTbCrossedPage is true).
2338 * Req: 1 extra range, probably 1 extra phys page entry.
2339 *
2340 * Sub-case 1c: Different page, so TLB load necessary (fTbCrossedPage is true),
2341 * but in addition we cross into the following page and require
2342 * another TLB load.
2343 * Req: 2 extra ranges, probably 2 extra phys page entries.
2344 *
2345 * Sub-case 1d: Same page, so no initial TLB load necessary, but we cross into
2346 * the following page (thus fTbCrossedPage is true).
2347 * Req: 2 extra ranges, probably 1 extra phys page entry.
2348 *
2349 * Note! The setting fTbCrossedPage is done by the iemOpcodeFetchBytesJmp, but
2350 * it may trigger "spuriously" from the CPU point of view because of
2351 * physical page changes that'll invalid the physical TLB and trigger a
2352 * call to the function. In theory this be a big deal, just a bit
2353 * performance loss as we'll pick the LoadingTlb variants.
2354 *
2355 * Note! We do not currently optimize branching to the next instruction (sorry
2356 * 32-bit PIC code). We could maybe do that in the branching code that
2357 * sets (or not) fTbBranched.
2358 */
2359 /** @todo Optimize 'jmp .next_instr' and 'call .next_instr'. Seen the jmp
2360 * variant in win 3.1 code and the call variant in 32-bit linux PIC
2361 * code. This'll require filtering out far jmps and calls, as they
2362 * load CS which should technically be considered indirect since the
2363 * GDT/LDT entry's base address can be modified independently from
2364 * the code. */
2365 if (pVCpu->iem.s.fTbBranched != IEMBRANCHED_F_NO)
2366 {
2367 if ( !pVCpu->iem.s.fTbCrossedPage /* 1a */
2368 || pVCpu->iem.s.offCurInstrStart >= 0 /* 1b */ )
2369 {
2370 /* 1a + 1b - instruction fully within the branched to page. */
2371 Assert(pVCpu->iem.s.offCurInstrStart >= 0);
2372 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr <= GUEST_PAGE_SIZE);
2373
2374 if (!(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_ZERO))
2375 {
2376 /* Check that we've got a free range. */
2377 idxRange += 1;
2378 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2379 { /* likely */ }
2380 else
2381 {
2382 Log8(("%04x:%08RX64: out of ranges after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2383 return false;
2384 }
2385 pCall->auParams[1] = idxRange;
2386 pCall->auParams[2] = 0;
2387
2388 /* Check that we've got a free page slot. */
2389 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2390 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2391 uint8_t idxPhysPage;
2392 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2393 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 0;
2394 else if (pTb->aGCPhysPages[0] == NIL_RTGCPHYS)
2395 {
2396 pTb->aGCPhysPages[0] = GCPhysNew;
2397 pTb->aRanges[idxRange].idxPhysPage = 1;
2398 idxPhysPage = UINT8_MAX;
2399 }
2400 else if (pTb->aGCPhysPages[0] == GCPhysNew)
2401 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 1;
2402 else if (pTb->aGCPhysPages[1] == NIL_RTGCPHYS)
2403 {
2404 pTb->aGCPhysPages[1] = GCPhysNew;
2405 pTb->aRanges[idxRange].idxPhysPage = 2;
2406 idxPhysPage = UINT8_MAX;
2407 }
2408 else if (pTb->aGCPhysPages[1] == GCPhysNew)
2409 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 2;
2410 else
2411 {
2412 Log8(("%04x:%08RX64: out of aGCPhysPages entires after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2413 return false;
2414 }
2415
2416 /* Loop check: We weave the loop check in here to optimize the lookup. */
2417 if (idxPhysPage != UINT8_MAX)
2418 {
2419 uint32_t const offPhysPc = pVCpu->iem.s.offCurInstrStart;
2420 for (uint8_t idxLoopRange = 0; idxLoopRange < idxRange; idxLoopRange++)
2421 if ( pTb->aRanges[idxLoopRange].idxPhysPage == idxPhysPage
2422 && offPhysPc - (uint32_t)pTb->aRanges[idxLoopRange].offPhysPage
2423 < (uint32_t)pTb->aRanges[idxLoopRange].cbOpcodes)
2424 {
2425 Log8(("%04x:%08RX64: loop detected after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2426#ifdef IEM_WITH_INTRA_TB_JUMPS
2427 /* If we're looping back to the start of the TB and the mode is still the same,
2428 we could emit a jump optimization. For now we don't do page transitions
2429 as that implies TLB loading and such. */
2430 if ( idxLoopRange == 0
2431 && offPhysPc == pTb->aRanges[0].offPhysPage
2432 && (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK)
2433 == (pTb->fFlags & IEMTB_F_KEY_MASK & ~IEMTB_F_X86_CS_LIM_CHECKS)
2434 && (pVCpu->iem.s.fTbBranched & ( IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR
2435 | IEMBRANCHED_F_STACK | IEMBRANCHED_F_RELATIVE))
2436 == IEMBRANCHED_F_RELATIVE)
2437 {
2438 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbLoopFullTbDetected);
2439 return iemThreadedCompileFullTbJump(pVCpu, pTb);
2440 }
2441#endif
2442 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbLoopInTbDetected);
2443 return false;
2444 }
2445 }
2446
2447 /* Finish setting up the new range. */
2448 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2449 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2450 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2451 pTb->aRanges[idxRange].u2Unused = 0;
2452 pTb->cRanges++;
2453 Log6(("%04x:%08RX64: new range #%u same page: offPhysPage=%#x offOpcodes=%#x\n",
2454 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].offPhysPage,
2455 pTb->aRanges[idxRange].offOpcodes));
2456 }
2457 else
2458 {
2459 Log8(("%04x:%08RX64: zero byte jump\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2460 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2461 }
2462
2463 /* Determin which function we need to load & check.
2464 Note! For jumps to a new page, we'll set both fTbBranched and
2465 fTbCrossedPage to avoid unnecessary TLB work for intra
2466 page branching */
2467 if ( (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR)) /* Far is basically indirect. */
2468 || pVCpu->iem.s.fTbCrossedPage)
2469 pCall->enmFunction = pTb->fFlags & IEMTB_F_X86_CS_LIM_CHECKS
2470 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
2471 : !fConsiderCsLimChecking
2472 ? kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
2473 : kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim;
2474 else if (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_CONDITIONAL | /* paranoia: */ IEMBRANCHED_F_DIRECT))
2475 pCall->enmFunction = pTb->fFlags & IEMTB_F_X86_CS_LIM_CHECKS
2476 ? kIemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
2477 : !fConsiderCsLimChecking
2478 ? kIemThreadedFunc_BltIn_CheckPcAndOpcodes
2479 : kIemThreadedFunc_BltIn_CheckPcAndOpcodesConsiderCsLim;
2480 else
2481 {
2482 Assert(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_RELATIVE);
2483 pCall->enmFunction = pTb->fFlags & IEMTB_F_X86_CS_LIM_CHECKS
2484 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2485 : !fConsiderCsLimChecking
2486 ? kIemThreadedFunc_BltIn_CheckOpcodes
2487 : kIemThreadedFunc_BltIn_CheckOpcodesConsiderCsLim;
2488 }
2489 }
2490 else
2491 {
2492 /* 1c + 1d - instruction crosses pages. */
2493 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2494 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2495
2496 /* Lazy bird: Check that this isn't case 1c, since we've already
2497 load the first physical address. End the TB and
2498 make it a case 2b instead.
2499
2500 Hmm. Too much bother to detect, so just do the same
2501 with case 1d as well. */
2502#if 0 /** @todo get back to this later when we've got the actual branch code in
2503 * place. */
2504 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2505
2506 /* Check that we've got two free ranges. */
2507 if (idxRange + 2 < RT_ELEMENTS(pTb->aRanges))
2508 { /* likely */ }
2509 else
2510 return false;
2511 idxRange += 1;
2512 pCall->auParams[1] = idxRange;
2513 pCall->auParams[2] = 0;
2514
2515 /* ... */
2516
2517#else
2518 Log8(("%04x:%08RX64: complicated post-branch condition, ending TB.\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2519 return false;
2520#endif
2521 }
2522 }
2523
2524 /*
2525 * Case 2: Page crossing.
2526 *
2527 * Sub-case 2a: The instruction starts on the first byte in the next page.
2528 *
2529 * Sub-case 2b: The instruction has opcode bytes in both the current and
2530 * following page.
2531 *
2532 * Both cases requires a new range table entry and probably a new physical
2533 * page entry. The difference is in which functions to emit and whether to
2534 * add bytes to the current range.
2535 */
2536 else if (pVCpu->iem.s.fTbCrossedPage)
2537 {
2538 /* Check that we've got a free range. */
2539 idxRange += 1;
2540 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2541 { /* likely */ }
2542 else
2543 {
2544 Log8(("%04x:%08RX64: out of ranges while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2545 return false;
2546 }
2547
2548 /* Check that we've got a free page slot. */
2549 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2550 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2551 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2552 pTb->aRanges[idxRange].idxPhysPage = 0;
2553 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
2554 || pTb->aGCPhysPages[0] == GCPhysNew)
2555 {
2556 pTb->aGCPhysPages[0] = GCPhysNew;
2557 pTb->aRanges[idxRange].idxPhysPage = 1;
2558 }
2559 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
2560 || pTb->aGCPhysPages[1] == GCPhysNew)
2561 {
2562 pTb->aGCPhysPages[1] = GCPhysNew;
2563 pTb->aRanges[idxRange].idxPhysPage = 2;
2564 }
2565 else
2566 {
2567 Log8(("%04x:%08RX64: out of aGCPhysPages entires while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2568 return false;
2569 }
2570
2571 if (((pTb->aRanges[idxRange - 1].offPhysPage + pTb->aRanges[idxRange - 1].cbOpcodes) & GUEST_PAGE_OFFSET_MASK) == 0)
2572 {
2573 Assert(pVCpu->iem.s.offCurInstrStart == 0);
2574 pCall->auParams[1] = idxRange;
2575 pCall->auParams[2] = 0;
2576
2577 /* Finish setting up the new range. */
2578 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2579 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2580 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2581 pTb->aRanges[idxRange].u2Unused = 0;
2582 pTb->cRanges++;
2583 Log6(("%04x:%08RX64: new range #%u new page (a) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2584 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2585 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2586
2587 /* Determin which function we need to load & check. */
2588 pCall->enmFunction = pTb->fFlags & IEMTB_F_X86_CS_LIM_CHECKS
2589 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
2590 : !fConsiderCsLimChecking
2591 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
2592 : kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim;
2593 }
2594 else
2595 {
2596 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2597 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2598 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2599 pCall->auParams[0] |= (uint64_t)cbStartPage << 32;
2600
2601 /* We've good. Split the instruction over the old and new range table entries. */
2602 pTb->aRanges[idxRange - 1].cbOpcodes += cbStartPage;
2603
2604 pTb->aRanges[idxRange].offPhysPage = 0;
2605 pTb->aRanges[idxRange].offOpcodes = offOpcode + cbStartPage;
2606 pTb->aRanges[idxRange].cbOpcodes = cbInstr - cbStartPage;
2607 pTb->aRanges[idxRange].u2Unused = 0;
2608 pTb->cRanges++;
2609 Log6(("%04x:%08RX64: new range #%u new page (b) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2610 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2611 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2612
2613 /* Determin which function we need to load & check. */
2614 if (pVCpu->iem.s.fTbCheckOpcodes)
2615 pCall->enmFunction = pTb->fFlags & IEMTB_F_X86_CS_LIM_CHECKS
2616 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
2617 : !fConsiderCsLimChecking
2618 ? kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
2619 : kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim;
2620 else
2621 pCall->enmFunction = pTb->fFlags & IEMTB_F_X86_CS_LIM_CHECKS
2622 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
2623 : !fConsiderCsLimChecking
2624 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
2625 : kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim;
2626 }
2627 }
2628
2629 /*
2630 * Regular case: No new range required.
2631 */
2632 else
2633 {
2634 Assert(pVCpu->iem.s.fTbCheckOpcodes || (pTb->fFlags & IEMTB_F_X86_CS_LIM_CHECKS));
2635 if (pVCpu->iem.s.fTbCheckOpcodes)
2636 pCall->enmFunction = pTb->fFlags & IEMTB_F_X86_CS_LIM_CHECKS
2637 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2638 : kIemThreadedFunc_BltIn_CheckOpcodes;
2639 else
2640 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckCsLim;
2641
2642 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2643 pTb->cbOpcodes = offOpcode + cbInstr;
2644 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2645 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2646 }
2647
2648 /*
2649 * Commit the call.
2650 */
2651 pTb->Thrd.cCalls++;
2652
2653 /*
2654 * Clear state.
2655 */
2656 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
2657 pVCpu->iem.s.fTbCrossedPage = false;
2658 pVCpu->iem.s.fTbCheckOpcodes = false;
2659
2660 /*
2661 * Copy opcode bytes.
2662 */
2663 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2664 pTb->cbOpcodes = offOpcode + cbInstr;
2665 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2666
2667 return true;
2668}
2669
2670
2671/**
2672 * Worker for iemThreadedCompileBeginEmitCallsComplications and
2673 * iemThreadedCompileCheckIrq that checks for pending delivarable events.
2674 *
2675 * @returns true if anything is pending, false if not.
2676 * @param pVCpu The cross context virtual CPU structure of the calling
2677 * thread.
2678 */
2679DECL_FORCE_INLINE(bool) iemThreadedCompileIsIrqOrForceFlagPending(PVMCPUCC pVCpu)
2680{
2681 uint64_t fCpu = pVCpu->fLocalForcedActions;
2682 fCpu &= VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI;
2683#if 1
2684 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2685 if (RT_LIKELY( !fCpu
2686 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2687 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2688 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))) ))
2689 return false;
2690 return true;
2691#else
2692 return false;
2693#endif
2694
2695}
2696
2697
2698/**
2699 * Called by iemThreadedCompile when a block requires a mode check.
2700 *
2701 * @returns true if we should continue, false if we're out of call entries.
2702 * @param pVCpu The cross context virtual CPU structure of the calling
2703 * thread.
2704 * @param pTb The translation block being compiled.
2705 */
2706static bool iemThreadedCompileEmitCheckMode(PVMCPUCC pVCpu, PIEMTB pTb)
2707{
2708 /* Emit the call. */
2709 uint32_t const idxCall = pTb->Thrd.cCalls;
2710 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2711 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2712 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2713 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckMode;
2714 pCall->idxInstr = pTb->cInstructions - 1;
2715 pCall->cbOpcode = 0;
2716 pCall->offOpcode = 0;
2717 pCall->uTbLookup = 0;
2718 pCall->fFlags = 0;
2719 pCall->auParams[0] = pVCpu->iem.s.fExec;
2720 pCall->auParams[1] = 0;
2721 pCall->auParams[2] = 0;
2722 LogFunc(("%04x:%08RX64 fExec=%#x\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, pVCpu->iem.s.fExec));
2723 return true;
2724}
2725
2726
2727/**
2728 * Called by IEM_MC2_BEGIN_EMIT_CALLS() when IEM_CIMPL_F_CHECK_IRQ_BEFORE is
2729 * set.
2730 *
2731 * @returns true if we should continue, false if an IRQ is deliverable or a
2732 * relevant force flag is pending.
2733 * @param pVCpu The cross context virtual CPU structure of the calling
2734 * thread.
2735 * @param pTb The translation block being compiled.
2736 * @sa iemThreadedCompileCheckIrq
2737 */
2738bool iemThreadedCompileEmitIrqCheckBefore(PVMCPUCC pVCpu, PIEMTB pTb)
2739{
2740 /*
2741 * Skip this we've already emitted a call after the previous instruction
2742 * or if it's the first call, as we're always checking FFs between blocks.
2743 */
2744 uint32_t const idxCall = pTb->Thrd.cCalls;
2745 if ( idxCall > 0
2746 && pTb->Thrd.paCalls[idxCall - 1].enmFunction != kIemThreadedFunc_BltIn_CheckIrq)
2747 {
2748 /* Emit the call. */
2749 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2750 pVCpu->iem.s.idxLastCheckIrqCallNo = (uint16_t)idxCall;
2751 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2752 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2753 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2754 pCall->idxInstr = pTb->cInstructions;
2755 pCall->offOpcode = 0;
2756 pCall->cbOpcode = 0;
2757 pCall->uTbLookup = 0;
2758 pCall->fFlags = 0;
2759 pCall->auParams[0] = 0;
2760 pCall->auParams[1] = 0;
2761 pCall->auParams[2] = 0;
2762 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2763
2764 /* Reset the IRQ check value. */
2765 pVCpu->iem.s.cInstrTillIrqCheck = !CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) ? 32 : 0;
2766
2767 /*
2768 * Check for deliverable IRQs and pending force flags.
2769 */
2770 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2771 }
2772 return true; /* continue */
2773}
2774
2775
2776/**
2777 * Emits an IRQ check call and checks for pending IRQs.
2778 *
2779 * @returns true if we should continue, false if an IRQ is deliverable or a
2780 * relevant force flag is pending.
2781 * @param pVCpu The cross context virtual CPU structure of the calling
2782 * thread.
2783 * @param pTb The transation block.
2784 * @sa iemThreadedCompileBeginEmitCallsComplications
2785 */
2786static bool iemThreadedCompileCheckIrqAfter(PVMCPUCC pVCpu, PIEMTB pTb)
2787{
2788 /* Check again in a little bit, unless it is immediately following an STI
2789 in which case we *must* check immediately after the next instruction
2790 as well in case it's executed with interrupt inhibition. We could
2791 otherwise miss the interrupt window. See the irq2 wait2 varaiant in
2792 bs3-timers-1 which is doing sti + sti + cli. */
2793 if (!pVCpu->iem.s.fTbCurInstrIsSti)
2794 pVCpu->iem.s.cInstrTillIrqCheck = 32;
2795 else
2796 {
2797 pVCpu->iem.s.fTbCurInstrIsSti = false;
2798 pVCpu->iem.s.cInstrTillIrqCheck = 0;
2799 }
2800 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2801
2802 /*
2803 * Emit the call.
2804 */
2805 uint32_t const idxCall = pTb->Thrd.cCalls;
2806 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2807 pVCpu->iem.s.idxLastCheckIrqCallNo = (uint16_t)idxCall;
2808 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2809 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2810 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2811 pCall->idxInstr = pTb->cInstructions;
2812 pCall->offOpcode = 0;
2813 pCall->cbOpcode = 0;
2814 pCall->uTbLookup = 0;
2815 pCall->fFlags = 0;
2816 pCall->auParams[0] = 0;
2817 pCall->auParams[1] = 0;
2818 pCall->auParams[2] = 0;
2819
2820 /*
2821 * Check for deliverable IRQs and pending force flags.
2822 */
2823 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2824}
2825
2826
2827/**
2828 * Compiles a new TB and executes it.
2829 *
2830 * We combine compilation and execution here as it makes it simpler code flow
2831 * in the main loop and it allows interpreting while compiling if we want to
2832 * explore that option.
2833 *
2834 * @returns Strict VBox status code.
2835 * @param pVM The cross context virtual machine structure.
2836 * @param pVCpu The cross context virtual CPU structure of the calling
2837 * thread.
2838 * @param GCPhysPc The physical address corresponding to the current
2839 * RIP+CS.BASE.
2840 * @param fExtraFlags Extra translation block flags: IEMTB_F_X86_INHIBIT_SHADOW,
2841 * IEMTB_F_X86_INHIBIT_NMI, IEMTB_F_X86_CS_LIM_CHECKS.
2842 */
2843static IEM_DECL_MSC_GUARD_IGNORE VBOXSTRICTRC
2844iemThreadedCompile(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
2845{
2846 IEMTLBTRACE_TB_COMPILE(pVCpu, GCPhysPc);
2847 Assert(!(fExtraFlags & IEMTB_F_TYPE_MASK));
2848 fExtraFlags |= IEMTB_F_TYPE_THREADED;
2849
2850 /*
2851 * Get the TB we use for the recompiling. This is a maxed-out TB so
2852 * that'll we'll make a more efficient copy of when we're done compiling.
2853 */
2854 PIEMTB pTb = pVCpu->iem.s.pThrdCompileTbR3;
2855 if (pTb)
2856 iemThreadedTbReuse(pVCpu, pTb, GCPhysPc, fExtraFlags);
2857 else
2858 {
2859 pTb = iemThreadedTbAlloc(pVM, pVCpu, GCPhysPc, fExtraFlags);
2860 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2861 pVCpu->iem.s.pThrdCompileTbR3 = pTb;
2862 }
2863 pTb->FlatPc = pVCpu->iem.s.uInstrBufPc | (GCPhysPc & GUEST_PAGE_OFFSET_MASK);
2864
2865 /* Set the current TB so iemThreadedCompileLongJumped and the CIMPL
2866 functions may get at it. */
2867 pVCpu->iem.s.pCurTbR3 = pTb;
2868
2869#if 0
2870 /* Make sure the CheckIrq condition matches the one in EM. */
2871 iemThreadedCompileCheckIrqAfter(pVCpu, pTb);
2872 const uint32_t cZeroCalls = 1;
2873#else
2874 const uint32_t cZeroCalls = 0;
2875#endif
2876
2877 /*
2878 * Now for the recomplication. (This mimicks IEMExecLots in many ways.)
2879 */
2880 iemThreadedCompileInitDecoder(pVCpu, false /*fReInit*/, fExtraFlags);
2881 iemThreadedCompileInitOpcodeFetching(pVCpu);
2882 VBOXSTRICTRC rcStrict;
2883 for (;;)
2884 {
2885 /* Process the next instruction. */
2886#ifdef LOG_ENABLED
2887 iemThreadedLogCurInstr(pVCpu, "CC", pTb->cInstructions);
2888 uint16_t const uCsLog = pVCpu->cpum.GstCtx.cs.Sel;
2889 uint64_t const uRipLog = pVCpu->cpum.GstCtx.rip;
2890 Assert(uCsLog != 0 || uRipLog > 0x400 || !IEM_IS_REAL_OR_V86_MODE(pVCpu)); /* Detect executing RM interrupt table. */
2891#endif
2892 uint8_t b; IEM_OPCODE_GET_FIRST_U8(&b);
2893 uint16_t const cCallsPrev = pTb->Thrd.cCalls;
2894
2895 rcStrict = FNIEMOP_CALL(g_apfnIemThreadedRecompilerOneByteMap[b]);
2896#if 0
2897 for (unsigned i = cCallsPrev; i < pTb->Thrd.cCalls; i++)
2898 Log8(("-> %#u/%u - %d %s\n", i, pTb->Thrd.paCalls[i].idxInstr, pTb->Thrd.paCalls[i].enmFunction,
2899 g_apszIemThreadedFunctions[pTb->Thrd.paCalls[i].enmFunction]));
2900#endif
2901 if ( rcStrict == VINF_SUCCESS
2902 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS
2903 && !pVCpu->iem.s.fEndTb)
2904 {
2905 Assert(pTb->Thrd.cCalls > cCallsPrev);
2906 Assert(cCallsPrev - pTb->Thrd.cCalls < 5);
2907
2908 pVCpu->iem.s.cInstructions++;
2909
2910 /* Check for mode change _after_ certain CIMPL calls, so check that
2911 we continue executing with the same mode value. */
2912 if (!(pVCpu->iem.s.fTbCurInstr & (IEM_CIMPL_F_MODE | IEM_CIMPL_F_XCPT | IEM_CIMPL_F_VMEXIT)))
2913 { /* probable */ }
2914 else if (RT_LIKELY(iemThreadedCompileEmitCheckMode(pVCpu, pTb)))
2915 { /* extremely likely */ }
2916 else
2917 break;
2918
2919#if defined(LOG_ENABLED) && 0 /* for debugging */
2920 //iemThreadedCompileEmitNop(pTb);
2921 iemThreadedCompileEmitLogCpuState(pTb);
2922#endif
2923 }
2924 else
2925 {
2926 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, rc=%d\n",
2927 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, VBOXSTRICTRC_VAL(rcStrict)));
2928 if (rcStrict == VINF_IEM_RECOMPILE_END_TB)
2929 rcStrict = VINF_SUCCESS;
2930
2931 if (pTb->Thrd.cCalls > cZeroCalls)
2932 {
2933 if (cCallsPrev != pTb->Thrd.cCalls)
2934 pVCpu->iem.s.cInstructions++;
2935 break;
2936 }
2937
2938 pVCpu->iem.s.pCurTbR3 = NULL;
2939 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2940 }
2941
2942 /* Check for IRQs? */
2943 if (pVCpu->iem.s.cInstrTillIrqCheck > 0)
2944 pVCpu->iem.s.cInstrTillIrqCheck--;
2945 else if (!iemThreadedCompileCheckIrqAfter(pVCpu, pTb))
2946 break;
2947
2948 /* Still space in the TB? */
2949 if ( pTb->Thrd.cCalls + 5 < pTb->Thrd.cAllocated
2950 && pTb->cbOpcodes + 16 <= pVCpu->iem.s.cbOpcodesAllocated
2951 && pTb->cTbLookupEntries < 127)
2952 iemThreadedCompileInitDecoder(pVCpu, true /*fReInit*/, 0);
2953 else
2954 {
2955 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, %u opcode bytes, %u TB lookup entries - full\n",
2956 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, pTb->cbOpcodes, pTb->cTbLookupEntries));
2957 break;
2958 }
2959 iemThreadedCompileReInitOpcodeFetching(pVCpu);
2960 }
2961
2962 /*
2963 * Reserve lookup space for the final call entry if necessary.
2964 */
2965 PIEMTHRDEDCALLENTRY pFinalCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls - 1];
2966 if (pTb->Thrd.cCalls > 1)
2967 {
2968 if (pFinalCall->uTbLookup == 0)
2969 {
2970 pFinalCall->uTbLookup = IEM_TB_LOOKUP_TAB_MAKE(pTb->cTbLookupEntries, 0);
2971 pTb->cTbLookupEntries += 1;
2972 }
2973 }
2974 else if (pFinalCall->uTbLookup != 0)
2975 {
2976 Assert(pTb->cTbLookupEntries > 1);
2977 pFinalCall->uTbLookup -= 1;
2978 pTb->cTbLookupEntries -= 1;
2979 }
2980
2981 /*
2982 * Duplicate the TB into a completed one and link it.
2983 */
2984 pTb = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
2985 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2986
2987 iemThreadedTbAdd(pVCpu, pVCpu->iem.s.pTbCacheR3, pTb);
2988
2989#ifdef IEM_COMPILE_ONLY_MODE
2990 /*
2991 * Execute the translation block.
2992 */
2993#endif
2994
2995 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2996}
2997
2998
2999
3000/*********************************************************************************************************************************
3001* Threaded Translation Block Saving and Restoring for Profiling the Native Recompiler *
3002*********************************************************************************************************************************/
3003#if defined(VBOX_WITH_IEM_NATIVE_RECOMPILER) && defined(VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING)
3004# include <iprt/message.h>
3005
3006static const SSMFIELD g_aIemThreadedTbFields[] =
3007{
3008 SSMFIELD_ENTRY( IEMTB, cUsed),
3009 SSMFIELD_ENTRY( IEMTB, msLastUsed),
3010 SSMFIELD_ENTRY_GCPHYS(IEMTB, GCPhysPc),
3011 SSMFIELD_ENTRY( IEMTB, fFlags),
3012 SSMFIELD_ENTRY( IEMTB, x86.fAttr),
3013 SSMFIELD_ENTRY( IEMTB, cRanges),
3014 SSMFIELD_ENTRY( IEMTB, cInstructions),
3015 SSMFIELD_ENTRY( IEMTB, Thrd.cCalls),
3016 SSMFIELD_ENTRY( IEMTB, cTbLookupEntries),
3017 SSMFIELD_ENTRY( IEMTB, cbOpcodes),
3018 SSMFIELD_ENTRY( IEMTB, FlatPc),
3019 SSMFIELD_ENTRY_GCPHYS(IEMTB, aGCPhysPages[0]),
3020 SSMFIELD_ENTRY_GCPHYS(IEMTB, aGCPhysPages[1]),
3021 SSMFIELD_ENTRY_TERM()
3022};
3023
3024/**
3025 * Saves a threaded TB to a dedicated saved state file.
3026 */
3027static void iemThreadedSaveTbForProfiling(PVMCPU pVCpu, PCIEMTB pTb)
3028{
3029 /* Only VCPU #0 for now. */
3030 if (pVCpu->idCpu != 0)
3031 return;
3032
3033 /*
3034 * Get the SSM handle, lazily opening the output file.
3035 */
3036 PSSMHANDLE const pNil = (PSSMHANDLE)~(uintptr_t)0; Assert(!RT_VALID_PTR(pNil));
3037 PSSMHANDLE pSSM = pVCpu->iem.s.pSsmThreadedTbsForProfiling;
3038 if (pSSM && pSSM != pNil)
3039 { /* likely */ }
3040 else if (pSSM)
3041 return;
3042 else
3043 {
3044 pVCpu->iem.s.pSsmThreadedTbsForProfiling = pNil;
3045 int rc = SSMR3Open("ThreadedTBsForRecompilerProfiling.sav", NULL, NULL, SSM_OPEN_F_FOR_WRITING, &pSSM);
3046 AssertLogRelRCReturnVoid(rc);
3047
3048 rc = SSMR3WriteFileHeader(pSSM, 1);
3049 AssertLogRelRCReturnVoid(rc); /* leaks SSM handle, but whatever. */
3050
3051 rc = SSMR3WriteUnitBegin(pSSM, "threaded-tbs", 1, 0);
3052 AssertLogRelRCReturnVoid(rc); /* leaks SSM handle, but whatever. */
3053 pVCpu->iem.s.pSsmThreadedTbsForProfiling = pSSM;
3054 }
3055
3056 /*
3057 * Do the actual saving.
3058 */
3059 SSMR3PutU32(pSSM, 0); /* Indicates that another TB follows. */
3060
3061 /* The basic structure. */
3062 SSMR3PutStructEx(pSSM, pTb, sizeof(*pTb), 0 /*fFlags*/, g_aIemThreadedTbFields, NULL);
3063
3064 /* The ranges. */
3065 for (uint32_t iRange = 0; iRange < pTb->cRanges; iRange++)
3066 {
3067 SSMR3PutU16(pSSM, pTb->aRanges[iRange].offOpcodes);
3068 SSMR3PutU16(pSSM, pTb->aRanges[iRange].cbOpcodes);
3069 SSMR3PutU16(pSSM, pTb->aRanges[iRange].offPhysPage | (pTb->aRanges[iRange].idxPhysPage << 14));
3070 }
3071
3072 /* The opcodes. */
3073 SSMR3PutMem(pSSM, pTb->pabOpcodes, pTb->cbOpcodes);
3074
3075 /* The threaded call table. */
3076 int rc = SSMR3PutMem(pSSM, pTb->Thrd.paCalls, sizeof(*pTb->Thrd.paCalls) * pTb->Thrd.cCalls);
3077 AssertLogRelMsgStmt(RT_SUCCESS(rc), ("rc=%Rrc\n", rc), pVCpu->iem.s.pSsmThreadedTbsForProfiling = pNil);
3078}
3079
3080
3081/**
3082 * Called by IEMR3Term to finish any open profile files.
3083 *
3084 * @note This is not called on the EMT for @a pVCpu, but rather on the thread
3085 * driving the VM termination.
3086 */
3087DECLHIDDEN(void) iemThreadedSaveTbForProfilingCleanup(PVMCPU pVCpu)
3088{
3089 PSSMHANDLE const pSSM = pVCpu->iem.s.pSsmThreadedTbsForProfiling;
3090 pVCpu->iem.s.pSsmThreadedTbsForProfiling = NULL;
3091 if (RT_VALID_PTR(pSSM))
3092 {
3093 /* Indicate that this is the end. */
3094 SSMR3PutU32(pSSM, UINT32_MAX);
3095
3096 int rc = SSMR3WriteUnitComplete(pSSM);
3097 AssertLogRelRC(rc);
3098 rc = SSMR3WriteFileFooter(pSSM);
3099 AssertLogRelRC(rc);
3100 rc = SSMR3Close(pSSM);
3101 AssertLogRelRC(rc);
3102 }
3103}
3104
3105#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER && VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING */
3106
3107#ifdef IN_RING3
3108/**
3109 * API use to process what iemThreadedSaveTbForProfiling() saved.
3110 *
3111 * @note Do not mix build types or revisions. Local changes between saving the
3112 * TBs and calling this API may cause unexpected trouble.
3113 */
3114VMMR3DECL(int) IEMR3ThreadedProfileRecompilingSavedTbs(PVM pVM, const char *pszFilename, uint32_t cMinTbs)
3115{
3116# if defined(VBOX_WITH_IEM_NATIVE_RECOMPILER) && defined(VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING)
3117 PVMCPU const pVCpu = pVM->apCpusR3[0];
3118
3119 /* We need to keep an eye on the TB allocator. */
3120 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
3121
3122 /*
3123 * Load the TBs from the file.
3124 */
3125 PSSMHANDLE pSSM = NULL;
3126 int rc = SSMR3Open(pszFilename, NULL, NULL, 0, &pSSM);
3127 if (RT_SUCCESS(rc))
3128 {
3129 uint32_t cTbs = 0;
3130 PIEMTB pTbHead = NULL;
3131 PIEMTB *ppTbTail = &pTbHead;
3132 uint32_t uVersion;
3133 rc = SSMR3Seek(pSSM, "threaded-tbs", 0, &uVersion);
3134 if (RT_SUCCESS(rc))
3135 {
3136 for (;; cTbs++)
3137 {
3138 /* Check for the end tag. */
3139 uint32_t uTag = 0;
3140 rc = SSMR3GetU32(pSSM, &uTag);
3141 AssertRCBreak(rc);
3142 if (uTag == UINT32_MAX)
3143 break;
3144 AssertBreakStmt(uTag == 0, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3145
3146 /* Do we have room for another TB? */
3147 if (pTbAllocator->cInUseTbs + 2 >= pTbAllocator->cMaxTbs)
3148 {
3149 RTMsgInfo("Too many TBs to load, stopping loading early.\n");
3150 break;
3151 }
3152
3153 /* Allocate a new TB. */
3154 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
3155 AssertBreakStmt(uTag == 0, rc = VERR_OUT_OF_RESOURCES);
3156
3157 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
3158 RT_ZERO(*pTb);
3159 pTb->idxAllocChunk = idxAllocChunk;
3160
3161 rc = SSMR3GetStructEx(pSSM, pTb, sizeof(*pTb), 0, g_aIemThreadedTbFields, NULL);
3162 if (RT_SUCCESS(rc))
3163 {
3164 AssertStmt(pTb->Thrd.cCalls > 0 && pTb->Thrd.cCalls <= _8K, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3165 AssertStmt(pTb->cbOpcodes > 0 && pTb->cbOpcodes <= _8K, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3166 AssertStmt(pTb->cRanges > 0 && pTb->cRanges <= RT_ELEMENTS(pTb->aRanges), rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3167 AssertStmt(pTb->cTbLookupEntries > 0 && pTb->cTbLookupEntries <= 136, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3168
3169 if (RT_SUCCESS(rc))
3170 for (uint32_t iRange = 0; iRange < pTb->cRanges; iRange++)
3171 {
3172 SSMR3GetU16(pSSM, &pTb->aRanges[iRange].offOpcodes);
3173 SSMR3GetU16(pSSM, &pTb->aRanges[iRange].cbOpcodes);
3174 uint16_t uTmp = 0;
3175 rc = SSMR3GetU16(pSSM, &uTmp);
3176 AssertRCBreak(rc);
3177 pTb->aRanges[iRange].offPhysPage = uTmp & GUEST_PAGE_OFFSET_MASK;
3178 pTb->aRanges[iRange].idxPhysPage = uTmp >> 14;
3179
3180 AssertBreakStmt(pTb->aRanges[iRange].idxPhysPage <= RT_ELEMENTS(pTb->aGCPhysPages),
3181 rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3182 AssertBreakStmt(pTb->aRanges[iRange].offOpcodes < pTb->cbOpcodes,
3183 rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3184 AssertBreakStmt(pTb->aRanges[iRange].offOpcodes + pTb->aRanges[iRange].cbOpcodes <= pTb->cbOpcodes,
3185 rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3186 }
3187
3188 if (RT_SUCCESS(rc))
3189 {
3190 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAllocZ(sizeof(IEMTHRDEDCALLENTRY) * pTb->Thrd.cCalls);
3191 if (pTb->Thrd.paCalls)
3192 {
3193 size_t const cbTbLookup = pTb->cTbLookupEntries * sizeof(PIEMTB);
3194 Assert(cbTbLookup > 0);
3195 size_t const cbOpcodes = pTb->cbOpcodes;
3196 Assert(cbOpcodes > 0);
3197 size_t const cbBoth = cbTbLookup + RT_ALIGN_Z(cbOpcodes, sizeof(PIEMTB));
3198 uint8_t * const pbBoth = (uint8_t *)RTMemAllocZ(cbBoth);
3199 if (pbBoth)
3200 {
3201 pTb->pabOpcodes = &pbBoth[cbTbLookup];
3202 SSMR3GetMem(pSSM, pTb->pabOpcodes, pTb->cbOpcodes);
3203 rc = SSMR3GetMem(pSSM, pTb->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * pTb->Thrd.cCalls);
3204 if (RT_SUCCESS(rc))
3205 {
3206 *ppTbTail = pTb;
3207 ppTbTail = &pTb->pNext;
3208 continue;
3209 }
3210 }
3211 else
3212 rc = VERR_NO_MEMORY;
3213 RTMemFree(pTb->Thrd.paCalls);
3214 }
3215 else
3216 rc = VERR_NO_MEMORY;
3217 }
3218 }
3219 iemTbAllocatorFree(pVCpu, pTb);
3220 break;
3221 }
3222 if (RT_FAILURE(rc))
3223 RTMsgError("Load error: %Rrc (cTbs=%u)", rc, cTbs);
3224 }
3225 else
3226 RTMsgError("SSMR3Seek failed on '%s': %Rrc", pszFilename, rc);
3227 SSMR3Close(pSSM);
3228 if (RT_SUCCESS(rc))
3229 {
3230 /*
3231 * Recompile the TBs.
3232 */
3233 if (pTbHead)
3234 {
3235 RTMsgInfo("Loaded %u TBs\n", cTbs);
3236 if (cTbs < cMinTbs)
3237 {
3238 RTMsgInfo("Duplicating TBs to reach %u TB target\n", cMinTbs);
3239 for (PIEMTB pTb = pTbHead;
3240 cTbs < cMinTbs && pTbAllocator->cInUseTbs + 2 <= pTbAllocator->cMaxTbs;
3241 pTb = pTb->pNext)
3242 {
3243 PIEMTB pTbCopy = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
3244 if (!pTbCopy)
3245 break;
3246 *ppTbTail = pTbCopy;
3247 ppTbTail = &pTbCopy->pNext;
3248 cTbs++;
3249 }
3250 }
3251
3252 PIEMTB pTbWarmup = iemThreadedTbDuplicate(pVM, pVCpu, pTbHead);
3253 if (pTbWarmup)
3254 {
3255 iemNativeRecompile(pVCpu, pTbWarmup);
3256 RTThreadSleep(512); /* to make the start visible in the profiler. */
3257 RTMsgInfo("Ready, set, go!\n");
3258
3259 if ((pTbWarmup->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
3260 {
3261 uint32_t cFailed = 0;
3262 uint64_t const nsStart = RTTimeNanoTS();
3263 for (PIEMTB pTb = pTbHead; pTb; pTb = pTb->pNext)
3264 {
3265 iemNativeRecompile(pVCpu, pTb);
3266 if ((pTb->fFlags & IEMTB_F_TYPE_MASK) != IEMTB_F_TYPE_NATIVE)
3267 cFailed++;
3268 }
3269 uint64_t const cNsElapsed = RTTimeNanoTS() - nsStart;
3270 RTMsgInfo("Recompiled %u TBs in %'RU64 ns - averaging %'RU64 ns/TB\n",
3271 cTbs, cNsElapsed, (cNsElapsed + cTbs - 1) / cTbs);
3272 if (cFailed)
3273 {
3274 RTMsgError("Unforuntately %u TB failed!", cFailed);
3275 rc = VERR_GENERAL_FAILURE;
3276 }
3277 RTThreadSleep(128); /* Another gap in the profiler timeline. */
3278 }
3279 else
3280 {
3281 RTMsgError("Failed to recompile the first TB!");
3282 rc = VERR_GENERAL_FAILURE;
3283 }
3284 }
3285 else
3286 rc = VERR_NO_MEMORY;
3287 }
3288 else
3289 {
3290 RTMsgError("'%s' contains no TBs!", pszFilename);
3291 rc = VERR_NO_DATA;
3292 }
3293 }
3294 }
3295 else
3296 RTMsgError("SSMR3Open failed on '%s': %Rrc", pszFilename, rc);
3297 return rc;
3298
3299# else
3300 RT_NOREF(pVM, pszFilename, cMinTbs);
3301 return VERR_NOT_IMPLEMENTED;
3302# endif
3303}
3304#endif /* IN_RING3 */
3305
3306
3307/*********************************************************************************************************************************
3308* Recompiled Execution Core *
3309*********************************************************************************************************************************/
3310
3311/** Default TB factor.
3312 * This is basically the number of nanoseconds we guess executing a TB takes
3313 * on average. We estimates it high if we can.
3314 * @note Best if this is a power of two so it can be translated to a shift. */
3315#define IEM_TIMER_POLL_DEFAULT_FACTOR UINT32_C(64)
3316/** The minimum number of nanoseconds we can allow between timer pollings.
3317 * This must take the cost of TMTimerPollBoolWithNanoTS into mind. We put that
3318 * cost at 104 ns now, thus this constant is at 256 ns. */
3319#define IEM_TIMER_POLL_MIN_NS UINT32_C(256)
3320/** The IEM_TIMER_POLL_MIN_NS value roughly translated to TBs, with some grains
3321 * of salt thrown in.
3322 * The idea is that we will be able to make progress with guest code execution
3323 * before polling timers and between running timers. */
3324#define IEM_TIMER_POLL_MIN_ITER UINT32_C(12)
3325/** The maximum number of nanoseconds we can allow between timer pollings.
3326 * This probably shouldn't be too high, as we don't have any timer
3327 * reprogramming feedback in the polling code. So, when a device reschedule a
3328 * timer for an earlier delivery, we won't know about it. */
3329#define IEM_TIMER_POLL_MAX_NS UINT32_C(8388608) /* 0x800000 ns = 8.4 ms */
3330/** The IEM_TIMER_POLL_MAX_NS value roughly translated to TBs, with some grains
3331 * of salt thrown in.
3332 * This helps control fluctuations in the NU benchmark. */
3333#define IEM_TIMER_POLL_MAX_ITER _512K
3334
3335#ifdef IEM_WITH_ADAPTIVE_TIMER_POLLING
3336/**
3337 * Calculates the number of TBs till the next timer polling using defaults.
3338 *
3339 * This is used when the previous run wasn't long enough to provide sufficient
3340 * data and when comming back from the HALT state and we haven't actually
3341 * executed anything for a while.
3342 */
3343DECL_FORCE_INLINE(uint32_t) iemPollTimersCalcDefaultCountdown(uint64_t cNsDelta) RT_NOEXCEPT
3344{
3345 if (cNsDelta >= IEM_TIMER_POLL_MAX_NS)
3346 return RT_MIN(IEM_TIMER_POLL_MAX_NS / IEM_TIMER_POLL_DEFAULT_FACTOR, IEM_TIMER_POLL_MAX_ITER);
3347
3348 cNsDelta = RT_BIT_64(ASMBitFirstSetU32(cNsDelta) - 1); /* round down to power of 2 */
3349 uint32_t const cRet = cNsDelta / IEM_TIMER_POLL_DEFAULT_FACTOR;
3350 if (cRet >= IEM_TIMER_POLL_MIN_ITER)
3351 {
3352 if (cRet <= IEM_TIMER_POLL_MAX_ITER)
3353 return cRet;
3354 return IEM_TIMER_POLL_MAX_ITER;
3355 }
3356 return IEM_TIMER_POLL_MIN_ITER;
3357}
3358#endif
3359
3360
3361/**
3362 * Helper for polling timers.
3363 */
3364DECLHIDDEN(int) iemPollTimers(PVMCC pVM, PVMCPUCC pVCpu) RT_NOEXCEPT
3365{
3366 STAM_PROFILE_START(&pVCpu->iem.s.StatTimerPoll, a);
3367
3368 /*
3369 * Check for VM_FF_TM_VIRTUAL_SYNC and call TMR3VirtualSyncFF if set.
3370 * This is something all EMTs can do.
3371 */
3372 /* If the virtual sync FF is set, respond to it. */
3373 bool fRanTimers = VM_FF_IS_SET(pVM, VM_FF_TM_VIRTUAL_SYNC);
3374 if (!fRanTimers)
3375 { /* likely */ }
3376 else
3377 {
3378 STAM_PROFILE_START(&pVCpu->iem.s.StatTimerPollRun, b);
3379 TMR3VirtualSyncFF(pVM, pVCpu);
3380 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPollRun, b);
3381 }
3382
3383 /*
3384 * Poll timers.
3385 *
3386 * On the 10980xe the polling averaging 314 ticks, with a min of 201, while
3387 * running a norton utilities DOS benchmark program. TSC runs at 3GHz,
3388 * translating that to 104 ns and 67 ns respectively. (An M2 booting win11
3389 * has an average of 2 ticks / 84 ns.)
3390 *
3391 * With the same setup the TMR3VirtualSyncFF and else branch here profiles
3392 * to 79751 ticks / 26583 ns on average, with a min of 1194 ticks / 398 ns.
3393 * (An M2 booting win11 has an average of 24 ticks / 1008 ns, with a min of
3394 * 8 ticks / 336 ns.)
3395 *
3396 * If we get a zero return value we run timers. Non-timer EMTs shouldn't
3397 * ever see a zero value here, so we just call TMR3TimerQueuesDo. However,
3398 * we do not re-run timers if we already called TMR3VirtualSyncFF above, we
3399 * try to make sure some code is executed first.
3400 */
3401 uint64_t nsNow = 0;
3402 uint64_t cNsDelta = TMTimerPollBoolWithNanoTS(pVM, pVCpu, &nsNow);
3403 if (cNsDelta >= 1) /* It is okay to run virtual sync timers a little early. */
3404 { /* likely */ }
3405 else if (!fRanTimers || VM_FF_IS_SET(pVM, VM_FF_TM_VIRTUAL_SYNC))
3406 {
3407 STAM_PROFILE_START(&pVCpu->iem.s.StatTimerPollRun, b);
3408 TMR3TimerQueuesDo(pVM);
3409 fRanTimers = true;
3410 nsNow = 0;
3411 cNsDelta = TMTimerPollBoolWithNanoTS(pVM, pVCpu, &nsNow);
3412 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPollRun, b);
3413 }
3414 else
3415 cNsDelta = 33;
3416
3417 /*
3418 * Calc interval and update the timestamps.
3419 */
3420 uint64_t const cNsSinceLast = nsNow - pVCpu->iem.s.nsRecompilerPollNow;
3421 pVCpu->iem.s.nsRecompilerPollNow = nsNow;
3422 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(nsNow / RT_NS_1MS);
3423
3424 /*
3425 * Set the next polling count down value.
3426 *
3427 * We take the previous value and adjust it according to the cNsSinceLast
3428 * value, if it's not within reason. This can't be too accurate since the
3429 * CheckIrq and intra-TB-checks aren't evenly spaced, they depends highly
3430 * on the guest code.
3431 */
3432#ifdef IEM_WITH_ADAPTIVE_TIMER_POLLING
3433 uint32_t cItersTillNextPoll = pVCpu->iem.s.cTbsTillNextTimerPollPrev;
3434 if (cNsDelta >= RT_NS_1SEC / 4)
3435 {
3436 /*
3437 * Non-timer EMTs should end up here with a fixed 500ms delta, just return
3438 * the max and keep the polling over head to the deadicated timer EMT.
3439 */
3440 AssertCompile(IEM_TIMER_POLL_MAX_ITER * IEM_TIMER_POLL_DEFAULT_FACTOR <= RT_NS_100MS);
3441 cItersTillNextPoll = IEM_TIMER_POLL_MAX_ITER;
3442 }
3443 else
3444 {
3445 /*
3446 * This is the timer EMT.
3447 */
3448 if (cNsDelta <= IEM_TIMER_POLL_MIN_NS)
3449 {
3450 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollTiny);
3451 cItersTillNextPoll = IEM_TIMER_POLL_MIN_ITER;
3452 }
3453 else
3454 {
3455 uint32_t const cNsDeltaAdj = cNsDelta >= IEM_TIMER_POLL_MAX_NS ? IEM_TIMER_POLL_MAX_NS : (uint32_t)cNsDelta;
3456 uint32_t const cNsDeltaSlack = cNsDelta >= IEM_TIMER_POLL_MAX_NS ? IEM_TIMER_POLL_MAX_NS / 2 : cNsDeltaAdj / 4;
3457 if ( cNsSinceLast < RT_MAX(IEM_TIMER_POLL_MIN_NS, 64)
3458 || cItersTillNextPoll < IEM_TIMER_POLL_MIN_ITER /* paranoia */)
3459 {
3460 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollDefaultCalc);
3461 cItersTillNextPoll = iemPollTimersCalcDefaultCountdown(cNsDeltaAdj);
3462 }
3463 else if ( cNsSinceLast >= cNsDeltaAdj + cNsDeltaSlack
3464 || cNsSinceLast <= cNsDeltaAdj - cNsDeltaSlack)
3465 {
3466 if (cNsSinceLast >= cItersTillNextPoll)
3467 {
3468 uint32_t uFactor = (uint32_t)(cNsSinceLast + cItersTillNextPoll - 1) / cItersTillNextPoll;
3469 cItersTillNextPoll = cNsDeltaAdj / uFactor;
3470 STAM_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTimerPollFactorDivision, uFactor);
3471 }
3472 else
3473 {
3474 uint32_t uFactor = cItersTillNextPoll / (uint32_t)cNsSinceLast;
3475 cItersTillNextPoll = cNsDeltaAdj * uFactor;
3476 STAM_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTimerPollFactorMultiplication, uFactor);
3477 }
3478
3479 if (cItersTillNextPoll >= IEM_TIMER_POLL_MIN_ITER)
3480 {
3481 if (cItersTillNextPoll <= IEM_TIMER_POLL_MAX_ITER)
3482 { /* likely */ }
3483 else
3484 {
3485 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollMax);
3486 cItersTillNextPoll = IEM_TIMER_POLL_MAX_ITER;
3487 }
3488 }
3489 else
3490 cItersTillNextPoll = IEM_TIMER_POLL_MIN_ITER;
3491 }
3492 else
3493 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollUnchanged);
3494 }
3495 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillNextPoll;
3496 }
3497#else
3498/** Poll timers every 400 us / 2500 Hz. (source: thin air) */
3499# define IEM_TIMER_POLL_IDEAL_NS (400U * RT_NS_1US)
3500 uint32_t cItersTillNextPoll = pVCpu->iem.s.cTbsTillNextTimerPollPrev;
3501 uint32_t const cNsIdealPollInterval = IEM_TIMER_POLL_IDEAL_NS;
3502 int64_t const nsFromIdeal = cNsSinceLast - cNsIdealPollInterval;
3503 if (nsFromIdeal < 0)
3504 {
3505 if ((uint64_t)-nsFromIdeal > cNsIdealPollInterval / 8 && cItersTillNextPoll < _64K)
3506 {
3507 cItersTillNextPoll += cItersTillNextPoll / 8;
3508 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillNextPoll;
3509 }
3510 }
3511 else
3512 {
3513 if ((uint64_t)nsFromIdeal > cNsIdealPollInterval / 8 && cItersTillNextPoll > 256)
3514 {
3515 cItersTillNextPoll -= cItersTillNextPoll / 8;
3516 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillNextPoll;
3517 }
3518 }
3519#endif
3520 pVCpu->iem.s.cTbsTillNextTimerPoll = cItersTillNextPoll;
3521
3522 /*
3523 * Repeat the IRQ and FF checks.
3524 */
3525 if (cNsDelta > 0)
3526 {
3527 uint32_t fCpu = pVCpu->fLocalForcedActions;
3528 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
3529 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
3530 | VMCPU_FF_TLB_FLUSH
3531 | VMCPU_FF_UNHALT );
3532 if (RT_LIKELY( ( !fCpu
3533 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
3534 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
3535 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx)) ) )
3536 && !VM_FF_IS_ANY_SET(pVCpu->CTX_SUFF(pVM), VM_FF_ALL_MASK) ))
3537 {
3538 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPoll, a);
3539 return VINF_SUCCESS;
3540 }
3541 }
3542 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPoll, a);
3543 return VINF_IEM_REEXEC_BREAK_FF;
3544}
3545
3546
3547/** Helper for iemTbExec. */
3548DECL_FORCE_INLINE(PIEMTB *) iemTbGetTbLookupEntryWithRip(PCIEMTB pTb, uint8_t uTbLookup, uint64_t uRip)
3549{
3550 uint8_t const idx = IEM_TB_LOOKUP_TAB_GET_IDX_WITH_PC(uTbLookup, uRip);
3551 Assert(idx < pTb->cTbLookupEntries);
3552 return IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idx);
3553}
3554
3555
3556/**
3557 * Executes a translation block.
3558 *
3559 * @returns Strict VBox status code.
3560 * @param pVCpu The cross context virtual CPU structure of the calling
3561 * thread.
3562 * @param pTb The translation block to execute.
3563 */
3564static IEM_DECL_MSC_GUARD_IGNORE VBOXSTRICTRC iemTbExec(PVMCPUCC pVCpu, PIEMTB pTb) IEM_NOEXCEPT_MAY_LONGJMP
3565{
3566 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & (RTGCPHYS)GUEST_PAGE_OFFSET_MASK));
3567
3568 /*
3569 * Set the current TB so CIMPL functions may get at it.
3570 */
3571 pVCpu->iem.s.pCurTbR3 = pTb;
3572 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, 0);
3573
3574 /*
3575 * Execute the block.
3576 */
3577#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
3578 if (pTb->fFlags & IEMTB_F_TYPE_NATIVE)
3579 {
3580 pVCpu->iem.s.cTbExecNative++;
3581 IEMTLBTRACE_TB_EXEC_N8VE(pVCpu, pTb);
3582# ifdef LOG_ENABLED
3583 iemThreadedLogCurInstr(pVCpu, "EXn", 0);
3584# endif
3585
3586# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3587 AssertCompileMemberOffset(VMCPUCC, iem.s.pvTbFramePointerR3, 0x7c8); /* This is assumed in iemNativeTbEntry */
3588# endif
3589# ifdef RT_ARCH_AMD64
3590 VBOXSTRICTRC const rcStrict = iemNativeTbEntry(pVCpu, (uintptr_t)pTb->Native.paInstructions);
3591# else
3592 VBOXSTRICTRC const rcStrict = iemNativeTbEntry(pVCpu, &pVCpu->cpum.GstCtx, (uintptr_t)pTb->Native.paInstructions);
3593# endif
3594
3595# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3596 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
3597# endif
3598# ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
3599 /* Restore FPCR/MXCSR if the TB modified it. */
3600 if (pVCpu->iem.s.uRegFpCtrl != IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED)
3601 {
3602 iemNativeFpCtrlRegRestore(pVCpu->iem.s.uRegFpCtrl);
3603 /* Reset for the next round saving us an unconditional instruction on next TB entry. */
3604 pVCpu->iem.s.uRegFpCtrl = IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED;
3605 }
3606# endif
3607# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
3608 Assert(pVCpu->iem.s.fSkippingEFlags == 0);
3609# endif
3610 if (RT_LIKELY( rcStrict == VINF_SUCCESS
3611 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
3612 { /* likely */ }
3613 else
3614 {
3615 /* pVCpu->iem.s.cInstructions is incremented by iemNativeHlpExecStatusCodeFiddling. */
3616 pVCpu->iem.s.pCurTbR3 = NULL;
3617
3618 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
3619 only to break out of TB execution early. */
3620 if (rcStrict == VINF_IEM_REEXEC_BREAK)
3621 {
3622 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnBreak);
3623 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
3624 }
3625
3626 /* VINF_IEM_REEXEC_BREAK_FF should be treated as VINF_SUCCESS as it's
3627 only to break out of TB execution early due to pending FFs. */
3628 if (rcStrict == VINF_IEM_REEXEC_BREAK_FF)
3629 {
3630 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnBreakFF);
3631 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
3632 }
3633
3634 /* VINF_IEM_REEXEC_WITH_FLAGS needs to receive special treatment
3635 and converted to VINF_SUCCESS or whatever is appropriate. */
3636 if (rcStrict == VINF_IEM_REEXEC_FINISH_WITH_FLAGS)
3637 {
3638 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnWithFlags);
3639 return iemExecStatusCodeFiddling(pVCpu, iemFinishInstructionWithFlagsSet(pVCpu, VINF_SUCCESS));
3640 }
3641
3642 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnOtherStatus);
3643 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
3644 }
3645 }
3646 else
3647#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER */
3648 {
3649 /*
3650 * The threaded execution loop.
3651 */
3652 pVCpu->iem.s.cTbExecThreaded++;
3653 IEMTLBTRACE_TB_EXEC_THRD(pVCpu, pTb);
3654#ifdef LOG_ENABLED
3655 uint64_t uRipPrev = UINT64_MAX;
3656#endif
3657 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
3658 uint32_t cCallsLeft = pTb->Thrd.cCalls;
3659 while (cCallsLeft-- > 0)
3660 {
3661#ifdef LOG_ENABLED
3662 if (pVCpu->cpum.GstCtx.rip != uRipPrev)
3663 {
3664 uRipPrev = pVCpu->cpum.GstCtx.rip;
3665 iemThreadedLogCurInstr(pVCpu, "EXt", pTb->Thrd.cCalls - cCallsLeft - 1);
3666 }
3667 Log9(("%04x:%08RX64: #%d/%d - %d %s\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
3668 pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr, pCallEntry->enmFunction,
3669 g_apszIemThreadedFunctions[pCallEntry->enmFunction]));
3670#endif
3671#ifdef VBOX_WITH_STATISTICS
3672 AssertCompile(RT_ELEMENTS(pVCpu->iem.s.acThreadedFuncStats) >= kIemThreadedFunc_End);
3673 pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction] += 1;
3674#endif
3675 VBOXSTRICTRC const rcStrict = g_apfnIemThreadedFunctions[pCallEntry->enmFunction](pVCpu,
3676 pCallEntry->auParams[0],
3677 pCallEntry->auParams[1],
3678 pCallEntry->auParams[2]);
3679 if (RT_LIKELY( rcStrict == VINF_SUCCESS
3680 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
3681 pCallEntry++;
3682 else if (rcStrict == VINF_IEM_REEXEC_JUMP)
3683 {
3684 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
3685 Assert(cCallsLeft == 0);
3686 uint32_t const idxTarget = (uint32_t)pCallEntry->auParams[0];
3687 cCallsLeft = pTb->Thrd.cCalls;
3688 AssertBreak(idxTarget < cCallsLeft - 1);
3689 cCallsLeft -= idxTarget;
3690 pCallEntry = &pTb->Thrd.paCalls[idxTarget];
3691 AssertBreak(pCallEntry->fFlags & IEMTHREADEDCALLENTRY_F_JUMP_TARGET);
3692 }
3693 else
3694 {
3695 pVCpu->iem.s.cInstructions += pCallEntry->idxInstr; /* This may be one short, but better than zero. */
3696 pVCpu->iem.s.pCurTbR3 = NULL;
3697 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaks);
3698 pVCpu->iem.s.ppTbLookupEntryR3 = iemTbGetTbLookupEntryWithRip(pTb, pCallEntry->uTbLookup, pVCpu->cpum.GstCtx.rip);
3699
3700 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
3701 only to break out of TB execution early. */
3702 if (rcStrict == VINF_IEM_REEXEC_BREAK)
3703 {
3704#ifdef VBOX_WITH_STATISTICS
3705 if (pCallEntry->uTbLookup)
3706 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaksWithLookup);
3707 else
3708 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaksWithoutLookup);
3709#endif
3710 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
3711 }
3712 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
3713 }
3714 }
3715
3716 /* Update the lookup entry. */
3717 pVCpu->iem.s.ppTbLookupEntryR3 = iemTbGetTbLookupEntryWithRip(pTb, pCallEntry[-1].uTbLookup, pVCpu->cpum.GstCtx.rip);
3718 }
3719
3720 pVCpu->iem.s.cInstructions += pTb->cInstructions;
3721 pVCpu->iem.s.pCurTbR3 = NULL;
3722 return VINF_SUCCESS;
3723}
3724
3725
3726/**
3727 * This is called when the PC doesn't match the current pbInstrBuf.
3728 *
3729 * Upon return, we're ready for opcode fetching. But please note that
3730 * pbInstrBuf can be NULL iff the memory doesn't have readable backing (i.e.
3731 * MMIO or unassigned).
3732 */
3733static RTGCPHYS iemGetPcWithPhysAndCodeMissed(PVMCPUCC pVCpu)
3734{
3735 pVCpu->iem.s.pbInstrBuf = NULL;
3736 pVCpu->iem.s.offCurInstrStart = 0;
3737 pVCpu->iem.s.offInstrNextByte = 0;
3738 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
3739 return pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart;
3740}
3741
3742
3743/** @todo need private inline decl for throw/nothrow matching IEM_WITH_SETJMP? */
3744DECL_FORCE_INLINE_THROW(RTGCPHYS) iemGetPcWithPhysAndCode(PVMCPUCC pVCpu)
3745{
3746 /*
3747 * Set uCurTbStartPc to RIP and calc the effective PC.
3748 */
3749 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
3750#if 0 /* unused */
3751 pVCpu->iem.s.uCurTbStartPc = uPc;
3752#endif
3753 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
3754 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
3755
3756 /*
3757 * Advance within the current buffer (PAGE) when possible.
3758 */
3759 if (pVCpu->iem.s.pbInstrBuf)
3760 {
3761 uint64_t off = uPc - pVCpu->iem.s.uInstrBufPc;
3762 if (off < pVCpu->iem.s.cbInstrBufTotal)
3763 {
3764 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
3765 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
3766 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
3767 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
3768 else
3769 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
3770
3771 return pVCpu->iem.s.GCPhysInstrBuf + off;
3772 }
3773 }
3774 return iemGetPcWithPhysAndCodeMissed(pVCpu);
3775}
3776
3777
3778/**
3779 * Determines the extra IEMTB_F_XXX flags.
3780 *
3781 * @returns A mix of IEMTB_F_X86_INHIBIT_SHADOW, IEMTB_F_X86_INHIBIT_NMI and
3782 * IEMTB_F_X86_CS_LIM_CHECKS (or zero).
3783 * @param pVCpu The cross context virtual CPU structure of the calling
3784 * thread.
3785 */
3786DECL_FORCE_INLINE(uint32_t) iemGetTbFlagsForCurrentPc(PVMCPUCC pVCpu)
3787{
3788 uint32_t fRet = 0;
3789
3790 /*
3791 * Determine the inhibit bits.
3792 */
3793 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (CPUMCTX_INHIBIT_SHADOW | CPUMCTX_INHIBIT_NMI)))
3794 { /* typical */ }
3795 else
3796 {
3797 if (CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))
3798 fRet |= IEMTB_F_X86_INHIBIT_SHADOW;
3799 if (CPUMAreInterruptsInhibitedByNmiEx(&pVCpu->cpum.GstCtx))
3800 fRet |= IEMTB_F_X86_INHIBIT_NMI;
3801 }
3802
3803 /*
3804 * Return IEMTB_F_X86_CS_LIM_CHECKS if the current PC is invalid or if it is
3805 * likely to go invalid before the end of the translation block.
3806 */
3807 if (IEM_F_MODE_X86_IS_FLAT(pVCpu->iem.s.fExec))
3808 return fRet;
3809
3810 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
3811 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
3812 return fRet;
3813 return fRet | IEMTB_F_X86_CS_LIM_CHECKS;
3814}
3815
3816
3817VMM_INT_DECL(VBOXSTRICTRC) IEMExecRecompiler(PVMCC pVM, PVMCPUCC pVCpu, bool fWasHalted)
3818{
3819 /*
3820 * See if there is an interrupt pending in TRPM, inject it if we can.
3821 */
3822 if (!TRPMHasTrap(pVCpu))
3823 { /* likely */ }
3824 else
3825 {
3826 VBOXSTRICTRC rcStrict = iemExecInjectPendingTrap(pVCpu);
3827 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
3828 { /*likely */ }
3829 else
3830 return rcStrict;
3831 }
3832
3833 /*
3834 * Init the execution environment.
3835 */
3836#if 1 /** @todo this seems like a good idea, however if we ever share memory
3837 * directly with other threads on the host, it isn't necessarily... */
3838 if (pVM->cCpus == 1)
3839 iemInitExec(pVCpu, IEM_F_X86_DISREGARD_LOCK /*fExecOpts*/);
3840 else
3841#endif
3842 iemInitExec(pVCpu, 0 /*fExecOpts*/);
3843
3844 if (RT_LIKELY(!fWasHalted && pVCpu->iem.s.msRecompilerPollNow != 0))
3845 { }
3846 else
3847 {
3848 /* Do polling after halt and the first time we get here. */
3849#ifdef IEM_WITH_ADAPTIVE_TIMER_POLLING
3850 uint64_t nsNow = 0;
3851 uint32_t const cItersTillPoll = iemPollTimersCalcDefaultCountdown(TMTimerPollBoolWithNanoTS(pVM, pVCpu, &nsNow));
3852 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillPoll;
3853 pVCpu->iem.s.cTbsTillNextTimerPoll = cItersTillPoll;
3854#else
3855 uint64_t const nsNow = TMVirtualGetNoCheck(pVM);
3856#endif
3857 pVCpu->iem.s.nsRecompilerPollNow = nsNow;
3858 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(nsNow / RT_NS_1MS);
3859 }
3860 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
3861
3862 /*
3863 * Run-loop.
3864 *
3865 * If we're using setjmp/longjmp we combine all the catching here to avoid
3866 * having to call setjmp for each block we're executing.
3867 */
3868 PIEMTBCACHE const pTbCache = pVCpu->iem.s.pTbCacheR3;
3869 for (;;)
3870 {
3871 VBOXSTRICTRC rcStrict;
3872 IEM_TRY_SETJMP(pVCpu, rcStrict)
3873 {
3874 for (;;)
3875 {
3876 /* Translate PC to physical address, we'll need this for both lookup and compilation. */
3877 RTGCPHYS const GCPhysPc = iemGetPcWithPhysAndCode(pVCpu);
3878 if (RT_LIKELY(pVCpu->iem.s.pbInstrBuf != NULL))
3879 {
3880 uint32_t const fExtraFlags = iemGetTbFlagsForCurrentPc(pVCpu);
3881 PIEMTB const pTb = iemTbCacheLookup(pVCpu, pTbCache, GCPhysPc, fExtraFlags);
3882 if (pTb)
3883 rcStrict = iemTbExec(pVCpu, pTb);
3884 else
3885 rcStrict = iemThreadedCompile(pVM, pVCpu, GCPhysPc, fExtraFlags);
3886 }
3887 else
3888 {
3889 /* This can only happen if the current PC cannot be translated into a
3890 host pointer, which means we're in MMIO or unmapped memory... */
3891#if defined(VBOX_STRICT) && defined(IN_RING3)
3892 rcStrict = DBGFSTOP(pVM);
3893 if (rcStrict != VINF_SUCCESS && rcStrict != VERR_DBGF_NOT_ATTACHED)
3894 return rcStrict;
3895#endif
3896 rcStrict = IEMExecLots(pVCpu, 2048, 511, NULL);
3897 }
3898 if (rcStrict == VINF_SUCCESS)
3899 {
3900 Assert(pVCpu->iem.s.cActiveMappings == 0);
3901
3902 /* Note! This IRQ/FF check is repeated in iemPollTimers, iemThreadedFunc_BltIn_CheckIrq
3903 and emitted by iemNativeRecompFunc_BltIn_CheckIrq. */
3904 uint64_t fCpu = pVCpu->fLocalForcedActions;
3905 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
3906 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
3907 | VMCPU_FF_TLB_FLUSH
3908 | VMCPU_FF_UNHALT );
3909 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
3910 if (RT_LIKELY( ( !fCpu
3911 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
3912 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
3913 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
3914 && !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) ))
3915 {
3916 /* Once in a while we need to poll timers here. */
3917 if ((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
3918 { /* likely */ }
3919 else
3920 {
3921 int rc = iemPollTimers(pVM, pVCpu);
3922 if (rc != VINF_SUCCESS)
3923 return VINF_SUCCESS;
3924 }
3925 }
3926 else
3927 return VINF_SUCCESS;
3928 }
3929 else
3930 return rcStrict;
3931 }
3932 }
3933 IEM_CATCH_LONGJMP_BEGIN(pVCpu, rcStrict);
3934 {
3935 Assert(rcStrict != VINF_IEM_REEXEC_BREAK);
3936 pVCpu->iem.s.cLongJumps++;
3937#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3938 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
3939#endif
3940 if (pVCpu->iem.s.cActiveMappings > 0)
3941 iemMemRollback(pVCpu);
3942
3943#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
3944 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
3945 if (pTb && (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
3946 {
3947 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitLongJump);
3948# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3949 Assert(pVCpu->iem.s.idxTbCurInstr < pTb->cInstructions);
3950 pVCpu->iem.s.cInstructions += pVCpu->iem.s.idxTbCurInstr;
3951# endif
3952
3953#ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
3954 /* Restore FPCR/MXCSR if the TB modified it. */
3955 if (pVCpu->iem.s.uRegFpCtrl != IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED)
3956 {
3957 iemNativeFpCtrlRegRestore(pVCpu->iem.s.uRegFpCtrl);
3958 /* Reset for the next round saving us an unconditional instruction on next TB entry. */
3959 pVCpu->iem.s.uRegFpCtrl = IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED;
3960 }
3961#endif
3962 }
3963#endif
3964
3965#if 0 /** @todo do we need to clean up anything? If not, we can drop the pTb = NULL some lines up and change the scope. */
3966 /* If pTb isn't NULL we're in iemTbExec. */
3967 if (!pTb)
3968 {
3969 /* If pCurTbR3 is NULL, we're in iemGetPcWithPhysAndCode.*/
3970 pTb = pVCpu->iem.s.pCurTbR3;
3971 if (pTb)
3972 {
3973 if (pTb == pVCpu->iem.s.pThrdCompileTbR3)
3974 return iemThreadedCompileLongJumped(pVM, pVCpu, rcStrict);
3975 Assert(pTb != pVCpu->iem.s.pNativeCompileTbR3);
3976 }
3977 }
3978#endif
3979 pVCpu->iem.s.pCurTbR3 = NULL;
3980 return rcStrict;
3981 }
3982 IEM_CATCH_LONGJMP_END(pVCpu);
3983 }
3984}
3985
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette