VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HMVMXR0.cpp@ 97572

Last change on this file since 97572 was 97562, checked in by vboxsync, 2 years ago

VMM/HMVMX,CPUM: Added a HM/AlwaysInterceptVmxMovDRx config for controlling how we deal with DR6.RTM & DR7.RTM and similar. Current default (-1) is the old behaviour of ignoring the issue, will change that to hide these new bits in a few hours.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 281.2 KB
Line 
1/* $Id: HMVMXR0.cpp 97562 2022-11-16 02:34:26Z vboxsync $ */
2/** @file
3 * HM VMX (Intel VT-x) - Host Context Ring-0.
4 */
5
6/*
7 * Copyright (C) 2012-2022 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_HM
33#define VMCPU_INCL_CPUM_GST_CTX
34#include <iprt/x86.h>
35#include <iprt/asm-amd64-x86.h>
36#include <iprt/thread.h>
37#include <iprt/mem.h>
38#include <iprt/mp.h>
39
40#include <VBox/vmm/pdmapi.h>
41#include <VBox/vmm/dbgf.h>
42#include <VBox/vmm/iem.h>
43#include <VBox/vmm/iom.h>
44#include <VBox/vmm/tm.h>
45#include <VBox/vmm/em.h>
46#include <VBox/vmm/gcm.h>
47#include <VBox/vmm/gim.h>
48#include <VBox/vmm/apic.h>
49#include "HMInternal.h"
50#include <VBox/vmm/vmcc.h>
51#include <VBox/vmm/hmvmxinline.h>
52#include "HMVMXR0.h"
53#include "VMXInternal.h"
54#include "dtrace/VBoxVMM.h"
55
56
57/*********************************************************************************************************************************
58* Defined Constants And Macros *
59*********************************************************************************************************************************/
60#ifdef DEBUG_ramshankar
61# define HMVMX_ALWAYS_SAVE_GUEST_RFLAGS
62# define HMVMX_ALWAYS_SAVE_RO_GUEST_STATE
63# define HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE
64# define HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
65# define HMVMX_ALWAYS_CLEAN_TRANSIENT
66# define HMVMX_ALWAYS_CHECK_GUEST_STATE
67# define HMVMX_ALWAYS_TRAP_ALL_XCPTS
68# define HMVMX_ALWAYS_TRAP_PF
69# define HMVMX_ALWAYS_FLUSH_TLB
70# define HMVMX_ALWAYS_SWAP_EFER
71#endif
72
73/** Enables the fAlwaysInterceptMovDRx related code. */
74#define VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX 1
75
76
77/*********************************************************************************************************************************
78* Structures and Typedefs *
79*********************************************************************************************************************************/
80/**
81 * VMX page allocation information.
82 */
83typedef struct
84{
85 uint32_t fValid; /**< Whether to allocate this page (e.g, based on a CPU feature). */
86 uint32_t uPadding0; /**< Padding to ensure array of these structs are aligned to a multiple of 8. */
87 PRTHCPHYS pHCPhys; /**< Where to store the host-physical address of the allocation. */
88 PRTR0PTR ppVirt; /**< Where to store the host-virtual address of the allocation. */
89} VMXPAGEALLOCINFO;
90/** Pointer to VMX page-allocation info. */
91typedef VMXPAGEALLOCINFO *PVMXPAGEALLOCINFO;
92/** Pointer to a const VMX page-allocation info. */
93typedef const VMXPAGEALLOCINFO *PCVMXPAGEALLOCINFO;
94AssertCompileSizeAlignment(VMXPAGEALLOCINFO, 8);
95
96
97/*********************************************************************************************************************************
98* Internal Functions *
99*********************************************************************************************************************************/
100static bool hmR0VmxShouldSwapEferMsr(PCVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient);
101static int hmR0VmxExitHostNmi(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo);
102
103
104/*********************************************************************************************************************************
105* Global Variables *
106*********************************************************************************************************************************/
107/** The DR6 value after writing zero to the register.
108 * Set by VMXR0GlobalInit(). */
109static uint64_t g_fDr6Zeroed = 0;
110
111
112/**
113 * Checks if the given MSR is part of the lastbranch-from-IP MSR stack.
114 * @returns @c true if it's part of LBR stack, @c false otherwise.
115 *
116 * @param pVM The cross context VM structure.
117 * @param idMsr The MSR.
118 * @param pidxMsr Where to store the index of the MSR in the LBR MSR array.
119 * Optional, can be NULL.
120 *
121 * @remarks Must only be called when LBR is enabled.
122 */
123DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchFromMsr(PCVMCC pVM, uint32_t idMsr, uint32_t *pidxMsr)
124{
125 Assert(pVM->hmr0.s.vmx.fLbr);
126 Assert(pVM->hmr0.s.vmx.idLbrFromIpMsrFirst);
127 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrFromIpMsrLast - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst + 1;
128 uint32_t const idxMsr = idMsr - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst;
129 if (idxMsr < cLbrStack)
130 {
131 if (pidxMsr)
132 *pidxMsr = idxMsr;
133 return true;
134 }
135 return false;
136}
137
138
139/**
140 * Checks if the given MSR is part of the lastbranch-to-IP MSR stack.
141 * @returns @c true if it's part of LBR stack, @c false otherwise.
142 *
143 * @param pVM The cross context VM structure.
144 * @param idMsr The MSR.
145 * @param pidxMsr Where to store the index of the MSR in the LBR MSR array.
146 * Optional, can be NULL.
147 *
148 * @remarks Must only be called when LBR is enabled and when lastbranch-to-IP MSRs
149 * are supported by the CPU (see hmR0VmxSetupLbrMsrRange).
150 */
151DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchToMsr(PCVMCC pVM, uint32_t idMsr, uint32_t *pidxMsr)
152{
153 Assert(pVM->hmr0.s.vmx.fLbr);
154 if (pVM->hmr0.s.vmx.idLbrToIpMsrFirst)
155 {
156 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrToIpMsrLast - pVM->hmr0.s.vmx.idLbrToIpMsrFirst + 1;
157 uint32_t const idxMsr = idMsr - pVM->hmr0.s.vmx.idLbrToIpMsrFirst;
158 if (idxMsr < cLbrStack)
159 {
160 if (pidxMsr)
161 *pidxMsr = idxMsr;
162 return true;
163 }
164 }
165 return false;
166}
167
168
169/**
170 * Gets the active (in use) VMCS info. object for the specified VCPU.
171 *
172 * This is either the guest or nested-guest VMCS info. and need not necessarily
173 * pertain to the "current" VMCS (in the VMX definition of the term). For instance,
174 * if the VM-entry failed due to an invalid-guest state, we may have "cleared" the
175 * current VMCS while returning to ring-3. However, the VMCS info. object for that
176 * VMCS would still be active and returned here so that we could dump the VMCS
177 * fields to ring-3 for diagnostics. This function is thus only used to
178 * distinguish between the nested-guest or guest VMCS.
179 *
180 * @returns The active VMCS information.
181 * @param pVCpu The cross context virtual CPU structure.
182 *
183 * @thread EMT.
184 * @remarks This function may be called with preemption or interrupts disabled!
185 */
186DECLINLINE(PVMXVMCSINFO) hmGetVmxActiveVmcsInfo(PVMCPUCC pVCpu)
187{
188 if (!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
189 return &pVCpu->hmr0.s.vmx.VmcsInfo;
190 return &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
191}
192
193
194/**
195 * Returns whether the VM-exit MSR-store area differs from the VM-exit MSR-load
196 * area.
197 *
198 * @returns @c true if it's different, @c false otherwise.
199 * @param pVmcsInfo The VMCS info. object.
200 */
201DECL_FORCE_INLINE(bool) hmR0VmxIsSeparateExitMsrStoreAreaVmcs(PCVMXVMCSINFO pVmcsInfo)
202{
203 return RT_BOOL( pVmcsInfo->pvGuestMsrStore != pVmcsInfo->pvGuestMsrLoad
204 && pVmcsInfo->pvGuestMsrStore);
205}
206
207
208/**
209 * Sets the given Processor-based VM-execution controls.
210 *
211 * @param pVmxTransient The VMX-transient structure.
212 * @param uProcCtls The Processor-based VM-execution controls to set.
213 */
214static void hmR0VmxSetProcCtlsVmcs(PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls)
215{
216 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
217 if ((pVmcsInfo->u32ProcCtls & uProcCtls) != uProcCtls)
218 {
219 pVmcsInfo->u32ProcCtls |= uProcCtls;
220 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
221 AssertRC(rc);
222 }
223}
224
225
226/**
227 * Removes the given Processor-based VM-execution controls.
228 *
229 * @param pVCpu The cross context virtual CPU structure.
230 * @param pVmxTransient The VMX-transient structure.
231 * @param uProcCtls The Processor-based VM-execution controls to remove.
232 *
233 * @remarks When executing a nested-guest, this will not remove any of the specified
234 * controls if the nested hypervisor has set any one of them.
235 */
236static void hmR0VmxRemoveProcCtlsVmcs(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls)
237{
238 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
239 if (pVmcsInfo->u32ProcCtls & uProcCtls)
240 {
241#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
242 if ( !pVmxTransient->fIsNestedGuest
243 || !CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, uProcCtls))
244#else
245 NOREF(pVCpu);
246 if (!pVmxTransient->fIsNestedGuest)
247#endif
248 {
249 pVmcsInfo->u32ProcCtls &= ~uProcCtls;
250 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
251 AssertRC(rc);
252 }
253 }
254}
255
256
257/**
258 * Sets the TSC offset for the current VMCS.
259 *
260 * @param uTscOffset The TSC offset to set.
261 * @param pVmcsInfo The VMCS info. object.
262 */
263static void hmR0VmxSetTscOffsetVmcs(PVMXVMCSINFO pVmcsInfo, uint64_t uTscOffset)
264{
265 if (pVmcsInfo->u64TscOffset != uTscOffset)
266 {
267 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, uTscOffset);
268 AssertRC(rc);
269 pVmcsInfo->u64TscOffset = uTscOffset;
270 }
271}
272
273
274/**
275 * Loads the VMCS specified by the VMCS info. object.
276 *
277 * @returns VBox status code.
278 * @param pVmcsInfo The VMCS info. object.
279 *
280 * @remarks Can be called with interrupts disabled.
281 */
282static int hmR0VmxLoadVmcs(PVMXVMCSINFO pVmcsInfo)
283{
284 Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS);
285 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
286
287 int rc = VMXLoadVmcs(pVmcsInfo->HCPhysVmcs);
288 if (RT_SUCCESS(rc))
289 pVmcsInfo->fVmcsState |= VMX_V_VMCS_LAUNCH_STATE_CURRENT;
290 return rc;
291}
292
293
294/**
295 * Clears the VMCS specified by the VMCS info. object.
296 *
297 * @returns VBox status code.
298 * @param pVmcsInfo The VMCS info. object.
299 *
300 * @remarks Can be called with interrupts disabled.
301 */
302static int hmR0VmxClearVmcs(PVMXVMCSINFO pVmcsInfo)
303{
304 Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS);
305 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
306
307 int rc = VMXClearVmcs(pVmcsInfo->HCPhysVmcs);
308 if (RT_SUCCESS(rc))
309 pVmcsInfo->fVmcsState = VMX_V_VMCS_LAUNCH_STATE_CLEAR;
310 return rc;
311}
312
313
314/**
315 * Checks whether the MSR belongs to the set of guest MSRs that we restore
316 * lazily while leaving VT-x.
317 *
318 * @returns true if it does, false otherwise.
319 * @param pVCpu The cross context virtual CPU structure.
320 * @param idMsr The MSR to check.
321 */
322static bool hmR0VmxIsLazyGuestMsr(PCVMCPUCC pVCpu, uint32_t idMsr)
323{
324 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
325 {
326 switch (idMsr)
327 {
328 case MSR_K8_LSTAR:
329 case MSR_K6_STAR:
330 case MSR_K8_SF_MASK:
331 case MSR_K8_KERNEL_GS_BASE:
332 return true;
333 }
334 }
335 return false;
336}
337
338
339/**
340 * Loads a set of guests MSRs to allow read/passthru to the guest.
341 *
342 * The name of this function is slightly confusing. This function does NOT
343 * postpone loading, but loads the MSR right now. "hmR0VmxLazy" is simply a
344 * common prefix for functions dealing with "lazy restoration" of the shared
345 * MSRs.
346 *
347 * @param pVCpu The cross context virtual CPU structure.
348 *
349 * @remarks No-long-jump zone!!!
350 */
351static void hmR0VmxLazyLoadGuestMsrs(PVMCPUCC pVCpu)
352{
353 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
354 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
355
356 Assert(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
357 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
358 {
359 /*
360 * If the guest MSRs are not loaded -and- if all the guest MSRs are identical
361 * to the MSRs on the CPU (which are the saved host MSRs, see assertion above) then
362 * we can skip a few MSR writes.
363 *
364 * Otherwise, it implies either 1. they're not loaded, or 2. they're loaded but the
365 * guest MSR values in the guest-CPU context might be different to what's currently
366 * loaded in the CPU. In either case, we need to write the new guest MSR values to the
367 * CPU, see @bugref{8728}.
368 */
369 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
370 if ( !(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
371 && pCtx->msrKERNELGSBASE == pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase
372 && pCtx->msrLSTAR == pVCpu->hmr0.s.vmx.u64HostMsrLStar
373 && pCtx->msrSTAR == pVCpu->hmr0.s.vmx.u64HostMsrStar
374 && pCtx->msrSFMASK == pVCpu->hmr0.s.vmx.u64HostMsrSfMask)
375 {
376#ifdef VBOX_STRICT
377 Assert(ASMRdMsr(MSR_K8_KERNEL_GS_BASE) == pCtx->msrKERNELGSBASE);
378 Assert(ASMRdMsr(MSR_K8_LSTAR) == pCtx->msrLSTAR);
379 Assert(ASMRdMsr(MSR_K6_STAR) == pCtx->msrSTAR);
380 Assert(ASMRdMsr(MSR_K8_SF_MASK) == pCtx->msrSFMASK);
381#endif
382 }
383 else
384 {
385 ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pCtx->msrKERNELGSBASE);
386 ASMWrMsr(MSR_K8_LSTAR, pCtx->msrLSTAR);
387 ASMWrMsr(MSR_K6_STAR, pCtx->msrSTAR);
388 /* The system call flag mask register isn't as benign and accepting of all
389 values as the above, so mask it to avoid #GP'ing on corrupted input. */
390 Assert(!(pCtx->msrSFMASK & ~(uint64_t)UINT32_MAX));
391 ASMWrMsr(MSR_K8_SF_MASK, pCtx->msrSFMASK & UINT32_MAX);
392 }
393 }
394 pVCpu->hmr0.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_LOADED_GUEST;
395}
396
397
398/**
399 * Checks if the specified guest MSR is part of the VM-entry MSR-load area.
400 *
401 * @returns @c true if found, @c false otherwise.
402 * @param pVmcsInfo The VMCS info. object.
403 * @param idMsr The MSR to find.
404 */
405static bool hmR0VmxIsAutoLoadGuestMsr(PCVMXVMCSINFO pVmcsInfo, uint32_t idMsr)
406{
407 PCVMXAUTOMSR pMsrs = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
408 uint32_t const cMsrs = pVmcsInfo->cEntryMsrLoad;
409 Assert(pMsrs);
410 Assert(sizeof(*pMsrs) * cMsrs <= X86_PAGE_4K_SIZE);
411 for (uint32_t i = 0; i < cMsrs; i++)
412 {
413 if (pMsrs[i].u32Msr == idMsr)
414 return true;
415 }
416 return false;
417}
418
419
420/**
421 * Performs lazy restoration of the set of host MSRs if they were previously
422 * loaded with guest MSR values.
423 *
424 * @param pVCpu The cross context virtual CPU structure.
425 *
426 * @remarks No-long-jump zone!!!
427 * @remarks The guest MSRs should have been saved back into the guest-CPU
428 * context by hmR0VmxImportGuestState()!!!
429 */
430static void hmR0VmxLazyRestoreHostMsrs(PVMCPUCC pVCpu)
431{
432 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
433 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
434
435 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
436 {
437 Assert(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
438 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
439 {
440 ASMWrMsr(MSR_K8_LSTAR, pVCpu->hmr0.s.vmx.u64HostMsrLStar);
441 ASMWrMsr(MSR_K6_STAR, pVCpu->hmr0.s.vmx.u64HostMsrStar);
442 ASMWrMsr(MSR_K8_SF_MASK, pVCpu->hmr0.s.vmx.u64HostMsrSfMask);
443 ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase);
444 }
445 }
446 pVCpu->hmr0.s.vmx.fLazyMsrs &= ~(VMX_LAZY_MSRS_LOADED_GUEST | VMX_LAZY_MSRS_SAVED_HOST);
447}
448
449
450/**
451 * Sets pfnStartVm to the best suited variant.
452 *
453 * This must be called whenever anything changes relative to the hmR0VmXStartVm
454 * variant selection:
455 * - pVCpu->hm.s.fLoadSaveGuestXcr0
456 * - HM_WSF_IBPB_ENTRY in pVCpu->hmr0.s.fWorldSwitcher
457 * - HM_WSF_IBPB_EXIT in pVCpu->hmr0.s.fWorldSwitcher
458 * - Perhaps: CPUMIsGuestFPUStateActive() (windows only)
459 * - Perhaps: CPUMCTX.fXStateMask (windows only)
460 *
461 * We currently ASSUME that neither HM_WSF_IBPB_ENTRY nor HM_WSF_IBPB_EXIT
462 * cannot be changed at runtime.
463 */
464static void hmR0VmxUpdateStartVmFunction(PVMCPUCC pVCpu)
465{
466 static const struct CLANGWORKAROUND { PFNHMVMXSTARTVM pfn; } s_aHmR0VmxStartVmFunctions[] =
467 {
468 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
469 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
470 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
471 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
472 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
473 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
474 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
475 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
476 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
477 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
478 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
479 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
480 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
481 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
482 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
483 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
484 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
485 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
486 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
487 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
488 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
489 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
490 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
491 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
492 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
493 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
494 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
495 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
496 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
497 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
498 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
499 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
500 };
501 uintptr_t const idx = (pVCpu->hmr0.s.fLoadSaveGuestXcr0 ? 1 : 0)
502 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_IBPB_ENTRY ? 2 : 0)
503 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_ENTRY ? 4 : 0)
504 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_ENTRY ? 8 : 0)
505 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_IBPB_EXIT ? 16 : 0);
506 PFNHMVMXSTARTVM const pfnStartVm = s_aHmR0VmxStartVmFunctions[idx].pfn;
507 if (pVCpu->hmr0.s.vmx.pfnStartVm != pfnStartVm)
508 pVCpu->hmr0.s.vmx.pfnStartVm = pfnStartVm;
509}
510
511
512/**
513 * Pushes a 2-byte value onto the real-mode (in virtual-8086 mode) guest's
514 * stack.
515 *
516 * @returns Strict VBox status code (i.e. informational status codes too).
517 * @retval VINF_EM_RESET if pushing a value to the stack caused a triple-fault.
518 * @param pVCpu The cross context virtual CPU structure.
519 * @param uValue The value to push to the guest stack.
520 */
521static VBOXSTRICTRC hmR0VmxRealModeGuestStackPush(PVMCPUCC pVCpu, uint16_t uValue)
522{
523 /*
524 * The stack limit is 0xffff in real-on-virtual 8086 mode. Real-mode with weird stack limits cannot be run in
525 * virtual 8086 mode in VT-x. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers".
526 * See Intel Instruction reference for PUSH and Intel spec. 22.33.1 "Segment Wraparound".
527 */
528 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
529 if (pCtx->sp == 1)
530 return VINF_EM_RESET;
531 pCtx->sp -= sizeof(uint16_t); /* May wrap around which is expected behaviour. */
532 int rc = PGMPhysSimpleWriteGCPhys(pVCpu->CTX_SUFF(pVM), pCtx->ss.u64Base + pCtx->sp, &uValue, sizeof(uint16_t));
533 AssertRC(rc);
534 return rc;
535}
536
537
538/**
539 * Wrapper around VMXWriteVmcs16 taking a pVCpu parameter so VCC doesn't complain about
540 * unreferenced local parameters in the template code...
541 */
542DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs16(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint16_t u16Val)
543{
544 RT_NOREF(pVCpu);
545 return VMXWriteVmcs16(uFieldEnc, u16Val);
546}
547
548
549/**
550 * Wrapper around VMXWriteVmcs32 taking a pVCpu parameter so VCC doesn't complain about
551 * unreferenced local parameters in the template code...
552 */
553DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs32(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint32_t u32Val)
554{
555 RT_NOREF(pVCpu);
556 return VMXWriteVmcs32(uFieldEnc, u32Val);
557}
558
559
560/**
561 * Wrapper around VMXWriteVmcs64 taking a pVCpu parameter so VCC doesn't complain about
562 * unreferenced local parameters in the template code...
563 */
564DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs64(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint64_t u64Val)
565{
566 RT_NOREF(pVCpu);
567 return VMXWriteVmcs64(uFieldEnc, u64Val);
568}
569
570
571/**
572 * Wrapper around VMXReadVmcs16 taking a pVCpu parameter so VCC doesn't complain about
573 * unreferenced local parameters in the template code...
574 */
575DECL_FORCE_INLINE(int) hmR0VmxReadVmcs16(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint16_t *pu16Val)
576{
577 RT_NOREF(pVCpu);
578 return VMXReadVmcs16(uFieldEnc, pu16Val);
579}
580
581
582/**
583 * Wrapper around VMXReadVmcs32 taking a pVCpu parameter so VCC doesn't complain about
584 * unreferenced local parameters in the template code...
585 */
586DECL_FORCE_INLINE(int) hmR0VmxReadVmcs32(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint32_t *pu32Val)
587{
588 RT_NOREF(pVCpu);
589 return VMXReadVmcs32(uFieldEnc, pu32Val);
590}
591
592
593/**
594 * Wrapper around VMXReadVmcs64 taking a pVCpu parameter so VCC doesn't complain about
595 * unreferenced local parameters in the template code...
596 */
597DECL_FORCE_INLINE(int) hmR0VmxReadVmcs64(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint64_t *pu64Val)
598{
599 RT_NOREF(pVCpu);
600 return VMXReadVmcs64(uFieldEnc, pu64Val);
601}
602
603
604/*
605 * Instantiate the code we share with the NEM darwin backend.
606 */
607#define VCPU_2_VMXSTATE(a_pVCpu) (a_pVCpu)->hm.s
608#define VCPU_2_VMXSTATS(a_pVCpu) (a_pVCpu)->hm.s
609
610#define VM_IS_VMX_UNRESTRICTED_GUEST(a_pVM) (a_pVM)->hmr0.s.vmx.fUnrestrictedGuest
611#define VM_IS_VMX_NESTED_PAGING(a_pVM) (a_pVM)->hmr0.s.fNestedPaging
612#define VM_IS_VMX_PREEMPT_TIMER_USED(a_pVM) (a_pVM)->hmr0.s.vmx.fUsePreemptTimer
613#define VM_IS_VMX_LBR(a_pVM) (a_pVM)->hmr0.s.vmx.fLbr
614
615#define VMX_VMCS_WRITE_16(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs16((a_pVCpu), (a_FieldEnc), (a_Val))
616#define VMX_VMCS_WRITE_32(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs32((a_pVCpu), (a_FieldEnc), (a_Val))
617#define VMX_VMCS_WRITE_64(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs64((a_pVCpu), (a_FieldEnc), (a_Val))
618#define VMX_VMCS_WRITE_NW(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs64((a_pVCpu), (a_FieldEnc), (a_Val))
619
620#define VMX_VMCS_READ_16(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs16((a_pVCpu), (a_FieldEnc), (a_pVal))
621#define VMX_VMCS_READ_32(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs32((a_pVCpu), (a_FieldEnc), (a_pVal))
622#define VMX_VMCS_READ_64(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs64((a_pVCpu), (a_FieldEnc), (a_pVal))
623#define VMX_VMCS_READ_NW(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs64((a_pVCpu), (a_FieldEnc), (a_pVal))
624
625#include "../VMMAll/VMXAllTemplate.cpp.h"
626
627#undef VMX_VMCS_WRITE_16
628#undef VMX_VMCS_WRITE_32
629#undef VMX_VMCS_WRITE_64
630#undef VMX_VMCS_WRITE_NW
631
632#undef VMX_VMCS_READ_16
633#undef VMX_VMCS_READ_32
634#undef VMX_VMCS_READ_64
635#undef VMX_VMCS_READ_NW
636
637#undef VM_IS_VMX_PREEMPT_TIMER_USED
638#undef VM_IS_VMX_NESTED_PAGING
639#undef VM_IS_VMX_UNRESTRICTED_GUEST
640#undef VCPU_2_VMXSTATS
641#undef VCPU_2_VMXSTATE
642
643
644/**
645 * Updates the VM's last error record.
646 *
647 * If there was a VMX instruction error, reads the error data from the VMCS and
648 * updates VCPU's last error record as well.
649 *
650 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
651 * Can be NULL if @a rc is not VERR_VMX_UNABLE_TO_START_VM or
652 * VERR_VMX_INVALID_VMCS_FIELD.
653 * @param rc The error code.
654 */
655static void hmR0VmxUpdateErrorRecord(PVMCPUCC pVCpu, int rc)
656{
657 if ( rc == VERR_VMX_INVALID_VMCS_FIELD
658 || rc == VERR_VMX_UNABLE_TO_START_VM)
659 {
660 AssertPtrReturnVoid(pVCpu);
661 VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
662 }
663 pVCpu->CTX_SUFF(pVM)->hm.s.ForR3.rcInit = rc;
664}
665
666
667/**
668 * Enters VMX root mode operation on the current CPU.
669 *
670 * @returns VBox status code.
671 * @param pHostCpu The HM physical-CPU structure.
672 * @param pVM The cross context VM structure. Can be
673 * NULL, after a resume.
674 * @param HCPhysCpuPage Physical address of the VMXON region.
675 * @param pvCpuPage Pointer to the VMXON region.
676 */
677static int hmR0VmxEnterRootMode(PHMPHYSCPU pHostCpu, PVMCC pVM, RTHCPHYS HCPhysCpuPage, void *pvCpuPage)
678{
679 Assert(pHostCpu);
680 Assert(HCPhysCpuPage && HCPhysCpuPage != NIL_RTHCPHYS);
681 Assert(RT_ALIGN_T(HCPhysCpuPage, _4K, RTHCPHYS) == HCPhysCpuPage);
682 Assert(pvCpuPage);
683 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
684
685 if (pVM)
686 {
687 /* Write the VMCS revision identifier to the VMXON region. */
688 *(uint32_t *)pvCpuPage = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
689 }
690
691 /* Paranoid: Disable interrupts as, in theory, interrupt handlers might mess with CR4. */
692 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
693
694 /* Enable the VMX bit in CR4 if necessary. */
695 RTCCUINTREG const uOldCr4 = SUPR0ChangeCR4(X86_CR4_VMXE, RTCCUINTREG_MAX);
696
697 /* Record whether VMXE was already prior to us enabling it above. */
698 pHostCpu->fVmxeAlreadyEnabled = RT_BOOL(uOldCr4 & X86_CR4_VMXE);
699
700 /* Enter VMX root mode. */
701 int rc = VMXEnable(HCPhysCpuPage);
702 if (RT_FAILURE(rc))
703 {
704 /* Restore CR4.VMXE if it was not set prior to our attempt to set it above. */
705 if (!pHostCpu->fVmxeAlreadyEnabled)
706 SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE);
707
708 if (pVM)
709 pVM->hm.s.ForR3.vmx.HCPhysVmxEnableError = HCPhysCpuPage;
710 }
711
712 /* Restore interrupts. */
713 ASMSetFlags(fEFlags);
714 return rc;
715}
716
717
718/**
719 * Exits VMX root mode operation on the current CPU.
720 *
721 * @returns VBox status code.
722 * @param pHostCpu The HM physical-CPU structure.
723 */
724static int hmR0VmxLeaveRootMode(PHMPHYSCPU pHostCpu)
725{
726 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
727
728 /* Paranoid: Disable interrupts as, in theory, interrupts handlers might mess with CR4. */
729 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
730
731 /* If we're for some reason not in VMX root mode, then don't leave it. */
732 RTCCUINTREG const uHostCr4 = ASMGetCR4();
733
734 int rc;
735 if (uHostCr4 & X86_CR4_VMXE)
736 {
737 /* Exit VMX root mode and clear the VMX bit in CR4. */
738 VMXDisable();
739
740 /* Clear CR4.VMXE only if it was clear prior to use setting it. */
741 if (!pHostCpu->fVmxeAlreadyEnabled)
742 SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE);
743
744 rc = VINF_SUCCESS;
745 }
746 else
747 rc = VERR_VMX_NOT_IN_VMX_ROOT_MODE;
748
749 /* Restore interrupts. */
750 ASMSetFlags(fEFlags);
751 return rc;
752}
753
754
755/**
756 * Allocates pages specified as specified by an array of VMX page allocation info
757 * objects.
758 *
759 * The pages contents are zero'd after allocation.
760 *
761 * @returns VBox status code.
762 * @param phMemObj Where to return the handle to the allocation.
763 * @param paAllocInfo The pointer to the first element of the VMX
764 * page-allocation info object array.
765 * @param cEntries The number of elements in the @a paAllocInfo array.
766 */
767static int hmR0VmxPagesAllocZ(PRTR0MEMOBJ phMemObj, PVMXPAGEALLOCINFO paAllocInfo, uint32_t cEntries)
768{
769 *phMemObj = NIL_RTR0MEMOBJ;
770
771 /* Figure out how many pages to allocate. */
772 uint32_t cPages = 0;
773 for (uint32_t iPage = 0; iPage < cEntries; iPage++)
774 cPages += !!paAllocInfo[iPage].fValid;
775
776 /* Allocate the pages. */
777 if (cPages)
778 {
779 size_t const cbPages = cPages << HOST_PAGE_SHIFT;
780 int rc = RTR0MemObjAllocPage(phMemObj, cbPages, false /* fExecutable */);
781 if (RT_FAILURE(rc))
782 return rc;
783
784 /* Zero the contents and assign each page to the corresponding VMX page-allocation entry. */
785 void *pvFirstPage = RTR0MemObjAddress(*phMemObj);
786 RT_BZERO(pvFirstPage, cbPages);
787
788 uint32_t iPage = 0;
789 for (uint32_t i = 0; i < cEntries; i++)
790 if (paAllocInfo[i].fValid)
791 {
792 RTHCPHYS const HCPhysPage = RTR0MemObjGetPagePhysAddr(*phMemObj, iPage);
793 void *pvPage = (void *)((uintptr_t)pvFirstPage + (iPage << X86_PAGE_4K_SHIFT));
794 Assert(HCPhysPage && HCPhysPage != NIL_RTHCPHYS);
795 AssertPtr(pvPage);
796
797 Assert(paAllocInfo[iPage].pHCPhys);
798 Assert(paAllocInfo[iPage].ppVirt);
799 *paAllocInfo[iPage].pHCPhys = HCPhysPage;
800 *paAllocInfo[iPage].ppVirt = pvPage;
801
802 /* Move to next page. */
803 ++iPage;
804 }
805
806 /* Make sure all valid (requested) pages have been assigned. */
807 Assert(iPage == cPages);
808 }
809 return VINF_SUCCESS;
810}
811
812
813/**
814 * Frees pages allocated using hmR0VmxPagesAllocZ.
815 *
816 * @param phMemObj Pointer to the memory object handle. Will be set to
817 * NIL.
818 */
819DECL_FORCE_INLINE(void) hmR0VmxPagesFree(PRTR0MEMOBJ phMemObj)
820{
821 /* We can cleanup wholesale since it's all one allocation. */
822 if (*phMemObj != NIL_RTR0MEMOBJ)
823 {
824 RTR0MemObjFree(*phMemObj, true /* fFreeMappings */);
825 *phMemObj = NIL_RTR0MEMOBJ;
826 }
827}
828
829
830/**
831 * Initializes a VMCS info. object.
832 *
833 * @param pVmcsInfo The VMCS info. object.
834 * @param pVmcsInfoShared The VMCS info. object shared with ring-3.
835 */
836static void hmR0VmxVmcsInfoInit(PVMXVMCSINFO pVmcsInfo, PVMXVMCSINFOSHARED pVmcsInfoShared)
837{
838 RT_ZERO(*pVmcsInfo);
839 RT_ZERO(*pVmcsInfoShared);
840
841 pVmcsInfo->pShared = pVmcsInfoShared;
842 Assert(pVmcsInfo->hMemObj == NIL_RTR0MEMOBJ);
843 pVmcsInfo->HCPhysVmcs = NIL_RTHCPHYS;
844 pVmcsInfo->HCPhysShadowVmcs = NIL_RTHCPHYS;
845 pVmcsInfo->HCPhysMsrBitmap = NIL_RTHCPHYS;
846 pVmcsInfo->HCPhysGuestMsrLoad = NIL_RTHCPHYS;
847 pVmcsInfo->HCPhysGuestMsrStore = NIL_RTHCPHYS;
848 pVmcsInfo->HCPhysHostMsrLoad = NIL_RTHCPHYS;
849 pVmcsInfo->HCPhysVirtApic = NIL_RTHCPHYS;
850 pVmcsInfo->HCPhysEPTP = NIL_RTHCPHYS;
851 pVmcsInfo->u64VmcsLinkPtr = NIL_RTHCPHYS;
852 pVmcsInfo->idHostCpuState = NIL_RTCPUID;
853 pVmcsInfo->idHostCpuExec = NIL_RTCPUID;
854}
855
856
857/**
858 * Frees the VT-x structures for a VMCS info. object.
859 *
860 * @param pVmcsInfo The VMCS info. object.
861 * @param pVmcsInfoShared The VMCS info. object shared with ring-3.
862 */
863static void hmR0VmxVmcsInfoFree(PVMXVMCSINFO pVmcsInfo, PVMXVMCSINFOSHARED pVmcsInfoShared)
864{
865 hmR0VmxPagesFree(&pVmcsInfo->hMemObj);
866 hmR0VmxVmcsInfoInit(pVmcsInfo, pVmcsInfoShared);
867}
868
869
870/**
871 * Allocates the VT-x structures for a VMCS info. object.
872 *
873 * @returns VBox status code.
874 * @param pVCpu The cross context virtual CPU structure.
875 * @param pVmcsInfo The VMCS info. object.
876 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
877 *
878 * @remarks The caller is expected to take care of any and all allocation failures.
879 * This function will not perform any cleanup for failures half-way
880 * through.
881 */
882static int hmR0VmxAllocVmcsInfo(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
883{
884 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
885
886 bool const fMsrBitmaps = RT_BOOL(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS);
887 bool const fShadowVmcs = !fIsNstGstVmcs ? pVM->hmr0.s.vmx.fUseVmcsShadowing : pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing;
888 Assert(!pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing); /* VMCS shadowing is not yet exposed to the guest. */
889 VMXPAGEALLOCINFO aAllocInfo[] =
890 {
891 { true, 0 /* Unused */, &pVmcsInfo->HCPhysVmcs, &pVmcsInfo->pvVmcs },
892 { true, 0 /* Unused */, &pVmcsInfo->HCPhysGuestMsrLoad, &pVmcsInfo->pvGuestMsrLoad },
893 { true, 0 /* Unused */, &pVmcsInfo->HCPhysHostMsrLoad, &pVmcsInfo->pvHostMsrLoad },
894 { fMsrBitmaps, 0 /* Unused */, &pVmcsInfo->HCPhysMsrBitmap, &pVmcsInfo->pvMsrBitmap },
895 { fShadowVmcs, 0 /* Unused */, &pVmcsInfo->HCPhysShadowVmcs, &pVmcsInfo->pvShadowVmcs },
896 };
897
898 int rc = hmR0VmxPagesAllocZ(&pVmcsInfo->hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo));
899 if (RT_FAILURE(rc))
900 return rc;
901
902 /*
903 * We use the same page for VM-entry MSR-load and VM-exit MSR store areas.
904 * Because they contain a symmetric list of guest MSRs to load on VM-entry and store on VM-exit.
905 */
906 AssertCompile(RT_ELEMENTS(aAllocInfo) > 0);
907 Assert(pVmcsInfo->HCPhysGuestMsrLoad != NIL_RTHCPHYS);
908 pVmcsInfo->pvGuestMsrStore = pVmcsInfo->pvGuestMsrLoad;
909 pVmcsInfo->HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrLoad;
910
911 /*
912 * Get the virtual-APIC page rather than allocating them again.
913 */
914 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW)
915 {
916 if (!fIsNstGstVmcs)
917 {
918 if (PDMHasApic(pVM))
919 {
920 rc = APICGetApicPageForCpu(pVCpu, &pVmcsInfo->HCPhysVirtApic, (PRTR0PTR)&pVmcsInfo->pbVirtApic, NULL /*pR3Ptr*/);
921 if (RT_FAILURE(rc))
922 return rc;
923 Assert(pVmcsInfo->pbVirtApic);
924 Assert(pVmcsInfo->HCPhysVirtApic && pVmcsInfo->HCPhysVirtApic != NIL_RTHCPHYS);
925 }
926 }
927 else
928 {
929 /* These are setup later while marging the nested-guest VMCS. */
930 Assert(pVmcsInfo->pbVirtApic == NULL);
931 Assert(pVmcsInfo->HCPhysVirtApic == NIL_RTHCPHYS);
932 }
933 }
934
935 return VINF_SUCCESS;
936}
937
938
939/**
940 * Free all VT-x structures for the VM.
941 *
942 * @returns IPRT status code.
943 * @param pVM The cross context VM structure.
944 */
945static void hmR0VmxStructsFree(PVMCC pVM)
946{
947 hmR0VmxPagesFree(&pVM->hmr0.s.vmx.hMemObj);
948#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
949 if (pVM->hmr0.s.vmx.fUseVmcsShadowing)
950 {
951 RTMemFree(pVM->hmr0.s.vmx.paShadowVmcsFields);
952 pVM->hmr0.s.vmx.paShadowVmcsFields = NULL;
953 RTMemFree(pVM->hmr0.s.vmx.paShadowVmcsRoFields);
954 pVM->hmr0.s.vmx.paShadowVmcsRoFields = NULL;
955 }
956#endif
957
958 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
959 {
960 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
961 hmR0VmxVmcsInfoFree(&pVCpu->hmr0.s.vmx.VmcsInfo, &pVCpu->hm.s.vmx.VmcsInfo);
962#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
963 if (pVM->cpum.ro.GuestFeatures.fVmx)
964 hmR0VmxVmcsInfoFree(&pVCpu->hmr0.s.vmx.VmcsInfoNstGst, &pVCpu->hm.s.vmx.VmcsInfoNstGst);
965#endif
966 }
967}
968
969
970/**
971 * Allocate all VT-x structures for the VM.
972 *
973 * @returns IPRT status code.
974 * @param pVM The cross context VM structure.
975 *
976 * @remarks This functions will cleanup on memory allocation failures.
977 */
978static int hmR0VmxStructsAlloc(PVMCC pVM)
979{
980 /*
981 * Sanity check the VMCS size reported by the CPU as we assume 4KB allocations.
982 * The VMCS size cannot be more than 4096 bytes.
983 *
984 * See Intel spec. Appendix A.1 "Basic VMX Information".
985 */
986 uint32_t const cbVmcs = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_SIZE);
987 if (cbVmcs <= X86_PAGE_4K_SIZE)
988 { /* likely */ }
989 else
990 {
991 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_INVALID_VMCS_SIZE;
992 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
993 }
994
995 /*
996 * Allocate per-VM VT-x structures.
997 */
998 bool const fVirtApicAccess = RT_BOOL(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS);
999 bool const fUseVmcsShadowing = pVM->hmr0.s.vmx.fUseVmcsShadowing;
1000 VMXPAGEALLOCINFO aAllocInfo[] =
1001 {
1002 { fVirtApicAccess, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysApicAccess, (PRTR0PTR)&pVM->hmr0.s.vmx.pbApicAccess },
1003 { fUseVmcsShadowing, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysVmreadBitmap, &pVM->hmr0.s.vmx.pvVmreadBitmap },
1004 { fUseVmcsShadowing, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysVmwriteBitmap, &pVM->hmr0.s.vmx.pvVmwriteBitmap },
1005#ifdef VBOX_WITH_CRASHDUMP_MAGIC
1006 { true, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysScratch, (PRTR0PTR)&pVM->hmr0.s.vmx.pbScratch },
1007#endif
1008 };
1009
1010 int rc = hmR0VmxPagesAllocZ(&pVM->hmr0.s.vmx.hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo));
1011 if (RT_SUCCESS(rc))
1012 {
1013#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1014 /* Allocate the shadow VMCS-fields array. */
1015 if (fUseVmcsShadowing)
1016 {
1017 Assert(!pVM->hmr0.s.vmx.cShadowVmcsFields);
1018 Assert(!pVM->hmr0.s.vmx.cShadowVmcsRoFields);
1019 pVM->hmr0.s.vmx.paShadowVmcsFields = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields));
1020 pVM->hmr0.s.vmx.paShadowVmcsRoFields = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields));
1021 if (!pVM->hmr0.s.vmx.paShadowVmcsFields || !pVM->hmr0.s.vmx.paShadowVmcsRoFields)
1022 rc = VERR_NO_MEMORY;
1023 }
1024#endif
1025
1026 /*
1027 * Allocate per-VCPU VT-x structures.
1028 */
1029 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus && RT_SUCCESS(rc); idCpu++)
1030 {
1031 /* Allocate the guest VMCS structures. */
1032 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
1033 rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfo, false /* fIsNstGstVmcs */);
1034
1035#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1036 /* Allocate the nested-guest VMCS structures, when the VMX feature is exposed to the guest. */
1037 if (pVM->cpum.ro.GuestFeatures.fVmx && RT_SUCCESS(rc))
1038 rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */);
1039#endif
1040 }
1041 if (RT_SUCCESS(rc))
1042 return VINF_SUCCESS;
1043 }
1044 hmR0VmxStructsFree(pVM);
1045 return rc;
1046}
1047
1048
1049/**
1050 * Pre-initializes non-zero fields in VMX structures that will be allocated.
1051 *
1052 * @param pVM The cross context VM structure.
1053 */
1054static void hmR0VmxStructsInit(PVMCC pVM)
1055{
1056 /* Paranoia. */
1057 Assert(pVM->hmr0.s.vmx.pbApicAccess == NULL);
1058#ifdef VBOX_WITH_CRASHDUMP_MAGIC
1059 Assert(pVM->hmr0.s.vmx.pbScratch == NULL);
1060#endif
1061
1062 /*
1063 * Initialize members up-front so we can cleanup en masse on allocation failures.
1064 */
1065#ifdef VBOX_WITH_CRASHDUMP_MAGIC
1066 pVM->hmr0.s.vmx.HCPhysScratch = NIL_RTHCPHYS;
1067#endif
1068 pVM->hmr0.s.vmx.HCPhysApicAccess = NIL_RTHCPHYS;
1069 pVM->hmr0.s.vmx.HCPhysVmreadBitmap = NIL_RTHCPHYS;
1070 pVM->hmr0.s.vmx.HCPhysVmwriteBitmap = NIL_RTHCPHYS;
1071 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
1072 {
1073 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
1074 hmR0VmxVmcsInfoInit(&pVCpu->hmr0.s.vmx.VmcsInfo, &pVCpu->hm.s.vmx.VmcsInfo);
1075 hmR0VmxVmcsInfoInit(&pVCpu->hmr0.s.vmx.VmcsInfoNstGst, &pVCpu->hm.s.vmx.VmcsInfoNstGst);
1076 }
1077}
1078
1079#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1080/**
1081 * Returns whether an MSR at the given MSR-bitmap offset is intercepted or not.
1082 *
1083 * @returns @c true if the MSR is intercepted, @c false otherwise.
1084 * @param pbMsrBitmap The MSR bitmap.
1085 * @param offMsr The MSR byte offset.
1086 * @param iBit The bit offset from the byte offset.
1087 */
1088DECLINLINE(bool) hmR0VmxIsMsrBitSet(uint8_t const *pbMsrBitmap, uint16_t offMsr, int32_t iBit)
1089{
1090 Assert(offMsr + (iBit >> 3) <= X86_PAGE_4K_SIZE);
1091 return ASMBitTest(pbMsrBitmap, (offMsr << 3) + iBit);
1092}
1093#endif
1094
1095/**
1096 * Sets the permission bits for the specified MSR in the given MSR bitmap.
1097 *
1098 * If the passed VMCS is a nested-guest VMCS, this function ensures that the
1099 * read/write intercept is cleared from the MSR bitmap used for hardware-assisted
1100 * VMX execution of the nested-guest, only if nested-guest is also not intercepting
1101 * the read/write access of this MSR.
1102 *
1103 * @param pVCpu The cross context virtual CPU structure.
1104 * @param pVmcsInfo The VMCS info. object.
1105 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
1106 * @param idMsr The MSR value.
1107 * @param fMsrpm The MSR permissions (see VMXMSRPM_XXX). This must
1108 * include both a read -and- a write permission!
1109 *
1110 * @sa CPUMGetVmxMsrPermission.
1111 * @remarks Can be called with interrupts disabled.
1112 */
1113static void hmR0VmxSetMsrPermission(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs, uint32_t idMsr, uint32_t fMsrpm)
1114{
1115 uint8_t *pbMsrBitmap = (uint8_t *)pVmcsInfo->pvMsrBitmap;
1116 Assert(pbMsrBitmap);
1117 Assert(VMXMSRPM_IS_FLAG_VALID(fMsrpm));
1118
1119 /*
1120 * MSR-bitmap Layout:
1121 * Byte index MSR range Interpreted as
1122 * 0x000 - 0x3ff 0x00000000 - 0x00001fff Low MSR read bits.
1123 * 0x400 - 0x7ff 0xc0000000 - 0xc0001fff High MSR read bits.
1124 * 0x800 - 0xbff 0x00000000 - 0x00001fff Low MSR write bits.
1125 * 0xc00 - 0xfff 0xc0000000 - 0xc0001fff High MSR write bits.
1126 *
1127 * A bit corresponding to an MSR within the above range causes a VM-exit
1128 * if the bit is 1 on executions of RDMSR/WRMSR. If an MSR falls out of
1129 * the MSR range, it always cause a VM-exit.
1130 *
1131 * See Intel spec. 24.6.9 "MSR-Bitmap Address".
1132 */
1133 uint16_t const offBitmapRead = 0;
1134 uint16_t const offBitmapWrite = 0x800;
1135 uint16_t offMsr;
1136 int32_t iBit;
1137 if (idMsr <= UINT32_C(0x00001fff))
1138 {
1139 offMsr = 0;
1140 iBit = idMsr;
1141 }
1142 else if (idMsr - UINT32_C(0xc0000000) <= UINT32_C(0x00001fff))
1143 {
1144 offMsr = 0x400;
1145 iBit = idMsr - UINT32_C(0xc0000000);
1146 }
1147 else
1148 AssertMsgFailedReturnVoid(("Invalid MSR %#RX32\n", idMsr));
1149
1150 /*
1151 * Set the MSR read permission.
1152 */
1153 uint16_t const offMsrRead = offBitmapRead + offMsr;
1154 Assert(offMsrRead + (iBit >> 3) < offBitmapWrite);
1155 if (fMsrpm & VMXMSRPM_ALLOW_RD)
1156 {
1157#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1158 bool const fClear = !fIsNstGstVmcs ? true
1159 : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, offMsrRead, iBit);
1160#else
1161 RT_NOREF2(pVCpu, fIsNstGstVmcs);
1162 bool const fClear = true;
1163#endif
1164 if (fClear)
1165 ASMBitClear(pbMsrBitmap, (offMsrRead << 3) + iBit);
1166 }
1167 else
1168 ASMBitSet(pbMsrBitmap, (offMsrRead << 3) + iBit);
1169
1170 /*
1171 * Set the MSR write permission.
1172 */
1173 uint16_t const offMsrWrite = offBitmapWrite + offMsr;
1174 Assert(offMsrWrite + (iBit >> 3) < X86_PAGE_4K_SIZE);
1175 if (fMsrpm & VMXMSRPM_ALLOW_WR)
1176 {
1177#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1178 bool const fClear = !fIsNstGstVmcs ? true
1179 : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, offMsrWrite, iBit);
1180#else
1181 RT_NOREF2(pVCpu, fIsNstGstVmcs);
1182 bool const fClear = true;
1183#endif
1184 if (fClear)
1185 ASMBitClear(pbMsrBitmap, (offMsrWrite << 3) + iBit);
1186 }
1187 else
1188 ASMBitSet(pbMsrBitmap, (offMsrWrite << 3) + iBit);
1189}
1190
1191
1192/**
1193 * Updates the VMCS with the number of effective MSRs in the auto-load/store MSR
1194 * area.
1195 *
1196 * @returns VBox status code.
1197 * @param pVCpu The cross context virtual CPU structure.
1198 * @param pVmcsInfo The VMCS info. object.
1199 * @param cMsrs The number of MSRs.
1200 */
1201static int hmR0VmxSetAutoLoadStoreMsrCount(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint32_t cMsrs)
1202{
1203 /* Shouldn't ever happen but there -is- a number. We're well within the recommended 512. */
1204 uint32_t const cMaxSupportedMsrs = VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc);
1205 if (RT_LIKELY(cMsrs < cMaxSupportedMsrs))
1206 {
1207 /* Commit the MSR counts to the VMCS and update the cache. */
1208 if (pVmcsInfo->cEntryMsrLoad != cMsrs)
1209 {
1210 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, cMsrs); AssertRC(rc);
1211 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, cMsrs); AssertRC(rc);
1212 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, cMsrs); AssertRC(rc);
1213 pVmcsInfo->cEntryMsrLoad = cMsrs;
1214 pVmcsInfo->cExitMsrStore = cMsrs;
1215 pVmcsInfo->cExitMsrLoad = cMsrs;
1216 }
1217 return VINF_SUCCESS;
1218 }
1219
1220 LogRel(("Auto-load/store MSR count exceeded! cMsrs=%u MaxSupported=%u\n", cMsrs, cMaxSupportedMsrs));
1221 pVCpu->hm.s.u32HMError = VMX_UFC_INSUFFICIENT_GUEST_MSR_STORAGE;
1222 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
1223}
1224
1225
1226/**
1227 * Adds a new (or updates the value of an existing) guest/host MSR
1228 * pair to be swapped during the world-switch as part of the
1229 * auto-load/store MSR area in the VMCS.
1230 *
1231 * @returns VBox status code.
1232 * @param pVCpu The cross context virtual CPU structure.
1233 * @param pVmxTransient The VMX-transient structure.
1234 * @param idMsr The MSR.
1235 * @param uGuestMsrValue Value of the guest MSR.
1236 * @param fSetReadWrite Whether to set the guest read/write access of this
1237 * MSR (thus not causing a VM-exit).
1238 * @param fUpdateHostMsr Whether to update the value of the host MSR if
1239 * necessary.
1240 */
1241static int hmR0VmxAddAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr, uint64_t uGuestMsrValue,
1242 bool fSetReadWrite, bool fUpdateHostMsr)
1243{
1244 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
1245 bool const fIsNstGstVmcs = pVmxTransient->fIsNestedGuest;
1246 PVMXAUTOMSR pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1247 uint32_t cMsrs = pVmcsInfo->cEntryMsrLoad;
1248 uint32_t i;
1249
1250 /* Paranoia. */
1251 Assert(pGuestMsrLoad);
1252
1253#ifndef DEBUG_bird
1254 LogFlowFunc(("pVCpu=%p idMsr=%#RX32 uGuestMsrValue=%#RX64\n", pVCpu, idMsr, uGuestMsrValue));
1255#endif
1256
1257 /* Check if the MSR already exists in the VM-entry MSR-load area. */
1258 for (i = 0; i < cMsrs; i++)
1259 {
1260 if (pGuestMsrLoad[i].u32Msr == idMsr)
1261 break;
1262 }
1263
1264 bool fAdded = false;
1265 if (i == cMsrs)
1266 {
1267 /* The MSR does not exist, bump the MSR count to make room for the new MSR. */
1268 ++cMsrs;
1269 int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs);
1270 AssertMsgRCReturn(rc, ("Insufficient space to add MSR to VM-entry MSR-load/store area %u\n", idMsr), rc);
1271
1272 /* Set the guest to read/write this MSR without causing VM-exits. */
1273 if ( fSetReadWrite
1274 && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS))
1275 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_ALLOW_RD_WR);
1276
1277 Log4Func(("Added MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs));
1278 fAdded = true;
1279 }
1280
1281 /* Update the MSR value for the newly added or already existing MSR. */
1282 pGuestMsrLoad[i].u32Msr = idMsr;
1283 pGuestMsrLoad[i].u64Value = uGuestMsrValue;
1284
1285 /* Create the corresponding slot in the VM-exit MSR-store area if we use a different page. */
1286 if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo))
1287 {
1288 PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1289 pGuestMsrStore[i].u32Msr = idMsr;
1290 pGuestMsrStore[i].u64Value = uGuestMsrValue;
1291 }
1292
1293 /* Update the corresponding slot in the host MSR area. */
1294 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1295 Assert(pHostMsr != pVmcsInfo->pvGuestMsrLoad);
1296 Assert(pHostMsr != pVmcsInfo->pvGuestMsrStore);
1297 pHostMsr[i].u32Msr = idMsr;
1298
1299 /*
1300 * Only if the caller requests to update the host MSR value AND we've newly added the
1301 * MSR to the host MSR area do we actually update the value. Otherwise, it will be
1302 * updated by hmR0VmxUpdateAutoLoadHostMsrs().
1303 *
1304 * We do this for performance reasons since reading MSRs may be quite expensive.
1305 */
1306 if (fAdded)
1307 {
1308 if (fUpdateHostMsr)
1309 {
1310 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
1311 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1312 pHostMsr[i].u64Value = ASMRdMsr(idMsr);
1313 }
1314 else
1315 {
1316 /* Someone else can do the work. */
1317 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
1318 }
1319 }
1320 return VINF_SUCCESS;
1321}
1322
1323
1324/**
1325 * Removes a guest/host MSR pair to be swapped during the world-switch from the
1326 * auto-load/store MSR area in the VMCS.
1327 *
1328 * @returns VBox status code.
1329 * @param pVCpu The cross context virtual CPU structure.
1330 * @param pVmxTransient The VMX-transient structure.
1331 * @param idMsr The MSR.
1332 */
1333static int hmR0VmxRemoveAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr)
1334{
1335 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
1336 bool const fIsNstGstVmcs = pVmxTransient->fIsNestedGuest;
1337 PVMXAUTOMSR pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1338 uint32_t cMsrs = pVmcsInfo->cEntryMsrLoad;
1339
1340#ifndef DEBUG_bird
1341 LogFlowFunc(("pVCpu=%p idMsr=%#RX32\n", pVCpu, idMsr));
1342#endif
1343
1344 for (uint32_t i = 0; i < cMsrs; i++)
1345 {
1346 /* Find the MSR. */
1347 if (pGuestMsrLoad[i].u32Msr == idMsr)
1348 {
1349 /*
1350 * If it's the last MSR, we only need to reduce the MSR count.
1351 * If it's -not- the last MSR, copy the last MSR in place of it and reduce the MSR count.
1352 */
1353 if (i < cMsrs - 1)
1354 {
1355 /* Remove it from the VM-entry MSR-load area. */
1356 pGuestMsrLoad[i].u32Msr = pGuestMsrLoad[cMsrs - 1].u32Msr;
1357 pGuestMsrLoad[i].u64Value = pGuestMsrLoad[cMsrs - 1].u64Value;
1358
1359 /* Remove it from the VM-exit MSR-store area if it's in a different page. */
1360 if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo))
1361 {
1362 PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1363 Assert(pGuestMsrStore[i].u32Msr == idMsr);
1364 pGuestMsrStore[i].u32Msr = pGuestMsrStore[cMsrs - 1].u32Msr;
1365 pGuestMsrStore[i].u64Value = pGuestMsrStore[cMsrs - 1].u64Value;
1366 }
1367
1368 /* Remove it from the VM-exit MSR-load area. */
1369 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1370 Assert(pHostMsr[i].u32Msr == idMsr);
1371 pHostMsr[i].u32Msr = pHostMsr[cMsrs - 1].u32Msr;
1372 pHostMsr[i].u64Value = pHostMsr[cMsrs - 1].u64Value;
1373 }
1374
1375 /* Reduce the count to reflect the removed MSR and bail. */
1376 --cMsrs;
1377 break;
1378 }
1379 }
1380
1381 /* Update the VMCS if the count changed (meaning the MSR was found and removed). */
1382 if (cMsrs != pVmcsInfo->cEntryMsrLoad)
1383 {
1384 int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs);
1385 AssertRCReturn(rc, rc);
1386
1387 /* We're no longer swapping MSRs during the world-switch, intercept guest read/writes to them. */
1388 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
1389 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_EXIT_RD | VMXMSRPM_EXIT_WR);
1390
1391 Log4Func(("Removed MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs));
1392 return VINF_SUCCESS;
1393 }
1394
1395 return VERR_NOT_FOUND;
1396}
1397
1398
1399/**
1400 * Updates the value of all host MSRs in the VM-exit MSR-load area.
1401 *
1402 * @param pVCpu The cross context virtual CPU structure.
1403 * @param pVmcsInfo The VMCS info. object.
1404 *
1405 * @remarks No-long-jump zone!!!
1406 */
1407static void hmR0VmxUpdateAutoLoadHostMsrs(PCVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1408{
1409 RT_NOREF(pVCpu);
1410 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1411
1412 PVMXAUTOMSR pHostMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1413 uint32_t const cMsrs = pVmcsInfo->cExitMsrLoad;
1414 Assert(pHostMsrLoad);
1415 Assert(sizeof(*pHostMsrLoad) * cMsrs <= X86_PAGE_4K_SIZE);
1416 LogFlowFunc(("pVCpu=%p cMsrs=%u\n", pVCpu, cMsrs));
1417 for (uint32_t i = 0; i < cMsrs; i++)
1418 {
1419 /*
1420 * Performance hack for the host EFER MSR. We use the cached value rather than re-read it.
1421 * Strict builds will catch mismatches in hmR0VmxCheckAutoLoadStoreMsrs(). See @bugref{7368}.
1422 */
1423 if (pHostMsrLoad[i].u32Msr == MSR_K6_EFER)
1424 pHostMsrLoad[i].u64Value = g_uHmVmxHostMsrEfer;
1425 else
1426 pHostMsrLoad[i].u64Value = ASMRdMsr(pHostMsrLoad[i].u32Msr);
1427 }
1428}
1429
1430
1431/**
1432 * Saves a set of host MSRs to allow read/write passthru access to the guest and
1433 * perform lazy restoration of the host MSRs while leaving VT-x.
1434 *
1435 * @param pVCpu The cross context virtual CPU structure.
1436 *
1437 * @remarks No-long-jump zone!!!
1438 */
1439static void hmR0VmxLazySaveHostMsrs(PVMCPUCC pVCpu)
1440{
1441 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1442
1443 /*
1444 * Note: If you're adding MSRs here, make sure to update the MSR-bitmap accesses in hmR0VmxSetupVmcsProcCtls().
1445 */
1446 if (!(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST))
1447 {
1448 Assert(!(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)); /* Guest MSRs better not be loaded now. */
1449 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
1450 {
1451 pVCpu->hmr0.s.vmx.u64HostMsrLStar = ASMRdMsr(MSR_K8_LSTAR);
1452 pVCpu->hmr0.s.vmx.u64HostMsrStar = ASMRdMsr(MSR_K6_STAR);
1453 pVCpu->hmr0.s.vmx.u64HostMsrSfMask = ASMRdMsr(MSR_K8_SF_MASK);
1454 pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase = ASMRdMsr(MSR_K8_KERNEL_GS_BASE);
1455 }
1456 pVCpu->hmr0.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_SAVED_HOST;
1457 }
1458}
1459
1460
1461#ifdef VBOX_STRICT
1462
1463/**
1464 * Verifies that our cached host EFER MSR value has not changed since we cached it.
1465 *
1466 * @param pVmcsInfo The VMCS info. object.
1467 */
1468static void hmR0VmxCheckHostEferMsr(PCVMXVMCSINFO pVmcsInfo)
1469{
1470 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1471
1472 if (pVmcsInfo->u32ExitCtls & VMX_EXIT_CTLS_LOAD_EFER_MSR)
1473 {
1474 uint64_t const uHostEferMsr = ASMRdMsr(MSR_K6_EFER);
1475 uint64_t const uHostEferMsrCache = g_uHmVmxHostMsrEfer;
1476 uint64_t uVmcsEferMsrVmcs;
1477 int rc = VMXReadVmcs64(VMX_VMCS64_HOST_EFER_FULL, &uVmcsEferMsrVmcs);
1478 AssertRC(rc);
1479
1480 AssertMsgReturnVoid(uHostEferMsr == uVmcsEferMsrVmcs,
1481 ("EFER Host/VMCS mismatch! host=%#RX64 vmcs=%#RX64\n", uHostEferMsr, uVmcsEferMsrVmcs));
1482 AssertMsgReturnVoid(uHostEferMsr == uHostEferMsrCache,
1483 ("EFER Host/Cache mismatch! host=%#RX64 cache=%#RX64\n", uHostEferMsr, uHostEferMsrCache));
1484 }
1485}
1486
1487
1488/**
1489 * Verifies whether the guest/host MSR pairs in the auto-load/store area in the
1490 * VMCS are correct.
1491 *
1492 * @param pVCpu The cross context virtual CPU structure.
1493 * @param pVmcsInfo The VMCS info. object.
1494 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
1495 */
1496static void hmR0VmxCheckAutoLoadStoreMsrs(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
1497{
1498 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1499
1500 /* Read the various MSR-area counts from the VMCS. */
1501 uint32_t cEntryLoadMsrs;
1502 uint32_t cExitStoreMsrs;
1503 uint32_t cExitLoadMsrs;
1504 int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, &cEntryLoadMsrs); AssertRC(rc);
1505 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, &cExitStoreMsrs); AssertRC(rc);
1506 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, &cExitLoadMsrs); AssertRC(rc);
1507
1508 /* Verify all the MSR counts are the same. */
1509 Assert(cEntryLoadMsrs == cExitStoreMsrs);
1510 Assert(cExitStoreMsrs == cExitLoadMsrs);
1511 uint32_t const cMsrs = cExitLoadMsrs;
1512
1513 /* Verify the MSR counts do not exceed the maximum count supported by the hardware. */
1514 Assert(cMsrs < VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc));
1515
1516 /* Verify the MSR counts are within the allocated page size. */
1517 Assert(sizeof(VMXAUTOMSR) * cMsrs <= X86_PAGE_4K_SIZE);
1518
1519 /* Verify the relevant contents of the MSR areas match. */
1520 PCVMXAUTOMSR pGuestMsrLoad = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1521 PCVMXAUTOMSR pGuestMsrStore = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1522 PCVMXAUTOMSR pHostMsrLoad = (PCVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1523 bool const fSeparateExitMsrStorePage = hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo);
1524 for (uint32_t i = 0; i < cMsrs; i++)
1525 {
1526 /* Verify that the MSRs are paired properly and that the host MSR has the correct value. */
1527 if (fSeparateExitMsrStorePage)
1528 {
1529 AssertMsgReturnVoid(pGuestMsrLoad->u32Msr == pGuestMsrStore->u32Msr,
1530 ("GuestMsrLoad=%#RX32 GuestMsrStore=%#RX32 cMsrs=%u\n",
1531 pGuestMsrLoad->u32Msr, pGuestMsrStore->u32Msr, cMsrs));
1532 }
1533
1534 AssertMsgReturnVoid(pHostMsrLoad->u32Msr == pGuestMsrLoad->u32Msr,
1535 ("HostMsrLoad=%#RX32 GuestMsrLoad=%#RX32 cMsrs=%u\n",
1536 pHostMsrLoad->u32Msr, pGuestMsrLoad->u32Msr, cMsrs));
1537
1538 uint64_t const u64HostMsr = ASMRdMsr(pHostMsrLoad->u32Msr);
1539 AssertMsgReturnVoid(pHostMsrLoad->u64Value == u64HostMsr,
1540 ("u32Msr=%#RX32 VMCS Value=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n",
1541 pHostMsrLoad->u32Msr, pHostMsrLoad->u64Value, u64HostMsr, cMsrs));
1542
1543 /* Verify that cached host EFER MSR matches what's loaded on the CPU. */
1544 bool const fIsEferMsr = RT_BOOL(pHostMsrLoad->u32Msr == MSR_K6_EFER);
1545 AssertMsgReturnVoid(!fIsEferMsr || u64HostMsr == g_uHmVmxHostMsrEfer,
1546 ("Cached=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n", g_uHmVmxHostMsrEfer, u64HostMsr, cMsrs));
1547
1548 /* Verify that the accesses are as expected in the MSR bitmap for auto-load/store MSRs. */
1549 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
1550 {
1551 uint32_t const fMsrpm = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, pGuestMsrLoad->u32Msr);
1552 if (fIsEferMsr)
1553 {
1554 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_RD), ("Passthru read for EFER MSR!?\n"));
1555 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_WR), ("Passthru write for EFER MSR!?\n"));
1556 }
1557 else
1558 {
1559 /* Verify LBR MSRs (used only for debugging) are intercepted. We don't passthru these MSRs to the guest yet. */
1560 PCVMCC pVM = pVCpu->CTX_SUFF(pVM);
1561 if ( pVM->hmr0.s.vmx.fLbr
1562 && ( hmR0VmxIsLbrBranchFromMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */)
1563 || hmR0VmxIsLbrBranchToMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */)
1564 || pGuestMsrLoad->u32Msr == pVM->hmr0.s.vmx.idLbrTosMsr))
1565 {
1566 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_EXIT_RD_WR,
1567 ("u32Msr=%#RX32 cMsrs=%u Passthru read/write for LBR MSRs!\n",
1568 pGuestMsrLoad->u32Msr, cMsrs));
1569 }
1570 else if (!fIsNstGstVmcs)
1571 {
1572 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_ALLOW_RD_WR,
1573 ("u32Msr=%#RX32 cMsrs=%u No passthru read/write!\n", pGuestMsrLoad->u32Msr, cMsrs));
1574 }
1575 else
1576 {
1577 /*
1578 * A nested-guest VMCS must -also- allow read/write passthrough for the MSR for us to
1579 * execute a nested-guest with MSR passthrough.
1580 *
1581 * Check if the nested-guest MSR bitmap allows passthrough, and if so, assert that we
1582 * allow passthrough too.
1583 */
1584 void const *pvMsrBitmapNstGst = pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap;
1585 Assert(pvMsrBitmapNstGst);
1586 uint32_t const fMsrpmNstGst = CPUMGetVmxMsrPermission(pvMsrBitmapNstGst, pGuestMsrLoad->u32Msr);
1587 AssertMsgReturnVoid(fMsrpm == fMsrpmNstGst,
1588 ("u32Msr=%#RX32 cMsrs=%u Permission mismatch fMsrpm=%#x fMsrpmNstGst=%#x!\n",
1589 pGuestMsrLoad->u32Msr, cMsrs, fMsrpm, fMsrpmNstGst));
1590 }
1591 }
1592 }
1593
1594 /* Move to the next MSR. */
1595 pHostMsrLoad++;
1596 pGuestMsrLoad++;
1597 pGuestMsrStore++;
1598 }
1599}
1600
1601#endif /* VBOX_STRICT */
1602
1603/**
1604 * Flushes the TLB using EPT.
1605 *
1606 * @returns VBox status code.
1607 * @param pVCpu The cross context virtual CPU structure of the calling
1608 * EMT. Can be NULL depending on @a enmTlbFlush.
1609 * @param pVmcsInfo The VMCS info. object. Can be NULL depending on @a
1610 * enmTlbFlush.
1611 * @param enmTlbFlush Type of flush.
1612 *
1613 * @remarks Caller is responsible for making sure this function is called only
1614 * when NestedPaging is supported and providing @a enmTlbFlush that is
1615 * supported by the CPU.
1616 * @remarks Can be called with interrupts disabled.
1617 */
1618static void hmR0VmxFlushEpt(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, VMXTLBFLUSHEPT enmTlbFlush)
1619{
1620 uint64_t au64Descriptor[2];
1621 if (enmTlbFlush == VMXTLBFLUSHEPT_ALL_CONTEXTS)
1622 au64Descriptor[0] = 0;
1623 else
1624 {
1625 Assert(pVCpu);
1626 Assert(pVmcsInfo);
1627 au64Descriptor[0] = pVmcsInfo->HCPhysEPTP;
1628 }
1629 au64Descriptor[1] = 0; /* MBZ. Intel spec. 33.3 "VMX Instructions" */
1630
1631 int rc = VMXR0InvEPT(enmTlbFlush, &au64Descriptor[0]);
1632 AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %#x %#RHp failed. rc=%Rrc\n", enmTlbFlush, au64Descriptor[0], rc));
1633
1634 if ( RT_SUCCESS(rc)
1635 && pVCpu)
1636 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushNestedPaging);
1637}
1638
1639
1640/**
1641 * Flushes the TLB using VPID.
1642 *
1643 * @returns VBox status code.
1644 * @param pVCpu The cross context virtual CPU structure of the calling
1645 * EMT. Can be NULL depending on @a enmTlbFlush.
1646 * @param enmTlbFlush Type of flush.
1647 * @param GCPtr Virtual address of the page to flush (can be 0 depending
1648 * on @a enmTlbFlush).
1649 *
1650 * @remarks Can be called with interrupts disabled.
1651 */
1652static void hmR0VmxFlushVpid(PVMCPUCC pVCpu, VMXTLBFLUSHVPID enmTlbFlush, RTGCPTR GCPtr)
1653{
1654 Assert(pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid);
1655
1656 uint64_t au64Descriptor[2];
1657 if (enmTlbFlush == VMXTLBFLUSHVPID_ALL_CONTEXTS)
1658 {
1659 au64Descriptor[0] = 0;
1660 au64Descriptor[1] = 0;
1661 }
1662 else
1663 {
1664 AssertPtr(pVCpu);
1665 AssertMsg(pVCpu->hmr0.s.uCurrentAsid != 0, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hmr0.s.uCurrentAsid));
1666 AssertMsg(pVCpu->hmr0.s.uCurrentAsid <= UINT16_MAX, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hmr0.s.uCurrentAsid));
1667 au64Descriptor[0] = pVCpu->hmr0.s.uCurrentAsid;
1668 au64Descriptor[1] = GCPtr;
1669 }
1670
1671 int rc = VMXR0InvVPID(enmTlbFlush, &au64Descriptor[0]);
1672 AssertMsg(rc == VINF_SUCCESS,
1673 ("VMXR0InvVPID %#x %u %RGv failed with %Rrc\n", enmTlbFlush, pVCpu ? pVCpu->hmr0.s.uCurrentAsid : 0, GCPtr, rc));
1674
1675 if ( RT_SUCCESS(rc)
1676 && pVCpu)
1677 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid);
1678 NOREF(rc);
1679}
1680
1681
1682/**
1683 * Invalidates a guest page by guest virtual address. Only relevant for EPT/VPID,
1684 * otherwise there is nothing really to invalidate.
1685 *
1686 * @returns VBox status code.
1687 * @param pVCpu The cross context virtual CPU structure.
1688 * @param GCVirt Guest virtual address of the page to invalidate.
1689 */
1690VMMR0DECL(int) VMXR0InvalidatePage(PVMCPUCC pVCpu, RTGCPTR GCVirt)
1691{
1692 AssertPtr(pVCpu);
1693 LogFlowFunc(("pVCpu=%p GCVirt=%RGv\n", pVCpu, GCVirt));
1694
1695 if (!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH))
1696 {
1697 /*
1698 * We must invalidate the guest TLB entry in either case, we cannot ignore it even for
1699 * the EPT case. See @bugref{6043} and @bugref{6177}.
1700 *
1701 * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VM-entry in hmR0VmxFlushTLB*()
1702 * as this function maybe called in a loop with individual addresses.
1703 */
1704 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1705 if (pVM->hmr0.s.vmx.fVpid)
1706 {
1707 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
1708 {
1709 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_INDIV_ADDR, GCVirt);
1710 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpgVirt);
1711 }
1712 else
1713 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1714 }
1715 else if (pVM->hmr0.s.fNestedPaging)
1716 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1717 }
1718
1719 return VINF_SUCCESS;
1720}
1721
1722
1723/**
1724 * Dummy placeholder for tagged-TLB flush handling before VM-entry. Used in the
1725 * case where neither EPT nor VPID is supported by the CPU.
1726 *
1727 * @param pHostCpu The HM physical-CPU structure.
1728 * @param pVCpu The cross context virtual CPU structure.
1729 *
1730 * @remarks Called with interrupts disabled.
1731 */
1732static void hmR0VmxFlushTaggedTlbNone(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu)
1733{
1734 AssertPtr(pVCpu);
1735 AssertPtr(pHostCpu);
1736
1737 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
1738
1739 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1740 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1741 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1742 pVCpu->hmr0.s.fForceTLBFlush = false;
1743 return;
1744}
1745
1746
1747/**
1748 * Flushes the tagged-TLB entries for EPT+VPID CPUs as necessary.
1749 *
1750 * @param pHostCpu The HM physical-CPU structure.
1751 * @param pVCpu The cross context virtual CPU structure.
1752 * @param pVmcsInfo The VMCS info. object.
1753 *
1754 * @remarks All references to "ASID" in this function pertains to "VPID" in Intel's
1755 * nomenclature. The reason is, to avoid confusion in compare statements
1756 * since the host-CPU copies are named "ASID".
1757 *
1758 * @remarks Called with interrupts disabled.
1759 */
1760static void hmR0VmxFlushTaggedTlbBoth(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1761{
1762#ifdef VBOX_WITH_STATISTICS
1763 bool fTlbFlushed = false;
1764# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { fTlbFlushed = true; } while (0)
1765# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { \
1766 if (!fTlbFlushed) \
1767 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch); \
1768 } while (0)
1769#else
1770# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { } while (0)
1771# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { } while (0)
1772#endif
1773
1774 AssertPtr(pVCpu);
1775 AssertPtr(pHostCpu);
1776 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1777
1778 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1779 AssertMsg(pVM->hmr0.s.fNestedPaging && pVM->hmr0.s.vmx.fVpid,
1780 ("hmR0VmxFlushTaggedTlbBoth cannot be invoked unless NestedPaging & VPID are enabled."
1781 "fNestedPaging=%RTbool fVpid=%RTbool", pVM->hmr0.s.fNestedPaging, pVM->hmr0.s.vmx.fVpid));
1782
1783 /*
1784 * Force a TLB flush for the first world-switch if the current CPU differs from the one we
1785 * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
1786 * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
1787 * cannot reuse the current ASID anymore.
1788 */
1789 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1790 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1791 {
1792 ++pHostCpu->uCurrentAsid;
1793 if (pHostCpu->uCurrentAsid >= g_uHmMaxAsid)
1794 {
1795 pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0. */
1796 pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
1797 pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
1798 }
1799
1800 pVCpu->hmr0.s.uCurrentAsid = pHostCpu->uCurrentAsid;
1801 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1802 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1803
1804 /*
1805 * Flush by EPT when we get rescheduled to a new host CPU to ensure EPT-only tagged mappings are also
1806 * invalidated. We don't need to flush-by-VPID here as flushing by EPT covers it. See @bugref{6568}.
1807 */
1808 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1809 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1810 HMVMX_SET_TAGGED_TLB_FLUSHED();
1811 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
1812 }
1813 else if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH)) /* Check for explicit TLB flushes. */
1814 {
1815 /*
1816 * Changes to the EPT paging structure by VMM requires flushing-by-EPT as the CPU
1817 * creates guest-physical (ie. only EPT-tagged) mappings while traversing the EPT
1818 * tables when EPT is in use. Flushing-by-VPID will only flush linear (only
1819 * VPID-tagged) and combined (EPT+VPID tagged) mappings but not guest-physical
1820 * mappings, see @bugref{6568}.
1821 *
1822 * See Intel spec. 28.3.2 "Creating and Using Cached Translation Information".
1823 */
1824 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1825 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1826 HMVMX_SET_TAGGED_TLB_FLUSHED();
1827 }
1828 else if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1829 {
1830 /*
1831 * The nested-guest specifies its own guest-physical address to use as the APIC-access
1832 * address which requires flushing the TLB of EPT cached structures.
1833 *
1834 * See Intel spec. 28.3.3.4 "Guidelines for Use of the INVEPT Instruction".
1835 */
1836 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1837 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1838 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1839 HMVMX_SET_TAGGED_TLB_FLUSHED();
1840 }
1841
1842
1843 pVCpu->hmr0.s.fForceTLBFlush = false;
1844 HMVMX_UPDATE_FLUSH_SKIPPED_STAT();
1845
1846 Assert(pVCpu->hmr0.s.idLastCpu == pHostCpu->idCpu);
1847 Assert(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes);
1848 AssertMsg(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes,
1849 ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hmr0.s.cTlbFlushes, pHostCpu->cTlbFlushes));
1850 AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < g_uHmMaxAsid,
1851 ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
1852 pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hmr0.s.idLastCpu, pVCpu->hmr0.s.cTlbFlushes));
1853 AssertMsg(pVCpu->hmr0.s.uCurrentAsid >= 1 && pVCpu->hmr0.s.uCurrentAsid < g_uHmMaxAsid,
1854 ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hmr0.s.uCurrentAsid));
1855
1856 /* Update VMCS with the VPID. */
1857 int rc = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hmr0.s.uCurrentAsid);
1858 AssertRC(rc);
1859
1860#undef HMVMX_SET_TAGGED_TLB_FLUSHED
1861}
1862
1863
1864/**
1865 * Flushes the tagged-TLB entries for EPT CPUs as necessary.
1866 *
1867 * @param pHostCpu The HM physical-CPU structure.
1868 * @param pVCpu The cross context virtual CPU structure.
1869 * @param pVmcsInfo The VMCS info. object.
1870 *
1871 * @remarks Called with interrupts disabled.
1872 */
1873static void hmR0VmxFlushTaggedTlbEpt(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1874{
1875 AssertPtr(pVCpu);
1876 AssertPtr(pHostCpu);
1877 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1878 AssertMsg(pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked without NestedPaging."));
1879 AssertMsg(!pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked with VPID."));
1880
1881 /*
1882 * Force a TLB flush for the first world-switch if the current CPU differs from the one we ran on last.
1883 * A change in the TLB flush count implies the host CPU is online after a suspend/resume.
1884 */
1885 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1886 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1887 {
1888 pVCpu->hmr0.s.fForceTLBFlush = true;
1889 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1890 }
1891
1892 /* Check for explicit TLB flushes. */
1893 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1894 {
1895 pVCpu->hmr0.s.fForceTLBFlush = true;
1896 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1897 }
1898
1899 /* Check for TLB flushes while switching to/from a nested-guest. */
1900 if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1901 {
1902 pVCpu->hmr0.s.fForceTLBFlush = true;
1903 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1904 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1905 }
1906
1907 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1908 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1909
1910 if (pVCpu->hmr0.s.fForceTLBFlush)
1911 {
1912 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.enmTlbFlushEpt);
1913 pVCpu->hmr0.s.fForceTLBFlush = false;
1914 }
1915}
1916
1917
1918/**
1919 * Flushes the tagged-TLB entries for VPID CPUs as necessary.
1920 *
1921 * @param pHostCpu The HM physical-CPU structure.
1922 * @param pVCpu The cross context virtual CPU structure.
1923 *
1924 * @remarks Called with interrupts disabled.
1925 */
1926static void hmR0VmxFlushTaggedTlbVpid(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu)
1927{
1928 AssertPtr(pVCpu);
1929 AssertPtr(pHostCpu);
1930 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1931 AssertMsg(pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid, ("hmR0VmxFlushTlbVpid cannot be invoked without VPID."));
1932 AssertMsg(!pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging, ("hmR0VmxFlushTlbVpid cannot be invoked with NestedPaging"));
1933
1934 /*
1935 * Force a TLB flush for the first world switch if the current CPU differs from the one we
1936 * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
1937 * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
1938 * cannot reuse the current ASID anymore.
1939 */
1940 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1941 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1942 {
1943 pVCpu->hmr0.s.fForceTLBFlush = true;
1944 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1945 }
1946
1947 /* Check for explicit TLB flushes. */
1948 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1949 {
1950 /*
1951 * If we ever support VPID flush combinations other than ALL or SINGLE-context (see
1952 * hmR0VmxSetupTaggedTlb()) we would need to explicitly flush in this case (add an
1953 * fExplicitFlush = true here and change the pHostCpu->fFlushAsidBeforeUse check below to
1954 * include fExplicitFlush's too) - an obscure corner case.
1955 */
1956 pVCpu->hmr0.s.fForceTLBFlush = true;
1957 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1958 }
1959
1960 /* Check for TLB flushes while switching to/from a nested-guest. */
1961 if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1962 {
1963 pVCpu->hmr0.s.fForceTLBFlush = true;
1964 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1965 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1966 }
1967
1968 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1969 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1970 if (pVCpu->hmr0.s.fForceTLBFlush)
1971 {
1972 ++pHostCpu->uCurrentAsid;
1973 if (pHostCpu->uCurrentAsid >= g_uHmMaxAsid)
1974 {
1975 pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0 */
1976 pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
1977 pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
1978 }
1979
1980 pVCpu->hmr0.s.fForceTLBFlush = false;
1981 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1982 pVCpu->hmr0.s.uCurrentAsid = pHostCpu->uCurrentAsid;
1983 if (pHostCpu->fFlushAsidBeforeUse)
1984 {
1985 if (pVM->hmr0.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_SINGLE_CONTEXT)
1986 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_SINGLE_CONTEXT, 0 /* GCPtr */);
1987 else if (pVM->hmr0.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_ALL_CONTEXTS)
1988 {
1989 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_ALL_CONTEXTS, 0 /* GCPtr */);
1990 pHostCpu->fFlushAsidBeforeUse = false;
1991 }
1992 else
1993 {
1994 /* hmR0VmxSetupTaggedTlb() ensures we never get here. Paranoia. */
1995 AssertMsgFailed(("Unsupported VPID-flush context type.\n"));
1996 }
1997 }
1998 }
1999
2000 AssertMsg(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes,
2001 ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hmr0.s.cTlbFlushes, pHostCpu->cTlbFlushes));
2002 AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < g_uHmMaxAsid,
2003 ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
2004 pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hmr0.s.idLastCpu, pVCpu->hmr0.s.cTlbFlushes));
2005 AssertMsg(pVCpu->hmr0.s.uCurrentAsid >= 1 && pVCpu->hmr0.s.uCurrentAsid < g_uHmMaxAsid,
2006 ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hmr0.s.uCurrentAsid));
2007
2008 int rc = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hmr0.s.uCurrentAsid);
2009 AssertRC(rc);
2010}
2011
2012
2013/**
2014 * Flushes the guest TLB entry based on CPU capabilities.
2015 *
2016 * @param pHostCpu The HM physical-CPU structure.
2017 * @param pVCpu The cross context virtual CPU structure.
2018 * @param pVmcsInfo The VMCS info. object.
2019 *
2020 * @remarks Called with interrupts disabled.
2021 */
2022static void hmR0VmxFlushTaggedTlb(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2023{
2024#ifdef HMVMX_ALWAYS_FLUSH_TLB
2025 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
2026#endif
2027 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2028 switch (pVM->hmr0.s.vmx.enmTlbFlushType)
2029 {
2030 case VMXTLBFLUSHTYPE_EPT_VPID: hmR0VmxFlushTaggedTlbBoth(pHostCpu, pVCpu, pVmcsInfo); break;
2031 case VMXTLBFLUSHTYPE_EPT: hmR0VmxFlushTaggedTlbEpt(pHostCpu, pVCpu, pVmcsInfo); break;
2032 case VMXTLBFLUSHTYPE_VPID: hmR0VmxFlushTaggedTlbVpid(pHostCpu, pVCpu); break;
2033 case VMXTLBFLUSHTYPE_NONE: hmR0VmxFlushTaggedTlbNone(pHostCpu, pVCpu); break;
2034 default:
2035 AssertMsgFailed(("Invalid flush-tag function identifier\n"));
2036 break;
2037 }
2038 /* Don't assert that VMCPU_FF_TLB_FLUSH should no longer be pending. It can be set by other EMTs. */
2039}
2040
2041
2042/**
2043 * Sets up the appropriate tagged TLB-flush level and handler for flushing guest
2044 * TLB entries from the host TLB before VM-entry.
2045 *
2046 * @returns VBox status code.
2047 * @param pVM The cross context VM structure.
2048 */
2049static int hmR0VmxSetupTaggedTlb(PVMCC pVM)
2050{
2051 /*
2052 * Determine optimal flush type for nested paging.
2053 * We cannot ignore EPT if no suitable flush-types is supported by the CPU as we've already setup
2054 * unrestricted guest execution (see hmR3InitFinalizeR0()).
2055 */
2056 if (pVM->hmr0.s.fNestedPaging)
2057 {
2058 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT)
2059 {
2060 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_SINGLE_CONTEXT)
2061 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_SINGLE_CONTEXT;
2062 else if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
2063 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_ALL_CONTEXTS;
2064 else
2065 {
2066 /* Shouldn't happen. EPT is supported but no suitable flush-types supported. */
2067 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2068 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_FLUSH_TYPE_UNSUPPORTED;
2069 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2070 }
2071
2072 /* Make sure the write-back cacheable memory type for EPT is supported. */
2073 if (RT_UNLIKELY(!(g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_MEMTYPE_WB)))
2074 {
2075 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2076 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_MEM_TYPE_NOT_WB;
2077 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2078 }
2079
2080 /* EPT requires a page-walk length of 4. */
2081 if (RT_UNLIKELY(!(g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_PAGE_WALK_LENGTH_4)))
2082 {
2083 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2084 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_PAGE_WALK_LENGTH_UNSUPPORTED;
2085 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2086 }
2087 }
2088 else
2089 {
2090 /* Shouldn't happen. EPT is supported but INVEPT instruction is not supported. */
2091 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2092 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_INVEPT_UNAVAILABLE;
2093 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2094 }
2095 }
2096
2097 /*
2098 * Determine optimal flush type for VPID.
2099 */
2100 if (pVM->hmr0.s.vmx.fVpid)
2101 {
2102 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID)
2103 {
2104 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT)
2105 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_SINGLE_CONTEXT;
2106 else if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_ALL_CONTEXTS)
2107 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_ALL_CONTEXTS;
2108 else
2109 {
2110 /* Neither SINGLE nor ALL-context flush types for VPID is supported by the CPU. Ignore VPID capability. */
2111 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
2112 LogRelFunc(("Only INDIV_ADDR supported. Ignoring VPID.\n"));
2113 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT_RETAIN_GLOBALS)
2114 LogRelFunc(("Only SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n"));
2115 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
2116 pVM->hmr0.s.vmx.fVpid = false;
2117 }
2118 }
2119 else
2120 {
2121 /* Shouldn't happen. VPID is supported but INVVPID is not supported by the CPU. Ignore VPID capability. */
2122 Log4Func(("VPID supported without INVEPT support. Ignoring VPID.\n"));
2123 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
2124 pVM->hmr0.s.vmx.fVpid = false;
2125 }
2126 }
2127
2128 /*
2129 * Setup the handler for flushing tagged-TLBs.
2130 */
2131 if (pVM->hmr0.s.fNestedPaging && pVM->hmr0.s.vmx.fVpid)
2132 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT_VPID;
2133 else if (pVM->hmr0.s.fNestedPaging)
2134 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT;
2135 else if (pVM->hmr0.s.vmx.fVpid)
2136 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_VPID;
2137 else
2138 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_NONE;
2139
2140
2141 /*
2142 * Copy out the result to ring-3.
2143 */
2144 pVM->hm.s.ForR3.vmx.fVpid = pVM->hmr0.s.vmx.fVpid;
2145 pVM->hm.s.ForR3.vmx.enmTlbFlushType = pVM->hmr0.s.vmx.enmTlbFlushType;
2146 pVM->hm.s.ForR3.vmx.enmTlbFlushEpt = pVM->hmr0.s.vmx.enmTlbFlushEpt;
2147 pVM->hm.s.ForR3.vmx.enmTlbFlushVpid = pVM->hmr0.s.vmx.enmTlbFlushVpid;
2148 return VINF_SUCCESS;
2149}
2150
2151
2152/**
2153 * Sets up the LBR MSR ranges based on the host CPU.
2154 *
2155 * @returns VBox status code.
2156 * @param pVM The cross context VM structure.
2157 *
2158 * @sa nemR3DarwinSetupLbrMsrRange
2159 */
2160static int hmR0VmxSetupLbrMsrRange(PVMCC pVM)
2161{
2162 Assert(pVM->hmr0.s.vmx.fLbr);
2163 uint32_t idLbrFromIpMsrFirst;
2164 uint32_t idLbrFromIpMsrLast;
2165 uint32_t idLbrToIpMsrFirst;
2166 uint32_t idLbrToIpMsrLast;
2167 uint32_t idLbrTosMsr;
2168
2169 /*
2170 * Determine the LBR MSRs supported for this host CPU family and model.
2171 *
2172 * See Intel spec. 17.4.8 "LBR Stack".
2173 * See Intel "Model-Specific Registers" spec.
2174 */
2175 uint32_t const uFamilyModel = (g_CpumHostFeatures.s.uFamily << 8)
2176 | g_CpumHostFeatures.s.uModel;
2177 switch (uFamilyModel)
2178 {
2179 case 0x0f01: case 0x0f02:
2180 idLbrFromIpMsrFirst = MSR_P4_LASTBRANCH_0;
2181 idLbrFromIpMsrLast = MSR_P4_LASTBRANCH_3;
2182 idLbrToIpMsrFirst = 0x0;
2183 idLbrToIpMsrLast = 0x0;
2184 idLbrTosMsr = MSR_P4_LASTBRANCH_TOS;
2185 break;
2186
2187 case 0x065c: case 0x065f: case 0x064e: case 0x065e: case 0x068e:
2188 case 0x069e: case 0x0655: case 0x0666: case 0x067a: case 0x0667:
2189 case 0x066a: case 0x066c: case 0x067d: case 0x067e:
2190 idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP;
2191 idLbrFromIpMsrLast = MSR_LASTBRANCH_31_FROM_IP;
2192 idLbrToIpMsrFirst = MSR_LASTBRANCH_0_TO_IP;
2193 idLbrToIpMsrLast = MSR_LASTBRANCH_31_TO_IP;
2194 idLbrTosMsr = MSR_LASTBRANCH_TOS;
2195 break;
2196
2197 case 0x063d: case 0x0647: case 0x064f: case 0x0656: case 0x063c:
2198 case 0x0645: case 0x0646: case 0x063f: case 0x062a: case 0x062d:
2199 case 0x063a: case 0x063e: case 0x061a: case 0x061e: case 0x061f:
2200 case 0x062e: case 0x0625: case 0x062c: case 0x062f:
2201 idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP;
2202 idLbrFromIpMsrLast = MSR_LASTBRANCH_15_FROM_IP;
2203 idLbrToIpMsrFirst = MSR_LASTBRANCH_0_TO_IP;
2204 idLbrToIpMsrLast = MSR_LASTBRANCH_15_TO_IP;
2205 idLbrTosMsr = MSR_LASTBRANCH_TOS;
2206 break;
2207
2208 case 0x0617: case 0x061d: case 0x060f:
2209 idLbrFromIpMsrFirst = MSR_CORE2_LASTBRANCH_0_FROM_IP;
2210 idLbrFromIpMsrLast = MSR_CORE2_LASTBRANCH_3_FROM_IP;
2211 idLbrToIpMsrFirst = MSR_CORE2_LASTBRANCH_0_TO_IP;
2212 idLbrToIpMsrLast = MSR_CORE2_LASTBRANCH_3_TO_IP;
2213 idLbrTosMsr = MSR_CORE2_LASTBRANCH_TOS;
2214 break;
2215
2216 /* Atom and related microarchitectures we don't care about:
2217 case 0x0637: case 0x064a: case 0x064c: case 0x064d: case 0x065a:
2218 case 0x065d: case 0x061c: case 0x0626: case 0x0627: case 0x0635:
2219 case 0x0636: */
2220 /* All other CPUs: */
2221 default:
2222 {
2223 LogRelFunc(("Could not determine LBR stack size for the CPU model %#x\n", uFamilyModel));
2224 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_UNKNOWN;
2225 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2226 }
2227 }
2228
2229 /*
2230 * Validate.
2231 */
2232 uint32_t const cLbrStack = idLbrFromIpMsrLast - idLbrFromIpMsrFirst + 1;
2233 PCVMCPU pVCpu0 = VMCC_GET_CPU_0(pVM);
2234 AssertCompile( RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr)
2235 == RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrToIpMsr));
2236 if (cLbrStack > RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr))
2237 {
2238 LogRelFunc(("LBR stack size of the CPU (%u) exceeds our buffer size\n", cLbrStack));
2239 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_OVERFLOW;
2240 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2241 }
2242 NOREF(pVCpu0);
2243
2244 /*
2245 * Update the LBR info. to the VM struct. for use later.
2246 */
2247 pVM->hmr0.s.vmx.idLbrTosMsr = idLbrTosMsr;
2248
2249 pVM->hm.s.ForR3.vmx.idLbrFromIpMsrFirst = pVM->hmr0.s.vmx.idLbrFromIpMsrFirst = idLbrFromIpMsrFirst;
2250 pVM->hm.s.ForR3.vmx.idLbrFromIpMsrLast = pVM->hmr0.s.vmx.idLbrFromIpMsrLast = idLbrFromIpMsrLast;
2251
2252 pVM->hm.s.ForR3.vmx.idLbrToIpMsrFirst = pVM->hmr0.s.vmx.idLbrToIpMsrFirst = idLbrToIpMsrFirst;
2253 pVM->hm.s.ForR3.vmx.idLbrToIpMsrLast = pVM->hmr0.s.vmx.idLbrToIpMsrLast = idLbrToIpMsrLast;
2254 return VINF_SUCCESS;
2255}
2256
2257#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2258
2259/**
2260 * Sets up the shadow VMCS fields arrays.
2261 *
2262 * This function builds arrays of VMCS fields to sync the shadow VMCS later while
2263 * executing the guest.
2264 *
2265 * @returns VBox status code.
2266 * @param pVM The cross context VM structure.
2267 */
2268static int hmR0VmxSetupShadowVmcsFieldsArrays(PVMCC pVM)
2269{
2270 /*
2271 * Paranoia. Ensure we haven't exposed the VMWRITE-All VMX feature to the guest
2272 * when the host does not support it.
2273 */
2274 bool const fGstVmwriteAll = pVM->cpum.ro.GuestFeatures.fVmxVmwriteAll;
2275 if ( !fGstVmwriteAll
2276 || (g_HmMsrs.u.vmx.u64Misc & VMX_MISC_VMWRITE_ALL))
2277 { /* likely. */ }
2278 else
2279 {
2280 LogRelFunc(("VMX VMWRITE-All feature exposed to the guest but host CPU does not support it!\n"));
2281 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_GST_HOST_VMWRITE_ALL;
2282 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2283 }
2284
2285 uint32_t const cVmcsFields = RT_ELEMENTS(g_aVmcsFields);
2286 uint32_t cRwFields = 0;
2287 uint32_t cRoFields = 0;
2288 for (uint32_t i = 0; i < cVmcsFields; i++)
2289 {
2290 VMXVMCSFIELD VmcsField;
2291 VmcsField.u = g_aVmcsFields[i];
2292
2293 /*
2294 * We will be writing "FULL" (64-bit) fields while syncing the shadow VMCS.
2295 * Therefore, "HIGH" (32-bit portion of 64-bit) fields must not be included
2296 * in the shadow VMCS fields array as they would be redundant.
2297 *
2298 * If the VMCS field depends on a CPU feature that is not exposed to the guest,
2299 * we must not include it in the shadow VMCS fields array. Guests attempting to
2300 * VMREAD/VMWRITE such VMCS fields would cause a VM-exit and we shall emulate
2301 * the required behavior.
2302 */
2303 if ( VmcsField.n.fAccessType == VMX_VMCSFIELD_ACCESS_FULL
2304 && CPUMIsGuestVmxVmcsFieldValid(pVM, VmcsField.u))
2305 {
2306 /*
2307 * Read-only fields are placed in a separate array so that while syncing shadow
2308 * VMCS fields later (which is more performance critical) we can avoid branches.
2309 *
2310 * However, if the guest can write to all fields (including read-only fields),
2311 * we treat it a as read/write field. Otherwise, writing to these fields would
2312 * cause a VMWRITE instruction error while syncing the shadow VMCS.
2313 */
2314 if ( fGstVmwriteAll
2315 || !VMXIsVmcsFieldReadOnly(VmcsField.u))
2316 pVM->hmr0.s.vmx.paShadowVmcsFields[cRwFields++] = VmcsField.u;
2317 else
2318 pVM->hmr0.s.vmx.paShadowVmcsRoFields[cRoFields++] = VmcsField.u;
2319 }
2320 }
2321
2322 /* Update the counts. */
2323 pVM->hmr0.s.vmx.cShadowVmcsFields = cRwFields;
2324 pVM->hmr0.s.vmx.cShadowVmcsRoFields = cRoFields;
2325 return VINF_SUCCESS;
2326}
2327
2328
2329/**
2330 * Sets up the VMREAD and VMWRITE bitmaps.
2331 *
2332 * @param pVM The cross context VM structure.
2333 */
2334static void hmR0VmxSetupVmreadVmwriteBitmaps(PVMCC pVM)
2335{
2336 /*
2337 * By default, ensure guest attempts to access any VMCS fields cause VM-exits.
2338 */
2339 uint32_t const cbBitmap = X86_PAGE_4K_SIZE;
2340 uint8_t *pbVmreadBitmap = (uint8_t *)pVM->hmr0.s.vmx.pvVmreadBitmap;
2341 uint8_t *pbVmwriteBitmap = (uint8_t *)pVM->hmr0.s.vmx.pvVmwriteBitmap;
2342 ASMMemFill32(pbVmreadBitmap, cbBitmap, UINT32_C(0xffffffff));
2343 ASMMemFill32(pbVmwriteBitmap, cbBitmap, UINT32_C(0xffffffff));
2344
2345 /*
2346 * Skip intercepting VMREAD/VMWRITE to guest read/write fields in the
2347 * VMREAD and VMWRITE bitmaps.
2348 */
2349 {
2350 uint32_t const *paShadowVmcsFields = pVM->hmr0.s.vmx.paShadowVmcsFields;
2351 uint32_t const cShadowVmcsFields = pVM->hmr0.s.vmx.cShadowVmcsFields;
2352 for (uint32_t i = 0; i < cShadowVmcsFields; i++)
2353 {
2354 uint32_t const uVmcsField = paShadowVmcsFields[i];
2355 Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK));
2356 Assert(uVmcsField >> 3 < cbBitmap);
2357 ASMBitClear(pbVmreadBitmap, uVmcsField & 0x7fff);
2358 ASMBitClear(pbVmwriteBitmap, uVmcsField & 0x7fff);
2359 }
2360 }
2361
2362 /*
2363 * Skip intercepting VMREAD for guest read-only fields in the VMREAD bitmap
2364 * if the host supports VMWRITE to all supported VMCS fields.
2365 */
2366 if (g_HmMsrs.u.vmx.u64Misc & VMX_MISC_VMWRITE_ALL)
2367 {
2368 uint32_t const *paShadowVmcsRoFields = pVM->hmr0.s.vmx.paShadowVmcsRoFields;
2369 uint32_t const cShadowVmcsRoFields = pVM->hmr0.s.vmx.cShadowVmcsRoFields;
2370 for (uint32_t i = 0; i < cShadowVmcsRoFields; i++)
2371 {
2372 uint32_t const uVmcsField = paShadowVmcsRoFields[i];
2373 Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK));
2374 Assert(uVmcsField >> 3 < cbBitmap);
2375 ASMBitClear(pbVmreadBitmap, uVmcsField & 0x7fff);
2376 }
2377 }
2378}
2379
2380#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
2381
2382/**
2383 * Sets up the virtual-APIC page address for the VMCS.
2384 *
2385 * @param pVmcsInfo The VMCS info. object.
2386 */
2387DECLINLINE(void) hmR0VmxSetupVmcsVirtApicAddr(PCVMXVMCSINFO pVmcsInfo)
2388{
2389 RTHCPHYS const HCPhysVirtApic = pVmcsInfo->HCPhysVirtApic;
2390 Assert(HCPhysVirtApic != NIL_RTHCPHYS);
2391 Assert(!(HCPhysVirtApic & 0xfff)); /* Bits 11:0 MBZ. */
2392 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, HCPhysVirtApic);
2393 AssertRC(rc);
2394}
2395
2396
2397/**
2398 * Sets up the MSR-bitmap address for the VMCS.
2399 *
2400 * @param pVmcsInfo The VMCS info. object.
2401 */
2402DECLINLINE(void) hmR0VmxSetupVmcsMsrBitmapAddr(PCVMXVMCSINFO pVmcsInfo)
2403{
2404 RTHCPHYS const HCPhysMsrBitmap = pVmcsInfo->HCPhysMsrBitmap;
2405 Assert(HCPhysMsrBitmap != NIL_RTHCPHYS);
2406 Assert(!(HCPhysMsrBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2407 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_MSR_BITMAP_FULL, HCPhysMsrBitmap);
2408 AssertRC(rc);
2409}
2410
2411
2412/**
2413 * Sets up the APIC-access page address for the VMCS.
2414 *
2415 * @param pVCpu The cross context virtual CPU structure.
2416 */
2417DECLINLINE(void) hmR0VmxSetupVmcsApicAccessAddr(PVMCPUCC pVCpu)
2418{
2419 RTHCPHYS const HCPhysApicAccess = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysApicAccess;
2420 Assert(HCPhysApicAccess != NIL_RTHCPHYS);
2421 Assert(!(HCPhysApicAccess & 0xfff)); /* Bits 11:0 MBZ. */
2422 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, HCPhysApicAccess);
2423 AssertRC(rc);
2424}
2425
2426#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2427
2428/**
2429 * Sets up the VMREAD bitmap address for the VMCS.
2430 *
2431 * @param pVCpu The cross context virtual CPU structure.
2432 */
2433DECLINLINE(void) hmR0VmxSetupVmcsVmreadBitmapAddr(PVMCPUCC pVCpu)
2434{
2435 RTHCPHYS const HCPhysVmreadBitmap = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysVmreadBitmap;
2436 Assert(HCPhysVmreadBitmap != NIL_RTHCPHYS);
2437 Assert(!(HCPhysVmreadBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2438 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMREAD_BITMAP_FULL, HCPhysVmreadBitmap);
2439 AssertRC(rc);
2440}
2441
2442
2443/**
2444 * Sets up the VMWRITE bitmap address for the VMCS.
2445 *
2446 * @param pVCpu The cross context virtual CPU structure.
2447 */
2448DECLINLINE(void) hmR0VmxSetupVmcsVmwriteBitmapAddr(PVMCPUCC pVCpu)
2449{
2450 RTHCPHYS const HCPhysVmwriteBitmap = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysVmwriteBitmap;
2451 Assert(HCPhysVmwriteBitmap != NIL_RTHCPHYS);
2452 Assert(!(HCPhysVmwriteBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2453 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMWRITE_BITMAP_FULL, HCPhysVmwriteBitmap);
2454 AssertRC(rc);
2455}
2456
2457#endif
2458
2459/**
2460 * Sets up the VM-entry MSR load, VM-exit MSR-store and VM-exit MSR-load addresses
2461 * in the VMCS.
2462 *
2463 * @returns VBox status code.
2464 * @param pVmcsInfo The VMCS info. object.
2465 */
2466DECLINLINE(int) hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(PVMXVMCSINFO pVmcsInfo)
2467{
2468 RTHCPHYS const HCPhysGuestMsrLoad = pVmcsInfo->HCPhysGuestMsrLoad;
2469 Assert(HCPhysGuestMsrLoad != NIL_RTHCPHYS);
2470 Assert(!(HCPhysGuestMsrLoad & 0xf)); /* Bits 3:0 MBZ. */
2471
2472 RTHCPHYS const HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrStore;
2473 Assert(HCPhysGuestMsrStore != NIL_RTHCPHYS);
2474 Assert(!(HCPhysGuestMsrStore & 0xf)); /* Bits 3:0 MBZ. */
2475
2476 RTHCPHYS const HCPhysHostMsrLoad = pVmcsInfo->HCPhysHostMsrLoad;
2477 Assert(HCPhysHostMsrLoad != NIL_RTHCPHYS);
2478 Assert(!(HCPhysHostMsrLoad & 0xf)); /* Bits 3:0 MBZ. */
2479
2480 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL, HCPhysGuestMsrLoad); AssertRC(rc);
2481 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL, HCPhysGuestMsrStore); AssertRC(rc);
2482 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL, HCPhysHostMsrLoad); AssertRC(rc);
2483 return VINF_SUCCESS;
2484}
2485
2486
2487/**
2488 * Sets up MSR permissions in the MSR bitmap of a VMCS info. object.
2489 *
2490 * @param pVCpu The cross context virtual CPU structure.
2491 * @param pVmcsInfo The VMCS info. object.
2492 */
2493static void hmR0VmxSetupVmcsMsrPermissions(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2494{
2495 Assert(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS);
2496
2497 /*
2498 * By default, ensure guest attempts to access any MSR cause VM-exits.
2499 * This shall later be relaxed for specific MSRs as necessary.
2500 *
2501 * Note: For nested-guests, the entire bitmap will be merged prior to
2502 * executing the nested-guest using hardware-assisted VMX and hence there
2503 * is no need to perform this operation. See hmR0VmxMergeMsrBitmapNested.
2504 */
2505 Assert(pVmcsInfo->pvMsrBitmap);
2506 ASMMemFill32(pVmcsInfo->pvMsrBitmap, X86_PAGE_4K_SIZE, UINT32_C(0xffffffff));
2507
2508 /*
2509 * The guest can access the following MSRs (read, write) without causing
2510 * VM-exits; they are loaded/stored automatically using fields in the VMCS.
2511 */
2512 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2513 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_CS, VMXMSRPM_ALLOW_RD_WR);
2514 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_ESP, VMXMSRPM_ALLOW_RD_WR);
2515 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_EIP, VMXMSRPM_ALLOW_RD_WR);
2516 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_GS_BASE, VMXMSRPM_ALLOW_RD_WR);
2517 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_FS_BASE, VMXMSRPM_ALLOW_RD_WR);
2518
2519 /*
2520 * The IA32_PRED_CMD and IA32_FLUSH_CMD MSRs are write-only and has no state
2521 * associated with then. We never need to intercept access (writes need to be
2522 * executed without causing a VM-exit, reads will #GP fault anyway).
2523 *
2524 * The IA32_SPEC_CTRL MSR is read/write and has state. We allow the guest to
2525 * read/write them. We swap the guest/host MSR value using the
2526 * auto-load/store MSR area.
2527 */
2528 if (pVM->cpum.ro.GuestFeatures.fIbpb)
2529 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_PRED_CMD, VMXMSRPM_ALLOW_RD_WR);
2530 if (pVM->cpum.ro.GuestFeatures.fFlushCmd)
2531 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_FLUSH_CMD, VMXMSRPM_ALLOW_RD_WR);
2532 if (pVM->cpum.ro.GuestFeatures.fIbrs)
2533 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SPEC_CTRL, VMXMSRPM_ALLOW_RD_WR);
2534
2535 /*
2536 * Allow full read/write access for the following MSRs (mandatory for VT-x)
2537 * required for 64-bit guests.
2538 */
2539 if (pVM->hmr0.s.fAllow64BitGuests)
2540 {
2541 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_LSTAR, VMXMSRPM_ALLOW_RD_WR);
2542 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K6_STAR, VMXMSRPM_ALLOW_RD_WR);
2543 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_SF_MASK, VMXMSRPM_ALLOW_RD_WR);
2544 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_KERNEL_GS_BASE, VMXMSRPM_ALLOW_RD_WR);
2545 }
2546
2547 /*
2548 * IA32_EFER MSR is always intercepted, see @bugref{9180#c37}.
2549 */
2550#ifdef VBOX_STRICT
2551 Assert(pVmcsInfo->pvMsrBitmap);
2552 uint32_t const fMsrpmEfer = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, MSR_K6_EFER);
2553 Assert(fMsrpmEfer == VMXMSRPM_EXIT_RD_WR);
2554#endif
2555}
2556
2557
2558/**
2559 * Sets up pin-based VM-execution controls in the VMCS.
2560 *
2561 * @returns VBox status code.
2562 * @param pVCpu The cross context virtual CPU structure.
2563 * @param pVmcsInfo The VMCS info. object.
2564 */
2565static int hmR0VmxSetupVmcsPinCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2566{
2567 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2568 uint32_t fVal = g_HmMsrs.u.vmx.PinCtls.n.allowed0; /* Bits set here must always be set. */
2569 uint32_t const fZap = g_HmMsrs.u.vmx.PinCtls.n.allowed1; /* Bits cleared here must always be cleared. */
2570
2571 fVal |= VMX_PIN_CTLS_EXT_INT_EXIT /* External interrupts cause a VM-exit. */
2572 | VMX_PIN_CTLS_NMI_EXIT; /* Non-maskable interrupts (NMIs) cause a VM-exit. */
2573
2574 if (g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_VIRT_NMI)
2575 fVal |= VMX_PIN_CTLS_VIRT_NMI; /* Use virtual NMIs and virtual-NMI blocking features. */
2576
2577 /* Enable the VMX-preemption timer. */
2578 if (pVM->hmr0.s.vmx.fUsePreemptTimer)
2579 {
2580 Assert(g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_PREEMPT_TIMER);
2581 fVal |= VMX_PIN_CTLS_PREEMPT_TIMER;
2582 }
2583
2584#if 0
2585 /* Enable posted-interrupt processing. */
2586 if (pVM->hm.s.fPostedIntrs)
2587 {
2588 Assert(g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_POSTED_INT);
2589 Assert(g_HmMsrs.u.vmx.ExitCtls.n.allowed1 & VMX_EXIT_CTLS_ACK_EXT_INT);
2590 fVal |= VMX_PIN_CTLS_POSTED_INT;
2591 }
2592#endif
2593
2594 if ((fVal & fZap) != fVal)
2595 {
2596 LogRelFunc(("Invalid pin-based VM-execution controls combo! Cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2597 g_HmMsrs.u.vmx.PinCtls.n.allowed0, fVal, fZap));
2598 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PIN_EXEC;
2599 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2600 }
2601
2602 /* Commit it to the VMCS and update our cache. */
2603 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, fVal);
2604 AssertRC(rc);
2605 pVmcsInfo->u32PinCtls = fVal;
2606
2607 return VINF_SUCCESS;
2608}
2609
2610
2611/**
2612 * Sets up secondary processor-based VM-execution controls in the VMCS.
2613 *
2614 * @returns VBox status code.
2615 * @param pVCpu The cross context virtual CPU structure.
2616 * @param pVmcsInfo The VMCS info. object.
2617 */
2618static int hmR0VmxSetupVmcsProcCtls2(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2619{
2620 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2621 uint32_t fVal = g_HmMsrs.u.vmx.ProcCtls2.n.allowed0; /* Bits set here must be set in the VMCS. */
2622 uint32_t const fZap = g_HmMsrs.u.vmx.ProcCtls2.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
2623
2624 /* WBINVD causes a VM-exit. */
2625 if (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_WBINVD_EXIT)
2626 fVal |= VMX_PROC_CTLS2_WBINVD_EXIT;
2627
2628 /* Enable EPT (aka nested-paging). */
2629 if (pVM->hmr0.s.fNestedPaging)
2630 fVal |= VMX_PROC_CTLS2_EPT;
2631
2632 /* Enable the INVPCID instruction if we expose it to the guest and is supported
2633 by the hardware. Without this, guest executing INVPCID would cause a #UD. */
2634 if ( pVM->cpum.ro.GuestFeatures.fInvpcid
2635 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_INVPCID))
2636 fVal |= VMX_PROC_CTLS2_INVPCID;
2637
2638 /* Enable VPID. */
2639 if (pVM->hmr0.s.vmx.fVpid)
2640 fVal |= VMX_PROC_CTLS2_VPID;
2641
2642 /* Enable unrestricted guest execution. */
2643 if (pVM->hmr0.s.vmx.fUnrestrictedGuest)
2644 fVal |= VMX_PROC_CTLS2_UNRESTRICTED_GUEST;
2645
2646#if 0
2647 if (pVM->hm.s.fVirtApicRegs)
2648 {
2649 /* Enable APIC-register virtualization. */
2650 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_APIC_REG_VIRT);
2651 fVal |= VMX_PROC_CTLS2_APIC_REG_VIRT;
2652
2653 /* Enable virtual-interrupt delivery. */
2654 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_INTR_DELIVERY);
2655 fVal |= VMX_PROC_CTLS2_VIRT_INTR_DELIVERY;
2656 }
2657#endif
2658
2659 /* Virtualize-APIC accesses if supported by the CPU. The virtual-APIC page is
2660 where the TPR shadow resides. */
2661 /** @todo VIRT_X2APIC support, it's mutually exclusive with this. So must be
2662 * done dynamically. */
2663 if (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
2664 {
2665 fVal |= VMX_PROC_CTLS2_VIRT_APIC_ACCESS;
2666 hmR0VmxSetupVmcsApicAccessAddr(pVCpu);
2667 }
2668
2669 /* Enable the RDTSCP instruction if we expose it to the guest and is supported
2670 by the hardware. Without this, guest executing RDTSCP would cause a #UD. */
2671 if ( pVM->cpum.ro.GuestFeatures.fRdTscP
2672 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_RDTSCP))
2673 fVal |= VMX_PROC_CTLS2_RDTSCP;
2674
2675 /* Enable Pause-Loop exiting. */
2676 if ( (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT)
2677 && pVM->hm.s.vmx.cPleGapTicks
2678 && pVM->hm.s.vmx.cPleWindowTicks)
2679 {
2680 fVal |= VMX_PROC_CTLS2_PAUSE_LOOP_EXIT;
2681
2682 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, pVM->hm.s.vmx.cPleGapTicks); AssertRC(rc);
2683 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, pVM->hm.s.vmx.cPleWindowTicks); AssertRC(rc);
2684 }
2685
2686 if ((fVal & fZap) != fVal)
2687 {
2688 LogRelFunc(("Invalid secondary processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2689 g_HmMsrs.u.vmx.ProcCtls2.n.allowed0, fVal, fZap));
2690 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC2;
2691 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2692 }
2693
2694 /* Commit it to the VMCS and update our cache. */
2695 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, fVal);
2696 AssertRC(rc);
2697 pVmcsInfo->u32ProcCtls2 = fVal;
2698
2699 return VINF_SUCCESS;
2700}
2701
2702
2703/**
2704 * Sets up processor-based VM-execution controls in the VMCS.
2705 *
2706 * @returns VBox status code.
2707 * @param pVCpu The cross context virtual CPU structure.
2708 * @param pVmcsInfo The VMCS info. object.
2709 */
2710static int hmR0VmxSetupVmcsProcCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2711{
2712 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2713 uint32_t fVal = g_HmMsrs.u.vmx.ProcCtls.n.allowed0; /* Bits set here must be set in the VMCS. */
2714 uint32_t const fZap = g_HmMsrs.u.vmx.ProcCtls.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
2715
2716 fVal |= VMX_PROC_CTLS_HLT_EXIT /* HLT causes a VM-exit. */
2717 | VMX_PROC_CTLS_USE_TSC_OFFSETTING /* Use TSC-offsetting. */
2718 | VMX_PROC_CTLS_MOV_DR_EXIT /* MOV DRx causes a VM-exit. */
2719 | VMX_PROC_CTLS_UNCOND_IO_EXIT /* All IO instructions cause a VM-exit. */
2720 | VMX_PROC_CTLS_RDPMC_EXIT /* RDPMC causes a VM-exit. */
2721 | VMX_PROC_CTLS_MONITOR_EXIT /* MONITOR causes a VM-exit. */
2722 | VMX_PROC_CTLS_MWAIT_EXIT; /* MWAIT causes a VM-exit. */
2723
2724 /* We toggle VMX_PROC_CTLS_MOV_DR_EXIT later, check if it's not -always- needed to be set or clear. */
2725 if ( !(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MOV_DR_EXIT)
2726 || (g_HmMsrs.u.vmx.ProcCtls.n.allowed0 & VMX_PROC_CTLS_MOV_DR_EXIT))
2727 {
2728 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_MOV_DRX_EXIT;
2729 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2730 }
2731
2732 /* Without nested paging, INVLPG (also affects INVPCID) and MOV CR3 instructions should cause VM-exits. */
2733 if (!pVM->hmr0.s.fNestedPaging)
2734 {
2735 Assert(!pVM->hmr0.s.vmx.fUnrestrictedGuest);
2736 fVal |= VMX_PROC_CTLS_INVLPG_EXIT
2737 | VMX_PROC_CTLS_CR3_LOAD_EXIT
2738 | VMX_PROC_CTLS_CR3_STORE_EXIT;
2739 }
2740
2741 /* Use TPR shadowing if supported by the CPU. */
2742 if ( PDMHasApic(pVM)
2743 && (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW))
2744 {
2745 fVal |= VMX_PROC_CTLS_USE_TPR_SHADOW; /* CR8 reads from the Virtual-APIC page. */
2746 /* CR8 writes cause a VM-exit based on TPR threshold. */
2747 Assert(!(fVal & VMX_PROC_CTLS_CR8_STORE_EXIT));
2748 Assert(!(fVal & VMX_PROC_CTLS_CR8_LOAD_EXIT));
2749 hmR0VmxSetupVmcsVirtApicAddr(pVmcsInfo);
2750 }
2751 else
2752 {
2753 /* Some 32-bit CPUs do not support CR8 load/store exiting as MOV CR8 is
2754 invalid on 32-bit Intel CPUs. Set this control only for 64-bit guests. */
2755 if (pVM->hmr0.s.fAllow64BitGuests)
2756 fVal |= VMX_PROC_CTLS_CR8_STORE_EXIT /* CR8 reads cause a VM-exit. */
2757 | VMX_PROC_CTLS_CR8_LOAD_EXIT; /* CR8 writes cause a VM-exit. */
2758 }
2759
2760 /* Use MSR-bitmaps if supported by the CPU. */
2761 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2762 {
2763 fVal |= VMX_PROC_CTLS_USE_MSR_BITMAPS;
2764 hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo);
2765 }
2766
2767 /* Use the secondary processor-based VM-execution controls if supported by the CPU. */
2768 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
2769 fVal |= VMX_PROC_CTLS_USE_SECONDARY_CTLS;
2770
2771 if ((fVal & fZap) != fVal)
2772 {
2773 LogRelFunc(("Invalid processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2774 g_HmMsrs.u.vmx.ProcCtls.n.allowed0, fVal, fZap));
2775 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC;
2776 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2777 }
2778
2779 /* Commit it to the VMCS and update our cache. */
2780 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, fVal);
2781 AssertRC(rc);
2782 pVmcsInfo->u32ProcCtls = fVal;
2783
2784 /* Set up MSR permissions that don't change through the lifetime of the VM. */
2785 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2786 hmR0VmxSetupVmcsMsrPermissions(pVCpu, pVmcsInfo);
2787
2788 /* Set up secondary processor-based VM-execution controls if the CPU supports it. */
2789 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
2790 return hmR0VmxSetupVmcsProcCtls2(pVCpu, pVmcsInfo);
2791
2792 /* Sanity check, should not really happen. */
2793 if (RT_LIKELY(!pVM->hmr0.s.vmx.fUnrestrictedGuest))
2794 { /* likely */ }
2795 else
2796 {
2797 pVCpu->hm.s.u32HMError = VMX_UFC_INVALID_UX_COMBO;
2798 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2799 }
2800
2801 /* Old CPUs without secondary processor-based VM-execution controls would end up here. */
2802 return VINF_SUCCESS;
2803}
2804
2805
2806/**
2807 * Sets up miscellaneous (everything other than Pin, Processor and secondary
2808 * Processor-based VM-execution) control fields in the VMCS.
2809 *
2810 * @returns VBox status code.
2811 * @param pVCpu The cross context virtual CPU structure.
2812 * @param pVmcsInfo The VMCS info. object.
2813 */
2814static int hmR0VmxSetupVmcsMiscCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2815{
2816#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2817 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUseVmcsShadowing)
2818 {
2819 hmR0VmxSetupVmcsVmreadBitmapAddr(pVCpu);
2820 hmR0VmxSetupVmcsVmwriteBitmapAddr(pVCpu);
2821 }
2822#endif
2823
2824 Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS);
2825 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS);
2826 AssertRC(rc);
2827
2828 rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo);
2829 if (RT_SUCCESS(rc))
2830 {
2831 uint64_t const u64Cr0Mask = vmxHCGetFixedCr0Mask(pVCpu);
2832 uint64_t const u64Cr4Mask = vmxHCGetFixedCr4Mask(pVCpu);
2833
2834 rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, u64Cr0Mask); AssertRC(rc);
2835 rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, u64Cr4Mask); AssertRC(rc);
2836
2837 pVmcsInfo->u64Cr0Mask = u64Cr0Mask;
2838 pVmcsInfo->u64Cr4Mask = u64Cr4Mask;
2839
2840 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fLbr)
2841 {
2842 rc = VMXWriteVmcsNw(VMX_VMCS64_GUEST_DEBUGCTL_FULL, MSR_IA32_DEBUGCTL_LBR);
2843 AssertRC(rc);
2844 }
2845 return VINF_SUCCESS;
2846 }
2847 else
2848 LogRelFunc(("Failed to initialize VMCS auto-load/store MSR addresses. rc=%Rrc\n", rc));
2849 return rc;
2850}
2851
2852
2853/**
2854 * Sets up the initial exception bitmap in the VMCS based on static conditions.
2855 *
2856 * We shall setup those exception intercepts that don't change during the
2857 * lifetime of the VM here. The rest are done dynamically while loading the
2858 * guest state.
2859 *
2860 * @param pVCpu The cross context virtual CPU structure.
2861 * @param pVmcsInfo The VMCS info. object.
2862 */
2863static void hmR0VmxSetupVmcsXcptBitmap(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2864{
2865 /*
2866 * The following exceptions are always intercepted:
2867 *
2868 * #AC - To prevent the guest from hanging the CPU and for dealing with
2869 * split-lock detecting host configs.
2870 * #DB - To maintain the DR6 state even when intercepting DRx reads/writes and
2871 * recursive #DBs can cause a CPU hang.
2872 * #PF - To sync our shadow page tables when nested-paging is not used.
2873 */
2874 bool const fNestedPaging = pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging;
2875 uint32_t const uXcptBitmap = RT_BIT(X86_XCPT_AC)
2876 | RT_BIT(X86_XCPT_DB)
2877 | (fNestedPaging ? 0 : RT_BIT(X86_XCPT_PF));
2878
2879 /* Commit it to the VMCS. */
2880 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, uXcptBitmap);
2881 AssertRC(rc);
2882
2883 /* Update our cache of the exception bitmap. */
2884 pVmcsInfo->u32XcptBitmap = uXcptBitmap;
2885}
2886
2887
2888#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2889/**
2890 * Sets up the VMCS for executing a nested-guest using hardware-assisted VMX.
2891 *
2892 * @returns VBox status code.
2893 * @param pVmcsInfo The VMCS info. object.
2894 */
2895static int hmR0VmxSetupVmcsCtlsNested(PVMXVMCSINFO pVmcsInfo)
2896{
2897 Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS);
2898 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS);
2899 AssertRC(rc);
2900
2901 rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo);
2902 if (RT_SUCCESS(rc))
2903 {
2904 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2905 hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo);
2906
2907 /* Paranoia - We've not yet initialized these, they shall be done while merging the VMCS. */
2908 Assert(!pVmcsInfo->u64Cr0Mask);
2909 Assert(!pVmcsInfo->u64Cr4Mask);
2910 return VINF_SUCCESS;
2911 }
2912 LogRelFunc(("Failed to set up the VMCS link pointer in the nested-guest VMCS. rc=%Rrc\n", rc));
2913 return rc;
2914}
2915#endif
2916
2917
2918/**
2919 * Selector FNHMSVMVMRUN implementation.
2920 */
2921static DECLCALLBACK(int) hmR0VmxStartVmSelector(PVMXVMCSINFO pVmcsInfo, PVMCPUCC pVCpu, bool fResume)
2922{
2923 hmR0VmxUpdateStartVmFunction(pVCpu);
2924 return pVCpu->hmr0.s.vmx.pfnStartVm(pVmcsInfo, pVCpu, fResume);
2925}
2926
2927
2928/**
2929 * Sets up the VMCS for executing a guest (or nested-guest) using hardware-assisted
2930 * VMX.
2931 *
2932 * @returns VBox status code.
2933 * @param pVCpu The cross context virtual CPU structure.
2934 * @param pVmcsInfo The VMCS info. object.
2935 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
2936 */
2937static int hmR0VmxSetupVmcs(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
2938{
2939 Assert(pVmcsInfo->pvVmcs);
2940 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2941
2942 /* Set the CPU specified revision identifier at the beginning of the VMCS structure. */
2943 *(uint32_t *)pVmcsInfo->pvVmcs = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
2944 const char * const pszVmcs = fIsNstGstVmcs ? "nested-guest VMCS" : "guest VMCS";
2945
2946 LogFlowFunc(("\n"));
2947
2948 /*
2949 * Initialize the VMCS using VMCLEAR before loading the VMCS.
2950 * See Intel spec. 31.6 "Preparation And Launching A Virtual Machine".
2951 */
2952 int rc = hmR0VmxClearVmcs(pVmcsInfo);
2953 if (RT_SUCCESS(rc))
2954 {
2955 rc = hmR0VmxLoadVmcs(pVmcsInfo);
2956 if (RT_SUCCESS(rc))
2957 {
2958 /*
2959 * Initialize the hardware-assisted VMX execution handler for guest and nested-guest VMCS.
2960 * The host is always 64-bit since we no longer support 32-bit hosts.
2961 * Currently we have just a single handler for all guest modes as well, see @bugref{6208#c73}.
2962 */
2963 if (!fIsNstGstVmcs)
2964 {
2965 rc = hmR0VmxSetupVmcsPinCtls(pVCpu, pVmcsInfo);
2966 if (RT_SUCCESS(rc))
2967 {
2968 rc = hmR0VmxSetupVmcsProcCtls(pVCpu, pVmcsInfo);
2969 if (RT_SUCCESS(rc))
2970 {
2971 rc = hmR0VmxSetupVmcsMiscCtls(pVCpu, pVmcsInfo);
2972 if (RT_SUCCESS(rc))
2973 {
2974 hmR0VmxSetupVmcsXcptBitmap(pVCpu, pVmcsInfo);
2975#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2976 /*
2977 * If a shadow VMCS is allocated for the VMCS info. object, initialize the
2978 * VMCS revision ID and shadow VMCS indicator bit. Also, clear the VMCS
2979 * making it fit for use when VMCS shadowing is later enabled.
2980 */
2981 if (pVmcsInfo->pvShadowVmcs)
2982 {
2983 VMXVMCSREVID VmcsRevId;
2984 VmcsRevId.u = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
2985 VmcsRevId.n.fIsShadowVmcs = 1;
2986 *(uint32_t *)pVmcsInfo->pvShadowVmcs = VmcsRevId.u;
2987 rc = vmxHCClearShadowVmcs(pVmcsInfo);
2988 if (RT_SUCCESS(rc))
2989 { /* likely */ }
2990 else
2991 LogRelFunc(("Failed to initialize shadow VMCS. rc=%Rrc\n", rc));
2992 }
2993#endif
2994 }
2995 else
2996 LogRelFunc(("Failed to setup miscellaneous controls. rc=%Rrc\n", rc));
2997 }
2998 else
2999 LogRelFunc(("Failed to setup processor-based VM-execution controls. rc=%Rrc\n", rc));
3000 }
3001 else
3002 LogRelFunc(("Failed to setup pin-based controls. rc=%Rrc\n", rc));
3003 }
3004 else
3005 {
3006#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3007 rc = hmR0VmxSetupVmcsCtlsNested(pVmcsInfo);
3008 if (RT_SUCCESS(rc))
3009 { /* likely */ }
3010 else
3011 LogRelFunc(("Failed to initialize nested-guest VMCS. rc=%Rrc\n", rc));
3012#else
3013 AssertFailed();
3014#endif
3015 }
3016 }
3017 else
3018 LogRelFunc(("Failed to load the %s. rc=%Rrc\n", rc, pszVmcs));
3019 }
3020 else
3021 LogRelFunc(("Failed to clear the %s. rc=%Rrc\n", rc, pszVmcs));
3022
3023 /* Sync any CPU internal VMCS data back into our VMCS in memory. */
3024 if (RT_SUCCESS(rc))
3025 {
3026 rc = hmR0VmxClearVmcs(pVmcsInfo);
3027 if (RT_SUCCESS(rc))
3028 { /* likely */ }
3029 else
3030 LogRelFunc(("Failed to clear the %s post setup. rc=%Rrc\n", rc, pszVmcs));
3031 }
3032
3033 /*
3034 * Update the last-error record both for failures and success, so we
3035 * can propagate the status code back to ring-3 for diagnostics.
3036 */
3037 hmR0VmxUpdateErrorRecord(pVCpu, rc);
3038 NOREF(pszVmcs);
3039 return rc;
3040}
3041
3042
3043/**
3044 * Does global VT-x initialization (called during module initialization).
3045 *
3046 * @returns VBox status code.
3047 */
3048VMMR0DECL(int) VMXR0GlobalInit(void)
3049{
3050#ifdef HMVMX_USE_FUNCTION_TABLE
3051 AssertCompile(VMX_EXIT_MAX + 1 == RT_ELEMENTS(g_aVMExitHandlers));
3052# ifdef VBOX_STRICT
3053 for (unsigned i = 0; i < RT_ELEMENTS(g_aVMExitHandlers); i++)
3054 Assert(g_aVMExitHandlers[i].pfn);
3055# endif
3056#endif
3057
3058 /*
3059 * For detecting whether DR6.RTM is writable or not (done in VMXR0InitVM).
3060 */
3061 RTTHREADPREEMPTSTATE Preempt = RTTHREADPREEMPTSTATE_INITIALIZER;
3062 RTThreadPreemptDisable(&Preempt);
3063 RTCCUINTXREG const fSavedDr6 = ASMGetDR6();
3064 ASMSetDR6(0);
3065 RTCCUINTXREG const fZeroDr6 = ASMGetDR6();
3066 ASMSetDR6(fSavedDr6);
3067 RTThreadPreemptRestore(&Preempt);
3068
3069 g_fDr6Zeroed = fZeroDr6;
3070
3071 return VINF_SUCCESS;
3072}
3073
3074
3075/**
3076 * Does global VT-x termination (called during module termination).
3077 */
3078VMMR0DECL(void) VMXR0GlobalTerm()
3079{
3080 /* Nothing to do currently. */
3081}
3082
3083
3084/**
3085 * Sets up and activates VT-x on the current CPU.
3086 *
3087 * @returns VBox status code.
3088 * @param pHostCpu The HM physical-CPU structure.
3089 * @param pVM The cross context VM structure. Can be
3090 * NULL after a host resume operation.
3091 * @param pvCpuPage Pointer to the VMXON region (can be NULL if @a
3092 * fEnabledByHost is @c true).
3093 * @param HCPhysCpuPage Physical address of the VMXON region (can be 0 if
3094 * @a fEnabledByHost is @c true).
3095 * @param fEnabledByHost Set if SUPR0EnableVTx() or similar was used to
3096 * enable VT-x on the host.
3097 * @param pHwvirtMsrs Pointer to the hardware-virtualization MSRs.
3098 */
3099VMMR0DECL(int) VMXR0EnableCpu(PHMPHYSCPU pHostCpu, PVMCC pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost,
3100 PCSUPHWVIRTMSRS pHwvirtMsrs)
3101{
3102 AssertPtr(pHostCpu);
3103 AssertPtr(pHwvirtMsrs);
3104 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3105
3106 /* Enable VT-x if it's not already enabled by the host. */
3107 if (!fEnabledByHost)
3108 {
3109 int rc = hmR0VmxEnterRootMode(pHostCpu, pVM, HCPhysCpuPage, pvCpuPage);
3110 if (RT_FAILURE(rc))
3111 return rc;
3112 }
3113
3114 /*
3115 * Flush all EPT tagged-TLB entries (in case VirtualBox or any other hypervisor have been
3116 * using EPTPs) so we don't retain any stale guest-physical mappings which won't get
3117 * invalidated when flushing by VPID.
3118 */
3119 if (pHwvirtMsrs->u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
3120 {
3121 hmR0VmxFlushEpt(NULL /* pVCpu */, NULL /* pVmcsInfo */, VMXTLBFLUSHEPT_ALL_CONTEXTS);
3122 pHostCpu->fFlushAsidBeforeUse = false;
3123 }
3124 else
3125 pHostCpu->fFlushAsidBeforeUse = true;
3126
3127 /* Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}. */
3128 ++pHostCpu->cTlbFlushes;
3129
3130 return VINF_SUCCESS;
3131}
3132
3133
3134/**
3135 * Deactivates VT-x on the current CPU.
3136 *
3137 * @returns VBox status code.
3138 * @param pHostCpu The HM physical-CPU structure.
3139 * @param pvCpuPage Pointer to the VMXON region.
3140 * @param HCPhysCpuPage Physical address of the VMXON region.
3141 *
3142 * @remarks This function should never be called when SUPR0EnableVTx() or
3143 * similar was used to enable VT-x on the host.
3144 */
3145VMMR0DECL(int) VMXR0DisableCpu(PHMPHYSCPU pHostCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
3146{
3147 RT_NOREF2(pvCpuPage, HCPhysCpuPage);
3148
3149 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3150 return hmR0VmxLeaveRootMode(pHostCpu);
3151}
3152
3153
3154/**
3155 * Does per-VM VT-x initialization.
3156 *
3157 * @returns VBox status code.
3158 * @param pVM The cross context VM structure.
3159 */
3160VMMR0DECL(int) VMXR0InitVM(PVMCC pVM)
3161{
3162 AssertPtr(pVM);
3163 LogFlowFunc(("pVM=%p\n", pVM));
3164
3165 hmR0VmxStructsInit(pVM);
3166 int rc = hmR0VmxStructsAlloc(pVM);
3167 if (RT_FAILURE(rc))
3168 {
3169 LogRelFunc(("Failed to allocated VMX structures. rc=%Rrc\n", rc));
3170 return rc;
3171 }
3172
3173 /* Setup the crash dump page. */
3174#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3175 strcpy((char *)pVM->hmr0.s.vmx.pbScratch, "SCRATCH Magic");
3176 *(uint64_t *)(pVM->hmr0.s.vmx.pbScratch + 16) = UINT64_C(0xdeadbeefdeadbeef);
3177#endif
3178
3179 /*
3180 * Copy out stuff that's for ring-3 and determin default configuration.
3181 */
3182 pVM->hm.s.ForR3.vmx.u64HostDr6Zeroed = g_fDr6Zeroed;
3183
3184 /* Since we do not emulate RTM, make sure DR6.RTM cannot be cleared by the
3185 guest and cause confusion there. It appears that the DR6.RTM bit can be
3186 cleared even if TSX-NI is disabled (microcode update / system / whatever). */
3187#ifdef VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX
3188 if (pVM->hm.s.vmx.fAlwaysInterceptMovDRxCfg == 0)
3189 pVM->hmr0.s.vmx.fAlwaysInterceptMovDRx = g_fDr6Zeroed != X86_DR6_RA1_MASK;
3190 else
3191#endif
3192 pVM->hmr0.s.vmx.fAlwaysInterceptMovDRx = pVM->hm.s.vmx.fAlwaysInterceptMovDRxCfg > 0;
3193 pVM->hm.s.ForR3.vmx.fAlwaysInterceptMovDRx = pVM->hmr0.s.vmx.fAlwaysInterceptMovDRx;
3194
3195 return VINF_SUCCESS;
3196}
3197
3198
3199/**
3200 * Does per-VM VT-x termination.
3201 *
3202 * @returns VBox status code.
3203 * @param pVM The cross context VM structure.
3204 */
3205VMMR0DECL(int) VMXR0TermVM(PVMCC pVM)
3206{
3207 AssertPtr(pVM);
3208 LogFlowFunc(("pVM=%p\n", pVM));
3209
3210#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3211 if (pVM->hmr0.s.vmx.pbScratch)
3212 RT_BZERO(pVM->hmr0.s.vmx.pbScratch, X86_PAGE_4K_SIZE);
3213#endif
3214 hmR0VmxStructsFree(pVM);
3215 return VINF_SUCCESS;
3216}
3217
3218
3219/**
3220 * Sets up the VM for execution using hardware-assisted VMX.
3221 * This function is only called once per-VM during initialization.
3222 *
3223 * @returns VBox status code.
3224 * @param pVM The cross context VM structure.
3225 */
3226VMMR0DECL(int) VMXR0SetupVM(PVMCC pVM)
3227{
3228 AssertPtr(pVM);
3229 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3230
3231 LogFlowFunc(("pVM=%p\n", pVM));
3232
3233 /*
3234 * At least verify if VMX is enabled, since we can't check if we're in VMX root mode or not
3235 * without causing a #GP.
3236 */
3237 RTCCUINTREG const uHostCr4 = ASMGetCR4();
3238 if (RT_LIKELY(uHostCr4 & X86_CR4_VMXE))
3239 { /* likely */ }
3240 else
3241 return VERR_VMX_NOT_IN_VMX_ROOT_MODE;
3242
3243 /*
3244 * Check that nested paging is supported if enabled and copy over the flag to the
3245 * ring-0 only structure.
3246 */
3247 bool const fNestedPaging = pVM->hm.s.fNestedPagingCfg;
3248 AssertReturn( !fNestedPaging
3249 || (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_EPT), /** @todo use a ring-0 copy of ProcCtls2.n.allowed1 */
3250 VERR_INCOMPATIBLE_CONFIG);
3251 pVM->hmr0.s.fNestedPaging = fNestedPaging;
3252 pVM->hmr0.s.fAllow64BitGuests = pVM->hm.s.fAllow64BitGuestsCfg;
3253
3254 /*
3255 * Without unrestricted guest execution, pRealModeTSS and pNonPagingModeEPTPageTable *must*
3256 * always be allocated. We no longer support the highly unlikely case of unrestricted guest
3257 * without pRealModeTSS, see hmR3InitFinalizeR0Intel().
3258 */
3259 bool const fUnrestrictedGuest = pVM->hm.s.vmx.fUnrestrictedGuestCfg;
3260 AssertReturn( !fUnrestrictedGuest
3261 || ( (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_UNRESTRICTED_GUEST)
3262 && fNestedPaging),
3263 VERR_INCOMPATIBLE_CONFIG);
3264 if ( !fUnrestrictedGuest
3265 && ( !pVM->hm.s.vmx.pNonPagingModeEPTPageTable
3266 || !pVM->hm.s.vmx.pRealModeTSS))
3267 {
3268 LogRelFunc(("Invalid real-on-v86 state.\n"));
3269 return VERR_INTERNAL_ERROR;
3270 }
3271 pVM->hmr0.s.vmx.fUnrestrictedGuest = fUnrestrictedGuest;
3272
3273 /* Initialize these always, see hmR3InitFinalizeR0().*/
3274 pVM->hm.s.ForR3.vmx.enmTlbFlushEpt = pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NONE;
3275 pVM->hm.s.ForR3.vmx.enmTlbFlushVpid = pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NONE;
3276
3277 /* Setup the tagged-TLB flush handlers. */
3278 int rc = hmR0VmxSetupTaggedTlb(pVM);
3279 if (RT_FAILURE(rc))
3280 {
3281 LogRelFunc(("Failed to setup tagged TLB. rc=%Rrc\n", rc));
3282 return rc;
3283 }
3284
3285 /* Determine LBR capabilities. */
3286 pVM->hmr0.s.vmx.fLbr = pVM->hm.s.vmx.fLbrCfg;
3287 if (pVM->hmr0.s.vmx.fLbr)
3288 {
3289 rc = hmR0VmxSetupLbrMsrRange(pVM);
3290 if (RT_FAILURE(rc))
3291 {
3292 LogRelFunc(("Failed to setup LBR MSR range. rc=%Rrc\n", rc));
3293 return rc;
3294 }
3295 }
3296
3297#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3298 /* Setup the shadow VMCS fields array and VMREAD/VMWRITE bitmaps. */
3299 if (pVM->hmr0.s.vmx.fUseVmcsShadowing)
3300 {
3301 rc = hmR0VmxSetupShadowVmcsFieldsArrays(pVM);
3302 if (RT_SUCCESS(rc))
3303 hmR0VmxSetupVmreadVmwriteBitmaps(pVM);
3304 else
3305 {
3306 LogRelFunc(("Failed to setup shadow VMCS fields arrays. rc=%Rrc\n", rc));
3307 return rc;
3308 }
3309 }
3310#endif
3311
3312 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
3313 {
3314 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
3315 Log4Func(("pVCpu=%p idCpu=%RU32\n", pVCpu, pVCpu->idCpu));
3316
3317 pVCpu->hmr0.s.vmx.pfnStartVm = hmR0VmxStartVmSelector;
3318
3319 rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfo, false /* fIsNstGstVmcs */);
3320 if (RT_SUCCESS(rc))
3321 {
3322#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3323 if (pVM->cpum.ro.GuestFeatures.fVmx)
3324 {
3325 rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */);
3326 if (RT_SUCCESS(rc))
3327 { /* likely */ }
3328 else
3329 {
3330 LogRelFunc(("Nested-guest VMCS setup failed. rc=%Rrc\n", rc));
3331 return rc;
3332 }
3333 }
3334#endif
3335 }
3336 else
3337 {
3338 LogRelFunc(("VMCS setup failed. rc=%Rrc\n", rc));
3339 return rc;
3340 }
3341 }
3342
3343 return VINF_SUCCESS;
3344}
3345
3346
3347/**
3348 * Saves the host control registers (CR0, CR3, CR4) into the host-state area in
3349 * the VMCS.
3350 * @returns CR4 for passing along to hmR0VmxExportHostSegmentRegs.
3351 */
3352static uint64_t hmR0VmxExportHostControlRegs(void)
3353{
3354 int rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR0, ASMGetCR0()); AssertRC(rc);
3355 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR3, ASMGetCR3()); AssertRC(rc);
3356 uint64_t uHostCr4 = ASMGetCR4();
3357 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR4, uHostCr4); AssertRC(rc);
3358 return uHostCr4;
3359}
3360
3361
3362/**
3363 * Saves the host segment registers and GDTR, IDTR, (TR, GS and FS bases) into
3364 * the host-state area in the VMCS.
3365 *
3366 * @returns VBox status code.
3367 * @param pVCpu The cross context virtual CPU structure.
3368 * @param uHostCr4 The host CR4 value.
3369 */
3370static int hmR0VmxExportHostSegmentRegs(PVMCPUCC pVCpu, uint64_t uHostCr4)
3371{
3372 /*
3373 * If we've executed guest code using hardware-assisted VMX, the host-state bits
3374 * will be messed up. We should -not- save the messed up state without restoring
3375 * the original host-state, see @bugref{7240}.
3376 *
3377 * This apparently can happen (most likely the FPU changes), deal with it rather than
3378 * asserting. Was observed booting Solaris 10u10 32-bit guest.
3379 */
3380 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
3381 {
3382 Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hmr0.s.vmx.fRestoreHostFlags,
3383 pVCpu->idCpu));
3384 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
3385 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
3386 }
3387
3388 /*
3389 * Get all the host info.
3390 * ASSUME it is safe to use rdfsbase and friends if the CR4.FSGSBASE bit is set
3391 * without also checking the cpuid bit.
3392 */
3393 uint32_t fRestoreHostFlags;
3394#if RT_INLINE_ASM_EXTERNAL
3395 if (uHostCr4 & X86_CR4_FSGSBASE)
3396 {
3397 hmR0VmxExportHostSegmentRegsAsmHlp(&pVCpu->hmr0.s.vmx.RestoreHost, true /*fHaveFsGsBase*/);
3398 fRestoreHostFlags = VMX_RESTORE_HOST_CAN_USE_WRFSBASE_AND_WRGSBASE;
3399 }
3400 else
3401 {
3402 hmR0VmxExportHostSegmentRegsAsmHlp(&pVCpu->hmr0.s.vmx.RestoreHost, false /*fHaveFsGsBase*/);
3403 fRestoreHostFlags = 0;
3404 }
3405 RTSEL uSelES = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelES;
3406 RTSEL uSelDS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelDS;
3407 RTSEL uSelFS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelFS;
3408 RTSEL uSelGS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelGS;
3409#else
3410 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR = ASMGetTR();
3411 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS = ASMGetSS();
3412 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS = ASMGetCS();
3413 ASMGetGDTR((PRTGDTR)&pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr);
3414 ASMGetIDTR((PRTIDTR)&pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr);
3415 if (uHostCr4 & X86_CR4_FSGSBASE)
3416 {
3417 pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase = ASMGetFSBase();
3418 pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase = ASMGetGSBase();
3419 fRestoreHostFlags = VMX_RESTORE_HOST_CAN_USE_WRFSBASE_AND_WRGSBASE;
3420 }
3421 else
3422 {
3423 pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase = ASMRdMsr(MSR_K8_FS_BASE);
3424 pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase = ASMRdMsr(MSR_K8_GS_BASE);
3425 fRestoreHostFlags = 0;
3426 }
3427 RTSEL uSelES, uSelDS, uSelFS, uSelGS;
3428 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelDS = uSelDS = ASMGetDS();
3429 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelES = uSelES = ASMGetES();
3430 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelFS = uSelFS = ASMGetFS();
3431 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelGS = uSelGS = ASMGetGS();
3432#endif
3433
3434 /*
3435 * Determine if the host segment registers are suitable for VT-x. Otherwise use zero to
3436 * gain VM-entry and restore them before we get preempted.
3437 *
3438 * See Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers".
3439 */
3440 RTSEL const uSelAll = uSelFS | uSelGS | uSelES | uSelDS;
3441 if (uSelAll & (X86_SEL_RPL | X86_SEL_LDT))
3442 {
3443 if (!(uSelAll & X86_SEL_LDT))
3444 {
3445#define VMXLOCAL_ADJUST_HOST_SEG(a_Seg, a_uVmcsVar) \
3446 do { \
3447 (a_uVmcsVar) = pVCpu->hmr0.s.vmx.RestoreHost.uHostSel##a_Seg; \
3448 if ((a_uVmcsVar) & X86_SEL_RPL) \
3449 { \
3450 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3451 (a_uVmcsVar) = 0; \
3452 } \
3453 } while (0)
3454 VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
3455 VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
3456 VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
3457 VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
3458#undef VMXLOCAL_ADJUST_HOST_SEG
3459 }
3460 else
3461 {
3462#define VMXLOCAL_ADJUST_HOST_SEG(a_Seg, a_uVmcsVar) \
3463 do { \
3464 (a_uVmcsVar) = pVCpu->hmr0.s.vmx.RestoreHost.uHostSel##a_Seg; \
3465 if ((a_uVmcsVar) & (X86_SEL_RPL | X86_SEL_LDT)) \
3466 { \
3467 if (!((a_uVmcsVar) & X86_SEL_LDT)) \
3468 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3469 else \
3470 { \
3471 uint32_t const fAttr = ASMGetSegAttr(a_uVmcsVar); \
3472 if ((fAttr & X86_DESC_P) && fAttr != UINT32_MAX) \
3473 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3474 } \
3475 (a_uVmcsVar) = 0; \
3476 } \
3477 } while (0)
3478 VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
3479 VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
3480 VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
3481 VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
3482#undef VMXLOCAL_ADJUST_HOST_SEG
3483 }
3484 }
3485
3486 /* Verification based on Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers" */
3487 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR & X86_SEL_LDT)); Assert(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR);
3488 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS & X86_SEL_LDT)); Assert(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS);
3489 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS & X86_SEL_LDT));
3490 Assert(!(uSelDS & X86_SEL_RPL)); Assert(!(uSelDS & X86_SEL_LDT));
3491 Assert(!(uSelES & X86_SEL_RPL)); Assert(!(uSelES & X86_SEL_LDT));
3492 Assert(!(uSelFS & X86_SEL_RPL)); Assert(!(uSelFS & X86_SEL_LDT));
3493 Assert(!(uSelGS & X86_SEL_RPL)); Assert(!(uSelGS & X86_SEL_LDT));
3494
3495 /*
3496 * Determine if we need to manually need to restore the GDTR and IDTR limits as VT-x zaps
3497 * them to the maximum limit (0xffff) on every VM-exit.
3498 */
3499 if (pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb != 0xffff)
3500 fRestoreHostFlags |= VMX_RESTORE_HOST_GDTR;
3501
3502 /*
3503 * IDT limit is effectively capped at 0xfff. (See Intel spec. 6.14.1 "64-Bit Mode IDT" and
3504 * Intel spec. 6.2 "Exception and Interrupt Vectors".) Therefore if the host has the limit
3505 * as 0xfff, VT-x bloating the limit to 0xffff shouldn't cause any different CPU behavior.
3506 * However, several hosts either insists on 0xfff being the limit (Windows Patch Guard) or
3507 * uses the limit for other purposes (darwin puts the CPU ID in there but botches sidt
3508 * alignment in at least one consumer). So, we're only allowing the IDTR.LIMIT to be left
3509 * at 0xffff on hosts where we are sure it won't cause trouble.
3510 */
3511#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS)
3512 if (pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.cb < 0x0fff)
3513#else
3514 if (pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.cb != 0xffff)
3515#endif
3516 fRestoreHostFlags |= VMX_RESTORE_HOST_IDTR;
3517
3518 /*
3519 * Host TR base. Verify that TR selector doesn't point past the GDT. Masking off the TI
3520 * and RPL bits is effectively what the CPU does for "scaling by 8". TI is always 0 and
3521 * RPL should be too in most cases.
3522 */
3523 RTSEL const uSelTR = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR;
3524 AssertMsgReturn((uSelTR | X86_SEL_RPL_LDT) <= pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb,
3525 ("TR selector exceeds limit. TR=%RTsel cbGdt=%#x\n", uSelTR, pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb),
3526 VERR_VMX_INVALID_HOST_STATE);
3527
3528 PCX86DESCHC pDesc = (PCX86DESCHC)(pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.uAddr + (uSelTR & X86_SEL_MASK));
3529 uintptr_t const uTRBase = X86DESC64_BASE(pDesc);
3530
3531 /*
3532 * VT-x unconditionally restores the TR limit to 0x67 and type to 11 (32-bit busy TSS) on
3533 * all VM-exits. The type is the same for 64-bit busy TSS[1]. The limit needs manual
3534 * restoration if the host has something else. Task switching is not supported in 64-bit
3535 * mode[2], but the limit still matters as IOPM is supported in 64-bit mode. Restoring the
3536 * limit lazily while returning to ring-3 is safe because IOPM is not applicable in ring-0.
3537 *
3538 * [1] See Intel spec. 3.5 "System Descriptor Types".
3539 * [2] See Intel spec. 7.2.3 "TSS Descriptor in 64-bit mode".
3540 */
3541 Assert(pDesc->System.u4Type == 11);
3542 if ( pDesc->System.u16LimitLow != 0x67
3543 || pDesc->System.u4LimitHigh)
3544 {
3545 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_TR;
3546
3547 /* If the host has made GDT read-only, we would need to temporarily toggle CR0.WP before writing the GDT. */
3548 if (g_fHmHostKernelFeatures & SUPKERNELFEATURES_GDT_READ_ONLY)
3549 fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_READ_ONLY;
3550 if (g_fHmHostKernelFeatures & SUPKERNELFEATURES_GDT_NEED_WRITABLE)
3551 {
3552 /* The GDT is read-only but the writable GDT is available. */
3553 fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_NEED_WRITABLE;
3554 pVCpu->hmr0.s.vmx.RestoreHost.HostGdtrRw.cb = pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb;
3555 int rc = SUPR0GetCurrentGdtRw(&pVCpu->hmr0.s.vmx.RestoreHost.HostGdtrRw.uAddr);
3556 AssertRCReturn(rc, rc);
3557 }
3558 }
3559
3560 pVCpu->hmr0.s.vmx.fRestoreHostFlags = fRestoreHostFlags;
3561
3562 /*
3563 * Do all the VMCS updates in one block to assist nested virtualization.
3564 */
3565 int rc;
3566 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_CS_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS); AssertRC(rc);
3567 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_SS_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS); AssertRC(rc);
3568 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_DS_SEL, uSelDS); AssertRC(rc);
3569 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_ES_SEL, uSelES); AssertRC(rc);
3570 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_FS_SEL, uSelFS); AssertRC(rc);
3571 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_GS_SEL, uSelGS); AssertRC(rc);
3572 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_TR_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR); AssertRC(rc);
3573 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GDTR_BASE, pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.uAddr); AssertRC(rc);
3574 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_IDTR_BASE, pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.uAddr); AssertRC(rc);
3575 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_TR_BASE, uTRBase); AssertRC(rc);
3576 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_FS_BASE, pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase); AssertRC(rc);
3577 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GS_BASE, pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase); AssertRC(rc);
3578
3579 return VINF_SUCCESS;
3580}
3581
3582
3583/**
3584 * Exports certain host MSRs in the VM-exit MSR-load area and some in the
3585 * host-state area of the VMCS.
3586 *
3587 * These MSRs will be automatically restored on the host after every successful
3588 * VM-exit.
3589 *
3590 * @param pVCpu The cross context virtual CPU structure.
3591 *
3592 * @remarks No-long-jump zone!!!
3593 */
3594static void hmR0VmxExportHostMsrs(PVMCPUCC pVCpu)
3595{
3596 AssertPtr(pVCpu);
3597
3598 /*
3599 * Save MSRs that we restore lazily (due to preemption or transition to ring-3)
3600 * rather than swapping them on every VM-entry.
3601 */
3602 hmR0VmxLazySaveHostMsrs(pVCpu);
3603
3604 /*
3605 * Host Sysenter MSRs.
3606 */
3607 int rc = VMXWriteVmcs32(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)); AssertRC(rc);
3608 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP)); AssertRC(rc);
3609 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP)); AssertRC(rc);
3610
3611 /*
3612 * Host EFER MSR.
3613 *
3614 * If the CPU supports the newer VMCS controls for managing EFER, use it. Otherwise it's
3615 * done as part of auto-load/store MSR area in the VMCS, see hmR0VmxExportGuestMsrs().
3616 */
3617 if (g_fHmVmxSupportsVmcsEfer)
3618 {
3619 rc = VMXWriteVmcs64(VMX_VMCS64_HOST_EFER_FULL, g_uHmVmxHostMsrEfer);
3620 AssertRC(rc);
3621 }
3622
3623 /** @todo IA32_PERF_GLOBALCTRL, IA32_PAT also see
3624 * hmR0VmxExportGuestEntryExitCtls(). */
3625}
3626
3627
3628/**
3629 * Figures out if we need to swap the EFER MSR which is particularly expensive.
3630 *
3631 * We check all relevant bits. For now, that's everything besides LMA/LME, as
3632 * these two bits are handled by VM-entry, see hmR0VMxExportGuestEntryExitCtls().
3633 *
3634 * @returns true if we need to load guest EFER, false otherwise.
3635 * @param pVCpu The cross context virtual CPU structure.
3636 * @param pVmxTransient The VMX-transient structure.
3637 *
3638 * @remarks Requires EFER, CR4.
3639 * @remarks No-long-jump zone!!!
3640 */
3641static bool hmR0VmxShouldSwapEferMsr(PCVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3642{
3643#ifdef HMVMX_ALWAYS_SWAP_EFER
3644 RT_NOREF2(pVCpu, pVmxTransient);
3645 return true;
3646#else
3647 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
3648 uint64_t const u64HostEfer = g_uHmVmxHostMsrEfer;
3649 uint64_t const u64GuestEfer = pCtx->msrEFER;
3650
3651# ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3652 /*
3653 * For nested-guests, we shall honor swapping the EFER MSR when requested by
3654 * the nested-guest.
3655 */
3656 if ( pVmxTransient->fIsNestedGuest
3657 && ( CPUMIsGuestVmxEntryCtlsSet(pCtx, VMX_ENTRY_CTLS_LOAD_EFER_MSR)
3658 || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_SAVE_EFER_MSR)
3659 || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_LOAD_EFER_MSR)))
3660 return true;
3661# else
3662 RT_NOREF(pVmxTransient);
3663#endif
3664
3665 /*
3666 * For 64-bit guests, if EFER.SCE bit differs, we need to swap the EFER MSR
3667 * to ensure that the guest's SYSCALL behaviour isn't broken, see @bugref{7386}.
3668 */
3669 if ( CPUMIsGuestInLongModeEx(pCtx)
3670 && (u64GuestEfer & MSR_K6_EFER_SCE) != (u64HostEfer & MSR_K6_EFER_SCE))
3671 return true;
3672
3673 /*
3674 * If the guest uses PAE and EFER.NXE bit differs, we need to swap the EFER MSR
3675 * as it affects guest paging. 64-bit paging implies CR4.PAE as well.
3676 *
3677 * See Intel spec. 4.5 "IA-32e Paging".
3678 * See Intel spec. 4.1.1 "Three Paging Modes".
3679 *
3680 * Verify that we always intercept CR4.PAE and CR0.PG bits, so we don't need to
3681 * import CR4 and CR0 from the VMCS here as those bits are always up to date.
3682 */
3683 Assert(vmxHCGetFixedCr4Mask(pVCpu) & X86_CR4_PAE);
3684 Assert(vmxHCGetFixedCr0Mask(pVCpu) & X86_CR0_PG);
3685 if ( (pCtx->cr4 & X86_CR4_PAE)
3686 && (pCtx->cr0 & X86_CR0_PG))
3687 {
3688 /*
3689 * If nested paging is not used, verify that the guest paging mode matches the
3690 * shadow paging mode which is/will be placed in the VMCS (which is what will
3691 * actually be used while executing the guest and not the CR4 shadow value).
3692 */
3693 AssertMsg( pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging
3694 || pVCpu->hm.s.enmShadowMode == PGMMODE_PAE
3695 || pVCpu->hm.s.enmShadowMode == PGMMODE_PAE_NX
3696 || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64
3697 || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64_NX,
3698 ("enmShadowMode=%u\n", pVCpu->hm.s.enmShadowMode));
3699 if ((u64GuestEfer & MSR_K6_EFER_NXE) != (u64HostEfer & MSR_K6_EFER_NXE))
3700 {
3701 /* Verify that the host is NX capable. */
3702 Assert(g_CpumHostFeatures.s.fNoExecute);
3703 return true;
3704 }
3705 }
3706
3707 return false;
3708#endif
3709}
3710
3711
3712/**
3713 * Exports the guest's RSP into the guest-state area in the VMCS.
3714 *
3715 * @param pVCpu The cross context virtual CPU structure.
3716 *
3717 * @remarks No-long-jump zone!!!
3718 */
3719static void hmR0VmxExportGuestRsp(PVMCPUCC pVCpu)
3720{
3721 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_RSP)
3722 {
3723 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RSP);
3724
3725 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_RSP, pVCpu->cpum.GstCtx.rsp);
3726 AssertRC(rc);
3727
3728 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_RSP);
3729 Log4Func(("rsp=%#RX64\n", pVCpu->cpum.GstCtx.rsp));
3730 }
3731}
3732
3733
3734/**
3735 * Exports the guest hardware-virtualization state.
3736 *
3737 * @returns VBox status code.
3738 * @param pVCpu The cross context virtual CPU structure.
3739 * @param pVmxTransient The VMX-transient structure.
3740 *
3741 * @remarks No-long-jump zone!!!
3742 */
3743static int hmR0VmxExportGuestHwvirtState(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3744{
3745 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_HWVIRT)
3746 {
3747#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3748 /*
3749 * Check if the VMX feature is exposed to the guest and if the host CPU supports
3750 * VMCS shadowing.
3751 */
3752 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUseVmcsShadowing)
3753 {
3754 /*
3755 * If the nested hypervisor has loaded a current VMCS and is in VMX root mode,
3756 * copy the nested hypervisor's current VMCS into the shadow VMCS and enable
3757 * VMCS shadowing to skip intercepting some or all VMREAD/VMWRITE VM-exits.
3758 *
3759 * We check for VMX root mode here in case the guest executes VMXOFF without
3760 * clearing the current VMCS pointer and our VMXOFF instruction emulation does
3761 * not clear the current VMCS pointer.
3762 */
3763 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
3764 if ( CPUMIsGuestInVmxRootMode(&pVCpu->cpum.GstCtx)
3765 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx)
3766 && CPUMIsGuestVmxCurrentVmcsValid(&pVCpu->cpum.GstCtx))
3767 {
3768 /* Paranoia. */
3769 Assert(!pVmxTransient->fIsNestedGuest);
3770
3771 /*
3772 * For performance reasons, also check if the nested hypervisor's current VMCS
3773 * was newly loaded or modified before copying it to the shadow VMCS.
3774 */
3775 if (!pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs)
3776 {
3777 int rc = vmxHCCopyNstGstToShadowVmcs(pVCpu, pVmcsInfo);
3778 AssertRCReturn(rc, rc);
3779 pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs = true;
3780 }
3781 vmxHCEnableVmcsShadowing(pVCpu, pVmcsInfo);
3782 }
3783 else
3784 vmxHCDisableVmcsShadowing(pVCpu, pVmcsInfo);
3785 }
3786#else
3787 NOREF(pVmxTransient);
3788#endif
3789 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_HWVIRT);
3790 }
3791 return VINF_SUCCESS;
3792}
3793
3794
3795/**
3796 * Exports the guest debug registers into the guest-state area in the VMCS.
3797 * The guest debug bits are partially shared with the host (e.g. DR6, DR0-3).
3798 *
3799 * This also sets up whether \#DB and MOV DRx accesses cause VM-exits.
3800 *
3801 * @returns VBox status code.
3802 * @param pVCpu The cross context virtual CPU structure.
3803 * @param pVmxTransient The VMX-transient structure.
3804 *
3805 * @remarks No-long-jump zone!!!
3806 */
3807static int hmR0VmxExportSharedDebugState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
3808{
3809 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3810
3811 /** @todo NSTVMX: Figure out what we want to do with nested-guest instruction
3812 * stepping. */
3813 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
3814 if (pVmxTransient->fIsNestedGuest)
3815 {
3816 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, CPUMGetGuestDR7(pVCpu));
3817 AssertRC(rc);
3818
3819 /*
3820 * We don't want to always intercept MOV DRx for nested-guests as it causes
3821 * problems when the nested hypervisor isn't intercepting them, see @bugref{10080}.
3822 * Instead, they are strictly only requested when the nested hypervisor intercepts
3823 * them -- handled while merging VMCS controls.
3824 *
3825 * If neither the outer nor the nested-hypervisor is intercepting MOV DRx,
3826 * then the nested-guest debug state should be actively loaded on the host so that
3827 * nested-guest reads its own debug registers without causing VM-exits.
3828 */
3829 if ( !(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT)
3830 && !CPUMIsGuestDebugStateActive(pVCpu))
3831 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
3832 return VINF_SUCCESS;
3833 }
3834
3835#ifdef VBOX_STRICT
3836 /* Validate. Intel spec. 26.3.1.1 "Checks on Guest Controls Registers, Debug Registers, MSRs" */
3837 if (pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_LOAD_DEBUG)
3838 {
3839 /* Validate. Intel spec. 17.2 "Debug Registers", recompiler paranoia checks. */
3840 Assert((pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_MBZ_MASK | X86_DR7_RAZ_MASK)) == 0);
3841 Assert((pVCpu->cpum.GstCtx.dr[7] & X86_DR7_RA1_MASK) == X86_DR7_RA1_MASK);
3842 }
3843#endif
3844
3845 bool fSteppingDB = false;
3846 uint32_t uProcCtls = pVmcsInfo->u32ProcCtls;
3847 if (pVCpu->hm.s.fSingleInstruction)
3848 {
3849 /* If the CPU supports the monitor trap flag, use it for single stepping in DBGF and avoid intercepting #DB. */
3850 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MONITOR_TRAP_FLAG)
3851 {
3852 uProcCtls |= VMX_PROC_CTLS_MONITOR_TRAP_FLAG;
3853 Assert(fSteppingDB == false);
3854 }
3855 else
3856 {
3857 pVCpu->cpum.GstCtx.eflags.u |= X86_EFL_TF;
3858 pVCpu->hm.s.fCtxChanged |= HM_CHANGED_GUEST_RFLAGS;
3859 pVCpu->hmr0.s.fClearTrapFlag = true;
3860 fSteppingDB = true;
3861 }
3862 }
3863
3864#ifdef VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX
3865 bool fInterceptMovDRx = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fAlwaysInterceptMovDRx;
3866#else
3867 bool fInterceptMovDRx = false;
3868#endif
3869 uint64_t u64GuestDr7;
3870 if ( fSteppingDB
3871 || (CPUMGetHyperDR7(pVCpu) & X86_DR7_ENABLED_MASK))
3872 {
3873 /*
3874 * Use the combined guest and host DRx values found in the hypervisor register set
3875 * because the hypervisor debugger has breakpoints active or someone is single stepping
3876 * on the host side without a monitor trap flag.
3877 *
3878 * Note! DBGF expects a clean DR6 state before executing guest code.
3879 */
3880 if (!CPUMIsHyperDebugStateActive(pVCpu))
3881 {
3882 CPUMR0LoadHyperDebugState(pVCpu, true /* include DR6 */);
3883 Assert(CPUMIsHyperDebugStateActive(pVCpu));
3884 Assert(!CPUMIsGuestDebugStateActive(pVCpu));
3885 }
3886
3887 /* Update DR7 with the hypervisor value (other DRx registers are handled by CPUM one way or another). */
3888 u64GuestDr7 = CPUMGetHyperDR7(pVCpu);
3889 pVCpu->hmr0.s.fUsingHyperDR7 = true;
3890 fInterceptMovDRx = true;
3891 }
3892 else
3893 {
3894 /*
3895 * If the guest has enabled debug registers, we need to load them prior to
3896 * executing guest code so they'll trigger at the right time.
3897 */
3898 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_DR7);
3899 if (pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_ENABLED_MASK | X86_DR7_GD))
3900 {
3901 if (!CPUMIsGuestDebugStateActive(pVCpu))
3902 {
3903 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
3904 Assert(CPUMIsGuestDebugStateActive(pVCpu));
3905 Assert(!CPUMIsHyperDebugStateActive(pVCpu));
3906 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
3907 }
3908#ifndef VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX
3909 Assert(!fInterceptMovDRx);
3910#endif
3911 }
3912 else if (!CPUMIsGuestDebugStateActive(pVCpu))
3913 {
3914 /*
3915 * If no debugging enabled, we'll lazy load DR0-3. Unlike on AMD-V, we
3916 * must intercept #DB in order to maintain a correct DR6 guest value, and
3917 * because we need to intercept it to prevent nested #DBs from hanging the
3918 * CPU, we end up always having to intercept it. See hmR0VmxSetupVmcsXcptBitmap().
3919 */
3920 fInterceptMovDRx = true;
3921 }
3922
3923 /* Update DR7 with the actual guest value. */
3924 u64GuestDr7 = pVCpu->cpum.GstCtx.dr[7];
3925 pVCpu->hmr0.s.fUsingHyperDR7 = false;
3926 }
3927
3928 if (fInterceptMovDRx)
3929 uProcCtls |= VMX_PROC_CTLS_MOV_DR_EXIT;
3930 else
3931 uProcCtls &= ~VMX_PROC_CTLS_MOV_DR_EXIT;
3932
3933 /*
3934 * Update the processor-based VM-execution controls with the MOV-DRx intercepts and the
3935 * monitor-trap flag and update our cache.
3936 */
3937 if (uProcCtls != pVmcsInfo->u32ProcCtls)
3938 {
3939 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, uProcCtls);
3940 AssertRC(rc);
3941 pVmcsInfo->u32ProcCtls = uProcCtls;
3942 }
3943
3944 /*
3945 * Update guest DR7.
3946 */
3947 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, u64GuestDr7);
3948 AssertRC(rc);
3949
3950 /*
3951 * If we have forced EFLAGS.TF to be set because we're single-stepping in the hypervisor debugger,
3952 * we need to clear interrupt inhibition if any as otherwise it causes a VM-entry failure.
3953 *
3954 * See Intel spec. 26.3.1.5 "Checks on Guest Non-Register State".
3955 */
3956 if (fSteppingDB)
3957 {
3958 Assert(pVCpu->hm.s.fSingleInstruction);
3959 Assert(pVCpu->cpum.GstCtx.eflags.Bits.u1TF);
3960
3961 uint32_t fIntrState = 0;
3962 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &fIntrState);
3963 AssertRC(rc);
3964
3965 if (fIntrState & (VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS))
3966 {
3967 fIntrState &= ~(VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS);
3968 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_INT_STATE, fIntrState);
3969 AssertRC(rc);
3970 }
3971 }
3972
3973 return VINF_SUCCESS;
3974}
3975
3976
3977/**
3978 * Exports certain guest MSRs into the VM-entry MSR-load and VM-exit MSR-store
3979 * areas.
3980 *
3981 * These MSRs will automatically be loaded to the host CPU on every successful
3982 * VM-entry and stored from the host CPU on every successful VM-exit.
3983 *
3984 * We creates/updates MSR slots for the host MSRs in the VM-exit MSR-load area. The
3985 * actual host MSR values are not- updated here for performance reasons. See
3986 * hmR0VmxExportHostMsrs().
3987 *
3988 * We also exports the guest sysenter MSRs into the guest-state area in the VMCS.
3989 *
3990 * @returns VBox status code.
3991 * @param pVCpu The cross context virtual CPU structure.
3992 * @param pVmxTransient The VMX-transient structure.
3993 *
3994 * @remarks No-long-jump zone!!!
3995 */
3996static int hmR0VmxExportGuestMsrs(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3997{
3998 AssertPtr(pVCpu);
3999 AssertPtr(pVmxTransient);
4000
4001 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4002 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
4003
4004 /*
4005 * MSRs that we use the auto-load/store MSR area in the VMCS.
4006 * For 64-bit hosts, we load/restore them lazily, see hmR0VmxLazyLoadGuestMsrs(),
4007 * nothing to do here. The host MSR values are updated when it's safe in
4008 * hmR0VmxLazySaveHostMsrs().
4009 *
4010 * For nested-guests, the guests MSRs from the VM-entry MSR-load area are already
4011 * loaded (into the guest-CPU context) by the VMLAUNCH/VMRESUME instruction
4012 * emulation. The merged MSR permission bitmap will ensure that we get VM-exits
4013 * for any MSR that are not part of the lazy MSRs so we do not need to place
4014 * those MSRs into the auto-load/store MSR area. Nothing to do here.
4015 */
4016 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_GUEST_AUTO_MSRS)
4017 {
4018 /* No auto-load/store MSRs currently. */
4019 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_VMX_GUEST_AUTO_MSRS);
4020 }
4021
4022 /*
4023 * Guest Sysenter MSRs.
4024 */
4025 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_MSR_MASK)
4026 {
4027 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_SYSENTER_MSRS);
4028
4029 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_CS_MSR)
4030 {
4031 int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
4032 AssertRC(rc);
4033 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_CS_MSR);
4034 }
4035
4036 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_EIP_MSR)
4037 {
4038 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
4039 AssertRC(rc);
4040 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_EIP_MSR);
4041 }
4042
4043 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_ESP_MSR)
4044 {
4045 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
4046 AssertRC(rc);
4047 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_ESP_MSR);
4048 }
4049 }
4050
4051 /*
4052 * Guest/host EFER MSR.
4053 */
4054 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_EFER_MSR)
4055 {
4056 /* Whether we are using the VMCS to swap the EFER MSR must have been
4057 determined earlier while exporting VM-entry/VM-exit controls. */
4058 Assert(!(ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_ENTRY_EXIT_CTLS));
4059 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_EFER);
4060
4061 if (hmR0VmxShouldSwapEferMsr(pVCpu, pVmxTransient))
4062 {
4063 /*
4064 * EFER.LME is written by software, while EFER.LMA is set by the CPU to (CR0.PG & EFER.LME).
4065 * This means a guest can set EFER.LME=1 while CR0.PG=0 and EFER.LMA can remain 0.
4066 * VT-x requires that "IA-32e mode guest" VM-entry control must be identical to EFER.LMA
4067 * and to CR0.PG. Without unrestricted execution, CR0.PG (used for VT-x, not the shadow)
4068 * must always be 1. This forces us to effectively clear both EFER.LMA and EFER.LME until
4069 * the guest has also set CR0.PG=1. Otherwise, we would run into an invalid-guest state
4070 * during VM-entry.
4071 */
4072 uint64_t uGuestEferMsr = pCtx->msrEFER;
4073 if (!pVM->hmr0.s.vmx.fUnrestrictedGuest)
4074 {
4075 if (!(pCtx->msrEFER & MSR_K6_EFER_LMA))
4076 uGuestEferMsr &= ~MSR_K6_EFER_LME;
4077 else
4078 Assert((pCtx->msrEFER & (MSR_K6_EFER_LMA | MSR_K6_EFER_LME)) == (MSR_K6_EFER_LMA | MSR_K6_EFER_LME));
4079 }
4080
4081 /*
4082 * If the CPU supports VMCS controls for swapping EFER, use it. Otherwise, we have no option
4083 * but to use the auto-load store MSR area in the VMCS for swapping EFER. See @bugref{7368}.
4084 */
4085 if (g_fHmVmxSupportsVmcsEfer)
4086 {
4087 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_EFER_FULL, uGuestEferMsr);
4088 AssertRC(rc);
4089 }
4090 else
4091 {
4092 /*
4093 * We shall use the auto-load/store MSR area only for loading the EFER MSR but we must
4094 * continue to intercept guest read and write accesses to it, see @bugref{7386#c16}.
4095 */
4096 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER, uGuestEferMsr,
4097 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4098 AssertRCReturn(rc, rc);
4099 }
4100
4101 Log4Func(("efer=%#RX64 shadow=%#RX64\n", uGuestEferMsr, pCtx->msrEFER));
4102 }
4103 else if (!g_fHmVmxSupportsVmcsEfer)
4104 hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER);
4105
4106 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_EFER_MSR);
4107 }
4108
4109 /*
4110 * Other MSRs.
4111 */
4112 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_OTHER_MSRS)
4113 {
4114 /* Speculation Control (R/W). */
4115 HMVMX_CPUMCTX_ASSERT(pVCpu, HM_CHANGED_GUEST_OTHER_MSRS);
4116 if (pVM->cpum.ro.GuestFeatures.fIbrs)
4117 {
4118 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_IA32_SPEC_CTRL, CPUMGetGuestSpecCtrl(pVCpu),
4119 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4120 AssertRCReturn(rc, rc);
4121 }
4122
4123 /* Last Branch Record. */
4124 if (pVM->hmr0.s.vmx.fLbr)
4125 {
4126 PVMXVMCSINFOSHARED const pVmcsInfoShared = pVmxTransient->pVmcsInfo->pShared;
4127 uint32_t const idFromIpMsrStart = pVM->hmr0.s.vmx.idLbrFromIpMsrFirst;
4128 uint32_t const idToIpMsrStart = pVM->hmr0.s.vmx.idLbrToIpMsrFirst;
4129 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrFromIpMsrLast - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst + 1;
4130 Assert(cLbrStack <= 32);
4131 for (uint32_t i = 0; i < cLbrStack; i++)
4132 {
4133 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idFromIpMsrStart + i,
4134 pVmcsInfoShared->au64LbrFromIpMsr[i],
4135 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4136 AssertRCReturn(rc, rc);
4137
4138 /* Some CPUs don't have a Branch-To-IP MSR (P4 and related Xeons). */
4139 if (idToIpMsrStart != 0)
4140 {
4141 rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idToIpMsrStart + i,
4142 pVmcsInfoShared->au64LbrToIpMsr[i],
4143 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4144 AssertRCReturn(rc, rc);
4145 }
4146 }
4147
4148 /* Add LBR top-of-stack MSR (which contains the index to the most recent record). */
4149 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, pVM->hmr0.s.vmx.idLbrTosMsr,
4150 pVmcsInfoShared->u64LbrTosMsr, false /* fSetReadWrite */,
4151 false /* fUpdateHostMsr */);
4152 AssertRCReturn(rc, rc);
4153 }
4154
4155 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_OTHER_MSRS);
4156 }
4157
4158 return VINF_SUCCESS;
4159}
4160
4161
4162/**
4163 * Wrapper for running the guest code in VT-x.
4164 *
4165 * @returns VBox status code, no informational status codes.
4166 * @param pVCpu The cross context virtual CPU structure.
4167 * @param pVmxTransient The VMX-transient structure.
4168 *
4169 * @remarks No-long-jump zone!!!
4170 */
4171DECLINLINE(int) hmR0VmxRunGuest(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
4172{
4173 /* Mark that HM is the keeper of all guest-CPU registers now that we're going to execute guest code. */
4174 pVCpu->cpum.GstCtx.fExtrn |= HMVMX_CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_KEEPER_HM;
4175
4176 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
4177 bool const fResumeVM = RT_BOOL(pVmcsInfo->fVmcsState & VMX_V_VMCS_LAUNCH_STATE_LAUNCHED);
4178#ifdef VBOX_WITH_STATISTICS
4179 if (fResumeVM)
4180 STAM_COUNTER_INC(&pVCpu->hm.s.StatVmxVmResume);
4181 else
4182 STAM_COUNTER_INC(&pVCpu->hm.s.StatVmxVmLaunch);
4183#endif
4184 int rc = pVCpu->hmr0.s.vmx.pfnStartVm(pVmcsInfo, pVCpu, fResumeVM);
4185 AssertMsg(rc <= VINF_SUCCESS, ("%Rrc\n", rc));
4186 return rc;
4187}
4188
4189
4190/**
4191 * Reports world-switch error and dumps some useful debug info.
4192 *
4193 * @param pVCpu The cross context virtual CPU structure.
4194 * @param rcVMRun The return code from VMLAUNCH/VMRESUME.
4195 * @param pVmxTransient The VMX-transient structure (only
4196 * exitReason updated).
4197 */
4198static void hmR0VmxReportWorldSwitchError(PVMCPUCC pVCpu, int rcVMRun, PVMXTRANSIENT pVmxTransient)
4199{
4200 Assert(pVCpu);
4201 Assert(pVmxTransient);
4202 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
4203
4204 Log4Func(("VM-entry failure: %Rrc\n", rcVMRun));
4205 switch (rcVMRun)
4206 {
4207 case VERR_VMX_INVALID_VMXON_PTR:
4208 AssertFailed();
4209 break;
4210 case VINF_SUCCESS: /* VMLAUNCH/VMRESUME succeeded but VM-entry failed... yeah, true story. */
4211 case VERR_VMX_UNABLE_TO_START_VM: /* VMLAUNCH/VMRESUME itself failed. */
4212 {
4213 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &pVCpu->hm.s.vmx.LastError.u32ExitReason);
4214 rc |= VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
4215 AssertRC(rc);
4216 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_QUALIFICATION>(pVCpu, pVmxTransient);
4217
4218 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hmr0.s.idEnteredCpu;
4219 /* LastError.idCurrentCpu was already updated in hmR0VmxPreRunGuestCommitted().
4220 Cannot do it here as we may have been long preempted. */
4221
4222#ifdef VBOX_STRICT
4223 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4224 Log4(("uExitReason %#RX32 (VmxTransient %#RX16)\n", pVCpu->hm.s.vmx.LastError.u32ExitReason,
4225 pVmxTransient->uExitReason));
4226 Log4(("Exit Qualification %#RX64\n", pVmxTransient->uExitQual));
4227 Log4(("InstrError %#RX32\n", pVCpu->hm.s.vmx.LastError.u32InstrError));
4228 if (pVCpu->hm.s.vmx.LastError.u32InstrError <= HMVMX_INSTR_ERROR_MAX)
4229 Log4(("InstrError Desc. \"%s\"\n", g_apszVmxInstrErrors[pVCpu->hm.s.vmx.LastError.u32InstrError]));
4230 else
4231 Log4(("InstrError Desc. Range exceeded %u\n", HMVMX_INSTR_ERROR_MAX));
4232 Log4(("Entered host CPU %u\n", pVCpu->hm.s.vmx.LastError.idEnteredCpu));
4233 Log4(("Current host CPU %u\n", pVCpu->hm.s.vmx.LastError.idCurrentCpu));
4234
4235 static struct
4236 {
4237 /** Name of the field to log. */
4238 const char *pszName;
4239 /** The VMCS field. */
4240 uint32_t uVmcsField;
4241 /** Whether host support of this field needs to be checked. */
4242 bool fCheckSupport;
4243 } const s_aVmcsFields[] =
4244 {
4245 { "VMX_VMCS32_CTRL_PIN_EXEC", VMX_VMCS32_CTRL_PIN_EXEC, false },
4246 { "VMX_VMCS32_CTRL_PROC_EXEC", VMX_VMCS32_CTRL_PROC_EXEC, false },
4247 { "VMX_VMCS32_CTRL_PROC_EXEC2", VMX_VMCS32_CTRL_PROC_EXEC2, true },
4248 { "VMX_VMCS32_CTRL_ENTRY", VMX_VMCS32_CTRL_ENTRY, false },
4249 { "VMX_VMCS32_CTRL_EXIT", VMX_VMCS32_CTRL_EXIT, false },
4250 { "VMX_VMCS32_CTRL_CR3_TARGET_COUNT", VMX_VMCS32_CTRL_CR3_TARGET_COUNT, false },
4251 { "VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO", VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, false },
4252 { "VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE", VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, false },
4253 { "VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH", VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, false },
4254 { "VMX_VMCS32_CTRL_TPR_THRESHOLD", VMX_VMCS32_CTRL_TPR_THRESHOLD, false },
4255 { "VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT", VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, false },
4256 { "VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT", VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, false },
4257 { "VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT", VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, false },
4258 { "VMX_VMCS32_CTRL_EXCEPTION_BITMAP", VMX_VMCS32_CTRL_EXCEPTION_BITMAP, false },
4259 { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK", VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, false },
4260 { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH", VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, false },
4261 { "VMX_VMCS_CTRL_CR0_MASK", VMX_VMCS_CTRL_CR0_MASK, false },
4262 { "VMX_VMCS_CTRL_CR0_READ_SHADOW", VMX_VMCS_CTRL_CR0_READ_SHADOW, false },
4263 { "VMX_VMCS_CTRL_CR4_MASK", VMX_VMCS_CTRL_CR4_MASK, false },
4264 { "VMX_VMCS_CTRL_CR4_READ_SHADOW", VMX_VMCS_CTRL_CR4_READ_SHADOW, false },
4265 { "VMX_VMCS64_CTRL_EPTP_FULL", VMX_VMCS64_CTRL_EPTP_FULL, true },
4266 { "VMX_VMCS_GUEST_RIP", VMX_VMCS_GUEST_RIP, false },
4267 { "VMX_VMCS_GUEST_RSP", VMX_VMCS_GUEST_RSP, false },
4268 { "VMX_VMCS_GUEST_RFLAGS", VMX_VMCS_GUEST_RFLAGS, false },
4269 { "VMX_VMCS16_VPID", VMX_VMCS16_VPID, true, },
4270 { "VMX_VMCS_HOST_CR0", VMX_VMCS_HOST_CR0, false },
4271 { "VMX_VMCS_HOST_CR3", VMX_VMCS_HOST_CR3, false },
4272 { "VMX_VMCS_HOST_CR4", VMX_VMCS_HOST_CR4, false },
4273 /* The order of selector fields below are fixed! */
4274 { "VMX_VMCS16_HOST_ES_SEL", VMX_VMCS16_HOST_ES_SEL, false },
4275 { "VMX_VMCS16_HOST_CS_SEL", VMX_VMCS16_HOST_CS_SEL, false },
4276 { "VMX_VMCS16_HOST_SS_SEL", VMX_VMCS16_HOST_SS_SEL, false },
4277 { "VMX_VMCS16_HOST_DS_SEL", VMX_VMCS16_HOST_DS_SEL, false },
4278 { "VMX_VMCS16_HOST_FS_SEL", VMX_VMCS16_HOST_FS_SEL, false },
4279 { "VMX_VMCS16_HOST_GS_SEL", VMX_VMCS16_HOST_GS_SEL, false },
4280 { "VMX_VMCS16_HOST_TR_SEL", VMX_VMCS16_HOST_TR_SEL, false },
4281 /* End of ordered selector fields. */
4282 { "VMX_VMCS_HOST_TR_BASE", VMX_VMCS_HOST_TR_BASE, false },
4283 { "VMX_VMCS_HOST_GDTR_BASE", VMX_VMCS_HOST_GDTR_BASE, false },
4284 { "VMX_VMCS_HOST_IDTR_BASE", VMX_VMCS_HOST_IDTR_BASE, false },
4285 { "VMX_VMCS32_HOST_SYSENTER_CS", VMX_VMCS32_HOST_SYSENTER_CS, false },
4286 { "VMX_VMCS_HOST_SYSENTER_EIP", VMX_VMCS_HOST_SYSENTER_EIP, false },
4287 { "VMX_VMCS_HOST_SYSENTER_ESP", VMX_VMCS_HOST_SYSENTER_ESP, false },
4288 { "VMX_VMCS_HOST_RSP", VMX_VMCS_HOST_RSP, false },
4289 { "VMX_VMCS_HOST_RIP", VMX_VMCS_HOST_RIP, false }
4290 };
4291
4292 RTGDTR HostGdtr;
4293 ASMGetGDTR(&HostGdtr);
4294
4295 uint32_t const cVmcsFields = RT_ELEMENTS(s_aVmcsFields);
4296 for (uint32_t i = 0; i < cVmcsFields; i++)
4297 {
4298 uint32_t const uVmcsField = s_aVmcsFields[i].uVmcsField;
4299
4300 bool fSupported;
4301 if (!s_aVmcsFields[i].fCheckSupport)
4302 fSupported = true;
4303 else
4304 {
4305 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4306 switch (uVmcsField)
4307 {
4308 case VMX_VMCS64_CTRL_EPTP_FULL: fSupported = pVM->hmr0.s.fNestedPaging; break;
4309 case VMX_VMCS16_VPID: fSupported = pVM->hmr0.s.vmx.fVpid; break;
4310 case VMX_VMCS32_CTRL_PROC_EXEC2:
4311 fSupported = RT_BOOL(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS);
4312 break;
4313 default:
4314 AssertMsgFailedReturnVoid(("Failed to provide VMCS field support for %#RX32\n", uVmcsField));
4315 }
4316 }
4317
4318 if (fSupported)
4319 {
4320 uint8_t const uWidth = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_WIDTH);
4321 switch (uWidth)
4322 {
4323 case VMX_VMCSFIELD_WIDTH_16BIT:
4324 {
4325 uint16_t u16Val;
4326 rc = VMXReadVmcs16(uVmcsField, &u16Val);
4327 AssertRC(rc);
4328 Log4(("%-40s = %#RX16\n", s_aVmcsFields[i].pszName, u16Val));
4329
4330 if ( uVmcsField >= VMX_VMCS16_HOST_ES_SEL
4331 && uVmcsField <= VMX_VMCS16_HOST_TR_SEL)
4332 {
4333 if (u16Val < HostGdtr.cbGdt)
4334 {
4335 /* Order of selectors in s_apszSel is fixed and matches the order in s_aVmcsFields. */
4336 static const char * const s_apszSel[] = { "Host ES", "Host CS", "Host SS", "Host DS",
4337 "Host FS", "Host GS", "Host TR" };
4338 uint8_t const idxSel = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_INDEX);
4339 Assert(idxSel < RT_ELEMENTS(s_apszSel));
4340 PCX86DESCHC pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u16Val & X86_SEL_MASK));
4341 hmR0DumpDescriptor(pDesc, u16Val, s_apszSel[idxSel]);
4342 }
4343 else
4344 Log4((" Selector value exceeds GDT limit!\n"));
4345 }
4346 break;
4347 }
4348
4349 case VMX_VMCSFIELD_WIDTH_32BIT:
4350 {
4351 uint32_t u32Val;
4352 rc = VMXReadVmcs32(uVmcsField, &u32Val);
4353 AssertRC(rc);
4354 Log4(("%-40s = %#RX32\n", s_aVmcsFields[i].pszName, u32Val));
4355 break;
4356 }
4357
4358 case VMX_VMCSFIELD_WIDTH_64BIT:
4359 case VMX_VMCSFIELD_WIDTH_NATURAL:
4360 {
4361 uint64_t u64Val;
4362 rc = VMXReadVmcs64(uVmcsField, &u64Val);
4363 AssertRC(rc);
4364 Log4(("%-40s = %#RX64\n", s_aVmcsFields[i].pszName, u64Val));
4365 break;
4366 }
4367 }
4368 }
4369 }
4370
4371 Log4(("MSR_K6_EFER = %#RX64\n", ASMRdMsr(MSR_K6_EFER)));
4372 Log4(("MSR_K8_CSTAR = %#RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
4373 Log4(("MSR_K8_LSTAR = %#RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
4374 Log4(("MSR_K6_STAR = %#RX64\n", ASMRdMsr(MSR_K6_STAR)));
4375 Log4(("MSR_K8_SF_MASK = %#RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
4376 Log4(("MSR_K8_KERNEL_GS_BASE = %#RX64\n", ASMRdMsr(MSR_K8_KERNEL_GS_BASE)));
4377#endif /* VBOX_STRICT */
4378 break;
4379 }
4380
4381 default:
4382 /* Impossible */
4383 AssertMsgFailed(("hmR0VmxReportWorldSwitchError %Rrc (%#x)\n", rcVMRun, rcVMRun));
4384 break;
4385 }
4386}
4387
4388
4389/**
4390 * Sets up the usage of TSC-offsetting and updates the VMCS.
4391 *
4392 * If offsetting is not possible, cause VM-exits on RDTSC(P)s. Also sets up the
4393 * VMX-preemption timer.
4394 *
4395 * @returns VBox status code.
4396 * @param pVCpu The cross context virtual CPU structure.
4397 * @param pVmxTransient The VMX-transient structure.
4398 * @param idCurrentCpu The current CPU number.
4399 *
4400 * @remarks No-long-jump zone!!!
4401 */
4402static void hmR0VmxUpdateTscOffsettingAndPreemptTimer(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, RTCPUID idCurrentCpu)
4403{
4404 bool fOffsettedTsc;
4405 bool fParavirtTsc;
4406 uint64_t uTscOffset;
4407 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4408 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4409
4410 if (pVM->hmr0.s.vmx.fUsePreemptTimer)
4411 {
4412 /* The TMCpuTickGetDeadlineAndTscOffset function is expensive (calling it on
4413 every entry slowed down the bs2-test1 CPUID testcase by ~33% (on an 10980xe). */
4414 uint64_t cTicksToDeadline;
4415 if ( idCurrentCpu == pVCpu->hmr0.s.idLastCpu
4416 && TMVirtualSyncIsCurrentDeadlineVersion(pVM, pVCpu->hmr0.s.vmx.uTscDeadlineVersion))
4417 {
4418 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionReusingDeadline);
4419 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc);
4420 cTicksToDeadline = pVCpu->hmr0.s.vmx.uTscDeadline - SUPReadTsc();
4421 if ((int64_t)cTicksToDeadline > 0)
4422 { /* hopefully */ }
4423 else
4424 {
4425 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionReusingDeadlineExpired);
4426 cTicksToDeadline = 0;
4427 }
4428 }
4429 else
4430 {
4431 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionRecalcingDeadline);
4432 cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVM, pVCpu, &uTscOffset, &fOffsettedTsc, &fParavirtTsc,
4433 &pVCpu->hmr0.s.vmx.uTscDeadline,
4434 &pVCpu->hmr0.s.vmx.uTscDeadlineVersion);
4435 pVCpu->hmr0.s.vmx.uTscDeadline += cTicksToDeadline;
4436 if (cTicksToDeadline >= 128)
4437 { /* hopefully */ }
4438 else
4439 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionRecalcingDeadlineExpired);
4440 }
4441
4442 /* Make sure the returned values have sane upper and lower boundaries. */
4443 uint64_t const u64CpuHz = SUPGetCpuHzFromGipBySetIndex(g_pSUPGlobalInfoPage, pVCpu->iHostCpuSet);
4444 cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64th of a second, 15.625ms. */ /** @todo r=bird: Once real+virtual timers move to separate thread, we can raise the upper limit (16ms isn't much). ASSUMES working poke cpu function. */
4445 cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 32678); /* 1/32768th of a second, ~30us. */
4446 cTicksToDeadline >>= pVM->hm.s.vmx.cPreemptTimerShift;
4447
4448 /** @todo r=ramshankar: We need to find a way to integrate nested-guest
4449 * preemption timers here. We probably need to clamp the preemption timer,
4450 * after converting the timer value to the host. */
4451 uint32_t const cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16);
4452 int rc = VMXWriteVmcs32(VMX_VMCS32_PREEMPT_TIMER_VALUE, cPreemptionTickCount);
4453 AssertRC(rc);
4454 }
4455 else
4456 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc);
4457
4458 if (fParavirtTsc)
4459 {
4460 /* Currently neither Hyper-V nor KVM need to update their paravirt. TSC
4461 information before every VM-entry, hence disable it for performance sake. */
4462#if 0
4463 int rc = GIMR0UpdateParavirtTsc(pVM, 0 /* u64Offset */);
4464 AssertRC(rc);
4465#endif
4466 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscParavirt);
4467 }
4468
4469 if ( fOffsettedTsc
4470 && RT_LIKELY(!pVCpu->hmr0.s.fDebugWantRdTscExit))
4471 {
4472 if (pVmxTransient->fIsNestedGuest)
4473 uTscOffset = CPUMApplyNestedGuestTscOffset(pVCpu, uTscOffset);
4474 hmR0VmxSetTscOffsetVmcs(pVmcsInfo, uTscOffset);
4475 hmR0VmxRemoveProcCtlsVmcs(pVCpu, pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT);
4476 }
4477 else
4478 {
4479 /* We can't use TSC-offsetting (non-fixed TSC, warp drive active etc.), VM-exit on RDTSC(P). */
4480 hmR0VmxSetProcCtlsVmcs(pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT);
4481 }
4482}
4483
4484
4485/**
4486 * Saves the guest state from the VMCS into the guest-CPU context.
4487 *
4488 * @returns VBox status code.
4489 * @param pVCpu The cross context virtual CPU structure.
4490 * @param fWhat What to import, CPUMCTX_EXTRN_XXX.
4491 */
4492VMMR0DECL(int) VMXR0ImportStateOnDemand(PVMCPUCC pVCpu, uint64_t fWhat)
4493{
4494 AssertPtr(pVCpu);
4495 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4496 return vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, fWhat);
4497}
4498
4499
4500/**
4501 * Gets VMX VM-exit auxiliary information.
4502 *
4503 * @returns VBox status code.
4504 * @param pVCpu The cross context virtual CPU structure.
4505 * @param pVmxExitAux Where to store the VM-exit auxiliary info.
4506 * @param fWhat What to fetch, HMVMX_READ_XXX.
4507 */
4508VMMR0DECL(int) VMXR0GetExitAuxInfo(PVMCPUCC pVCpu, PVMXEXITAUX pVmxExitAux, uint32_t fWhat)
4509{
4510 PVMXTRANSIENT pVmxTransient = pVCpu->hmr0.s.vmx.pVmxTransient;
4511 if (RT_LIKELY(pVmxTransient))
4512 {
4513 AssertCompile(sizeof(fWhat) == sizeof(pVmxTransient->fVmcsFieldsRead));
4514
4515 /* The exit reason is always available. */
4516 pVmxExitAux->uReason = pVmxTransient->uExitReason;
4517
4518
4519 if (fWhat & HMVMX_READ_EXIT_QUALIFICATION)
4520 {
4521 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_QUALIFICATION>(pVCpu, pVmxTransient);
4522 pVmxExitAux->u64Qual = pVmxTransient->uExitQual;
4523#ifdef VBOX_STRICT
4524 fWhat &= ~HMVMX_READ_EXIT_QUALIFICATION;
4525#endif
4526 }
4527
4528 if (fWhat & HMVMX_READ_IDT_VECTORING_INFO)
4529 {
4530 vmxHCReadToTransientSlow<HMVMX_READ_IDT_VECTORING_INFO>(pVCpu, pVmxTransient);
4531 pVmxExitAux->uIdtVectoringInfo = pVmxTransient->uIdtVectoringInfo;
4532#ifdef VBOX_STRICT
4533 fWhat &= ~HMVMX_READ_IDT_VECTORING_INFO;
4534#endif
4535 }
4536
4537 if (fWhat & HMVMX_READ_IDT_VECTORING_ERROR_CODE)
4538 {
4539 vmxHCReadToTransientSlow<HMVMX_READ_IDT_VECTORING_ERROR_CODE>(pVCpu, pVmxTransient);
4540 pVmxExitAux->uIdtVectoringErrCode = pVmxTransient->uIdtVectoringErrorCode;
4541#ifdef VBOX_STRICT
4542 fWhat &= ~HMVMX_READ_IDT_VECTORING_ERROR_CODE;
4543#endif
4544 }
4545
4546 if (fWhat & HMVMX_READ_EXIT_INSTR_LEN)
4547 {
4548 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_INSTR_LEN>(pVCpu, pVmxTransient);
4549 pVmxExitAux->cbInstr = pVmxTransient->cbExitInstr;
4550#ifdef VBOX_STRICT
4551 fWhat &= ~HMVMX_READ_EXIT_INSTR_LEN;
4552#endif
4553 }
4554
4555 if (fWhat & HMVMX_READ_EXIT_INTERRUPTION_INFO)
4556 {
4557 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_INTERRUPTION_INFO>(pVCpu, pVmxTransient);
4558 pVmxExitAux->uExitIntInfo = pVmxTransient->uExitIntInfo;
4559#ifdef VBOX_STRICT
4560 fWhat &= ~HMVMX_READ_EXIT_INTERRUPTION_INFO;
4561#endif
4562 }
4563
4564 if (fWhat & HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE)
4565 {
4566 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE>(pVCpu, pVmxTransient);
4567 pVmxExitAux->uExitIntErrCode = pVmxTransient->uExitIntErrorCode;
4568#ifdef VBOX_STRICT
4569 fWhat &= ~HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE;
4570#endif
4571 }
4572
4573 if (fWhat & HMVMX_READ_EXIT_INSTR_INFO)
4574 {
4575 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_INSTR_INFO>(pVCpu, pVmxTransient);
4576 pVmxExitAux->InstrInfo.u = pVmxTransient->ExitInstrInfo.u;
4577#ifdef VBOX_STRICT
4578 fWhat &= ~HMVMX_READ_EXIT_INSTR_INFO;
4579#endif
4580 }
4581
4582 if (fWhat & HMVMX_READ_GUEST_LINEAR_ADDR)
4583 {
4584 vmxHCReadToTransientSlow<HMVMX_READ_GUEST_LINEAR_ADDR>(pVCpu, pVmxTransient);
4585 pVmxExitAux->u64GuestLinearAddr = pVmxTransient->uGuestLinearAddr;
4586#ifdef VBOX_STRICT
4587 fWhat &= ~HMVMX_READ_GUEST_LINEAR_ADDR;
4588#endif
4589 }
4590
4591 if (fWhat & HMVMX_READ_GUEST_PHYSICAL_ADDR)
4592 {
4593 vmxHCReadToTransientSlow<HMVMX_READ_GUEST_PHYSICAL_ADDR>(pVCpu, pVmxTransient);
4594 pVmxExitAux->u64GuestPhysAddr = pVmxTransient->uGuestPhysicalAddr;
4595#ifdef VBOX_STRICT
4596 fWhat &= ~HMVMX_READ_GUEST_PHYSICAL_ADDR;
4597#endif
4598 }
4599
4600 if (fWhat & HMVMX_READ_GUEST_PENDING_DBG_XCPTS)
4601 {
4602#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
4603 vmxHCReadToTransientSlow<HMVMX_READ_GUEST_PENDING_DBG_XCPTS>(pVCpu, pVmxTransient);
4604 pVmxExitAux->u64GuestPendingDbgXcpts = pVmxTransient->uGuestPendingDbgXcpts;
4605#else
4606 pVmxExitAux->u64GuestPendingDbgXcpts = 0;
4607#endif
4608#ifdef VBOX_STRICT
4609 fWhat &= ~HMVMX_READ_GUEST_PENDING_DBG_XCPTS;
4610#endif
4611 }
4612
4613 AssertMsg(!fWhat, ("fWhat=%#RX32 fVmcsFieldsRead=%#RX32\n", fWhat, pVmxTransient->fVmcsFieldsRead));
4614 return VINF_SUCCESS;
4615 }
4616 return VERR_NOT_AVAILABLE;
4617}
4618
4619
4620/**
4621 * Does the necessary state syncing before returning to ring-3 for any reason
4622 * (longjmp, preemption, voluntary exits to ring-3) from VT-x.
4623 *
4624 * @returns VBox status code.
4625 * @param pVCpu The cross context virtual CPU structure.
4626 * @param fImportState Whether to import the guest state from the VMCS back
4627 * to the guest-CPU context.
4628 *
4629 * @remarks No-long-jmp zone!!!
4630 */
4631static int hmR0VmxLeave(PVMCPUCC pVCpu, bool fImportState)
4632{
4633 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4634 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
4635
4636 RTCPUID const idCpu = RTMpCpuId();
4637 Log4Func(("HostCpuId=%u\n", idCpu));
4638
4639 /*
4640 * !!! IMPORTANT !!!
4641 * If you modify code here, check whether VMXR0CallRing3Callback() needs to be updated too.
4642 */
4643
4644 /* Save the guest state if necessary. */
4645 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4646 if (fImportState)
4647 {
4648 int rc = vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
4649 AssertRCReturn(rc, rc);
4650 }
4651
4652 /* Restore host FPU state if necessary. We will resync on next R0 reentry. */
4653 CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
4654 Assert(!CPUMIsGuestFPUStateActive(pVCpu));
4655
4656 /* Restore host debug registers if necessary. We will resync on next R0 reentry. */
4657#ifdef VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX
4658 Assert( (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT)
4659 || (!CPUMIsHyperDebugStateActive(pVCpu) && !pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fAlwaysInterceptMovDRx));
4660#else
4661 Assert((pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT) || !CPUMIsHyperDebugStateActive(pVCpu));
4662#endif
4663 CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
4664 Assert(!CPUMIsGuestDebugStateActive(pVCpu));
4665 Assert(!CPUMIsHyperDebugStateActive(pVCpu));
4666
4667 /* Restore host-state bits that VT-x only restores partially. */
4668 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
4669 {
4670 Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hmr0.s.vmx.fRestoreHostFlags, idCpu));
4671 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
4672 }
4673 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
4674
4675 /* Restore the lazy host MSRs as we're leaving VT-x context. */
4676 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
4677 {
4678 /* We shouldn't restore the host MSRs without saving the guest MSRs first. */
4679 if (!fImportState)
4680 {
4681 int rc = vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_KERNEL_GS_BASE | CPUMCTX_EXTRN_SYSCALL_MSRS);
4682 AssertRCReturn(rc, rc);
4683 }
4684 hmR0VmxLazyRestoreHostMsrs(pVCpu);
4685 Assert(!pVCpu->hmr0.s.vmx.fLazyMsrs);
4686 }
4687 else
4688 pVCpu->hmr0.s.vmx.fLazyMsrs = 0;
4689
4690 /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
4691 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
4692
4693 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatEntry);
4694 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatImportGuestState);
4695 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExportGuestState);
4696 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatPreExit);
4697 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitHandling);
4698 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitIO);
4699 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitMovCRx);
4700 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitXcptNmi);
4701 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitVmentry);
4702 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
4703
4704 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
4705
4706 /** @todo This partially defeats the purpose of having preemption hooks.
4707 * The problem is, deregistering the hooks should be moved to a place that
4708 * lasts until the EMT is about to be destroyed not everytime while leaving HM
4709 * context.
4710 */
4711 int rc = hmR0VmxClearVmcs(pVmcsInfo);
4712 AssertRCReturn(rc, rc);
4713
4714#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
4715 /*
4716 * A valid shadow VMCS is made active as part of VM-entry. It is necessary to
4717 * clear a shadow VMCS before allowing that VMCS to become active on another
4718 * logical processor. We may or may not be importing guest state which clears
4719 * it, so cover for it here.
4720 *
4721 * See Intel spec. 24.11.1 "Software Use of Virtual-Machine Control Structures".
4722 */
4723 if ( pVmcsInfo->pvShadowVmcs
4724 && pVmcsInfo->fShadowVmcsState != VMX_V_VMCS_LAUNCH_STATE_CLEAR)
4725 {
4726 rc = vmxHCClearShadowVmcs(pVmcsInfo);
4727 AssertRCReturn(rc, rc);
4728 }
4729
4730 /*
4731 * Flag that we need to re-export the host state if we switch to this VMCS before
4732 * executing guest or nested-guest code.
4733 */
4734 pVmcsInfo->idHostCpuState = NIL_RTCPUID;
4735#endif
4736
4737 Log4Func(("Cleared Vmcs. HostCpuId=%u\n", idCpu));
4738 NOREF(idCpu);
4739 return VINF_SUCCESS;
4740}
4741
4742
4743/**
4744 * Leaves the VT-x session.
4745 *
4746 * @returns VBox status code.
4747 * @param pVCpu The cross context virtual CPU structure.
4748 *
4749 * @remarks No-long-jmp zone!!!
4750 */
4751static int hmR0VmxLeaveSession(PVMCPUCC pVCpu)
4752{
4753 HM_DISABLE_PREEMPT(pVCpu);
4754 HMVMX_ASSERT_CPU_SAFE(pVCpu);
4755 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
4756 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4757
4758 /* When thread-context hooks are used, we can avoid doing the leave again if we had been preempted before
4759 and done this from the VMXR0ThreadCtxCallback(). */
4760 if (!pVCpu->hmr0.s.fLeaveDone)
4761 {
4762 int rc2 = hmR0VmxLeave(pVCpu, true /* fImportState */);
4763 AssertRCReturnStmt(rc2, HM_RESTORE_PREEMPT(), rc2);
4764 pVCpu->hmr0.s.fLeaveDone = true;
4765 }
4766 Assert(!pVCpu->cpum.GstCtx.fExtrn);
4767
4768 /*
4769 * !!! IMPORTANT !!!
4770 * If you modify code here, make sure to check whether VMXR0CallRing3Callback() needs to be updated too.
4771 */
4772
4773 /* Deregister hook now that we've left HM context before re-enabling preemption. */
4774 /** @todo Deregistering here means we need to VMCLEAR always
4775 * (longjmp/exit-to-r3) in VT-x which is not efficient, eliminate need
4776 * for calling VMMR0ThreadCtxHookDisable here! */
4777 VMMR0ThreadCtxHookDisable(pVCpu);
4778
4779 /* Leave HM context. This takes care of local init (term) and deregistering the longjmp-to-ring-3 callback. */
4780 int rc = HMR0LeaveCpu(pVCpu);
4781 HM_RESTORE_PREEMPT();
4782 return rc;
4783}
4784
4785
4786/**
4787 * Take necessary actions before going back to ring-3.
4788 *
4789 * An action requires us to go back to ring-3. This function does the necessary
4790 * steps before we can safely return to ring-3. This is not the same as longjmps
4791 * to ring-3, this is voluntary and prepares the guest so it may continue
4792 * executing outside HM (recompiler/IEM).
4793 *
4794 * @returns VBox status code.
4795 * @param pVCpu The cross context virtual CPU structure.
4796 * @param rcExit The reason for exiting to ring-3. Can be
4797 * VINF_VMM_UNKNOWN_RING3_CALL.
4798 */
4799static int hmR0VmxExitToRing3(PVMCPUCC pVCpu, VBOXSTRICTRC rcExit)
4800{
4801 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
4802
4803 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4804 if (RT_UNLIKELY(rcExit == VERR_VMX_INVALID_VMCS_PTR))
4805 {
4806 VMXGetCurrentVmcs(&pVCpu->hm.s.vmx.LastError.HCPhysCurrentVmcs);
4807 pVCpu->hm.s.vmx.LastError.u32VmcsRev = *(uint32_t *)pVmcsInfo->pvVmcs;
4808 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hmr0.s.idEnteredCpu;
4809 /* LastError.idCurrentCpu was updated in hmR0VmxPreRunGuestCommitted(). */
4810 }
4811
4812 /* Please, no longjumps here (any logging shouldn't flush jump back to ring-3). NO LOGGING BEFORE THIS POINT! */
4813 VMMRZCallRing3Disable(pVCpu);
4814 Log4Func(("rcExit=%d\n", VBOXSTRICTRC_VAL(rcExit)));
4815
4816 /*
4817 * Convert any pending HM events back to TRPM due to premature exits to ring-3.
4818 * We need to do this only on returns to ring-3 and not for longjmps to ring3.
4819 *
4820 * This is because execution may continue from ring-3 and we would need to inject
4821 * the event from there (hence place it back in TRPM).
4822 */
4823 if (pVCpu->hm.s.Event.fPending)
4824 {
4825 vmxHCPendingEventToTrpmTrap(pVCpu);
4826 Assert(!pVCpu->hm.s.Event.fPending);
4827
4828 /* Clear the events from the VMCS. */
4829 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, 0); AssertRC(rc);
4830 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, 0); AssertRC(rc);
4831 }
4832#ifdef VBOX_STRICT
4833 /*
4834 * We check for rcExit here since for errors like VERR_VMX_UNABLE_TO_START_VM (which are
4835 * fatal), we don't care about verifying duplicate injection of events. Errors like
4836 * VERR_EM_INTERPRET are converted to their VINF_* counterparts -prior- to calling this
4837 * function so those should and will be checked below.
4838 */
4839 else if (RT_SUCCESS(rcExit))
4840 {
4841 /*
4842 * Ensure we don't accidentally clear a pending HM event without clearing the VMCS.
4843 * This can be pretty hard to debug otherwise, interrupts might get injected twice
4844 * occasionally, see @bugref{9180#c42}.
4845 *
4846 * However, if the VM-entry failed, any VM entry-interruption info. field would
4847 * be left unmodified as the event would not have been injected to the guest. In
4848 * such cases, don't assert, we're not going to continue guest execution anyway.
4849 */
4850 uint32_t uExitReason;
4851 uint32_t uEntryIntInfo;
4852 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
4853 rc |= VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &uEntryIntInfo);
4854 AssertRC(rc);
4855 AssertMsg(VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason) || !VMX_ENTRY_INT_INFO_IS_VALID(uEntryIntInfo),
4856 ("uExitReason=%#RX32 uEntryIntInfo=%#RX32 rcExit=%d\n", uExitReason, uEntryIntInfo, VBOXSTRICTRC_VAL(rcExit)));
4857 }
4858#endif
4859
4860 /*
4861 * Clear the interrupt-window and NMI-window VMCS controls as we could have got
4862 * a VM-exit with higher priority than interrupt-window or NMI-window VM-exits
4863 * (e.g. TPR below threshold).
4864 */
4865 if (!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
4866 {
4867 vmxHCClearIntWindowExitVmcs(pVCpu, pVmcsInfo);
4868 vmxHCClearNmiWindowExitVmcs(pVCpu, pVmcsInfo);
4869 }
4870
4871 /* If we're emulating an instruction, we shouldn't have any TRPM traps pending
4872 and if we're injecting an event we should have a TRPM trap pending. */
4873 AssertMsg(rcExit != VINF_EM_RAW_INJECT_TRPM_EVENT || TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
4874#ifndef DEBUG_bird /* Triggered after firing an NMI against NT4SP1, possibly a triple fault in progress. */
4875 AssertMsg(rcExit != VINF_EM_RAW_EMULATE_INSTR || !TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
4876#endif
4877
4878 /* Save guest state and restore host state bits. */
4879 int rc = hmR0VmxLeaveSession(pVCpu);
4880 AssertRCReturn(rc, rc);
4881 STAM_COUNTER_DEC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
4882
4883 /* Thread-context hooks are unregistered at this point!!! */
4884 /* Ring-3 callback notifications are unregistered at this point!!! */
4885
4886 /* Sync recompiler state. */
4887 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
4888 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR
4889 | CPUM_CHANGED_LDTR
4890 | CPUM_CHANGED_GDTR
4891 | CPUM_CHANGED_IDTR
4892 | CPUM_CHANGED_TR
4893 | CPUM_CHANGED_HIDDEN_SEL_REGS);
4894 if ( pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging
4895 && CPUMIsGuestPagingEnabledEx(&pVCpu->cpum.GstCtx))
4896 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_GLOBAL_TLB_FLUSH);
4897
4898 Assert(!pVCpu->hmr0.s.fClearTrapFlag);
4899
4900 /* Update the exit-to-ring 3 reason. */
4901 pVCpu->hm.s.rcLastExitToR3 = VBOXSTRICTRC_VAL(rcExit);
4902
4903 /* On our way back from ring-3 reload the guest state if there is a possibility of it being changed. */
4904 if ( rcExit != VINF_EM_RAW_INTERRUPT
4905 || CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
4906 {
4907 Assert(!(pVCpu->cpum.GstCtx.fExtrn & HMVMX_CPUMCTX_EXTRN_ALL));
4908 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
4909 }
4910
4911 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchExitToR3);
4912 VMMRZCallRing3Enable(pVCpu);
4913 return rc;
4914}
4915
4916
4917/**
4918 * VMMRZCallRing3() callback wrapper which saves the guest state before we
4919 * longjump due to a ring-0 assertion.
4920 *
4921 * @returns VBox status code.
4922 * @param pVCpu The cross context virtual CPU structure.
4923 */
4924VMMR0DECL(int) VMXR0AssertionCallback(PVMCPUCC pVCpu)
4925{
4926 /*
4927 * !!! IMPORTANT !!!
4928 * If you modify code here, check whether hmR0VmxLeave() and hmR0VmxLeaveSession() needs to be updated too.
4929 * This is a stripped down version which gets out ASAP, trying to not trigger any further assertions.
4930 */
4931 VMMR0AssertionRemoveNotification(pVCpu);
4932 VMMRZCallRing3Disable(pVCpu);
4933 HM_DISABLE_PREEMPT(pVCpu);
4934
4935 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4936 vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
4937 CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
4938 CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
4939
4940 /* Restore host-state bits that VT-x only restores partially. */
4941 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
4942 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
4943 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
4944
4945 /* Restore the lazy host MSRs as we're leaving VT-x context. */
4946 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
4947 hmR0VmxLazyRestoreHostMsrs(pVCpu);
4948
4949 /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
4950 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
4951 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
4952
4953 /* Clear the current VMCS data back to memory (shadow VMCS if any would have been
4954 cleared as part of importing the guest state above. */
4955 hmR0VmxClearVmcs(pVmcsInfo);
4956
4957 /** @todo eliminate the need for calling VMMR0ThreadCtxHookDisable here! */
4958 VMMR0ThreadCtxHookDisable(pVCpu);
4959
4960 /* Leave HM context. This takes care of local init (term). */
4961 HMR0LeaveCpu(pVCpu);
4962 HM_RESTORE_PREEMPT();
4963 return VINF_SUCCESS;
4964}
4965
4966
4967/**
4968 * Enters the VT-x session.
4969 *
4970 * @returns VBox status code.
4971 * @param pVCpu The cross context virtual CPU structure.
4972 */
4973VMMR0DECL(int) VMXR0Enter(PVMCPUCC pVCpu)
4974{
4975 AssertPtr(pVCpu);
4976 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fSupported);
4977 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4978
4979 LogFlowFunc(("pVCpu=%p\n", pVCpu));
4980 Assert((pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
4981 == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
4982
4983#ifdef VBOX_STRICT
4984 /* At least verify VMX is enabled, since we can't check if we're in VMX root mode without #GP'ing. */
4985 RTCCUINTREG uHostCr4 = ASMGetCR4();
4986 if (!(uHostCr4 & X86_CR4_VMXE))
4987 {
4988 LogRelFunc(("X86_CR4_VMXE bit in CR4 is not set!\n"));
4989 return VERR_VMX_X86_CR4_VMXE_CLEARED;
4990 }
4991#endif
4992
4993 /*
4994 * Do the EMT scheduled L1D and MDS flush here if needed.
4995 */
4996 if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_SCHED)
4997 ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
4998 else if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_SCHED)
4999 hmR0MdsClear();
5000
5001 /*
5002 * Load the appropriate VMCS as the current and active one.
5003 */
5004 PVMXVMCSINFO pVmcsInfo;
5005 bool const fInNestedGuestMode = CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx);
5006 if (!fInNestedGuestMode)
5007 pVmcsInfo = &pVCpu->hmr0.s.vmx.VmcsInfo;
5008 else
5009 pVmcsInfo = &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
5010 int rc = hmR0VmxLoadVmcs(pVmcsInfo);
5011 if (RT_SUCCESS(rc))
5012 {
5013 pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs = fInNestedGuestMode;
5014 pVCpu->hm.s.vmx.fSwitchedToNstGstVmcsCopyForRing3 = fInNestedGuestMode;
5015 pVCpu->hmr0.s.fLeaveDone = false;
5016 Log4Func(("Loaded Vmcs. HostCpuId=%u\n", RTMpCpuId()));
5017 }
5018 return rc;
5019}
5020
5021
5022/**
5023 * The thread-context callback.
5024 *
5025 * This is used together with RTThreadCtxHookCreate() on platforms which
5026 * supports it, and directly from VMMR0EmtPrepareForBlocking() and
5027 * VMMR0EmtResumeAfterBlocking() on platforms which don't.
5028 *
5029 * @param enmEvent The thread-context event.
5030 * @param pVCpu The cross context virtual CPU structure.
5031 * @param fGlobalInit Whether global VT-x/AMD-V init. was used.
5032 * @thread EMT(pVCpu)
5033 */
5034VMMR0DECL(void) VMXR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPUCC pVCpu, bool fGlobalInit)
5035{
5036 AssertPtr(pVCpu);
5037 RT_NOREF1(fGlobalInit);
5038
5039 switch (enmEvent)
5040 {
5041 case RTTHREADCTXEVENT_OUT:
5042 {
5043 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5044 VMCPU_ASSERT_EMT(pVCpu);
5045
5046 /* No longjmps (logger flushes, locks) in this fragile context. */
5047 VMMRZCallRing3Disable(pVCpu);
5048 Log4Func(("Preempting: HostCpuId=%u\n", RTMpCpuId()));
5049
5050 /* Restore host-state (FPU, debug etc.) */
5051 if (!pVCpu->hmr0.s.fLeaveDone)
5052 {
5053 /*
5054 * Do -not- import the guest-state here as we might already be in the middle of importing
5055 * it, esp. bad if we're holding the PGM lock, see comment in hmR0VmxImportGuestState().
5056 */
5057 hmR0VmxLeave(pVCpu, false /* fImportState */);
5058 pVCpu->hmr0.s.fLeaveDone = true;
5059 }
5060
5061 /* Leave HM context, takes care of local init (term). */
5062 int rc = HMR0LeaveCpu(pVCpu);
5063 AssertRC(rc);
5064
5065 /* Restore longjmp state. */
5066 VMMRZCallRing3Enable(pVCpu);
5067 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatSwitchPreempt);
5068 break;
5069 }
5070
5071 case RTTHREADCTXEVENT_IN:
5072 {
5073 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5074 VMCPU_ASSERT_EMT(pVCpu);
5075
5076 /* Do the EMT scheduled L1D and MDS flush here if needed. */
5077 if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_SCHED)
5078 ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
5079 else if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_SCHED)
5080 hmR0MdsClear();
5081
5082 /* No longjmps here, as we don't want to trigger preemption (& its hook) while resuming. */
5083 VMMRZCallRing3Disable(pVCpu);
5084 Log4Func(("Resumed: HostCpuId=%u\n", RTMpCpuId()));
5085
5086 /* Initialize the bare minimum state required for HM. This takes care of
5087 initializing VT-x if necessary (onlined CPUs, local init etc.) */
5088 int rc = hmR0EnterCpu(pVCpu);
5089 AssertRC(rc);
5090 Assert( (pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
5091 == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
5092
5093 /* Load the active VMCS as the current one. */
5094 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
5095 rc = hmR0VmxLoadVmcs(pVmcsInfo);
5096 AssertRC(rc);
5097 Log4Func(("Resumed: Loaded Vmcs. HostCpuId=%u\n", RTMpCpuId()));
5098 pVCpu->hmr0.s.fLeaveDone = false;
5099
5100 /* Restore longjmp state. */
5101 VMMRZCallRing3Enable(pVCpu);
5102 break;
5103 }
5104
5105 default:
5106 break;
5107 }
5108}
5109
5110
5111/**
5112 * Exports the host state into the VMCS host-state area.
5113 * Sets up the VM-exit MSR-load area.
5114 *
5115 * The CPU state will be loaded from these fields on every successful VM-exit.
5116 *
5117 * @returns VBox status code.
5118 * @param pVCpu The cross context virtual CPU structure.
5119 *
5120 * @remarks No-long-jump zone!!!
5121 */
5122static int hmR0VmxExportHostState(PVMCPUCC pVCpu)
5123{
5124 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5125
5126 int rc = VINF_SUCCESS;
5127 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)
5128 {
5129 uint64_t uHostCr4 = hmR0VmxExportHostControlRegs();
5130
5131 rc = hmR0VmxExportHostSegmentRegs(pVCpu, uHostCr4);
5132 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5133
5134 hmR0VmxExportHostMsrs(pVCpu);
5135
5136 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_HOST_CONTEXT;
5137 }
5138 return rc;
5139}
5140
5141
5142/**
5143 * Saves the host state in the VMCS host-state.
5144 *
5145 * @returns VBox status code.
5146 * @param pVCpu The cross context virtual CPU structure.
5147 *
5148 * @remarks No-long-jump zone!!!
5149 */
5150VMMR0DECL(int) VMXR0ExportHostState(PVMCPUCC pVCpu)
5151{
5152 AssertPtr(pVCpu);
5153 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5154
5155 /*
5156 * Export the host state here while entering HM context.
5157 * When thread-context hooks are used, we might get preempted and have to re-save the host
5158 * state but most of the time we won't be, so do it here before we disable interrupts.
5159 */
5160 return hmR0VmxExportHostState(pVCpu);
5161}
5162
5163
5164/**
5165 * Exports the guest state into the VMCS guest-state area.
5166 *
5167 * The will typically be done before VM-entry when the guest-CPU state and the
5168 * VMCS state may potentially be out of sync.
5169 *
5170 * Sets up the VM-entry MSR-load and VM-exit MSR-store areas. Sets up the
5171 * VM-entry controls.
5172 * Sets up the appropriate VMX non-root function to execute guest code based on
5173 * the guest CPU mode.
5174 *
5175 * @returns VBox strict status code.
5176 * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
5177 * without unrestricted guest execution and the VMMDev is not presently
5178 * mapped (e.g. EFI32).
5179 *
5180 * @param pVCpu The cross context virtual CPU structure.
5181 * @param pVmxTransient The VMX-transient structure.
5182 *
5183 * @remarks No-long-jump zone!!!
5184 */
5185static VBOXSTRICTRC hmR0VmxExportGuestState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5186{
5187 AssertPtr(pVCpu);
5188 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
5189 LogFlowFunc(("pVCpu=%p\n", pVCpu));
5190
5191 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExportGuestState, x);
5192
5193 /*
5194 * Determine real-on-v86 mode.
5195 * Used when the guest is in real-mode and unrestricted guest execution is not used.
5196 */
5197 PVMXVMCSINFOSHARED pVmcsInfoShared = pVmxTransient->pVmcsInfo->pShared;
5198 if ( pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUnrestrictedGuest
5199 || !CPUMIsGuestInRealModeEx(&pVCpu->cpum.GstCtx))
5200 pVmcsInfoShared->RealMode.fRealOnV86Active = false;
5201 else
5202 {
5203 Assert(!pVmxTransient->fIsNestedGuest);
5204 pVmcsInfoShared->RealMode.fRealOnV86Active = true;
5205 }
5206
5207 /*
5208 * Any ordering dependency among the sub-functions below must be explicitly stated using comments.
5209 * Ideally, assert that the cross-dependent bits are up-to-date at the point of using it.
5210 */
5211 int rc = vmxHCExportGuestEntryExitCtls(pVCpu, pVmxTransient);
5212 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5213
5214 rc = vmxHCExportGuestCR0(pVCpu, pVmxTransient);
5215 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5216
5217 VBOXSTRICTRC rcStrict = vmxHCExportGuestCR3AndCR4(pVCpu, pVmxTransient);
5218 if (rcStrict == VINF_SUCCESS)
5219 { /* likely */ }
5220 else
5221 {
5222 Assert(rcStrict == VINF_EM_RESCHEDULE_REM || RT_FAILURE_NP(rcStrict));
5223 return rcStrict;
5224 }
5225
5226 rc = vmxHCExportGuestSegRegsXdtr(pVCpu, pVmxTransient);
5227 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5228
5229 rc = hmR0VmxExportGuestMsrs(pVCpu, pVmxTransient);
5230 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5231
5232 vmxHCExportGuestApicTpr(pVCpu, pVmxTransient);
5233 vmxHCExportGuestXcptIntercepts(pVCpu, pVmxTransient);
5234 vmxHCExportGuestRip(pVCpu);
5235 hmR0VmxExportGuestRsp(pVCpu);
5236 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5237
5238 rc = hmR0VmxExportGuestHwvirtState(pVCpu, pVmxTransient);
5239 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5240
5241 /* Clear any bits that may be set but exported unconditionally or unused/reserved bits. */
5242 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~( (HM_CHANGED_GUEST_GPRS_MASK & ~HM_CHANGED_GUEST_RSP)
5243 | HM_CHANGED_GUEST_CR2
5244 | (HM_CHANGED_GUEST_DR_MASK & ~HM_CHANGED_GUEST_DR7)
5245 | HM_CHANGED_GUEST_X87
5246 | HM_CHANGED_GUEST_SSE_AVX
5247 | HM_CHANGED_GUEST_OTHER_XSAVE
5248 | HM_CHANGED_GUEST_XCRx
5249 | HM_CHANGED_GUEST_KERNEL_GS_BASE /* Part of lazy or auto load-store MSRs. */
5250 | HM_CHANGED_GUEST_SYSCALL_MSRS /* Part of lazy or auto load-store MSRs. */
5251 | HM_CHANGED_GUEST_TSC_AUX
5252 | HM_CHANGED_GUEST_OTHER_MSRS
5253 | (HM_CHANGED_KEEPER_STATE_MASK & ~HM_CHANGED_VMX_MASK)));
5254
5255 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExportGuestState, x);
5256 return rc;
5257}
5258
5259
5260/**
5261 * Exports the state shared between the host and guest into the VMCS.
5262 *
5263 * @param pVCpu The cross context virtual CPU structure.
5264 * @param pVmxTransient The VMX-transient structure.
5265 *
5266 * @remarks No-long-jump zone!!!
5267 */
5268static void hmR0VmxExportSharedState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5269{
5270 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5271 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5272
5273 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_DR_MASK)
5274 {
5275 int rc = hmR0VmxExportSharedDebugState(pVCpu, pVmxTransient);
5276 AssertRC(rc);
5277 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_GUEST_DR_MASK;
5278
5279 /* Loading shared debug bits might have changed eflags.TF bit for debugging purposes. */
5280 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_RFLAGS)
5281 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5282 }
5283
5284 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_GUEST_LAZY_MSRS)
5285 {
5286 hmR0VmxLazyLoadGuestMsrs(pVCpu);
5287 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_VMX_GUEST_LAZY_MSRS;
5288 }
5289
5290 AssertMsg(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE),
5291 ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
5292}
5293
5294
5295/**
5296 * Worker for loading the guest-state bits in the inner VT-x execution loop.
5297 *
5298 * @returns Strict VBox status code (i.e. informational status codes too).
5299 * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
5300 * without unrestricted guest execution and the VMMDev is not presently
5301 * mapped (e.g. EFI32).
5302 *
5303 * @param pVCpu The cross context virtual CPU structure.
5304 * @param pVmxTransient The VMX-transient structure.
5305 *
5306 * @remarks No-long-jump zone!!!
5307 */
5308static VBOXSTRICTRC hmR0VmxExportGuestStateOptimal(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5309{
5310 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
5311 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5312
5313#ifdef HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
5314 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
5315#endif
5316
5317 /*
5318 * For many VM-exits only RIP/RSP/RFLAGS (and HWVIRT state when executing a nested-guest)
5319 * changes. First try to export only these without going through all other changed-flag checks.
5320 */
5321 VBOXSTRICTRC rcStrict;
5322 uint64_t const fCtxMask = HM_CHANGED_ALL_GUEST & ~HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE;
5323 uint64_t const fMinimalMask = HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT;
5324 uint64_t const fCtxChanged = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
5325
5326 /* If only RIP/RSP/RFLAGS/HWVIRT changed, export only those (quicker, happens more often).*/
5327 if ( (fCtxChanged & fMinimalMask)
5328 && !(fCtxChanged & (fCtxMask & ~fMinimalMask)))
5329 {
5330 vmxHCExportGuestRip(pVCpu);
5331 hmR0VmxExportGuestRsp(pVCpu);
5332 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5333 rcStrict = hmR0VmxExportGuestHwvirtState(pVCpu, pVmxTransient);
5334 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportMinimal);
5335 }
5336 /* If anything else also changed, go through the full export routine and export as required. */
5337 else if (fCtxChanged & fCtxMask)
5338 {
5339 rcStrict = hmR0VmxExportGuestState(pVCpu, pVmxTransient);
5340 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
5341 { /* likely */}
5342 else
5343 {
5344 AssertMsg(rcStrict == VINF_EM_RESCHEDULE_REM, ("Failed to export guest state! rc=%Rrc\n",
5345 VBOXSTRICTRC_VAL(rcStrict)));
5346 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5347 return rcStrict;
5348 }
5349 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportFull);
5350 }
5351 /* Nothing changed, nothing to load here. */
5352 else
5353 rcStrict = VINF_SUCCESS;
5354
5355#ifdef VBOX_STRICT
5356 /* All the guest state bits should be loaded except maybe the host context and/or the shared host/guest bits. */
5357 uint64_t const fCtxChangedCur = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
5358 AssertMsg(!(fCtxChangedCur & fCtxMask), ("fCtxChangedCur=%#RX64\n", fCtxChangedCur));
5359#endif
5360 return rcStrict;
5361}
5362
5363
5364/**
5365 * Map the APIC-access page for virtualizing APIC accesses.
5366 *
5367 * This can cause a longjumps to R3 due to the acquisition of the PGM lock. Hence,
5368 * this not done as part of exporting guest state, see @bugref{8721}.
5369 *
5370 * @returns VBox status code.
5371 * @param pVCpu The cross context virtual CPU structure.
5372 * @param GCPhysApicBase The guest-physical address of the APIC access page.
5373 */
5374static int hmR0VmxMapHCApicAccessPage(PVMCPUCC pVCpu, RTGCPHYS GCPhysApicBase)
5375{
5376 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
5377 Assert(GCPhysApicBase);
5378
5379 LogFunc(("Mapping HC APIC-access page at %#RGp\n", GCPhysApicBase));
5380
5381 /* Unalias the existing mapping. */
5382 int rc = PGMHandlerPhysicalReset(pVM, GCPhysApicBase);
5383 AssertRCReturn(rc, rc);
5384
5385 /* Map the HC APIC-access page in place of the MMIO page, also updates the shadow page tables if necessary. */
5386 Assert(pVM->hmr0.s.vmx.HCPhysApicAccess != NIL_RTHCPHYS);
5387 rc = IOMR0MmioMapMmioHCPage(pVM, pVCpu, GCPhysApicBase, pVM->hmr0.s.vmx.HCPhysApicAccess, X86_PTE_RW | X86_PTE_P);
5388 AssertRCReturn(rc, rc);
5389
5390 return VINF_SUCCESS;
5391}
5392
5393
5394/**
5395 * Worker function passed to RTMpOnSpecific() that is to be called on the target
5396 * CPU.
5397 *
5398 * @param idCpu The ID for the CPU the function is called on.
5399 * @param pvUser1 Null, not used.
5400 * @param pvUser2 Null, not used.
5401 */
5402static DECLCALLBACK(void) hmR0DispatchHostNmi(RTCPUID idCpu, void *pvUser1, void *pvUser2)
5403{
5404 RT_NOREF3(idCpu, pvUser1, pvUser2);
5405 VMXDispatchHostNmi();
5406}
5407
5408
5409/**
5410 * Dispatching an NMI on the host CPU that received it.
5411 *
5412 * @returns VBox status code.
5413 * @param pVCpu The cross context virtual CPU structure.
5414 * @param pVmcsInfo The VMCS info. object corresponding to the VMCS that was
5415 * executing when receiving the host NMI in VMX non-root
5416 * operation.
5417 */
5418static int hmR0VmxExitHostNmi(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
5419{
5420 RTCPUID const idCpu = pVmcsInfo->idHostCpuExec;
5421 Assert(idCpu != NIL_RTCPUID);
5422
5423 /*
5424 * We don't want to delay dispatching the NMI any more than we have to. However,
5425 * we have already chosen -not- to dispatch NMIs when interrupts were still disabled
5426 * after executing guest or nested-guest code for the following reasons:
5427 *
5428 * - We would need to perform VMREADs with interrupts disabled and is orders of
5429 * magnitude worse when we run as a nested hypervisor without VMCS shadowing
5430 * supported by the host hypervisor.
5431 *
5432 * - It affects the common VM-exit scenario and keeps interrupts disabled for a
5433 * longer period of time just for handling an edge case like host NMIs which do
5434 * not occur nearly as frequently as other VM-exits.
5435 *
5436 * Let's cover the most likely scenario first. Check if we are on the target CPU
5437 * and dispatch the NMI right away. This should be much faster than calling into
5438 * RTMpOnSpecific() machinery.
5439 */
5440 bool fDispatched = false;
5441 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
5442 if (idCpu == RTMpCpuId())
5443 {
5444 VMXDispatchHostNmi();
5445 fDispatched = true;
5446 }
5447 ASMSetFlags(fEFlags);
5448 if (fDispatched)
5449 {
5450 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGC);
5451 return VINF_SUCCESS;
5452 }
5453
5454 /*
5455 * RTMpOnSpecific() waits until the worker function has run on the target CPU. So
5456 * there should be no race or recursion even if we are unlucky enough to be preempted
5457 * (to the target CPU) without dispatching the host NMI above.
5458 */
5459 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGCIpi);
5460 return RTMpOnSpecific(idCpu, &hmR0DispatchHostNmi, NULL /* pvUser1 */, NULL /* pvUser2 */);
5461}
5462
5463
5464#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5465/**
5466 * Merges the guest with the nested-guest MSR bitmap in preparation of executing the
5467 * nested-guest using hardware-assisted VMX.
5468 *
5469 * @param pVCpu The cross context virtual CPU structure.
5470 * @param pVmcsInfoNstGst The nested-guest VMCS info. object.
5471 * @param pVmcsInfoGst The guest VMCS info. object.
5472 */
5473static void hmR0VmxMergeMsrBitmapNested(PCVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfoNstGst, PCVMXVMCSINFO pVmcsInfoGst)
5474{
5475 uint32_t const cbMsrBitmap = X86_PAGE_4K_SIZE;
5476 uint64_t *pu64MsrBitmap = (uint64_t *)pVmcsInfoNstGst->pvMsrBitmap;
5477 Assert(pu64MsrBitmap);
5478
5479 /*
5480 * We merge the guest MSR bitmap with the nested-guest MSR bitmap such that any
5481 * MSR that is intercepted by the guest is also intercepted while executing the
5482 * nested-guest using hardware-assisted VMX.
5483 *
5484 * Note! If the nested-guest is not using an MSR bitmap, every MSR must cause a
5485 * nested-guest VM-exit even if the outer guest is not intercepting some
5486 * MSRs. We cannot assume the caller has initialized the nested-guest
5487 * MSR bitmap in this case.
5488 *
5489 * The nested hypervisor may also switch whether it uses MSR bitmaps for
5490 * each of its VM-entry, hence initializing it once per-VM while setting
5491 * up the nested-guest VMCS is not sufficient.
5492 */
5493 PCVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
5494 if (pVmcsNstGst->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
5495 {
5496 uint64_t const *pu64MsrBitmapNstGst = (uint64_t const *)&pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap[0];
5497 uint64_t const *pu64MsrBitmapGst = (uint64_t const *)pVmcsInfoGst->pvMsrBitmap;
5498 Assert(pu64MsrBitmapNstGst);
5499 Assert(pu64MsrBitmapGst);
5500
5501 /** @todo Detect and use EVEX.POR? */
5502 uint32_t const cFrags = cbMsrBitmap / sizeof(uint64_t);
5503 for (uint32_t i = 0; i < cFrags; i++)
5504 pu64MsrBitmap[i] = pu64MsrBitmapNstGst[i] | pu64MsrBitmapGst[i];
5505 }
5506 else
5507 ASMMemFill32(pu64MsrBitmap, cbMsrBitmap, UINT32_C(0xffffffff));
5508}
5509
5510
5511/**
5512 * Merges the guest VMCS in to the nested-guest VMCS controls in preparation of
5513 * hardware-assisted VMX execution of the nested-guest.
5514 *
5515 * For a guest, we don't modify these controls once we set up the VMCS and hence
5516 * this function is never called.
5517 *
5518 * For nested-guests since the nested hypervisor provides these controls on every
5519 * nested-guest VM-entry and could potentially change them everytime we need to
5520 * merge them before every nested-guest VM-entry.
5521 *
5522 * @returns VBox status code.
5523 * @param pVCpu The cross context virtual CPU structure.
5524 */
5525static int hmR0VmxMergeVmcsNested(PVMCPUCC pVCpu)
5526{
5527 PVMCC const pVM = pVCpu->CTX_SUFF(pVM);
5528 PCVMXVMCSINFO const pVmcsInfoGst = &pVCpu->hmr0.s.vmx.VmcsInfo;
5529 PCVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
5530
5531 /*
5532 * Merge the controls with the requirements of the guest VMCS.
5533 *
5534 * We do not need to validate the nested-guest VMX features specified in the nested-guest
5535 * VMCS with the features supported by the physical CPU as it's already done by the
5536 * VMLAUNCH/VMRESUME instruction emulation.
5537 *
5538 * This is because the VMX features exposed by CPUM (through CPUID/MSRs) to the guest are
5539 * derived from the VMX features supported by the physical CPU.
5540 */
5541
5542 /* Pin-based VM-execution controls. */
5543 uint32_t const u32PinCtls = pVmcsNstGst->u32PinCtls | pVmcsInfoGst->u32PinCtls;
5544
5545 /* Processor-based VM-execution controls. */
5546 uint32_t u32ProcCtls = (pVmcsNstGst->u32ProcCtls & ~VMX_PROC_CTLS_USE_IO_BITMAPS)
5547 | (pVmcsInfoGst->u32ProcCtls & ~( VMX_PROC_CTLS_INT_WINDOW_EXIT
5548 | VMX_PROC_CTLS_NMI_WINDOW_EXIT
5549 | VMX_PROC_CTLS_MOV_DR_EXIT /* hmR0VmxExportSharedDebugState makes
5550 sure guest DRx regs are loaded. */
5551 | VMX_PROC_CTLS_USE_TPR_SHADOW
5552 | VMX_PROC_CTLS_MONITOR_TRAP_FLAG));
5553
5554 /* Secondary processor-based VM-execution controls. */
5555 uint32_t const u32ProcCtls2 = (pVmcsNstGst->u32ProcCtls2 & ~VMX_PROC_CTLS2_VPID)
5556 | (pVmcsInfoGst->u32ProcCtls2 & ~( VMX_PROC_CTLS2_VIRT_APIC_ACCESS
5557 | VMX_PROC_CTLS2_INVPCID
5558 | VMX_PROC_CTLS2_VMCS_SHADOWING
5559 | VMX_PROC_CTLS2_RDTSCP
5560 | VMX_PROC_CTLS2_XSAVES_XRSTORS
5561 | VMX_PROC_CTLS2_APIC_REG_VIRT
5562 | VMX_PROC_CTLS2_VIRT_INT_DELIVERY
5563 | VMX_PROC_CTLS2_VMFUNC));
5564
5565 /*
5566 * VM-entry controls:
5567 * These controls contains state that depends on the nested-guest state (primarily
5568 * EFER MSR) and is thus not constant between VMLAUNCH/VMRESUME and the nested-guest
5569 * VM-exit. Although the nested hypervisor cannot change it, we need to in order to
5570 * properly continue executing the nested-guest if the EFER MSR changes but does not
5571 * cause a nested-guest VM-exits.
5572 *
5573 * VM-exit controls:
5574 * These controls specify the host state on return. We cannot use the controls from
5575 * the nested hypervisor state as is as it would contain the guest state rather than
5576 * the host state. Since the host state is subject to change (e.g. preemption, trips
5577 * to ring-3, longjmp and rescheduling to a different host CPU) they are not constant
5578 * through VMLAUNCH/VMRESUME and the nested-guest VM-exit.
5579 *
5580 * VM-entry MSR-load:
5581 * The guest MSRs from the VM-entry MSR-load area are already loaded into the guest-CPU
5582 * context by the VMLAUNCH/VMRESUME instruction emulation.
5583 *
5584 * VM-exit MSR-store:
5585 * The VM-exit emulation will take care of populating the MSRs from the guest-CPU context
5586 * back into the VM-exit MSR-store area.
5587 *
5588 * VM-exit MSR-load areas:
5589 * This must contain the real host MSRs with hardware-assisted VMX execution. Hence, we
5590 * can entirely ignore what the nested hypervisor wants to load here.
5591 */
5592
5593 /*
5594 * Exception bitmap.
5595 *
5596 * We could remove #UD from the guest bitmap and merge it with the nested-guest bitmap
5597 * here (and avoid doing anything while exporting nested-guest state), but to keep the
5598 * code more flexible if intercepting exceptions become more dynamic in the future we do
5599 * it as part of exporting the nested-guest state.
5600 */
5601 uint32_t const u32XcptBitmap = pVmcsNstGst->u32XcptBitmap | pVmcsInfoGst->u32XcptBitmap;
5602
5603 /*
5604 * CR0/CR4 guest/host mask.
5605 *
5606 * Modifications by the nested-guest to CR0/CR4 bits owned by the host and the guest must
5607 * cause VM-exits, so we need to merge them here.
5608 */
5609 uint64_t const u64Cr0Mask = pVmcsNstGst->u64Cr0Mask.u | pVmcsInfoGst->u64Cr0Mask;
5610 uint64_t const u64Cr4Mask = pVmcsNstGst->u64Cr4Mask.u | pVmcsInfoGst->u64Cr4Mask;
5611
5612 /*
5613 * Page-fault error-code mask and match.
5614 *
5615 * Although we require unrestricted guest execution (and thereby nested-paging) for
5616 * hardware-assisted VMX execution of nested-guests and thus the outer guest doesn't
5617 * normally intercept #PFs, it might intercept them for debugging purposes.
5618 *
5619 * If the outer guest is not intercepting #PFs, we can use the nested-guest #PF filters.
5620 * If the outer guest is intercepting #PFs, we must intercept all #PFs.
5621 */
5622 uint32_t u32XcptPFMask;
5623 uint32_t u32XcptPFMatch;
5624 if (!(pVmcsInfoGst->u32XcptBitmap & RT_BIT(X86_XCPT_PF)))
5625 {
5626 u32XcptPFMask = pVmcsNstGst->u32XcptPFMask;
5627 u32XcptPFMatch = pVmcsNstGst->u32XcptPFMatch;
5628 }
5629 else
5630 {
5631 u32XcptPFMask = 0;
5632 u32XcptPFMatch = 0;
5633 }
5634
5635 /*
5636 * Pause-Loop exiting.
5637 */
5638 /** @todo r=bird: given that both pVM->hm.s.vmx.cPleGapTicks and
5639 * pVM->hm.s.vmx.cPleWindowTicks defaults to zero, I cannot see how
5640 * this will work... */
5641 uint32_t const cPleGapTicks = RT_MIN(pVM->hm.s.vmx.cPleGapTicks, pVmcsNstGst->u32PleGap);
5642 uint32_t const cPleWindowTicks = RT_MIN(pVM->hm.s.vmx.cPleWindowTicks, pVmcsNstGst->u32PleWindow);
5643
5644 /*
5645 * Pending debug exceptions.
5646 * Currently just copy whatever the nested-guest provides us.
5647 */
5648 uint64_t const uPendingDbgXcpts = pVmcsNstGst->u64GuestPendingDbgXcpts.u;
5649
5650 /*
5651 * I/O Bitmap.
5652 *
5653 * We do not use the I/O bitmap that may be provided by the nested hypervisor as we always
5654 * intercept all I/O port accesses.
5655 */
5656 Assert(u32ProcCtls & VMX_PROC_CTLS_UNCOND_IO_EXIT);
5657 Assert(!(u32ProcCtls & VMX_PROC_CTLS_USE_IO_BITMAPS));
5658
5659 /*
5660 * VMCS shadowing.
5661 *
5662 * We do not yet expose VMCS shadowing to the guest and thus VMCS shadowing should not be
5663 * enabled while executing the nested-guest.
5664 */
5665 Assert(!(u32ProcCtls2 & VMX_PROC_CTLS2_VMCS_SHADOWING));
5666
5667 /*
5668 * APIC-access page.
5669 */
5670 RTHCPHYS HCPhysApicAccess;
5671 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
5672 {
5673 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS);
5674 RTGCPHYS const GCPhysApicAccess = pVmcsNstGst->u64AddrApicAccess.u;
5675
5676 void *pvPage;
5677 PGMPAGEMAPLOCK PgLockApicAccess;
5678 int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhysApicAccess, &pvPage, &PgLockApicAccess);
5679 if (RT_SUCCESS(rc))
5680 {
5681 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysApicAccess, &HCPhysApicAccess);
5682 AssertMsgRCReturn(rc, ("Failed to get host-physical address for APIC-access page at %#RGp\n", GCPhysApicAccess), rc);
5683
5684 /** @todo Handle proper releasing of page-mapping lock later. */
5685 PGMPhysReleasePageMappingLock(pVCpu->CTX_SUFF(pVM), &PgLockApicAccess);
5686 }
5687 else
5688 return rc;
5689 }
5690 else
5691 HCPhysApicAccess = 0;
5692
5693 /*
5694 * Virtual-APIC page and TPR threshold.
5695 */
5696 RTHCPHYS HCPhysVirtApic;
5697 uint32_t u32TprThreshold;
5698 if (u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)
5699 {
5700 Assert(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW);
5701 RTGCPHYS const GCPhysVirtApic = pVmcsNstGst->u64AddrVirtApic.u;
5702
5703 void *pvPage;
5704 PGMPAGEMAPLOCK PgLockVirtApic;
5705 int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhysVirtApic, &pvPage, &PgLockVirtApic);
5706 if (RT_SUCCESS(rc))
5707 {
5708 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysVirtApic, &HCPhysVirtApic);
5709 AssertMsgRCReturn(rc, ("Failed to get host-physical address for virtual-APIC page at %#RGp\n", GCPhysVirtApic), rc);
5710
5711 /** @todo Handle proper releasing of page-mapping lock later. */
5712 PGMPhysReleasePageMappingLock(pVCpu->CTX_SUFF(pVM), &PgLockVirtApic);
5713 }
5714 else
5715 return rc;
5716
5717 u32TprThreshold = pVmcsNstGst->u32TprThreshold;
5718 }
5719 else
5720 {
5721 HCPhysVirtApic = 0;
5722 u32TprThreshold = 0;
5723
5724 /*
5725 * We must make sure CR8 reads/write must cause VM-exits when TPR shadowing is not
5726 * used by the nested hypervisor. Preventing MMIO accesses to the physical APIC will
5727 * be taken care of by EPT/shadow paging.
5728 */
5729 if (pVM->hmr0.s.fAllow64BitGuests)
5730 u32ProcCtls |= VMX_PROC_CTLS_CR8_STORE_EXIT
5731 | VMX_PROC_CTLS_CR8_LOAD_EXIT;
5732 }
5733
5734 /*
5735 * Validate basic assumptions.
5736 */
5737 PVMXVMCSINFO pVmcsInfoNstGst = &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
5738 Assert(pVM->hmr0.s.vmx.fUnrestrictedGuest);
5739 Assert(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS);
5740 Assert(hmGetVmxActiveVmcsInfo(pVCpu) == pVmcsInfoNstGst);
5741
5742 /*
5743 * Commit it to the nested-guest VMCS.
5744 */
5745 int rc = VINF_SUCCESS;
5746 if (pVmcsInfoNstGst->u32PinCtls != u32PinCtls)
5747 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, u32PinCtls);
5748 if (pVmcsInfoNstGst->u32ProcCtls != u32ProcCtls)
5749 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, u32ProcCtls);
5750 if (pVmcsInfoNstGst->u32ProcCtls2 != u32ProcCtls2)
5751 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, u32ProcCtls2);
5752 if (pVmcsInfoNstGst->u32XcptBitmap != u32XcptBitmap)
5753 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, u32XcptBitmap);
5754 if (pVmcsInfoNstGst->u64Cr0Mask != u64Cr0Mask)
5755 rc |= VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, u64Cr0Mask);
5756 if (pVmcsInfoNstGst->u64Cr4Mask != u64Cr4Mask)
5757 rc |= VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, u64Cr4Mask);
5758 if (pVmcsInfoNstGst->u32XcptPFMask != u32XcptPFMask)
5759 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, u32XcptPFMask);
5760 if (pVmcsInfoNstGst->u32XcptPFMatch != u32XcptPFMatch)
5761 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, u32XcptPFMatch);
5762 if ( !(u32ProcCtls & VMX_PROC_CTLS_PAUSE_EXIT)
5763 && (u32ProcCtls2 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT))
5764 {
5765 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT);
5766 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, cPleGapTicks);
5767 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, cPleWindowTicks);
5768 }
5769 if (pVmcsInfoNstGst->HCPhysVirtApic != HCPhysVirtApic)
5770 rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, HCPhysVirtApic);
5771 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, u32TprThreshold);
5772 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
5773 rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, HCPhysApicAccess);
5774 rc |= VMXWriteVmcsNw(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, uPendingDbgXcpts);
5775 AssertRC(rc);
5776
5777 /*
5778 * Update the nested-guest VMCS cache.
5779 */
5780 pVmcsInfoNstGst->u32PinCtls = u32PinCtls;
5781 pVmcsInfoNstGst->u32ProcCtls = u32ProcCtls;
5782 pVmcsInfoNstGst->u32ProcCtls2 = u32ProcCtls2;
5783 pVmcsInfoNstGst->u32XcptBitmap = u32XcptBitmap;
5784 pVmcsInfoNstGst->u64Cr0Mask = u64Cr0Mask;
5785 pVmcsInfoNstGst->u64Cr4Mask = u64Cr4Mask;
5786 pVmcsInfoNstGst->u32XcptPFMask = u32XcptPFMask;
5787 pVmcsInfoNstGst->u32XcptPFMatch = u32XcptPFMatch;
5788 pVmcsInfoNstGst->HCPhysVirtApic = HCPhysVirtApic;
5789
5790 /*
5791 * We need to flush the TLB if we are switching the APIC-access page address.
5792 * See Intel spec. 28.3.3.4 "Guidelines for Use of the INVEPT Instruction".
5793 */
5794 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
5795 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = true;
5796
5797 /*
5798 * MSR bitmap.
5799 *
5800 * The MSR bitmap address has already been initialized while setting up the nested-guest
5801 * VMCS, here we need to merge the MSR bitmaps.
5802 */
5803 if (u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
5804 hmR0VmxMergeMsrBitmapNested(pVCpu, pVmcsInfoNstGst, pVmcsInfoGst);
5805
5806 return VINF_SUCCESS;
5807}
5808#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
5809
5810
5811/**
5812 * Does the preparations before executing guest code in VT-x.
5813 *
5814 * This may cause longjmps to ring-3 and may even result in rescheduling to the
5815 * recompiler/IEM. We must be cautious what we do here regarding committing
5816 * guest-state information into the VMCS assuming we assuredly execute the
5817 * guest in VT-x mode.
5818 *
5819 * If we fall back to the recompiler/IEM after updating the VMCS and clearing
5820 * the common-state (TRPM/forceflags), we must undo those changes so that the
5821 * recompiler/IEM can (and should) use them when it resumes guest execution.
5822 * Otherwise such operations must be done when we can no longer exit to ring-3.
5823 *
5824 * @returns Strict VBox status code (i.e. informational status codes too).
5825 * @retval VINF_SUCCESS if we can proceed with running the guest, interrupts
5826 * have been disabled.
5827 * @retval VINF_VMX_VMEXIT if a nested-guest VM-exit occurs (e.g., while evaluating
5828 * pending events).
5829 * @retval VINF_EM_RESET if a triple-fault occurs while injecting a
5830 * double-fault into the guest.
5831 * @retval VINF_EM_DBG_STEPPED if @a fStepping is true and an event was
5832 * dispatched directly.
5833 * @retval VINF_* scheduling changes, we have to go back to ring-3.
5834 *
5835 * @param pVCpu The cross context virtual CPU structure.
5836 * @param pVmxTransient The VMX-transient structure.
5837 * @param fStepping Whether we are single-stepping the guest in the
5838 * hypervisor debugger. Makes us ignore some of the reasons
5839 * for returning to ring-3, and return VINF_EM_DBG_STEPPED
5840 * if event dispatching took place.
5841 */
5842static VBOXSTRICTRC hmR0VmxPreRunGuest(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, bool fStepping)
5843{
5844 Assert(VMMRZCallRing3IsEnabled(pVCpu));
5845
5846 Log4Func(("fIsNested=%RTbool fStepping=%RTbool\n", pVmxTransient->fIsNestedGuest, fStepping));
5847
5848#ifdef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
5849 if (pVmxTransient->fIsNestedGuest)
5850 {
5851 RT_NOREF2(pVCpu, fStepping);
5852 Log2Func(("Rescheduling to IEM due to nested-hwvirt or forced IEM exec -> VINF_EM_RESCHEDULE_REM\n"));
5853 return VINF_EM_RESCHEDULE_REM;
5854 }
5855#endif
5856
5857 /*
5858 * Check and process force flag actions, some of which might require us to go back to ring-3.
5859 */
5860 VBOXSTRICTRC rcStrict = vmxHCCheckForceFlags(pVCpu, pVmxTransient->fIsNestedGuest, fStepping);
5861 if (rcStrict == VINF_SUCCESS)
5862 {
5863 /* FFs don't get set all the time. */
5864#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5865 if ( pVmxTransient->fIsNestedGuest
5866 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
5867 {
5868 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
5869 return VINF_VMX_VMEXIT;
5870 }
5871#endif
5872 }
5873 else
5874 return rcStrict;
5875
5876 /*
5877 * Virtualize memory-mapped accesses to the physical APIC (may take locks).
5878 */
5879 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
5880 if ( !pVCpu->hm.s.vmx.u64GstMsrApicBase
5881 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
5882 && PDMHasApic(pVM))
5883 {
5884 /* Get the APIC base MSR from the virtual APIC device. */
5885 uint64_t const uApicBaseMsr = APICGetBaseMsrNoCheck(pVCpu);
5886
5887 /* Map the APIC access page. */
5888 int rc = hmR0VmxMapHCApicAccessPage(pVCpu, uApicBaseMsr & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
5889 AssertRCReturn(rc, rc);
5890
5891 /* Update the per-VCPU cache of the APIC base MSR corresponding to the mapped APIC access page. */
5892 pVCpu->hm.s.vmx.u64GstMsrApicBase = uApicBaseMsr;
5893 }
5894
5895#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5896 /*
5897 * Merge guest VMCS controls with the nested-guest VMCS controls.
5898 *
5899 * Even if we have not executed the guest prior to this (e.g. when resuming from a
5900 * saved state), we should be okay with merging controls as we initialize the
5901 * guest VMCS controls as part of VM setup phase.
5902 */
5903 if ( pVmxTransient->fIsNestedGuest
5904 && !pVCpu->hm.s.vmx.fMergedNstGstCtls)
5905 {
5906 int rc = hmR0VmxMergeVmcsNested(pVCpu);
5907 AssertRCReturn(rc, rc);
5908 pVCpu->hm.s.vmx.fMergedNstGstCtls = true;
5909 }
5910#endif
5911
5912 /*
5913 * Evaluate events to be injected into the guest.
5914 *
5915 * Events in TRPM can be injected without inspecting the guest state.
5916 * If any new events (interrupts/NMI) are pending currently, we try to set up the
5917 * guest to cause a VM-exit the next time they are ready to receive the event.
5918 */
5919 if (TRPMHasTrap(pVCpu))
5920 vmxHCTrpmTrapToPendingEvent(pVCpu);
5921
5922 uint32_t fIntrState;
5923 rcStrict = vmxHCEvaluatePendingEvent(pVCpu, pVmxTransient->pVmcsInfo, pVmxTransient->fIsNestedGuest,
5924 &fIntrState);
5925
5926#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5927 /*
5928 * While evaluating pending events if something failed (unlikely) or if we were
5929 * preparing to run a nested-guest but performed a nested-guest VM-exit, we should bail.
5930 */
5931 if (rcStrict != VINF_SUCCESS)
5932 return rcStrict;
5933 if ( pVmxTransient->fIsNestedGuest
5934 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
5935 {
5936 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
5937 return VINF_VMX_VMEXIT;
5938 }
5939#else
5940 Assert(rcStrict == VINF_SUCCESS);
5941#endif
5942
5943 /*
5944 * Event injection may take locks (currently the PGM lock for real-on-v86 case) and thus
5945 * needs to be done with longjmps or interrupts + preemption enabled. Event injection might
5946 * also result in triple-faulting the VM.
5947 *
5948 * With nested-guests, the above does not apply since unrestricted guest execution is a
5949 * requirement. Regardless, we do this here to avoid duplicating code elsewhere.
5950 */
5951 rcStrict = vmxHCInjectPendingEvent(pVCpu, pVmxTransient->pVmcsInfo, pVmxTransient->fIsNestedGuest,
5952 fIntrState, fStepping);
5953 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
5954 { /* likely */ }
5955 else
5956 {
5957 AssertMsg(rcStrict == VINF_EM_RESET || (rcStrict == VINF_EM_DBG_STEPPED && fStepping),
5958 ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
5959 return rcStrict;
5960 }
5961
5962 /*
5963 * A longjump might result in importing CR3 even for VM-exits that don't necessarily
5964 * import CR3 themselves. We will need to update them here, as even as late as the above
5965 * hmR0VmxInjectPendingEvent() call may lazily import guest-CPU state on demand causing
5966 * the below force flags to be set.
5967 */
5968 if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3))
5969 {
5970 Assert(!(ASMAtomicUoReadU64(&pVCpu->cpum.GstCtx.fExtrn) & CPUMCTX_EXTRN_CR3));
5971 int rc2 = PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu));
5972 AssertMsgReturn(rc2 == VINF_SUCCESS || rc2 == VINF_PGM_SYNC_CR3,
5973 ("%Rrc\n", rc2), RT_FAILURE_NP(rc2) ? rc2 : VERR_IPE_UNEXPECTED_INFO_STATUS);
5974 Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
5975 }
5976
5977#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5978 /* Paranoia. */
5979 Assert(!pVmxTransient->fIsNestedGuest || CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
5980#endif
5981
5982 /*
5983 * No longjmps to ring-3 from this point on!!!
5984 * Asserts() will still longjmp to ring-3 (but won't return), which is intentional, better than a kernel panic.
5985 * This also disables flushing of the R0-logger instance (if any).
5986 */
5987 VMMRZCallRing3Disable(pVCpu);
5988
5989 /*
5990 * Export the guest state bits.
5991 *
5992 * We cannot perform longjmps while loading the guest state because we do not preserve the
5993 * host/guest state (although the VMCS will be preserved) across longjmps which can cause
5994 * CPU migration.
5995 *
5996 * If we are injecting events to a real-on-v86 mode guest, we would have updated RIP and some segment
5997 * registers. Hence, exporting of the guest state needs to be done -after- injection of events.
5998 */
5999 rcStrict = hmR0VmxExportGuestStateOptimal(pVCpu, pVmxTransient);
6000 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
6001 { /* likely */ }
6002 else
6003 {
6004 VMMRZCallRing3Enable(pVCpu);
6005 return rcStrict;
6006 }
6007
6008 /*
6009 * We disable interrupts so that we don't miss any interrupts that would flag preemption
6010 * (IPI/timers etc.) when thread-context hooks aren't used and we've been running with
6011 * preemption disabled for a while. Since this is purely to aid the
6012 * RTThreadPreemptIsPending() code, it doesn't matter that it may temporarily reenable and
6013 * disable interrupt on NT.
6014 *
6015 * We need to check for force-flags that could've possible been altered since we last
6016 * checked them (e.g. by PDMGetInterrupt() leaving the PDM critical section,
6017 * see @bugref{6398}).
6018 *
6019 * We also check a couple of other force-flags as a last opportunity to get the EMT back
6020 * to ring-3 before executing guest code.
6021 */
6022 pVmxTransient->fEFlags = ASMIntDisableFlags();
6023
6024 if ( ( !VM_FF_IS_ANY_SET(pVM, VM_FF_EMT_RENDEZVOUS | VM_FF_TM_VIRTUAL_SYNC)
6025 && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
6026 || ( fStepping /* Optimized for the non-stepping case, so a bit of unnecessary work when stepping. */
6027 && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK & ~(VMCPU_FF_TIMER | VMCPU_FF_PDM_CRITSECT))) )
6028 {
6029 if (!RTThreadPreemptIsPending(NIL_RTTHREAD))
6030 {
6031#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6032 /*
6033 * If we are executing a nested-guest make sure that we should intercept subsequent
6034 * events. The one we are injecting might be part of VM-entry. This is mainly to keep
6035 * the VM-exit instruction emulation happy.
6036 */
6037 if (pVmxTransient->fIsNestedGuest)
6038 CPUMSetGuestVmxInterceptEvents(&pVCpu->cpum.GstCtx, true);
6039#endif
6040
6041 /*
6042 * We've injected any pending events. This is really the point of no return (to ring-3).
6043 *
6044 * Note! The caller expects to continue with interrupts & longjmps disabled on successful
6045 * returns from this function, so do -not- enable them here.
6046 */
6047 pVCpu->hm.s.Event.fPending = false;
6048 return VINF_SUCCESS;
6049 }
6050
6051 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchPendingHostIrq);
6052 rcStrict = VINF_EM_RAW_INTERRUPT;
6053 }
6054 else
6055 {
6056 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF);
6057 rcStrict = VINF_EM_RAW_TO_R3;
6058 }
6059
6060 ASMSetFlags(pVmxTransient->fEFlags);
6061 VMMRZCallRing3Enable(pVCpu);
6062
6063 return rcStrict;
6064}
6065
6066
6067/**
6068 * Final preparations before executing guest code using hardware-assisted VMX.
6069 *
6070 * We can no longer get preempted to a different host CPU and there are no returns
6071 * to ring-3. We ignore any errors that may happen from this point (e.g. VMWRITE
6072 * failures), this function is not intended to fail sans unrecoverable hardware
6073 * errors.
6074 *
6075 * @param pVCpu The cross context virtual CPU structure.
6076 * @param pVmxTransient The VMX-transient structure.
6077 *
6078 * @remarks Called with preemption disabled.
6079 * @remarks No-long-jump zone!!!
6080 */
6081static void hmR0VmxPreRunGuestCommitted(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
6082{
6083 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
6084 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
6085 Assert(!pVCpu->hm.s.Event.fPending);
6086
6087 /*
6088 * Indicate start of guest execution and where poking EMT out of guest-context is recognized.
6089 */
6090 VMCPU_ASSERT_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
6091 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
6092
6093 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
6094 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
6095 PHMPHYSCPU pHostCpu = hmR0GetCurrentCpu();
6096 RTCPUID const idCurrentCpu = pHostCpu->idCpu;
6097
6098 if (!CPUMIsGuestFPUStateActive(pVCpu))
6099 {
6100 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatLoadGuestFpuState, x);
6101 if (CPUMR0LoadGuestFPU(pVM, pVCpu) == VINF_CPUM_HOST_CR0_MODIFIED)
6102 pVCpu->hm.s.fCtxChanged |= HM_CHANGED_HOST_CONTEXT;
6103 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatLoadGuestFpuState, x);
6104 STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadGuestFpu);
6105 }
6106
6107 /*
6108 * Re-export the host state bits as we may've been preempted (only happens when
6109 * thread-context hooks are used or when the VM start function changes) or if
6110 * the host CR0 is modified while loading the guest FPU state above.
6111 *
6112 * The 64-on-32 switcher saves the (64-bit) host state into the VMCS and if we
6113 * changed the switcher back to 32-bit, we *must* save the 32-bit host state here,
6114 * see @bugref{8432}.
6115 *
6116 * This may also happen when switching to/from a nested-guest VMCS without leaving
6117 * ring-0.
6118 */
6119 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)
6120 {
6121 hmR0VmxExportHostState(pVCpu);
6122 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportHostState);
6123 }
6124 Assert(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT));
6125
6126 /*
6127 * Export the state shared between host and guest (FPU, debug, lazy MSRs).
6128 */
6129 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE)
6130 hmR0VmxExportSharedState(pVCpu, pVmxTransient);
6131 AssertMsg(!pVCpu->hm.s.fCtxChanged, ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
6132
6133 /*
6134 * Store status of the shared guest/host debug state at the time of VM-entry.
6135 */
6136 pVmxTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActive(pVCpu);
6137 pVmxTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActive(pVCpu);
6138
6139 /*
6140 * Always cache the TPR-shadow if the virtual-APIC page exists, thereby skipping
6141 * more than one conditional check. The post-run side of our code shall determine
6142 * if it needs to sync. the virtual APIC TPR with the TPR-shadow.
6143 */
6144 if (pVmcsInfo->pbVirtApic)
6145 pVmxTransient->u8GuestTpr = pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR];
6146
6147 /*
6148 * Update the host MSRs values in the VM-exit MSR-load area.
6149 */
6150 if (!pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs)
6151 {
6152 if (pVmcsInfo->cExitMsrLoad > 0)
6153 hmR0VmxUpdateAutoLoadHostMsrs(pVCpu, pVmcsInfo);
6154 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = true;
6155 }
6156
6157 /*
6158 * Evaluate if we need to intercept guest RDTSC/P accesses. Set up the
6159 * VMX-preemption timer based on the next virtual sync clock deadline.
6160 */
6161 if ( !pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer
6162 || idCurrentCpu != pVCpu->hmr0.s.idLastCpu)
6163 {
6164 hmR0VmxUpdateTscOffsettingAndPreemptTimer(pVCpu, pVmxTransient, idCurrentCpu);
6165 pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer = true;
6166 }
6167
6168 /* Record statistics of how often we use TSC offsetting as opposed to intercepting RDTSC/P. */
6169 bool const fIsRdtscIntercepted = RT_BOOL(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT);
6170 if (!fIsRdtscIntercepted)
6171 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset);
6172 else
6173 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept);
6174
6175 ASMAtomicUoWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true); /* Used for TLB flushing, set this across the world switch. */
6176 hmR0VmxFlushTaggedTlb(pHostCpu, pVCpu, pVmcsInfo); /* Invalidate the appropriate guest entries from the TLB. */
6177 Assert(idCurrentCpu == pVCpu->hmr0.s.idLastCpu);
6178 pVCpu->hm.s.vmx.LastError.idCurrentCpu = idCurrentCpu; /* Record the error reporting info. with the current host CPU. */
6179 pVmcsInfo->idHostCpuState = idCurrentCpu; /* Record the CPU for which the host-state has been exported. */
6180 pVmcsInfo->idHostCpuExec = idCurrentCpu; /* Record the CPU on which we shall execute. */
6181
6182 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatEntry, &pVCpu->hm.s.StatInGC, x);
6183
6184 TMNotifyStartOfExecution(pVM, pVCpu); /* Notify TM to resume its clocks when TSC is tied to execution,
6185 as we're about to start executing the guest. */
6186
6187 /*
6188 * Load the guest TSC_AUX MSR when we are not intercepting RDTSCP.
6189 *
6190 * This is done this late as updating the TSC offsetting/preemption timer above
6191 * figures out if we can skip intercepting RDTSCP by calculating the number of
6192 * host CPU ticks till the next virtual sync deadline (for the dynamic case).
6193 */
6194 if ( (pVmcsInfo->u32ProcCtls2 & VMX_PROC_CTLS2_RDTSCP)
6195 && !fIsRdtscIntercepted)
6196 {
6197 vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_TSC_AUX);
6198
6199 /* NB: Because we call hmR0VmxAddAutoLoadStoreMsr with fUpdateHostMsr=true,
6200 it's safe even after hmR0VmxUpdateAutoLoadHostMsrs has already been done. */
6201 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K8_TSC_AUX, CPUMGetGuestTscAux(pVCpu),
6202 true /* fSetReadWrite */, true /* fUpdateHostMsr */);
6203 AssertRC(rc);
6204 Assert(!pVmxTransient->fRemoveTscAuxMsr);
6205 pVmxTransient->fRemoveTscAuxMsr = true;
6206 }
6207
6208#ifdef VBOX_STRICT
6209 Assert(pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs);
6210 hmR0VmxCheckAutoLoadStoreMsrs(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest);
6211 hmR0VmxCheckHostEferMsr(pVmcsInfo);
6212 AssertRC(vmxHCCheckCachedVmcsCtls(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest));
6213#endif
6214
6215#ifdef HMVMX_ALWAYS_CHECK_GUEST_STATE
6216 /** @todo r=ramshankar: We can now probably use iemVmxVmentryCheckGuestState here.
6217 * Add a PVMXMSRS parameter to it, so that IEM can look at the host MSRs,
6218 * see @bugref{9180#c54}. */
6219 uint32_t const uInvalidReason = hmR0VmxCheckGuestState(pVCpu, pVmcsInfo);
6220 if (uInvalidReason != VMX_IGS_REASON_NOT_FOUND)
6221 Log4(("hmR0VmxCheckGuestState returned %#x\n", uInvalidReason));
6222#endif
6223}
6224
6225
6226/**
6227 * First C routine invoked after running guest code using hardware-assisted VMX.
6228 *
6229 * @param pVCpu The cross context virtual CPU structure.
6230 * @param pVmxTransient The VMX-transient structure.
6231 * @param rcVMRun Return code of VMLAUNCH/VMRESUME.
6232 *
6233 * @remarks Called with interrupts disabled, and returns with interrupts enabled!
6234 *
6235 * @remarks No-long-jump zone!!! This function will however re-enable longjmps
6236 * unconditionally when it is safe to do so.
6237 */
6238static void hmR0VmxPostRunGuest(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, int rcVMRun)
6239{
6240 ASMAtomicUoWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false); /* See HMInvalidatePageOnAllVCpus(): used for TLB flushing. */
6241 ASMAtomicIncU32(&pVCpu->hmr0.s.cWorldSwitchExits); /* Initialized in vmR3CreateUVM(): used for EMT poking. */
6242 pVCpu->hm.s.fCtxChanged = 0; /* Exits/longjmps to ring-3 requires saving the guest state. */
6243 pVmxTransient->fVmcsFieldsRead = 0; /* Transient fields need to be read from the VMCS. */
6244 pVmxTransient->fVectoringPF = false; /* Vectoring page-fault needs to be determined later. */
6245 pVmxTransient->fVectoringDoublePF = false; /* Vectoring double page-fault needs to be determined later. */
6246
6247 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
6248 if (!(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT))
6249 {
6250 uint64_t uGstTsc;
6251 if (!pVmxTransient->fIsNestedGuest)
6252 uGstTsc = pVCpu->hmr0.s.uTscExit + pVmcsInfo->u64TscOffset;
6253 else
6254 {
6255 uint64_t const uNstGstTsc = pVCpu->hmr0.s.uTscExit + pVmcsInfo->u64TscOffset;
6256 uGstTsc = CPUMRemoveNestedGuestTscOffset(pVCpu, uNstGstTsc);
6257 }
6258 TMCpuTickSetLastSeen(pVCpu, uGstTsc); /* Update TM with the guest TSC. */
6259 }
6260
6261 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatInGC, &pVCpu->hm.s.StatPreExit, x);
6262 TMNotifyEndOfExecution(pVCpu->CTX_SUFF(pVM), pVCpu, pVCpu->hmr0.s.uTscExit); /* Notify TM that the guest is no longer running. */
6263 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
6264
6265 pVCpu->hmr0.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_REQUIRED; /* Some host state messed up by VMX needs restoring. */
6266 pVmcsInfo->fVmcsState |= VMX_V_VMCS_LAUNCH_STATE_LAUNCHED; /* Use VMRESUME instead of VMLAUNCH in the next run. */
6267#ifdef VBOX_STRICT
6268 hmR0VmxCheckHostEferMsr(pVmcsInfo); /* Verify that the host EFER MSR wasn't modified. */
6269#endif
6270 Assert(!ASMIntAreEnabled());
6271 ASMSetFlags(pVmxTransient->fEFlags); /* Enable interrupts. */
6272 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
6273
6274#ifdef HMVMX_ALWAYS_CLEAN_TRANSIENT
6275 /*
6276 * Clean all the VMCS fields in the transient structure before reading
6277 * anything from the VMCS.
6278 */
6279 pVmxTransient->uExitReason = 0;
6280 pVmxTransient->uExitIntErrorCode = 0;
6281 pVmxTransient->uExitQual = 0;
6282 pVmxTransient->uGuestLinearAddr = 0;
6283 pVmxTransient->uExitIntInfo = 0;
6284 pVmxTransient->cbExitInstr = 0;
6285 pVmxTransient->ExitInstrInfo.u = 0;
6286 pVmxTransient->uEntryIntInfo = 0;
6287 pVmxTransient->uEntryXcptErrorCode = 0;
6288 pVmxTransient->cbEntryInstr = 0;
6289 pVmxTransient->uIdtVectoringInfo = 0;
6290 pVmxTransient->uIdtVectoringErrorCode = 0;
6291#endif
6292
6293 /*
6294 * Save the basic VM-exit reason and check if the VM-entry failed.
6295 * See Intel spec. 24.9.1 "Basic VM-exit Information".
6296 */
6297 uint32_t uExitReason;
6298 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
6299 AssertRC(rc);
6300 pVmxTransient->uExitReason = VMX_EXIT_REASON_BASIC(uExitReason);
6301 pVmxTransient->fVMEntryFailed = VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason);
6302
6303 /*
6304 * Log the VM-exit before logging anything else as otherwise it might be a
6305 * tad confusing what happens before and after the world-switch.
6306 */
6307 HMVMX_LOG_EXIT(pVCpu, uExitReason);
6308
6309 /*
6310 * Remove the TSC_AUX MSR from the auto-load/store MSR area and reset any MSR
6311 * bitmap permissions, if it was added before VM-entry.
6312 */
6313 if (pVmxTransient->fRemoveTscAuxMsr)
6314 {
6315 hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K8_TSC_AUX);
6316 pVmxTransient->fRemoveTscAuxMsr = false;
6317 }
6318
6319 /*
6320 * Check if VMLAUNCH/VMRESUME succeeded.
6321 * If this failed, we cause a guru meditation and cease further execution.
6322 */
6323 if (RT_LIKELY(rcVMRun == VINF_SUCCESS))
6324 {
6325 /*
6326 * Update the VM-exit history array here even if the VM-entry failed due to:
6327 * - Invalid guest state.
6328 * - MSR loading.
6329 * - Machine-check event.
6330 *
6331 * In any of the above cases we will still have a "valid" VM-exit reason
6332 * despite @a fVMEntryFailed being false.
6333 *
6334 * See Intel spec. 26.7 "VM-Entry failures during or after loading guest state".
6335 *
6336 * Note! We don't have CS or RIP at this point. Will probably address that later
6337 * by amending the history entry added here.
6338 */
6339 EMHistoryAddExit(pVCpu, EMEXIT_MAKE_FT(EMEXIT_F_KIND_VMX, pVmxTransient->uExitReason & EMEXIT_F_TYPE_MASK),
6340 UINT64_MAX, pVCpu->hmr0.s.uTscExit);
6341
6342 if (RT_LIKELY(!pVmxTransient->fVMEntryFailed))
6343 {
6344 VMMRZCallRing3Enable(pVCpu);
6345 Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
6346
6347#ifdef HMVMX_ALWAYS_SAVE_RO_GUEST_STATE
6348 vmxHCReadAllRoFieldsVmcs(pVCpu, pVmxTransient);
6349#endif
6350
6351 /*
6352 * Always import the guest-interruptibility state as we need it while evaluating
6353 * injecting events on re-entry. We could in *theory* postpone reading it for
6354 * exits that does not involve instruction emulation, but since most exits are
6355 * for instruction emulation (exceptions being external interrupts, shadow
6356 * paging building page faults and EPT violations, and interrupt window stuff)
6357 * this is a reasonable simplification.
6358 *
6359 * We don't import CR0 (when unrestricted guest execution is unavailable) despite
6360 * checking for real-mode while exporting the state because all bits that cause
6361 * mode changes wrt CR0 are intercepted.
6362 *
6363 * Note! This mask _must_ match the default value for the default a_fDonePostExit
6364 * value for the vmxHCImportGuestState template!
6365 */
6366 /** @todo r=bird: consider dropping the INHIBIT_XXX and fetch the state
6367 * explicitly in the exit handlers and injection function. That way we have
6368 * fewer clusters of vmread spread around the code, because the EM history
6369 * executor won't execute very many non-exiting instructions before stopping. */
6370 rc = vmxHCImportGuestState< CPUMCTX_EXTRN_INHIBIT_INT
6371 | CPUMCTX_EXTRN_INHIBIT_NMI
6372#if defined(HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE) || defined(HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE)
6373 | HMVMX_CPUMCTX_EXTRN_ALL
6374#elif defined(HMVMX_ALWAYS_SAVE_GUEST_RFLAGS)
6375 | CPUMCTX_EXTRN_RFLAGS
6376#endif
6377 , 0 /*a_fDoneLocal*/, 0 /*a_fDonePostExit*/>(pVCpu, pVmcsInfo, __FUNCTION__);
6378 AssertRC(rc);
6379
6380 /*
6381 * Sync the TPR shadow with our APIC state.
6382 */
6383 if ( !pVmxTransient->fIsNestedGuest
6384 && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW))
6385 {
6386 Assert(pVmcsInfo->pbVirtApic);
6387 if (pVmxTransient->u8GuestTpr != pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR])
6388 {
6389 rc = APICSetTpr(pVCpu, pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR]);
6390 AssertRC(rc);
6391 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR);
6392 }
6393 }
6394
6395 Assert(VMMRZCallRing3IsEnabled(pVCpu));
6396 Assert( pVmxTransient->fWasGuestDebugStateActive == false
6397 || pVmxTransient->fWasHyperDebugStateActive == false);
6398 return;
6399 }
6400 }
6401#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6402 else if (pVmxTransient->fIsNestedGuest)
6403 AssertMsgFailed(("VMLAUNCH/VMRESUME failed but shouldn't happen when VMLAUNCH/VMRESUME was emulated in IEM!\n"));
6404#endif
6405 else
6406 Log4Func(("VM-entry failure: rcVMRun=%Rrc fVMEntryFailed=%RTbool\n", rcVMRun, pVmxTransient->fVMEntryFailed));
6407
6408 VMMRZCallRing3Enable(pVCpu);
6409}
6410
6411
6412/**
6413 * Runs the guest code using hardware-assisted VMX the normal way.
6414 *
6415 * @returns VBox status code.
6416 * @param pVCpu The cross context virtual CPU structure.
6417 * @param pcLoops Pointer to the number of executed loops.
6418 */
6419static VBOXSTRICTRC hmR0VmxRunGuestCodeNormal(PVMCPUCC pVCpu, uint32_t *pcLoops)
6420{
6421 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
6422 Assert(pcLoops);
6423 Assert(*pcLoops <= cMaxResumeLoops);
6424 Assert(!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
6425
6426#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6427 /*
6428 * Switch to the guest VMCS as we may have transitioned from executing the nested-guest
6429 * without leaving ring-0. Otherwise, if we came from ring-3 we would have loaded the
6430 * guest VMCS while entering the VMX ring-0 session.
6431 */
6432 if (pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
6433 {
6434 int rc = vmxHCSwitchToGstOrNstGstVmcs(pVCpu, false /* fSwitchToNstGstVmcs */);
6435 if (RT_SUCCESS(rc))
6436 { /* likely */ }
6437 else
6438 {
6439 LogRelFunc(("Failed to switch to the guest VMCS. rc=%Rrc\n", rc));
6440 return rc;
6441 }
6442 }
6443#endif
6444
6445 VMXTRANSIENT VmxTransient;
6446 RT_ZERO(VmxTransient);
6447 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
6448
6449 /* Paranoia. */
6450 Assert(VmxTransient.pVmcsInfo == &pVCpu->hmr0.s.vmx.VmcsInfo);
6451
6452 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
6453 for (;;)
6454 {
6455 Assert(!HMR0SuspendPending());
6456 HMVMX_ASSERT_CPU_SAFE(pVCpu);
6457 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
6458
6459 /*
6460 * Preparatory work for running nested-guest code, this may force us to
6461 * return to ring-3.
6462 *
6463 * Warning! This bugger disables interrupts on VINF_SUCCESS!
6464 */
6465 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */);
6466 if (rcStrict != VINF_SUCCESS)
6467 break;
6468
6469 /* Interrupts are disabled at this point! */
6470 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
6471 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
6472 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
6473 /* Interrupts are re-enabled at this point! */
6474
6475 /*
6476 * Check for errors with running the VM (VMLAUNCH/VMRESUME).
6477 */
6478 if (RT_SUCCESS(rcRun))
6479 { /* very likely */ }
6480 else
6481 {
6482 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
6483 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
6484 return rcRun;
6485 }
6486
6487 /*
6488 * Profile the VM-exit.
6489 */
6490 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
6491 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll);
6492 STAM_COUNTER_INC(&pVCpu->hm.s.aStatExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
6493 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
6494 HMVMX_START_EXIT_DISPATCH_PROF();
6495
6496 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
6497
6498 /*
6499 * Handle the VM-exit.
6500 */
6501#ifdef HMVMX_USE_FUNCTION_TABLE
6502 rcStrict = g_aVMExitHandlers[VmxTransient.uExitReason].pfn(pVCpu, &VmxTransient);
6503#else
6504 rcStrict = hmR0VmxHandleExit(pVCpu, &VmxTransient);
6505#endif
6506 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
6507 if (rcStrict == VINF_SUCCESS)
6508 {
6509 if (++(*pcLoops) <= cMaxResumeLoops)
6510 continue;
6511 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
6512 rcStrict = VINF_EM_RAW_INTERRUPT;
6513 }
6514 break;
6515 }
6516
6517 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
6518 return rcStrict;
6519}
6520
6521
6522#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6523/**
6524 * Runs the nested-guest code using hardware-assisted VMX.
6525 *
6526 * @returns VBox status code.
6527 * @param pVCpu The cross context virtual CPU structure.
6528 * @param pcLoops Pointer to the number of executed loops.
6529 *
6530 * @sa hmR0VmxRunGuestCodeNormal.
6531 */
6532static VBOXSTRICTRC hmR0VmxRunGuestCodeNested(PVMCPUCC pVCpu, uint32_t *pcLoops)
6533{
6534 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
6535 Assert(pcLoops);
6536 Assert(*pcLoops <= cMaxResumeLoops);
6537 Assert(CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
6538
6539 /*
6540 * Switch to the nested-guest VMCS as we may have transitioned from executing the
6541 * guest without leaving ring-0. Otherwise, if we came from ring-3 we would have
6542 * loaded the nested-guest VMCS while entering the VMX ring-0 session.
6543 */
6544 if (!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
6545 {
6546 int rc = vmxHCSwitchToGstOrNstGstVmcs(pVCpu, true /* fSwitchToNstGstVmcs */);
6547 if (RT_SUCCESS(rc))
6548 { /* likely */ }
6549 else
6550 {
6551 LogRelFunc(("Failed to switch to the nested-guest VMCS. rc=%Rrc\n", rc));
6552 return rc;
6553 }
6554 }
6555
6556 VMXTRANSIENT VmxTransient;
6557 RT_ZERO(VmxTransient);
6558 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
6559 VmxTransient.fIsNestedGuest = true;
6560
6561 /* Paranoia. */
6562 Assert(VmxTransient.pVmcsInfo == &pVCpu->hmr0.s.vmx.VmcsInfoNstGst);
6563
6564 /* Setup pointer so PGM/IEM can query VM-exit auxiliary info on demand in ring-0. */
6565 pVCpu->hmr0.s.vmx.pVmxTransient = &VmxTransient;
6566
6567 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
6568 for (;;)
6569 {
6570 Assert(!HMR0SuspendPending());
6571 HMVMX_ASSERT_CPU_SAFE(pVCpu);
6572 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
6573
6574 /*
6575 * Preparatory work for running guest code, this may force us to
6576 * return to ring-3.
6577 *
6578 * Warning! This bugger disables interrupts on VINF_SUCCESS!
6579 */
6580 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */);
6581 if (rcStrict != VINF_SUCCESS)
6582 break;
6583
6584 /* Interrupts are disabled at this point! */
6585 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
6586 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
6587 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
6588 /* Interrupts are re-enabled at this point! */
6589
6590 /*
6591 * Check for errors with running the VM (VMLAUNCH/VMRESUME).
6592 */
6593 if (RT_SUCCESS(rcRun))
6594 { /* very likely */ }
6595 else
6596 {
6597 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
6598 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
6599 rcStrict = rcRun;
6600 break;
6601 }
6602
6603 /*
6604 * Profile the VM-exit.
6605 */
6606 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
6607 STAM_COUNTER_INC(&pVCpu->hm.s.StatNestedExitAll);
6608 STAM_COUNTER_INC(&pVCpu->hm.s.aStatNestedExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
6609 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
6610 HMVMX_START_EXIT_DISPATCH_PROF();
6611
6612 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
6613
6614 /*
6615 * Handle the VM-exit.
6616 */
6617 rcStrict = vmxHCHandleExitNested(pVCpu, &VmxTransient);
6618 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
6619 if (rcStrict == VINF_SUCCESS)
6620 {
6621 if (!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
6622 {
6623 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
6624 rcStrict = VINF_VMX_VMEXIT;
6625 }
6626 else
6627 {
6628 if (++(*pcLoops) <= cMaxResumeLoops)
6629 continue;
6630 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
6631 rcStrict = VINF_EM_RAW_INTERRUPT;
6632 }
6633 }
6634 else
6635 Assert(rcStrict != VINF_VMX_VMEXIT);
6636 break;
6637 }
6638
6639 /* Ensure VM-exit auxiliary info. is no longer available. */
6640 pVCpu->hmr0.s.vmx.pVmxTransient = NULL;
6641
6642 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
6643 return rcStrict;
6644}
6645#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
6646
6647
6648/** @name Execution loop for single stepping, DBGF events and expensive Dtrace
6649 * probes.
6650 *
6651 * The following few functions and associated structure contains the bloat
6652 * necessary for providing detailed debug events and dtrace probes as well as
6653 * reliable host side single stepping. This works on the principle of
6654 * "subclassing" the normal execution loop and workers. We replace the loop
6655 * method completely and override selected helpers to add necessary adjustments
6656 * to their core operation.
6657 *
6658 * The goal is to keep the "parent" code lean and mean, so as not to sacrifice
6659 * any performance for debug and analysis features.
6660 *
6661 * @{
6662 */
6663
6664/**
6665 * Single steps guest code using hardware-assisted VMX.
6666 *
6667 * This is -not- the same as the guest single-stepping itself (say using EFLAGS.TF)
6668 * but single-stepping through the hypervisor debugger.
6669 *
6670 * @returns Strict VBox status code (i.e. informational status codes too).
6671 * @param pVCpu The cross context virtual CPU structure.
6672 * @param pcLoops Pointer to the number of executed loops.
6673 *
6674 * @note Mostly the same as hmR0VmxRunGuestCodeNormal().
6675 */
6676static VBOXSTRICTRC hmR0VmxRunGuestCodeDebug(PVMCPUCC pVCpu, uint32_t *pcLoops)
6677{
6678 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
6679 Assert(pcLoops);
6680 Assert(*pcLoops <= cMaxResumeLoops);
6681
6682 VMXTRANSIENT VmxTransient;
6683 RT_ZERO(VmxTransient);
6684 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
6685
6686 /* Set HMCPU indicators. */
6687 bool const fSavedSingleInstruction = pVCpu->hm.s.fSingleInstruction;
6688 pVCpu->hm.s.fSingleInstruction = pVCpu->hm.s.fSingleInstruction || DBGFIsStepping(pVCpu);
6689 pVCpu->hmr0.s.fDebugWantRdTscExit = false;
6690 pVCpu->hmr0.s.fUsingDebugLoop = true;
6691
6692 /* State we keep to help modify and later restore the VMCS fields we alter, and for detecting steps. */
6693 VMXRUNDBGSTATE DbgState;
6694 vmxHCRunDebugStateInit(pVCpu, &VmxTransient, &DbgState);
6695 vmxHCPreRunGuestDebugStateUpdate(pVCpu, &VmxTransient, &DbgState);
6696
6697 /*
6698 * The loop.
6699 */
6700 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
6701 for (;;)
6702 {
6703 Assert(!HMR0SuspendPending());
6704 HMVMX_ASSERT_CPU_SAFE(pVCpu);
6705 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
6706 bool fStepping = pVCpu->hm.s.fSingleInstruction;
6707
6708 /* Set up VM-execution controls the next two can respond to. */
6709 vmxHCPreRunGuestDebugStateApply(pVCpu, &VmxTransient, &DbgState);
6710
6711 /*
6712 * Preparatory work for running guest code, this may force us to
6713 * return to ring-3.
6714 *
6715 * Warning! This bugger disables interrupts on VINF_SUCCESS!
6716 */
6717 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, fStepping);
6718 if (rcStrict != VINF_SUCCESS)
6719 break;
6720
6721 /* Interrupts are disabled at this point! */
6722 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
6723
6724 /* Override any obnoxious code in the above two calls. */
6725 vmxHCPreRunGuestDebugStateApply(pVCpu, &VmxTransient, &DbgState);
6726
6727 /*
6728 * Finally execute the guest.
6729 */
6730 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
6731
6732 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
6733 /* Interrupts are re-enabled at this point! */
6734
6735 /* Check for errors with running the VM (VMLAUNCH/VMRESUME). */
6736 if (RT_SUCCESS(rcRun))
6737 { /* very likely */ }
6738 else
6739 {
6740 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
6741 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
6742 return rcRun;
6743 }
6744
6745 /* Profile the VM-exit. */
6746 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
6747 STAM_COUNTER_INC(&pVCpu->hm.s.StatDebugExitAll);
6748 STAM_COUNTER_INC(&pVCpu->hm.s.aStatExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
6749 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
6750 HMVMX_START_EXIT_DISPATCH_PROF();
6751
6752 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
6753
6754 /*
6755 * Handle the VM-exit - we quit earlier on certain VM-exits, see hmR0VmxHandleExitDebug().
6756 */
6757 rcStrict = vmxHCRunDebugHandleExit(pVCpu, &VmxTransient, &DbgState);
6758 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
6759 if (rcStrict != VINF_SUCCESS)
6760 break;
6761 if (++(*pcLoops) > cMaxResumeLoops)
6762 {
6763 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
6764 rcStrict = VINF_EM_RAW_INTERRUPT;
6765 break;
6766 }
6767
6768 /*
6769 * Stepping: Did the RIP change, if so, consider it a single step.
6770 * Otherwise, make sure one of the TFs gets set.
6771 */
6772 if (fStepping)
6773 {
6774 int rc = vmxHCImportGuestStateEx(pVCpu, VmxTransient.pVmcsInfo, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
6775 AssertRC(rc);
6776 if ( pVCpu->cpum.GstCtx.rip != DbgState.uRipStart
6777 || pVCpu->cpum.GstCtx.cs.Sel != DbgState.uCsStart)
6778 {
6779 rcStrict = VINF_EM_DBG_STEPPED;
6780 break;
6781 }
6782 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_DR7);
6783 }
6784
6785 /*
6786 * Update when dtrace settings changes (DBGF kicks us, so no need to check).
6787 */
6788 if (VBOXVMM_GET_SETTINGS_SEQ_NO() != DbgState.uDtraceSettingsSeqNo)
6789 vmxHCPreRunGuestDebugStateUpdate(pVCpu, &VmxTransient, &DbgState);
6790
6791 /* Restore all controls applied by hmR0VmxPreRunGuestDebugStateApply above. */
6792 rcStrict = vmxHCRunDebugStateRevert(pVCpu, &VmxTransient, &DbgState, rcStrict);
6793 Assert(rcStrict == VINF_SUCCESS);
6794 }
6795
6796 /*
6797 * Clear the X86_EFL_TF if necessary.
6798 */
6799 if (pVCpu->hmr0.s.fClearTrapFlag)
6800 {
6801 int rc = vmxHCImportGuestStateEx(pVCpu, VmxTransient.pVmcsInfo, CPUMCTX_EXTRN_RFLAGS);
6802 AssertRC(rc);
6803 pVCpu->hmr0.s.fClearTrapFlag = false;
6804 pVCpu->cpum.GstCtx.eflags.Bits.u1TF = 0;
6805 }
6806 /** @todo there seems to be issues with the resume flag when the monitor trap
6807 * flag is pending without being used. Seen early in bios init when
6808 * accessing APIC page in protected mode. */
6809
6810/** @todo we need to do hmR0VmxRunDebugStateRevert here too, in case we broke
6811 * out of the above loop. */
6812
6813 /* Restore HMCPU indicators. */
6814 pVCpu->hmr0.s.fUsingDebugLoop = false;
6815 pVCpu->hmr0.s.fDebugWantRdTscExit = false;
6816 pVCpu->hm.s.fSingleInstruction = fSavedSingleInstruction;
6817
6818 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
6819 return rcStrict;
6820}
6821
6822/** @} */
6823
6824
6825/**
6826 * Checks if any expensive dtrace probes are enabled and we should go to the
6827 * debug loop.
6828 *
6829 * @returns true if we should use debug loop, false if not.
6830 */
6831static bool hmR0VmxAnyExpensiveProbesEnabled(void)
6832{
6833 /* It's probably faster to OR the raw 32-bit counter variables together.
6834 Since the variables are in an array and the probes are next to one
6835 another (more or less), we have good locality. So, better read
6836 eight-nine cache lines ever time and only have one conditional, than
6837 128+ conditionals, right? */
6838 return ( VBOXVMM_R0_HMVMX_VMEXIT_ENABLED_RAW() /* expensive too due to context */
6839 | VBOXVMM_XCPT_DE_ENABLED_RAW()
6840 | VBOXVMM_XCPT_DB_ENABLED_RAW()
6841 | VBOXVMM_XCPT_BP_ENABLED_RAW()
6842 | VBOXVMM_XCPT_OF_ENABLED_RAW()
6843 | VBOXVMM_XCPT_BR_ENABLED_RAW()
6844 | VBOXVMM_XCPT_UD_ENABLED_RAW()
6845 | VBOXVMM_XCPT_NM_ENABLED_RAW()
6846 | VBOXVMM_XCPT_DF_ENABLED_RAW()
6847 | VBOXVMM_XCPT_TS_ENABLED_RAW()
6848 | VBOXVMM_XCPT_NP_ENABLED_RAW()
6849 | VBOXVMM_XCPT_SS_ENABLED_RAW()
6850 | VBOXVMM_XCPT_GP_ENABLED_RAW()
6851 | VBOXVMM_XCPT_PF_ENABLED_RAW()
6852 | VBOXVMM_XCPT_MF_ENABLED_RAW()
6853 | VBOXVMM_XCPT_AC_ENABLED_RAW()
6854 | VBOXVMM_XCPT_XF_ENABLED_RAW()
6855 | VBOXVMM_XCPT_VE_ENABLED_RAW()
6856 | VBOXVMM_XCPT_SX_ENABLED_RAW()
6857 | VBOXVMM_INT_SOFTWARE_ENABLED_RAW()
6858 | VBOXVMM_INT_HARDWARE_ENABLED_RAW()
6859 ) != 0
6860 || ( VBOXVMM_INSTR_HALT_ENABLED_RAW()
6861 | VBOXVMM_INSTR_MWAIT_ENABLED_RAW()
6862 | VBOXVMM_INSTR_MONITOR_ENABLED_RAW()
6863 | VBOXVMM_INSTR_CPUID_ENABLED_RAW()
6864 | VBOXVMM_INSTR_INVD_ENABLED_RAW()
6865 | VBOXVMM_INSTR_WBINVD_ENABLED_RAW()
6866 | VBOXVMM_INSTR_INVLPG_ENABLED_RAW()
6867 | VBOXVMM_INSTR_RDTSC_ENABLED_RAW()
6868 | VBOXVMM_INSTR_RDTSCP_ENABLED_RAW()
6869 | VBOXVMM_INSTR_RDPMC_ENABLED_RAW()
6870 | VBOXVMM_INSTR_RDMSR_ENABLED_RAW()
6871 | VBOXVMM_INSTR_WRMSR_ENABLED_RAW()
6872 | VBOXVMM_INSTR_CRX_READ_ENABLED_RAW()
6873 | VBOXVMM_INSTR_CRX_WRITE_ENABLED_RAW()
6874 | VBOXVMM_INSTR_DRX_READ_ENABLED_RAW()
6875 | VBOXVMM_INSTR_DRX_WRITE_ENABLED_RAW()
6876 | VBOXVMM_INSTR_PAUSE_ENABLED_RAW()
6877 | VBOXVMM_INSTR_XSETBV_ENABLED_RAW()
6878 | VBOXVMM_INSTR_SIDT_ENABLED_RAW()
6879 | VBOXVMM_INSTR_LIDT_ENABLED_RAW()
6880 | VBOXVMM_INSTR_SGDT_ENABLED_RAW()
6881 | VBOXVMM_INSTR_LGDT_ENABLED_RAW()
6882 | VBOXVMM_INSTR_SLDT_ENABLED_RAW()
6883 | VBOXVMM_INSTR_LLDT_ENABLED_RAW()
6884 | VBOXVMM_INSTR_STR_ENABLED_RAW()
6885 | VBOXVMM_INSTR_LTR_ENABLED_RAW()
6886 | VBOXVMM_INSTR_GETSEC_ENABLED_RAW()
6887 | VBOXVMM_INSTR_RSM_ENABLED_RAW()
6888 | VBOXVMM_INSTR_RDRAND_ENABLED_RAW()
6889 | VBOXVMM_INSTR_RDSEED_ENABLED_RAW()
6890 | VBOXVMM_INSTR_XSAVES_ENABLED_RAW()
6891 | VBOXVMM_INSTR_XRSTORS_ENABLED_RAW()
6892 | VBOXVMM_INSTR_VMM_CALL_ENABLED_RAW()
6893 | VBOXVMM_INSTR_VMX_VMCLEAR_ENABLED_RAW()
6894 | VBOXVMM_INSTR_VMX_VMLAUNCH_ENABLED_RAW()
6895 | VBOXVMM_INSTR_VMX_VMPTRLD_ENABLED_RAW()
6896 | VBOXVMM_INSTR_VMX_VMPTRST_ENABLED_RAW()
6897 | VBOXVMM_INSTR_VMX_VMREAD_ENABLED_RAW()
6898 | VBOXVMM_INSTR_VMX_VMRESUME_ENABLED_RAW()
6899 | VBOXVMM_INSTR_VMX_VMWRITE_ENABLED_RAW()
6900 | VBOXVMM_INSTR_VMX_VMXOFF_ENABLED_RAW()
6901 | VBOXVMM_INSTR_VMX_VMXON_ENABLED_RAW()
6902 | VBOXVMM_INSTR_VMX_VMFUNC_ENABLED_RAW()
6903 | VBOXVMM_INSTR_VMX_INVEPT_ENABLED_RAW()
6904 | VBOXVMM_INSTR_VMX_INVVPID_ENABLED_RAW()
6905 | VBOXVMM_INSTR_VMX_INVPCID_ENABLED_RAW()
6906 ) != 0
6907 || ( VBOXVMM_EXIT_TASK_SWITCH_ENABLED_RAW()
6908 | VBOXVMM_EXIT_HALT_ENABLED_RAW()
6909 | VBOXVMM_EXIT_MWAIT_ENABLED_RAW()
6910 | VBOXVMM_EXIT_MONITOR_ENABLED_RAW()
6911 | VBOXVMM_EXIT_CPUID_ENABLED_RAW()
6912 | VBOXVMM_EXIT_INVD_ENABLED_RAW()
6913 | VBOXVMM_EXIT_WBINVD_ENABLED_RAW()
6914 | VBOXVMM_EXIT_INVLPG_ENABLED_RAW()
6915 | VBOXVMM_EXIT_RDTSC_ENABLED_RAW()
6916 | VBOXVMM_EXIT_RDTSCP_ENABLED_RAW()
6917 | VBOXVMM_EXIT_RDPMC_ENABLED_RAW()
6918 | VBOXVMM_EXIT_RDMSR_ENABLED_RAW()
6919 | VBOXVMM_EXIT_WRMSR_ENABLED_RAW()
6920 | VBOXVMM_EXIT_CRX_READ_ENABLED_RAW()
6921 | VBOXVMM_EXIT_CRX_WRITE_ENABLED_RAW()
6922 | VBOXVMM_EXIT_DRX_READ_ENABLED_RAW()
6923 | VBOXVMM_EXIT_DRX_WRITE_ENABLED_RAW()
6924 | VBOXVMM_EXIT_PAUSE_ENABLED_RAW()
6925 | VBOXVMM_EXIT_XSETBV_ENABLED_RAW()
6926 | VBOXVMM_EXIT_SIDT_ENABLED_RAW()
6927 | VBOXVMM_EXIT_LIDT_ENABLED_RAW()
6928 | VBOXVMM_EXIT_SGDT_ENABLED_RAW()
6929 | VBOXVMM_EXIT_LGDT_ENABLED_RAW()
6930 | VBOXVMM_EXIT_SLDT_ENABLED_RAW()
6931 | VBOXVMM_EXIT_LLDT_ENABLED_RAW()
6932 | VBOXVMM_EXIT_STR_ENABLED_RAW()
6933 | VBOXVMM_EXIT_LTR_ENABLED_RAW()
6934 | VBOXVMM_EXIT_GETSEC_ENABLED_RAW()
6935 | VBOXVMM_EXIT_RSM_ENABLED_RAW()
6936 | VBOXVMM_EXIT_RDRAND_ENABLED_RAW()
6937 | VBOXVMM_EXIT_RDSEED_ENABLED_RAW()
6938 | VBOXVMM_EXIT_XSAVES_ENABLED_RAW()
6939 | VBOXVMM_EXIT_XRSTORS_ENABLED_RAW()
6940 | VBOXVMM_EXIT_VMM_CALL_ENABLED_RAW()
6941 | VBOXVMM_EXIT_VMX_VMCLEAR_ENABLED_RAW()
6942 | VBOXVMM_EXIT_VMX_VMLAUNCH_ENABLED_RAW()
6943 | VBOXVMM_EXIT_VMX_VMPTRLD_ENABLED_RAW()
6944 | VBOXVMM_EXIT_VMX_VMPTRST_ENABLED_RAW()
6945 | VBOXVMM_EXIT_VMX_VMREAD_ENABLED_RAW()
6946 | VBOXVMM_EXIT_VMX_VMRESUME_ENABLED_RAW()
6947 | VBOXVMM_EXIT_VMX_VMWRITE_ENABLED_RAW()
6948 | VBOXVMM_EXIT_VMX_VMXOFF_ENABLED_RAW()
6949 | VBOXVMM_EXIT_VMX_VMXON_ENABLED_RAW()
6950 | VBOXVMM_EXIT_VMX_VMFUNC_ENABLED_RAW()
6951 | VBOXVMM_EXIT_VMX_INVEPT_ENABLED_RAW()
6952 | VBOXVMM_EXIT_VMX_INVVPID_ENABLED_RAW()
6953 | VBOXVMM_EXIT_VMX_INVPCID_ENABLED_RAW()
6954 | VBOXVMM_EXIT_VMX_EPT_VIOLATION_ENABLED_RAW()
6955 | VBOXVMM_EXIT_VMX_EPT_MISCONFIG_ENABLED_RAW()
6956 | VBOXVMM_EXIT_VMX_VAPIC_ACCESS_ENABLED_RAW()
6957 | VBOXVMM_EXIT_VMX_VAPIC_WRITE_ENABLED_RAW()
6958 ) != 0;
6959}
6960
6961
6962/**
6963 * Runs the guest using hardware-assisted VMX.
6964 *
6965 * @returns Strict VBox status code (i.e. informational status codes too).
6966 * @param pVCpu The cross context virtual CPU structure.
6967 */
6968VMMR0DECL(VBOXSTRICTRC) VMXR0RunGuestCode(PVMCPUCC pVCpu)
6969{
6970 AssertPtr(pVCpu);
6971 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
6972 Assert(VMMRZCallRing3IsEnabled(pVCpu));
6973 Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
6974 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
6975
6976 VBOXSTRICTRC rcStrict;
6977 uint32_t cLoops = 0;
6978 for (;;)
6979 {
6980#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6981 bool const fInNestedGuestMode = CPUMIsGuestInVmxNonRootMode(pCtx);
6982#else
6983 NOREF(pCtx);
6984 bool const fInNestedGuestMode = false;
6985#endif
6986 if (!fInNestedGuestMode)
6987 {
6988 if ( !pVCpu->hm.s.fUseDebugLoop
6989 && (!VBOXVMM_ANY_PROBES_ENABLED() || !hmR0VmxAnyExpensiveProbesEnabled())
6990 && !DBGFIsStepping(pVCpu)
6991 && !pVCpu->CTX_SUFF(pVM)->dbgf.ro.cEnabledInt3Breakpoints)
6992 rcStrict = hmR0VmxRunGuestCodeNormal(pVCpu, &cLoops);
6993 else
6994 rcStrict = hmR0VmxRunGuestCodeDebug(pVCpu, &cLoops);
6995 }
6996#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6997 else
6998 rcStrict = hmR0VmxRunGuestCodeNested(pVCpu, &cLoops);
6999
7000 if (rcStrict == VINF_VMX_VMLAUNCH_VMRESUME)
7001 {
7002 Assert(CPUMIsGuestInVmxNonRootMode(pCtx));
7003 continue;
7004 }
7005 if (rcStrict == VINF_VMX_VMEXIT)
7006 {
7007 Assert(!CPUMIsGuestInVmxNonRootMode(pCtx));
7008 continue;
7009 }
7010#endif
7011 break;
7012 }
7013
7014 int const rcLoop = VBOXSTRICTRC_VAL(rcStrict);
7015 switch (rcLoop)
7016 {
7017 case VERR_EM_INTERPRETER: rcStrict = VINF_EM_RAW_EMULATE_INSTR; break;
7018 case VINF_EM_RESET: rcStrict = VINF_EM_TRIPLE_FAULT; break;
7019 }
7020
7021 int rc2 = hmR0VmxExitToRing3(pVCpu, rcStrict);
7022 if (RT_FAILURE(rc2))
7023 {
7024 pVCpu->hm.s.u32HMError = (uint32_t)VBOXSTRICTRC_VAL(rcStrict);
7025 rcStrict = rc2;
7026 }
7027 Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
7028 Assert(!VMMR0AssertionIsNotificationSet(pVCpu));
7029 return rcStrict;
7030}
7031
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette