VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HWVMXR0.cpp@ 38845

Last change on this file since 38845 was 38707, checked in by vboxsync, 13 years ago

VMM/VT-x: Fix for PAE guests running on 32-bit hosts or 64-bit hosts where VBoxInternal/PGM/MaxRing3Chunks is used.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 200.3 KB
Line 
1/* $Id: HWVMXR0.cpp 38707 2011-09-09 14:10:18Z vboxsync $ */
2/** @file
3 * HM VMX (VT-x) - Host Context Ring 0.
4 */
5
6/*
7 * Copyright (C) 2006-2011 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_HWACCM
23#include <iprt/asm-amd64-x86.h>
24#include <VBox/vmm/hwaccm.h>
25#include <VBox/vmm/pgm.h>
26#include <VBox/vmm/dbgf.h>
27#include <VBox/vmm/selm.h>
28#include <VBox/vmm/iom.h>
29#include <VBox/vmm/rem.h>
30#include <VBox/vmm/tm.h>
31#include "HWACCMInternal.h"
32#include <VBox/vmm/vm.h>
33#include <VBox/vmm/pdmapi.h>
34#include <VBox/err.h>
35#include <VBox/log.h>
36#include <iprt/assert.h>
37#include <iprt/param.h>
38#include <iprt/string.h>
39#include <iprt/time.h>
40#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
41# include <iprt/thread.h>
42#endif
43#include <iprt/x86.h>
44#include "HWVMXR0.h"
45
46/*******************************************************************************
47* Defined Constants And Macros *
48*******************************************************************************/
49#if defined(RT_ARCH_AMD64)
50# define VMX_IS_64BIT_HOST_MODE() (true)
51#elif defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
52# define VMX_IS_64BIT_HOST_MODE() (g_fVMXIs64bitHost != 0)
53#else
54# define VMX_IS_64BIT_HOST_MODE() (false)
55#endif
56
57/*******************************************************************************
58* Global Variables *
59*******************************************************************************/
60/* IO operation lookup arrays. */
61static uint32_t const g_aIOSize[4] = {1, 2, 0, 4};
62static uint32_t const g_aIOOpAnd[4] = {0xff, 0xffff, 0, 0xffffffff};
63
64#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
65/** See HWACCMR0A.asm. */
66extern "C" uint32_t g_fVMXIs64bitHost;
67#endif
68
69/*******************************************************************************
70* Local Functions *
71*******************************************************************************/
72static void hmR0VmxReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, VBOXSTRICTRC rc, PCPUMCTX pCtx);
73static void hmR0VmxSetupTLBEPT(PVM pVM, PVMCPU pVCpu);
74static void hmR0VmxSetupTLBVPID(PVM pVM, PVMCPU pVCpu);
75static void hmR0VmxSetupTLBDummy(PVM pVM, PVMCPU pVCpu);
76static void hmR0VmxFlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH enmFlush, RTGCPHYS GCPhys);
77static void hmR0VmxFlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH enmFlush, RTGCPTR GCPtr);
78static void hmR0VmxUpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx);
79#ifdef VBOX_STRICT
80static bool hmR0VmxIsValidReadField(uint32_t idxField);
81static bool hmR0VmxIsValidWriteField(uint32_t idxField);
82#endif
83static void hmR0VmxSetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite);
84
85
86static void hmR0VmxCheckError(PVM pVM, PVMCPU pVCpu, int rc)
87{
88 if (rc == VERR_VMX_GENERIC)
89 {
90 RTCCUINTREG instrError;
91
92 VMXReadVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
93 pVCpu->hwaccm.s.vmx.lasterror.ulInstrError = instrError;
94 }
95 pVM->hwaccm.s.lLastError = rc;
96}
97
98/**
99 * Sets up and activates VT-x on the current CPU
100 *
101 * @returns VBox status code.
102 * @param pCpu CPU info struct
103 * @param pVM The VM to operate on. (can be NULL after a resume!!)
104 * @param pvCpuPage Pointer to the global cpu page.
105 * @param HCPhysCpuPage Physical address of the global cpu page.
106 */
107VMMR0DECL(int) VMXR0EnableCpu(PHMGLOBLCPUINFO pCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
108{
109 AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
110 AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
111
112 if (pVM)
113 {
114 /* Set revision dword at the beginning of the VMXON structure. */
115 *(uint32_t *)pvCpuPage = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hwaccm.s.vmx.msr.vmx_basic_info);
116 }
117
118 /** @todo we should unmap the two pages from the virtual address space in order to prevent accidental corruption.
119 * (which can have very bad consequences!!!)
120 */
121
122 if (ASMGetCR4() & X86_CR4_VMXE)
123 return VERR_VMX_IN_VMX_ROOT_MODE;
124
125 /* Make sure the VMX instructions don't cause #UD faults. */
126 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE);
127
128 /* Enter VMX Root Mode. */
129 int rc = VMXEnable(HCPhysCpuPage);
130 if (RT_FAILURE(rc))
131 {
132 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
133 return VERR_VMX_VMXON_FAILED;
134 }
135 return VINF_SUCCESS;
136}
137
138/**
139 * Deactivates VT-x on the current CPU
140 *
141 * @returns VBox status code.
142 * @param pCpu CPU info struct
143 * @param pvCpuPage Pointer to the global cpu page.
144 * @param HCPhysCpuPage Physical address of the global cpu page.
145 */
146VMMR0DECL(int) VMXR0DisableCpu(PHMGLOBLCPUINFO pCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
147{
148 AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
149 AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
150
151 /* If we're somehow not in VMX root mode, then we shouldn't dare leaving it. */
152 if (!(ASMGetCR4() & X86_CR4_VMXE))
153 return VERR_VMX_NOT_IN_VMX_ROOT_MODE;
154
155 /* Leave VMX Root Mode. */
156 VMXDisable();
157
158 /* And clear the X86_CR4_VMXE bit. */
159 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
160 return VINF_SUCCESS;
161}
162
163/**
164 * Does Ring-0 per VM VT-x init.
165 *
166 * @returns VBox status code.
167 * @param pVM The VM to operate on.
168 */
169VMMR0DECL(int) VMXR0InitVM(PVM pVM)
170{
171 int rc;
172
173#ifdef LOG_ENABLED
174 SUPR0Printf("VMXR0InitVM %x\n", pVM);
175#endif
176
177 pVM->hwaccm.s.vmx.pMemObjAPIC = NIL_RTR0MEMOBJ;
178
179 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
180 {
181 /* Allocate one page for the APIC physical page (serves for filtering accesses). */
182 rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.vmx.pMemObjAPIC, PAGE_SIZE, true /* executable R0 mapping */);
183 AssertRC(rc);
184 if (RT_FAILURE(rc))
185 return rc;
186
187 pVM->hwaccm.s.vmx.pAPIC = (uint8_t *)RTR0MemObjAddress(pVM->hwaccm.s.vmx.pMemObjAPIC);
188 pVM->hwaccm.s.vmx.pAPICPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.vmx.pMemObjAPIC, 0);
189 ASMMemZero32(pVM->hwaccm.s.vmx.pAPIC, PAGE_SIZE);
190 }
191 else
192 {
193 pVM->hwaccm.s.vmx.pMemObjAPIC = 0;
194 pVM->hwaccm.s.vmx.pAPIC = 0;
195 pVM->hwaccm.s.vmx.pAPICPhys = 0;
196 }
197
198#ifdef VBOX_WITH_CRASHDUMP_MAGIC
199 {
200 rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.vmx.pMemObjScratch, PAGE_SIZE, true /* executable R0 mapping */);
201 AssertRC(rc);
202 if (RT_FAILURE(rc))
203 return rc;
204
205 pVM->hwaccm.s.vmx.pScratch = (uint8_t *)RTR0MemObjAddress(pVM->hwaccm.s.vmx.pMemObjScratch);
206 pVM->hwaccm.s.vmx.pScratchPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.vmx.pMemObjScratch, 0);
207
208 ASMMemZero32(pVM->hwaccm.s.vmx.pScratch, PAGE_SIZE);
209 strcpy((char *)pVM->hwaccm.s.vmx.pScratch, "SCRATCH Magic");
210 *(uint64_t *)(pVM->hwaccm.s.vmx.pScratch + 16) = UINT64_C(0xDEADBEEFDEADBEEF);
211 }
212#endif
213
214 /* Allocate VMCBs for all guest CPUs. */
215 for (VMCPUID i = 0; i < pVM->cCpus; i++)
216 {
217 PVMCPU pVCpu = &pVM->aCpus[i];
218
219 pVCpu->hwaccm.s.vmx.hMemObjVMCS = NIL_RTR0MEMOBJ;
220
221 /* Allocate one page for the VM control structure (VMCS). */
222 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.hMemObjVMCS, PAGE_SIZE, true /* executable R0 mapping */);
223 AssertRC(rc);
224 if (RT_FAILURE(rc))
225 return rc;
226
227 pVCpu->hwaccm.s.vmx.pvVMCS = RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.hMemObjVMCS);
228 pVCpu->hwaccm.s.vmx.HCPhysVMCS = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.hMemObjVMCS, 0);
229 ASMMemZeroPage(pVCpu->hwaccm.s.vmx.pvVMCS);
230
231 pVCpu->hwaccm.s.vmx.cr0_mask = 0;
232 pVCpu->hwaccm.s.vmx.cr4_mask = 0;
233
234 /* Allocate one page for the virtual APIC page for TPR caching. */
235 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.hMemObjVAPIC, PAGE_SIZE, true /* executable R0 mapping */);
236 AssertRC(rc);
237 if (RT_FAILURE(rc))
238 return rc;
239
240 pVCpu->hwaccm.s.vmx.pbVAPIC = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.hMemObjVAPIC);
241 pVCpu->hwaccm.s.vmx.HCPhysVAPIC = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.hMemObjVAPIC, 0);
242 ASMMemZeroPage(pVCpu->hwaccm.s.vmx.pbVAPIC);
243
244 /* Allocate the MSR bitmap if this feature is supported. */
245 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
246 {
247 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap, PAGE_SIZE, true /* executable R0 mapping */);
248 AssertRC(rc);
249 if (RT_FAILURE(rc))
250 return rc;
251
252 pVCpu->hwaccm.s.vmx.pMSRBitmap = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap);
253 pVCpu->hwaccm.s.vmx.pMSRBitmapPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap, 0);
254 memset(pVCpu->hwaccm.s.vmx.pMSRBitmap, 0xff, PAGE_SIZE);
255 }
256
257#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
258 /* Allocate one page for the guest MSR load area (for preloading guest MSRs during the world switch). */
259 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjGuestMSR, PAGE_SIZE, true /* executable R0 mapping */);
260 AssertRC(rc);
261 if (RT_FAILURE(rc))
262 return rc;
263
264 pVCpu->hwaccm.s.vmx.pGuestMSR = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjGuestMSR);
265 pVCpu->hwaccm.s.vmx.pGuestMSRPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjGuestMSR, 0);
266 memset(pVCpu->hwaccm.s.vmx.pGuestMSR, 0, PAGE_SIZE);
267
268 /* Allocate one page for the host MSR load area (for restoring host MSRs after the world switch back). */
269 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjHostMSR, PAGE_SIZE, true /* executable R0 mapping */);
270 AssertRC(rc);
271 if (RT_FAILURE(rc))
272 return rc;
273
274 pVCpu->hwaccm.s.vmx.pHostMSR = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjHostMSR);
275 pVCpu->hwaccm.s.vmx.pHostMSRPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjHostMSR, 0);
276 memset(pVCpu->hwaccm.s.vmx.pHostMSR, 0, PAGE_SIZE);
277#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
278
279 /* Current guest paging mode. */
280 pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode = PGMMODE_REAL;
281
282#ifdef LOG_ENABLED
283 SUPR0Printf("VMXR0InitVM %x VMCS=%x (%x)\n", pVM, pVCpu->hwaccm.s.vmx.pvVMCS, (uint32_t)pVCpu->hwaccm.s.vmx.HCPhysVMCS);
284#endif
285 }
286
287 return VINF_SUCCESS;
288}
289
290/**
291 * Does Ring-0 per VM VT-x termination.
292 *
293 * @returns VBox status code.
294 * @param pVM The VM to operate on.
295 */
296VMMR0DECL(int) VMXR0TermVM(PVM pVM)
297{
298 for (VMCPUID i = 0; i < pVM->cCpus; i++)
299 {
300 PVMCPU pVCpu = &pVM->aCpus[i];
301
302 if (pVCpu->hwaccm.s.vmx.hMemObjVMCS != NIL_RTR0MEMOBJ)
303 {
304 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.hMemObjVMCS, false);
305 pVCpu->hwaccm.s.vmx.hMemObjVMCS = NIL_RTR0MEMOBJ;
306 pVCpu->hwaccm.s.vmx.pvVMCS = 0;
307 pVCpu->hwaccm.s.vmx.HCPhysVMCS = 0;
308 }
309 if (pVCpu->hwaccm.s.vmx.hMemObjVAPIC != NIL_RTR0MEMOBJ)
310 {
311 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.hMemObjVAPIC, false);
312 pVCpu->hwaccm.s.vmx.hMemObjVAPIC = NIL_RTR0MEMOBJ;
313 pVCpu->hwaccm.s.vmx.pbVAPIC = 0;
314 pVCpu->hwaccm.s.vmx.HCPhysVAPIC = 0;
315 }
316 if (pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap != NIL_RTR0MEMOBJ)
317 {
318 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap, false);
319 pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap = NIL_RTR0MEMOBJ;
320 pVCpu->hwaccm.s.vmx.pMSRBitmap = 0;
321 pVCpu->hwaccm.s.vmx.pMSRBitmapPhys = 0;
322 }
323#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
324 if (pVCpu->hwaccm.s.vmx.pMemObjHostMSR != NIL_RTR0MEMOBJ)
325 {
326 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.pMemObjHostMSR, false);
327 pVCpu->hwaccm.s.vmx.pMemObjHostMSR = NIL_RTR0MEMOBJ;
328 pVCpu->hwaccm.s.vmx.pHostMSR = 0;
329 pVCpu->hwaccm.s.vmx.pHostMSRPhys = 0;
330 }
331 if (pVCpu->hwaccm.s.vmx.pMemObjGuestMSR != NIL_RTR0MEMOBJ)
332 {
333 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.pMemObjGuestMSR, false);
334 pVCpu->hwaccm.s.vmx.pMemObjGuestMSR = NIL_RTR0MEMOBJ;
335 pVCpu->hwaccm.s.vmx.pGuestMSR = 0;
336 pVCpu->hwaccm.s.vmx.pGuestMSRPhys = 0;
337 }
338#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
339 }
340 if (pVM->hwaccm.s.vmx.pMemObjAPIC != NIL_RTR0MEMOBJ)
341 {
342 RTR0MemObjFree(pVM->hwaccm.s.vmx.pMemObjAPIC, false);
343 pVM->hwaccm.s.vmx.pMemObjAPIC = NIL_RTR0MEMOBJ;
344 pVM->hwaccm.s.vmx.pAPIC = 0;
345 pVM->hwaccm.s.vmx.pAPICPhys = 0;
346 }
347#ifdef VBOX_WITH_CRASHDUMP_MAGIC
348 if (pVM->hwaccm.s.vmx.pMemObjScratch != NIL_RTR0MEMOBJ)
349 {
350 ASMMemZero32(pVM->hwaccm.s.vmx.pScratch, PAGE_SIZE);
351 RTR0MemObjFree(pVM->hwaccm.s.vmx.pMemObjScratch, false);
352 pVM->hwaccm.s.vmx.pMemObjScratch = NIL_RTR0MEMOBJ;
353 pVM->hwaccm.s.vmx.pScratch = 0;
354 pVM->hwaccm.s.vmx.pScratchPhys = 0;
355 }
356#endif
357 return VINF_SUCCESS;
358}
359
360/**
361 * Sets up VT-x for the specified VM
362 *
363 * @returns VBox status code.
364 * @param pVM The VM to operate on.
365 */
366VMMR0DECL(int) VMXR0SetupVM(PVM pVM)
367{
368 int rc = VINF_SUCCESS;
369 uint32_t val;
370
371 AssertReturn(pVM, VERR_INVALID_PARAMETER);
372
373 for (VMCPUID i = 0; i < pVM->cCpus; i++)
374 {
375 PVMCPU pVCpu = &pVM->aCpus[i];
376
377 AssertPtr(pVCpu->hwaccm.s.vmx.pvVMCS);
378
379 /* Set revision dword at the beginning of the VMCS structure. */
380 *(uint32_t *)pVCpu->hwaccm.s.vmx.pvVMCS = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hwaccm.s.vmx.msr.vmx_basic_info);
381
382 /* Clear VM Control Structure. */
383 Log(("HCPhysVMCS = %RHp\n", pVCpu->hwaccm.s.vmx.HCPhysVMCS));
384 rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
385 if (RT_FAILURE(rc))
386 goto vmx_end;
387
388 /* Activate the VM Control Structure. */
389 rc = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
390 if (RT_FAILURE(rc))
391 goto vmx_end;
392
393 /* VMX_VMCS_CTRL_PIN_EXEC_CONTROLS
394 * Set required bits to one and zero according to the MSR capabilities.
395 */
396 val = pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0;
397 /* External and non-maskable interrupts cause VM-exits. */
398 val |= VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_EXT_INT_EXIT | VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_NMI_EXIT;
399 /* enable the preemption timer. */
400 if (pVM->hwaccm.s.vmx.fUsePreemptTimer)
401 val |= VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_PREEMPT_TIMER;
402 val &= pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.allowed1;
403
404 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, val);
405 AssertRC(rc);
406
407 /* VMX_VMCS_CTRL_PROC_EXEC_CONTROLS
408 * Set required bits to one and zero according to the MSR capabilities.
409 */
410 val = pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0;
411 /* Program which event cause VM-exits and which features we want to use. */
412 val = val | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_HLT_EXIT
413 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_TSC_OFFSET
414 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT
415 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_UNCOND_IO_EXIT
416 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDPMC_EXIT
417 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_EXIT
418 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT; /* don't execute mwait or else we'll idle inside the guest (host thinks the cpu load is high) */
419
420 /* Without nested paging we should intercept invlpg and cr3 mov instructions. */
421 if (!pVM->hwaccm.s.fNestedPaging)
422 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
423 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
424 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
425
426 /* Note: VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT might cause a vmlaunch failure with an invalid control fields error. (combined with some other exit reasons) */
427 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
428 {
429 /* CR8 reads from the APIC shadow page; writes cause an exit is they lower the TPR below the threshold */
430 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW;
431 Assert(pVM->hwaccm.s.vmx.pAPIC);
432 }
433 else
434 /* Exit on CR8 reads & writes in case the TPR shadow feature isn't present. */
435 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_STORE_EXIT | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_LOAD_EXIT;
436
437 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
438 {
439 Assert(pVCpu->hwaccm.s.vmx.pMSRBitmapPhys);
440 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS;
441 }
442
443 /* We will use the secondary control if it's present. */
444 val |= VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL;
445
446 /* Mask away the bits that the CPU doesn't support */
447 /** @todo make sure they don't conflict with the above requirements. */
448 val &= pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1;
449 pVCpu->hwaccm.s.vmx.proc_ctls = val;
450
451 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, val);
452 AssertRC(rc);
453
454 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
455 {
456 /* VMX_VMCS_CTRL_PROC_EXEC_CONTROLS2
457 * Set required bits to one and zero according to the MSR capabilities.
458 */
459 val = pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.disallowed0;
460 val |= VMX_VMCS_CTRL_PROC_EXEC2_WBINVD_EXIT;
461
462#ifdef HWACCM_VTX_WITH_EPT
463 if (pVM->hwaccm.s.fNestedPaging)
464 val |= VMX_VMCS_CTRL_PROC_EXEC2_EPT;
465#endif /* HWACCM_VTX_WITH_EPT */
466#ifdef HWACCM_VTX_WITH_VPID
467 else
468 if (pVM->hwaccm.s.vmx.fVPID)
469 val |= VMX_VMCS_CTRL_PROC_EXEC2_VPID;
470#endif /* HWACCM_VTX_WITH_VPID */
471
472 if (pVM->hwaccm.s.fHasIoApic)
473 val |= VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC;
474
475 if (pVM->hwaccm.s.vmx.fUnrestrictedGuest)
476 val |= VMX_VMCS_CTRL_PROC_EXEC2_REAL_MODE;
477
478 /* Mask away the bits that the CPU doesn't support */
479 /** @todo make sure they don't conflict with the above requirements. */
480 val &= pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1;
481 pVCpu->hwaccm.s.vmx.proc_ctls2 = val;
482 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS2, val);
483 AssertRC(rc);
484 }
485
486 /* VMX_VMCS_CTRL_CR3_TARGET_COUNT
487 * Set required bits to one and zero according to the MSR capabilities.
488 */
489 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR3_TARGET_COUNT, 0);
490 AssertRC(rc);
491
492 /* Forward all exception except #NM & #PF to the guest.
493 * We always need to check pagefaults since our shadow page table can be out of sync.
494 * And we always lazily sync the FPU & XMM state.
495 */
496
497 /** @todo Possible optimization:
498 * Keep the FPU and XMM state current in the EM thread. That way there's no need to
499 * lazily sync anything, but the downside is that we can't use the FPU stack or XMM
500 * registers ourselves of course.
501 *
502 * Note: only possible if the current state is actually ours (X86_CR0_TS flag)
503 */
504
505 /* Don't filter page faults; all of them should cause a switch. */
506 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PAGEFAULT_ERROR_MASK, 0);
507 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_PAGEFAULT_ERROR_MATCH, 0);
508 AssertRC(rc);
509
510 /* Init TSC offset to zero. */
511 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_TSC_OFFSET_FULL, 0);
512 AssertRC(rc);
513
514 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_IO_BITMAP_A_FULL, 0);
515 AssertRC(rc);
516
517 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_IO_BITMAP_B_FULL, 0);
518 AssertRC(rc);
519
520 /* Set the MSR bitmap address. */
521 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
522 {
523 Assert(pVCpu->hwaccm.s.vmx.pMSRBitmapPhys);
524
525 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_MSR_BITMAP_FULL, pVCpu->hwaccm.s.vmx.pMSRBitmapPhys);
526 AssertRC(rc);
527
528 /* Allow the guest to directly modify these MSRs; they are restored and saved automatically. */
529 hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_CS, true, true);
530 hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_ESP, true, true);
531 hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_EIP, true, true);
532 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true);
533 hmR0VmxSetMSRPermission(pVCpu, MSR_K6_STAR, true, true);
534 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_SF_MASK, true, true);
535 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_KERNEL_GS_BASE, true, true);
536 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_GS_BASE, true, true);
537 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_FS_BASE, true, true);
538 }
539
540#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
541 /* Set the guest & host MSR load/store physical addresses. */
542 Assert(pVCpu->hwaccm.s.vmx.pGuestMSRPhys);
543 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_VMENTRY_MSR_LOAD_FULL, pVCpu->hwaccm.s.vmx.pGuestMSRPhys);
544 AssertRC(rc);
545 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_VMEXIT_MSR_STORE_FULL, pVCpu->hwaccm.s.vmx.pGuestMSRPhys);
546 AssertRC(rc);
547
548 Assert(pVCpu->hwaccm.s.vmx.pHostMSRPhys);
549 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_VMEXIT_MSR_LOAD_FULL, pVCpu->hwaccm.s.vmx.pHostMSRPhys);
550 AssertRC(rc);
551#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
552
553 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_MSR_LOAD_COUNT, 0);
554 AssertRC(rc);
555
556 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_STORE_COUNT, 0);
557 AssertRC(rc);
558
559 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
560 {
561 Assert(pVM->hwaccm.s.vmx.pMemObjAPIC);
562 /* Optional */
563 rc = VMXWriteVMCS(VMX_VMCS_CTRL_TPR_THRESHOLD, 0);
564 rc |= VMXWriteVMCS64(VMX_VMCS_CTRL_VAPIC_PAGEADDR_FULL, pVCpu->hwaccm.s.vmx.HCPhysVAPIC);
565
566 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)
567 rc |= VMXWriteVMCS64(VMX_VMCS_CTRL_APIC_ACCESSADDR_FULL, pVM->hwaccm.s.vmx.pAPICPhys);
568
569 AssertRC(rc);
570 }
571
572 /* Set link pointer to -1. Not currently used. */
573 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_LINK_PTR_FULL, 0xFFFFFFFFFFFFFFFFULL);
574 AssertRC(rc);
575
576 /* Clear VM Control Structure. Marking it inactive, clearing implementation specific data and writing back VMCS data to memory. */
577 rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
578 AssertRC(rc);
579
580 /* Configure the VMCS read cache. */
581 PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache;
582
583 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_RIP);
584 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_RSP);
585 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_RFLAGS);
586 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE);
587 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_CTRL_CR0_READ_SHADOW);
588 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR0);
589 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_CTRL_CR4_READ_SHADOW);
590 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR4);
591 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_DR7);
592 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_SYSENTER_CS);
593 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_SYSENTER_EIP);
594 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_SYSENTER_ESP);
595 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_GDTR_LIMIT);
596 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_GDTR_BASE);
597 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_IDTR_LIMIT);
598 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_IDTR_BASE);
599
600 VMX_SETUP_SELREG(ES, pCache);
601 VMX_SETUP_SELREG(SS, pCache);
602 VMX_SETUP_SELREG(CS, pCache);
603 VMX_SETUP_SELREG(DS, pCache);
604 VMX_SETUP_SELREG(FS, pCache);
605 VMX_SETUP_SELREG(GS, pCache);
606 VMX_SETUP_SELREG(LDTR, pCache);
607 VMX_SETUP_SELREG(TR, pCache);
608
609 /* Status code VMCS reads. */
610 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_REASON);
611 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_VM_INSTR_ERROR);
612 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INSTR_LENGTH);
613 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE);
614 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO);
615 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INSTR_INFO);
616 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_RO_EXIT_QUALIFICATION);
617 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_IDT_INFO);
618 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_IDT_ERRCODE);
619
620 if (pVM->hwaccm.s.fNestedPaging)
621 {
622 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR3);
623 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_EXIT_PHYS_ADDR_FULL);
624 pCache->Read.cValidEntries = VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX;
625 }
626 else
627 pCache->Read.cValidEntries = VMX_VMCS_MAX_CACHE_IDX;
628 } /* for each VMCPU */
629
630 /* Choose the right TLB setup function. */
631 if (pVM->hwaccm.s.fNestedPaging)
632 {
633 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = hmR0VmxSetupTLBEPT;
634
635 /* Default values for flushing. */
636 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_ALL_CONTEXTS;
637 pVM->hwaccm.s.vmx.enmFlushContext = VMX_FLUSH_ALL_CONTEXTS;
638
639 /* If the capabilities specify we can do more, then make use of it. */
640 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_INDIV)
641 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_PAGE;
642 else
643 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_CONTEXT)
644 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_SINGLE_CONTEXT;
645
646 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_CONTEXT)
647 pVM->hwaccm.s.vmx.enmFlushContext = VMX_FLUSH_SINGLE_CONTEXT;
648 }
649#ifdef HWACCM_VTX_WITH_VPID
650 else
651 if (pVM->hwaccm.s.vmx.fVPID)
652 {
653 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = hmR0VmxSetupTLBVPID;
654
655 /* Default values for flushing. */
656 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_ALL_CONTEXTS;
657 pVM->hwaccm.s.vmx.enmFlushContext = VMX_FLUSH_ALL_CONTEXTS;
658
659 /* If the capabilities specify we can do more, then make use of it. */
660 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV)
661 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_PAGE;
662 else
663 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_CONTEXT)
664 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_SINGLE_CONTEXT;
665
666 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_CONTEXT)
667 pVM->hwaccm.s.vmx.enmFlushContext = VMX_FLUSH_SINGLE_CONTEXT;
668 }
669#endif /* HWACCM_VTX_WITH_VPID */
670 else
671 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = hmR0VmxSetupTLBDummy;
672
673vmx_end:
674 hmR0VmxCheckError(pVM, &pVM->aCpus[0], rc);
675 return rc;
676}
677
678/**
679 * Sets the permission bits for the specified MSR
680 *
681 * @param pVCpu The VMCPU to operate on.
682 * @param ulMSR MSR value
683 * @param fRead Reading allowed/disallowed
684 * @param fWrite Writing allowed/disallowed
685 */
686static void hmR0VmxSetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite)
687{
688 unsigned ulBit;
689 uint8_t *pMSRBitmap = (uint8_t *)pVCpu->hwaccm.s.vmx.pMSRBitmap;
690
691 /* Layout:
692 * 0x000 - 0x3ff - Low MSR read bits
693 * 0x400 - 0x7ff - High MSR read bits
694 * 0x800 - 0xbff - Low MSR write bits
695 * 0xc00 - 0xfff - High MSR write bits
696 */
697 if (ulMSR <= 0x00001FFF)
698 {
699 /* Pentium-compatible MSRs */
700 ulBit = ulMSR;
701 }
702 else
703 if ( ulMSR >= 0xC0000000
704 && ulMSR <= 0xC0001FFF)
705 {
706 /* AMD Sixth Generation x86 Processor MSRs */
707 ulBit = (ulMSR - 0xC0000000);
708 pMSRBitmap += 0x400;
709 }
710 else
711 {
712 AssertFailed();
713 return;
714 }
715
716 Assert(ulBit <= 0x1fff);
717 if (fRead)
718 ASMBitClear(pMSRBitmap, ulBit);
719 else
720 ASMBitSet(pMSRBitmap, ulBit);
721
722 if (fWrite)
723 ASMBitClear(pMSRBitmap + 0x800, ulBit);
724 else
725 ASMBitSet(pMSRBitmap + 0x800, ulBit);
726}
727
728
729/**
730 * Injects an event (trap or external interrupt)
731 *
732 * @returns VBox status code. Note that it may return VINF_EM_RESET to
733 * indicate a triple fault when injecting X86_XCPT_DF.
734 *
735 * @param pVM The VM to operate on.
736 * @param pVCpu The VMCPU to operate on.
737 * @param pCtx CPU Context
738 * @param intInfo VMX interrupt info
739 * @param cbInstr Opcode length of faulting instruction
740 * @param errCode Error code (optional)
741 */
742static int hmR0VmxInjectEvent(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, uint32_t intInfo, uint32_t cbInstr, uint32_t errCode)
743{
744 int rc;
745 uint32_t iGate = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
746
747#ifdef VBOX_WITH_STATISTICS
748 STAM_COUNTER_INC(&pVCpu->hwaccm.s.paStatInjectedIrqsR0[iGate & MASK_INJECT_IRQ_STAT]);
749#endif
750
751#ifdef VBOX_STRICT
752 if (iGate == 0xE)
753 LogFlow(("hmR0VmxInjectEvent: Injecting interrupt %d at %RGv error code=%08x CR2=%RGv intInfo=%08x\n", iGate, (RTGCPTR)pCtx->rip, errCode, pCtx->cr2, intInfo));
754 else
755 if (iGate < 0x20)
756 LogFlow(("hmR0VmxInjectEvent: Injecting interrupt %d at %RGv error code=%08x\n", iGate, (RTGCPTR)pCtx->rip, errCode));
757 else
758 {
759 LogFlow(("INJ-EI: %x at %RGv\n", iGate, (RTGCPTR)pCtx->rip));
760 Assert(VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW || !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
761 Assert(VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW || pCtx->eflags.u32 & X86_EFL_IF);
762 }
763#endif
764
765 if ( CPUMIsGuestInRealModeEx(pCtx)
766 && pVM->hwaccm.s.vmx.pRealModeTSS)
767 {
768 RTGCPHYS GCPhysHandler;
769 uint16_t offset, ip;
770 RTSEL sel;
771
772 /* Injecting events doesn't work right with real mode emulation.
773 * (#GP if we try to inject external hardware interrupts)
774 * Inject the interrupt or trap directly instead.
775 *
776 * ASSUMES no access handlers for the bits we read or write below (should be safe).
777 */
778 Log(("Manual interrupt/trap '%x' inject (real mode)\n", iGate));
779
780 /* Check if the interrupt handler is present. */
781 if (iGate * 4 + 3 > pCtx->idtr.cbIdt)
782 {
783 Log(("IDT cbIdt violation\n"));
784 if (iGate != X86_XCPT_DF)
785 {
786 uint32_t intInfo2;
787
788 intInfo2 = (iGate == X86_XCPT_GP) ? (uint32_t)X86_XCPT_DF : iGate;
789 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
790 intInfo2 |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
791 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
792
793 return hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, 0, 0 /* no error code according to the Intel docs */);
794 }
795 Log(("Triple fault -> reset the VM!\n"));
796 return VINF_EM_RESET;
797 }
798 if ( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
799 || iGate == 3 /* Both #BP and #OF point to the instruction after. */
800 || iGate == 4)
801 {
802 ip = pCtx->ip + cbInstr;
803 }
804 else
805 ip = pCtx->ip;
806
807 /* Read the selector:offset pair of the interrupt handler. */
808 GCPhysHandler = (RTGCPHYS)pCtx->idtr.pIdt + iGate * 4;
809 rc = PGMPhysSimpleReadGCPhys(pVM, &offset, GCPhysHandler, sizeof(offset)); AssertRC(rc);
810 rc = PGMPhysSimpleReadGCPhys(pVM, &sel, GCPhysHandler + 2, sizeof(sel)); AssertRC(rc);
811
812 LogFlow(("IDT handler %04X:%04X\n", sel, offset));
813
814 /* Construct the stack frame. */
815 /** @todo should check stack limit. */
816 pCtx->sp -= 2;
817 LogFlow(("ss:sp %04X:%04X eflags=%x\n", pCtx->ss, pCtx->sp, pCtx->eflags.u));
818 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ssHid.u64Base + pCtx->sp, &pCtx->eflags, sizeof(uint16_t)); AssertRC(rc);
819 pCtx->sp -= 2;
820 LogFlow(("ss:sp %04X:%04X cs=%x\n", pCtx->ss, pCtx->sp, pCtx->cs));
821 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ssHid.u64Base + pCtx->sp, &pCtx->cs, sizeof(uint16_t)); AssertRC(rc);
822 pCtx->sp -= 2;
823 LogFlow(("ss:sp %04X:%04X ip=%x\n", pCtx->ss, pCtx->sp, ip));
824 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ssHid.u64Base + pCtx->sp, &ip, sizeof(ip)); AssertRC(rc);
825
826 /* Update the CPU state for executing the handler. */
827 pCtx->rip = offset;
828 pCtx->cs = sel;
829 pCtx->csHid.u64Base = sel << 4;
830 pCtx->eflags.u &= ~(X86_EFL_IF|X86_EFL_TF|X86_EFL_RF|X86_EFL_AC);
831
832 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_SEGMENT_REGS;
833 return VINF_SUCCESS;
834 }
835
836 /* Set event injection state. */
837 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_IRQ_INFO, intInfo | (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT));
838
839 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_INSTR_LENGTH, cbInstr);
840 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_EXCEPTION_ERRCODE, errCode);
841
842 AssertRC(rc);
843 return rc;
844}
845
846
847/**
848 * Checks for pending guest interrupts and injects them
849 *
850 * @returns VBox status code.
851 * @param pVM The VM to operate on.
852 * @param pVCpu The VMCPU to operate on.
853 * @param pCtx CPU Context
854 */
855static int hmR0VmxCheckPendingInterrupt(PVM pVM, PVMCPU pVCpu, CPUMCTX *pCtx)
856{
857 int rc;
858
859 /* Dispatch any pending interrupts. (injected before, but a VM exit occurred prematurely) */
860 if (pVCpu->hwaccm.s.Event.fPending)
861 {
862 Log(("CPU%d: Reinjecting event %RX64 %08x at %RGv cr2=%RX64\n", pVCpu->idCpu, pVCpu->hwaccm.s.Event.intInfo, pVCpu->hwaccm.s.Event.errCode, (RTGCPTR)pCtx->rip, pCtx->cr2));
863 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatIntReinject);
864 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, pVCpu->hwaccm.s.Event.intInfo, 0, pVCpu->hwaccm.s.Event.errCode);
865 AssertRC(rc);
866
867 pVCpu->hwaccm.s.Event.fPending = false;
868 return VINF_SUCCESS;
869 }
870
871 /* If an active trap is already pending, then we must forward it first! */
872 if (!TRPMHasTrap(pVCpu))
873 {
874 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI))
875 {
876 RTGCUINTPTR intInfo;
877
878 Log(("CPU%d: injecting #NMI\n", pVCpu->idCpu));
879
880 intInfo = X86_XCPT_NMI;
881 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
882 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
883
884 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo, 0, 0);
885 AssertRC(rc);
886
887 return VINF_SUCCESS;
888 }
889
890 /* @todo SMI interrupts. */
891
892 /* When external interrupts are pending, we should exit the VM when IF is set. */
893 if (VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)))
894 {
895 if (!(pCtx->eflags.u32 & X86_EFL_IF))
896 {
897 if (!(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT))
898 {
899 LogFlow(("Enable irq window exit!\n"));
900 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT;
901 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
902 AssertRC(rc);
903 }
904 /* else nothing to do but wait */
905 }
906 else
907 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
908 {
909 uint8_t u8Interrupt;
910
911 rc = PDMGetInterrupt(pVCpu, &u8Interrupt);
912 Log(("CPU%d: Dispatch interrupt: u8Interrupt=%x (%d) rc=%Rrc cs:rip=%04X:%RGv\n", pVCpu->idCpu, u8Interrupt, u8Interrupt, rc, pCtx->cs, (RTGCPTR)pCtx->rip));
913 if (RT_SUCCESS(rc))
914 {
915 rc = TRPMAssertTrap(pVCpu, u8Interrupt, TRPM_HARDWARE_INT);
916 AssertRC(rc);
917 }
918 else
919 {
920 /* Can only happen in rare cases where a pending interrupt is cleared behind our back */
921 Assert(!VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)));
922 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatSwitchGuestIrq);
923 /* Just continue */
924 }
925 }
926 else
927 Log(("Pending interrupt blocked at %RGv by VM_FF_INHIBIT_INTERRUPTS!!\n", (RTGCPTR)pCtx->rip));
928 }
929 }
930
931#ifdef VBOX_STRICT
932 if (TRPMHasTrap(pVCpu))
933 {
934 uint8_t u8Vector;
935 rc = TRPMQueryTrapAll(pVCpu, &u8Vector, 0, 0, 0);
936 AssertRC(rc);
937 }
938#endif
939
940 if ( (pCtx->eflags.u32 & X86_EFL_IF)
941 && (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
942 && TRPMHasTrap(pVCpu)
943 )
944 {
945 uint8_t u8Vector;
946 TRPMEVENT enmType;
947 RTGCUINTPTR intInfo;
948 RTGCUINT errCode;
949
950 /* If a new event is pending, then dispatch it now. */
951 rc = TRPMQueryTrapAll(pVCpu, &u8Vector, &enmType, &errCode, 0);
952 AssertRC(rc);
953 Assert(pCtx->eflags.Bits.u1IF == 1 || enmType == TRPM_TRAP);
954 Assert(enmType != TRPM_SOFTWARE_INT);
955
956 /* Clear the pending trap. */
957 rc = TRPMResetTrap(pVCpu);
958 AssertRC(rc);
959
960 intInfo = u8Vector;
961 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
962
963 if (enmType == TRPM_TRAP)
964 {
965 switch (u8Vector) {
966 case 8:
967 case 10:
968 case 11:
969 case 12:
970 case 13:
971 case 14:
972 case 17:
973 /* Valid error codes. */
974 intInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
975 break;
976 default:
977 break;
978 }
979 if (u8Vector == X86_XCPT_BP || u8Vector == X86_XCPT_OF)
980 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
981 else
982 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
983 }
984 else
985 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
986
987 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatIntInject);
988 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo, 0, errCode);
989 AssertRC(rc);
990 } /* if (interrupts can be dispatched) */
991
992 return VINF_SUCCESS;
993}
994
995/**
996 * Save the host state
997 *
998 * @returns VBox status code.
999 * @param pVM The VM to operate on.
1000 * @param pVCpu The VMCPU to operate on.
1001 */
1002VMMR0DECL(int) VMXR0SaveHostState(PVM pVM, PVMCPU pVCpu)
1003{
1004 int rc = VINF_SUCCESS;
1005
1006 /*
1007 * Host CPU Context
1008 */
1009 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_HOST_CONTEXT)
1010 {
1011 RTIDTR idtr;
1012 RTGDTR gdtr;
1013 RTSEL SelTR;
1014 PCX86DESCHC pDesc;
1015 uintptr_t trBase;
1016 RTSEL cs;
1017 RTSEL ss;
1018 uint64_t cr3;
1019
1020 /* Control registers */
1021 rc = VMXWriteVMCS(VMX_VMCS_HOST_CR0, ASMGetCR0());
1022#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1023 if (VMX_IS_64BIT_HOST_MODE())
1024 {
1025 cr3 = hwaccmR0Get64bitCR3();
1026 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_CR3, cr3);
1027 }
1028 else
1029#endif
1030 {
1031 cr3 = ASMGetCR3();
1032 rc |= VMXWriteVMCS(VMX_VMCS_HOST_CR3, cr3);
1033 }
1034 rc |= VMXWriteVMCS(VMX_VMCS_HOST_CR4, ASMGetCR4());
1035 AssertRC(rc);
1036 Log2(("VMX_VMCS_HOST_CR0 %08x\n", ASMGetCR0()));
1037 Log2(("VMX_VMCS_HOST_CR3 %08RX64\n", cr3));
1038 Log2(("VMX_VMCS_HOST_CR4 %08x\n", ASMGetCR4()));
1039
1040 /* Selector registers. */
1041#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1042 if (VMX_IS_64BIT_HOST_MODE())
1043 {
1044 cs = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelCS;
1045 ss = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelSS;
1046 }
1047 else
1048 {
1049 /* sysenter loads LDT cs & ss, VMX doesn't like this. Load the GDT ones (safe). */
1050 cs = (RTSEL)(uintptr_t)&SUPR0AbsKernelCS;
1051 ss = (RTSEL)(uintptr_t)&SUPR0AbsKernelSS;
1052 }
1053#else
1054 cs = ASMGetCS();
1055 ss = ASMGetSS();
1056#endif
1057 Assert(!(cs & X86_SEL_LDT)); Assert((cs & X86_SEL_RPL) == 0);
1058 Assert(!(ss & X86_SEL_LDT)); Assert((ss & X86_SEL_RPL) == 0);
1059 rc = VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_CS, cs);
1060 /* Note: VMX is (again) very picky about the RPL of the selectors here; we'll restore them manually. */
1061 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_DS, 0);
1062 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_ES, 0);
1063#if HC_ARCH_BITS == 32
1064 if (!VMX_IS_64BIT_HOST_MODE())
1065 {
1066 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_FS, 0);
1067 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_GS, 0);
1068 }
1069#endif
1070 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_SS, ss);
1071 SelTR = ASMGetTR();
1072 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_TR, SelTR);
1073 AssertRC(rc);
1074 Log2(("VMX_VMCS_HOST_FIELD_CS %08x (%08x)\n", cs, ASMGetSS()));
1075 Log2(("VMX_VMCS_HOST_FIELD_DS 00000000 (%08x)\n", ASMGetDS()));
1076 Log2(("VMX_VMCS_HOST_FIELD_ES 00000000 (%08x)\n", ASMGetES()));
1077 Log2(("VMX_VMCS_HOST_FIELD_FS 00000000 (%08x)\n", ASMGetFS()));
1078 Log2(("VMX_VMCS_HOST_FIELD_GS 00000000 (%08x)\n", ASMGetGS()));
1079 Log2(("VMX_VMCS_HOST_FIELD_SS %08x (%08x)\n", ss, ASMGetSS()));
1080 Log2(("VMX_VMCS_HOST_FIELD_TR %08x\n", ASMGetTR()));
1081
1082 /* GDTR & IDTR */
1083#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1084 if (VMX_IS_64BIT_HOST_MODE())
1085 {
1086 X86XDTR64 gdtr64, idtr64;
1087 hwaccmR0Get64bitGDTRandIDTR(&gdtr64, &idtr64);
1088 rc = VMXWriteVMCS64(VMX_VMCS_HOST_GDTR_BASE, gdtr64.uAddr);
1089 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_IDTR_BASE, gdtr64.uAddr);
1090 AssertRC(rc);
1091 Log2(("VMX_VMCS_HOST_GDTR_BASE %RX64\n", gdtr64.uAddr));
1092 Log2(("VMX_VMCS_HOST_IDTR_BASE %RX64\n", idtr64.uAddr));
1093 gdtr.cbGdt = gdtr64.cb;
1094 gdtr.pGdt = (uintptr_t)gdtr64.uAddr;
1095 }
1096 else
1097#endif
1098 {
1099 ASMGetGDTR(&gdtr);
1100 rc = VMXWriteVMCS(VMX_VMCS_HOST_GDTR_BASE, gdtr.pGdt);
1101 ASMGetIDTR(&idtr);
1102 rc |= VMXWriteVMCS(VMX_VMCS_HOST_IDTR_BASE, idtr.pIdt);
1103 AssertRC(rc);
1104 Log2(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", gdtr.pGdt));
1105 Log2(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", idtr.pIdt));
1106 }
1107
1108 /* Save the base address of the TR selector. */
1109 if (SelTR > gdtr.cbGdt)
1110 {
1111 AssertMsgFailed(("Invalid TR selector %x. GDTR.cbGdt=%x\n", SelTR, gdtr.cbGdt));
1112 return VERR_VMX_INVALID_HOST_STATE;
1113 }
1114
1115 pDesc = (PCX86DESCHC)(gdtr.pGdt + (SelTR & X86_SEL_MASK));
1116#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1117 if (VMX_IS_64BIT_HOST_MODE())
1118 {
1119 uint64_t trBase64 = X86DESC64_BASE(*(PX86DESC64)pDesc);
1120 rc = VMXWriteVMCS64(VMX_VMCS_HOST_TR_BASE, trBase64);
1121 Log2(("VMX_VMCS_HOST_TR_BASE %RX64\n", trBase64));
1122 AssertRC(rc);
1123 }
1124 else
1125#endif
1126 {
1127#if HC_ARCH_BITS == 64
1128 trBase = X86DESC64_BASE(*pDesc);
1129#else
1130 trBase = X86DESC_BASE(*pDesc);
1131#endif
1132 rc = VMXWriteVMCS(VMX_VMCS_HOST_TR_BASE, trBase);
1133 AssertRC(rc);
1134 Log2(("VMX_VMCS_HOST_TR_BASE %RHv\n", trBase));
1135 }
1136
1137 /* FS and GS base. */
1138#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1139 if (VMX_IS_64BIT_HOST_MODE())
1140 {
1141 Log2(("MSR_K8_FS_BASE = %RX64\n", ASMRdMsr(MSR_K8_FS_BASE)));
1142 Log2(("MSR_K8_GS_BASE = %RX64\n", ASMRdMsr(MSR_K8_GS_BASE)));
1143 rc = VMXWriteVMCS64(VMX_VMCS_HOST_FS_BASE, ASMRdMsr(MSR_K8_FS_BASE));
1144 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_GS_BASE, ASMRdMsr(MSR_K8_GS_BASE));
1145 }
1146#endif
1147 AssertRC(rc);
1148
1149 /* Sysenter MSRs. */
1150 /** @todo expensive!! */
1151 rc = VMXWriteVMCS(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS));
1152 Log2(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)));
1153#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1154 if (VMX_IS_64BIT_HOST_MODE())
1155 {
1156 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP)));
1157 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP)));
1158 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
1159 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
1160 }
1161 else
1162 {
1163 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
1164 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
1165 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)));
1166 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)));
1167 }
1168#elif HC_ARCH_BITS == 32
1169 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
1170 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
1171 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)));
1172 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)));
1173#else
1174 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP)));
1175 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP)));
1176 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
1177 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
1178#endif
1179 AssertRC(rc);
1180
1181#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
1182 /* Store all host MSRs in the VM-Exit load area, so they will be reloaded after the world switch back to the host. */
1183 PVMXMSR pMsr = (PVMXMSR)pVCpu->hwaccm.s.vmx.pHostMSR;
1184 unsigned idxMsr = 0;
1185
1186 /* EFER MSR present? */
1187 if (ASMCpuId_EDX(0x80000001) & (X86_CPUID_AMD_FEATURE_EDX_NX|X86_CPUID_AMD_FEATURE_EDX_LONG_MODE))
1188 {
1189 if (ASMCpuId_EDX(0x80000001) & X86_CPUID_AMD_FEATURE_EDX_SEP)
1190 {
1191 pMsr->u32IndexMSR = MSR_K6_STAR;
1192 pMsr->u32Reserved = 0;
1193 pMsr->u64Value = ASMRdMsr(MSR_K6_STAR); /* legacy syscall eip, cs & ss */
1194 pMsr++; idxMsr++;
1195 }
1196
1197 pMsr->u32IndexMSR = MSR_K6_EFER;
1198 pMsr->u32Reserved = 0;
1199# if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1200 if (CPUMIsGuestInLongMode(pVCpu))
1201 {
1202 /* Must match the efer value in our 64 bits switcher. */
1203 pMsr->u64Value = ASMRdMsr(MSR_K6_EFER) | MSR_K6_EFER_LME | MSR_K6_EFER_SCE | MSR_K6_EFER_NXE;
1204 }
1205 else
1206# endif
1207 pMsr->u64Value = ASMRdMsr(MSR_K6_EFER);
1208 pMsr++; idxMsr++;
1209 }
1210
1211# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1212 if (VMX_IS_64BIT_HOST_MODE())
1213 {
1214 pMsr->u32IndexMSR = MSR_K8_LSTAR;
1215 pMsr->u32Reserved = 0;
1216 pMsr->u64Value = ASMRdMsr(MSR_K8_LSTAR); /* 64 bits mode syscall rip */
1217 pMsr++; idxMsr++;
1218 pMsr->u32IndexMSR = MSR_K8_SF_MASK;
1219 pMsr->u32Reserved = 0;
1220 pMsr->u64Value = ASMRdMsr(MSR_K8_SF_MASK); /* syscall flag mask */
1221 pMsr++; idxMsr++;
1222 pMsr->u32IndexMSR = MSR_K8_KERNEL_GS_BASE;
1223 pMsr->u32Reserved = 0;
1224 pMsr->u64Value = ASMRdMsr(MSR_K8_KERNEL_GS_BASE); /* swapgs exchange value */
1225 pMsr++; idxMsr++;
1226 }
1227# endif
1228 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_LOAD_COUNT, idxMsr);
1229 AssertRC(rc);
1230#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
1231
1232 pVCpu->hwaccm.s.fContextUseFlags &= ~HWACCM_CHANGED_HOST_CONTEXT;
1233 }
1234 return rc;
1235}
1236
1237/**
1238 * Loads the 4 PDPEs into the guest state when nested paging is used and the
1239 * guest operates in PAE mode.
1240 *
1241 * @returns VINF_SUCCESS or fatal error.
1242 * @param pVCpu The VMCPU to operate on.
1243 * @param pCtx Guest context
1244 */
1245static int hmR0VmxLoadPaePdpes(PVMCPU pVCpu, PCPUMCTX pCtx)
1246{
1247 if (CPUMIsGuestInPAEModeEx(pCtx))
1248 {
1249 X86PDPE aPdpes[4];
1250 int rc = PGMGstGetPaePdpes(pVCpu, &aPdpes[0]);
1251 AssertRCReturn(rc, rc);
1252
1253 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR0_FULL, aPdpes[0].u); AssertRCReturn(rc, rc);
1254 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR1_FULL, aPdpes[1].u); AssertRCReturn(rc, rc);
1255 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR2_FULL, aPdpes[2].u); AssertRCReturn(rc, rc);
1256 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR3_FULL, aPdpes[3].u); AssertRCReturn(rc, rc);
1257 }
1258 return VINF_SUCCESS;
1259}
1260
1261/**
1262 * Saves the 4 PDPEs into the guest state when nested paging is used and the
1263 * guest operates in PAE mode.
1264 *
1265 * @returns VINF_SUCCESS or fatal error.
1266 * @param pVCpu The VMCPU to operate on.
1267 * @param pCtx Guest context
1268 *
1269 * @remarks Tell PGM about CR3 changes before calling this helper.
1270 */
1271static int hmR0VmxSavePaePdpes(PVMCPU pVCpu, PCPUMCTX pCtx)
1272{
1273 if (CPUMIsGuestInPAEModeEx(pCtx))
1274 {
1275 int rc;
1276 X86PDPE aPdpes[4];
1277 rc = VMXReadVMCS64(VMX_VMCS_GUEST_PDPTR0_FULL, &aPdpes[0].u); AssertRCReturn(rc, rc);
1278 rc = VMXReadVMCS64(VMX_VMCS_GUEST_PDPTR1_FULL, &aPdpes[1].u); AssertRCReturn(rc, rc);
1279 rc = VMXReadVMCS64(VMX_VMCS_GUEST_PDPTR2_FULL, &aPdpes[2].u); AssertRCReturn(rc, rc);
1280 rc = VMXReadVMCS64(VMX_VMCS_GUEST_PDPTR3_FULL, &aPdpes[3].u); AssertRCReturn(rc, rc);
1281
1282 rc = PGMGstUpdatePaePdpes(pVCpu, &aPdpes[0]);
1283 AssertRCReturn(rc, rc);
1284 }
1285 return VINF_SUCCESS;
1286}
1287
1288
1289/**
1290 * Update the exception bitmap according to the current CPU state
1291 *
1292 * @param pVM The VM to operate on.
1293 * @param pVCpu The VMCPU to operate on.
1294 * @param pCtx Guest context
1295 */
1296static void hmR0VmxUpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1297{
1298 uint32_t u32TrapMask;
1299 Assert(pCtx);
1300
1301 u32TrapMask = HWACCM_VMX_TRAP_MASK;
1302#ifndef DEBUG
1303 if (pVM->hwaccm.s.fNestedPaging)
1304 u32TrapMask &= ~RT_BIT(X86_XCPT_PF); /* no longer need to intercept #PF. */
1305#endif
1306
1307 /* Also catch floating point exceptions as we need to report them to the guest in a different way. */
1308 if ( CPUMIsGuestFPUStateActive(pVCpu) == true
1309 && !(pCtx->cr0 & X86_CR0_NE)
1310 && !pVCpu->hwaccm.s.fFPUOldStyleOverride)
1311 {
1312 u32TrapMask |= RT_BIT(X86_XCPT_MF);
1313 pVCpu->hwaccm.s.fFPUOldStyleOverride = true;
1314 }
1315
1316#ifdef VBOX_STRICT
1317 Assert(u32TrapMask & RT_BIT(X86_XCPT_GP));
1318#endif
1319
1320 /* Intercept all exceptions in real mode as none of them can be injected directly (#GP otherwise). */
1321 if ( CPUMIsGuestInRealModeEx(pCtx)
1322 && pVM->hwaccm.s.vmx.pRealModeTSS)
1323 u32TrapMask |= HWACCM_VMX_TRAP_MASK_REALMODE;
1324
1325 int rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXCEPTION_BITMAP, u32TrapMask);
1326 AssertRC(rc);
1327}
1328
1329/**
1330 * Loads a minimal guest state
1331 *
1332 * NOTE: Don't do anything here that can cause a jump back to ring 3!!!!!
1333 *
1334 * @param pVM The VM to operate on.
1335 * @param pVCpu The VMCPU to operate on.
1336 * @param pCtx Guest context
1337 */
1338VMMR0DECL(void) VMXR0LoadMinimalGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1339{
1340 int rc;
1341 X86EFLAGS eflags;
1342
1343 Assert(!(pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_ALL_GUEST));
1344
1345 /* EIP, ESP and EFLAGS */
1346 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_RIP, pCtx->rip);
1347 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_RSP, pCtx->rsp);
1348 AssertRC(rc);
1349
1350 /* Bits 22-31, 15, 5 & 3 must be zero. Bit 1 must be 1. */
1351 eflags = pCtx->eflags;
1352 eflags.u32 &= VMX_EFLAGS_RESERVED_0;
1353 eflags.u32 |= VMX_EFLAGS_RESERVED_1;
1354
1355 /* Real mode emulation using v86 mode. */
1356 if ( CPUMIsGuestInRealModeEx(pCtx)
1357 && pVM->hwaccm.s.vmx.pRealModeTSS)
1358 {
1359 pVCpu->hwaccm.s.vmx.RealMode.eflags = eflags;
1360
1361 eflags.Bits.u1VM = 1;
1362 eflags.Bits.u2IOPL = 0; /* must always be 0 or else certain instructions won't cause faults. */
1363 }
1364 rc = VMXWriteVMCS(VMX_VMCS_GUEST_RFLAGS, eflags.u32);
1365 AssertRC(rc);
1366}
1367
1368/**
1369 * Loads the guest state
1370 *
1371 * NOTE: Don't do anything here that can cause a jump back to ring 3!!!!!
1372 *
1373 * @returns VBox status code.
1374 * @param pVM The VM to operate on.
1375 * @param pVCpu The VMCPU to operate on.
1376 * @param pCtx Guest context
1377 */
1378VMMR0DECL(int) VMXR0LoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1379{
1380 int rc = VINF_SUCCESS;
1381 RTGCUINTPTR val;
1382
1383 /* VMX_VMCS_CTRL_ENTRY_CONTROLS
1384 * Set required bits to one and zero according to the MSR capabilities.
1385 */
1386 val = pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0;
1387 /* Load guest debug controls (dr7 & IA32_DEBUGCTL_MSR) (forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs) */
1388 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_LOAD_DEBUG;
1389 /* 64 bits guest mode? */
1390 if (CPUMIsGuestInLongModeEx(pCtx))
1391 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_IA64_MODE;
1392 /* else Must be zero when AMD64 is not available. */
1393
1394 /* Mask away the bits that the CPU doesn't support */
1395 val &= pVM->hwaccm.s.vmx.msr.vmx_entry.n.allowed1;
1396 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, val);
1397 AssertRC(rc);
1398
1399 /* VMX_VMCS_CTRL_EXIT_CONTROLS
1400 * Set required bits to one and zero according to the MSR capabilities.
1401 */
1402 val = pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0;
1403
1404 /* Save debug controls (dr7 & IA32_DEBUGCTL_MSR) (forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs) */
1405 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_SAVE_DEBUG;
1406
1407#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1408 if (VMX_IS_64BIT_HOST_MODE())
1409 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64;
1410 /* else: Must be zero when AMD64 is not available. */
1411#elif HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS)
1412 if (CPUMIsGuestInLongModeEx(pCtx))
1413 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64; /* our switcher goes to long mode */
1414 else
1415 Assert(!(val & VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64));
1416#endif
1417 val &= pVM->hwaccm.s.vmx.msr.vmx_exit.n.allowed1;
1418 /* Don't acknowledge external interrupts on VM-exit. */
1419 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, val);
1420 AssertRC(rc);
1421
1422 /* Guest CPU context: ES, CS, SS, DS, FS, GS. */
1423 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_SEGMENT_REGS)
1424 {
1425 if (pVM->hwaccm.s.vmx.pRealModeTSS)
1426 {
1427 PGMMODE enmGuestMode = PGMGetGuestMode(pVCpu);
1428 if (pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode != enmGuestMode)
1429 {
1430 /* Correct weird requirements for switching to protected mode. */
1431 if ( pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode == PGMMODE_REAL
1432 && enmGuestMode >= PGMMODE_PROTECTED)
1433 {
1434 /* Flush the recompiler code cache as it's not unlikely
1435 * the guest will rewrite code it will later execute in real
1436 * mode (OpenBSD 4.0 is one such example)
1437 */
1438 REMFlushTBs(pVM);
1439
1440 /* DPL of all hidden selector registers must match the current CPL (0). */
1441 pCtx->csHid.Attr.n.u2Dpl = 0;
1442 pCtx->csHid.Attr.n.u4Type = X86_SEL_TYPE_CODE | X86_SEL_TYPE_RW_ACC;
1443
1444 pCtx->dsHid.Attr.n.u2Dpl = 0;
1445 pCtx->esHid.Attr.n.u2Dpl = 0;
1446 pCtx->fsHid.Attr.n.u2Dpl = 0;
1447 pCtx->gsHid.Attr.n.u2Dpl = 0;
1448 pCtx->ssHid.Attr.n.u2Dpl = 0;
1449
1450 /* The limit must correspond to the 32 bits setting. */
1451 if (!pCtx->csHid.Attr.n.u1DefBig)
1452 pCtx->csHid.u32Limit &= 0xffff;
1453 if (!pCtx->dsHid.Attr.n.u1DefBig)
1454 pCtx->dsHid.u32Limit &= 0xffff;
1455 if (!pCtx->esHid.Attr.n.u1DefBig)
1456 pCtx->esHid.u32Limit &= 0xffff;
1457 if (!pCtx->fsHid.Attr.n.u1DefBig)
1458 pCtx->fsHid.u32Limit &= 0xffff;
1459 if (!pCtx->gsHid.Attr.n.u1DefBig)
1460 pCtx->gsHid.u32Limit &= 0xffff;
1461 if (!pCtx->ssHid.Attr.n.u1DefBig)
1462 pCtx->ssHid.u32Limit &= 0xffff;
1463 }
1464 else
1465 /* Switching from protected mode to real mode. */
1466 if ( pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode >= PGMMODE_PROTECTED
1467 && enmGuestMode == PGMMODE_REAL)
1468 {
1469 /* The limit must also be set to 0xffff. */
1470 pCtx->csHid.u32Limit = 0xffff;
1471 pCtx->dsHid.u32Limit = 0xffff;
1472 pCtx->esHid.u32Limit = 0xffff;
1473 pCtx->fsHid.u32Limit = 0xffff;
1474 pCtx->gsHid.u32Limit = 0xffff;
1475 pCtx->ssHid.u32Limit = 0xffff;
1476
1477 Assert(pCtx->csHid.u64Base <= 0xfffff);
1478 Assert(pCtx->dsHid.u64Base <= 0xfffff);
1479 Assert(pCtx->esHid.u64Base <= 0xfffff);
1480 Assert(pCtx->fsHid.u64Base <= 0xfffff);
1481 Assert(pCtx->gsHid.u64Base <= 0xfffff);
1482 }
1483 pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode = enmGuestMode;
1484 }
1485 else
1486 /* VT-x will fail with a guest invalid state otherwise... (CPU state after a reset) */
1487 if ( CPUMIsGuestInRealModeEx(pCtx)
1488 && pCtx->csHid.u64Base == 0xffff0000)
1489 {
1490 pCtx->csHid.u64Base = 0xf0000;
1491 pCtx->cs = 0xf000;
1492 }
1493 }
1494
1495 VMX_WRITE_SELREG(ES, es);
1496 AssertRC(rc);
1497
1498 VMX_WRITE_SELREG(CS, cs);
1499 AssertRC(rc);
1500
1501 VMX_WRITE_SELREG(SS, ss);
1502 AssertRC(rc);
1503
1504 VMX_WRITE_SELREG(DS, ds);
1505 AssertRC(rc);
1506
1507 VMX_WRITE_SELREG(FS, fs);
1508 AssertRC(rc);
1509
1510 VMX_WRITE_SELREG(GS, gs);
1511 AssertRC(rc);
1512 }
1513
1514 /* Guest CPU context: LDTR. */
1515 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_LDTR)
1516 {
1517 if (pCtx->ldtr == 0)
1518 {
1519 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_LDTR, 0);
1520 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_LIMIT, 0);
1521 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_LDTR_BASE, 0);
1522 /* Note: vmlaunch will fail with 0 or just 0x02. No idea why. */
1523 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, 0x82 /* present, LDT */);
1524 }
1525 else
1526 {
1527 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_LDTR, pCtx->ldtr);
1528 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_LIMIT, pCtx->ldtrHid.u32Limit);
1529 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_LDTR_BASE, pCtx->ldtrHid.u64Base);
1530 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, pCtx->ldtrHid.Attr.u);
1531 }
1532 AssertRC(rc);
1533 }
1534 /* Guest CPU context: TR. */
1535 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_TR)
1536 {
1537 /* Real mode emulation using v86 mode with CR4.VME (interrupt redirection using the int bitmap in the TSS) */
1538 if ( CPUMIsGuestInRealModeEx(pCtx)
1539 && pVM->hwaccm.s.vmx.pRealModeTSS)
1540 {
1541 RTGCPHYS GCPhys;
1542
1543 /* We convert it here every time as pci regions could be reconfigured. */
1544 rc = PDMVMMDevHeapR3ToGCPhys(pVM, pVM->hwaccm.s.vmx.pRealModeTSS, &GCPhys);
1545 AssertRC(rc);
1546
1547 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_TR, 0);
1548 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_LIMIT, HWACCM_VTX_TSS_SIZE);
1549 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_TR_BASE, GCPhys /* phys = virt in this mode */);
1550
1551 X86DESCATTR attr;
1552
1553 attr.u = 0;
1554 attr.n.u1Present = 1;
1555 attr.n.u4Type = X86_SEL_TYPE_SYS_386_TSS_BUSY;
1556 val = attr.u;
1557 }
1558 else
1559 {
1560 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_TR, pCtx->tr);
1561 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_LIMIT, pCtx->trHid.u32Limit);
1562 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_TR_BASE, pCtx->trHid.u64Base);
1563
1564 val = pCtx->trHid.Attr.u;
1565
1566 /* The TSS selector must be busy (REM bugs? see defect #XXXX). */
1567 if (!(val & X86_SEL_TYPE_SYS_TSS_BUSY_MASK))
1568 {
1569 if (val & 0xf)
1570 val |= X86_SEL_TYPE_SYS_TSS_BUSY_MASK;
1571 else
1572 /* Default if no TR selector has been set (otherwise vmlaunch will fail!) */
1573 val = (val & ~0xF) | X86_SEL_TYPE_SYS_386_TSS_BUSY;
1574 }
1575 AssertMsg((val & 0xf) == X86_SEL_TYPE_SYS_386_TSS_BUSY || (val & 0xf) == X86_SEL_TYPE_SYS_286_TSS_BUSY, ("%#x\n", val));
1576 }
1577 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS, val);
1578 AssertRC(rc);
1579 }
1580 /* Guest CPU context: GDTR. */
1581 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_GDTR)
1582 {
1583 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_GDTR_LIMIT, pCtx->gdtr.cbGdt);
1584 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_GDTR_BASE, pCtx->gdtr.pGdt);
1585 AssertRC(rc);
1586 }
1587 /* Guest CPU context: IDTR. */
1588 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_IDTR)
1589 {
1590 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_IDTR_LIMIT, pCtx->idtr.cbIdt);
1591 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_IDTR_BASE, pCtx->idtr.pIdt);
1592 AssertRC(rc);
1593 }
1594
1595 /*
1596 * Sysenter MSRs
1597 */
1598 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_MSR)
1599 {
1600 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
1601 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
1602 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
1603 AssertRC(rc);
1604 }
1605
1606 /* Control registers */
1607 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR0)
1608 {
1609 val = pCtx->cr0;
1610 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR0_READ_SHADOW, val);
1611 Log2(("Guest CR0-shadow %08x\n", val));
1612 if (CPUMIsGuestFPUStateActive(pVCpu) == false)
1613 {
1614 /* Always use #NM exceptions to load the FPU/XMM state on demand. */
1615 val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_NE | X86_CR0_MP;
1616 }
1617 else
1618 {
1619 /** @todo check if we support the old style mess correctly. */
1620 if (!(val & X86_CR0_NE))
1621 Log(("Forcing X86_CR0_NE!!!\n"));
1622
1623 val |= X86_CR0_NE; /* always turn on the native mechanism to report FPU errors (old style uses interrupts) */
1624 }
1625 /* Note: protected mode & paging are always enabled; we use them for emulating real and protected mode without paging too. */
1626 if (!pVM->hwaccm.s.vmx.fUnrestrictedGuest)
1627 val |= X86_CR0_PE | X86_CR0_PG;
1628
1629 if (pVM->hwaccm.s.fNestedPaging)
1630 {
1631 if (CPUMIsGuestInPagedProtectedModeEx(pCtx))
1632 {
1633 /* Disable cr3 read/write monitoring as we don't need it for EPT. */
1634 pVCpu->hwaccm.s.vmx.proc_ctls &= ~( VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1635 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT);
1636 }
1637 else
1638 {
1639 /* Reenable cr3 read/write monitoring as our identity mapped page table is active. */
1640 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1641 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
1642 }
1643 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1644 AssertRC(rc);
1645 }
1646 else
1647 {
1648 /* Note: We must also set this as we rely on protecting various pages for which supervisor writes must be caught. */
1649 val |= X86_CR0_WP;
1650 }
1651
1652 /* Always enable caching. */
1653 val &= ~(X86_CR0_CD|X86_CR0_NW);
1654
1655 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_CR0, val);
1656 Log2(("Guest CR0 %08x\n", val));
1657 /* CR0 flags owned by the host; if the guests attempts to change them, then
1658 * the VM will exit.
1659 */
1660 val = X86_CR0_PE /* Must monitor this bit (assumptions are made for real mode emulation) */
1661 | X86_CR0_WP /* Must monitor this bit (it must always be enabled). */
1662 | X86_CR0_PG /* Must monitor this bit (assumptions are made for real mode & protected mode without paging emulation) */
1663 | X86_CR0_CD /* Bit not restored during VM-exit! */
1664 | X86_CR0_NW /* Bit not restored during VM-exit! */
1665 | X86_CR0_NE;
1666
1667 /* When the guest's FPU state is active, then we no longer care about
1668 * the FPU related bits.
1669 */
1670 if (CPUMIsGuestFPUStateActive(pVCpu) == false)
1671 val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_MP;
1672
1673 pVCpu->hwaccm.s.vmx.cr0_mask = val;
1674
1675 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_CR0_MASK, val);
1676 Log2(("Guest CR0-mask %08x\n", val));
1677 AssertRC(rc);
1678 }
1679 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR4)
1680 {
1681 /* CR4 */
1682 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR4_READ_SHADOW, pCtx->cr4);
1683 Log2(("Guest CR4-shadow %08x\n", pCtx->cr4));
1684 /* Set the required bits in cr4 too (currently X86_CR4_VMXE). */
1685 val = pCtx->cr4 | (uint32_t)pVM->hwaccm.s.vmx.msr.vmx_cr4_fixed0;
1686
1687 if (!pVM->hwaccm.s.fNestedPaging)
1688 {
1689 switch(pVCpu->hwaccm.s.enmShadowMode)
1690 {
1691 case PGMMODE_REAL: /* Real mode -> emulated using v86 mode */
1692 case PGMMODE_PROTECTED: /* Protected mode, no paging -> emulated using identity mapping. */
1693 case PGMMODE_32_BIT: /* 32-bit paging. */
1694 val &= ~X86_CR4_PAE;
1695 break;
1696
1697 case PGMMODE_PAE: /* PAE paging. */
1698 case PGMMODE_PAE_NX: /* PAE paging with NX enabled. */
1699 /** Must use PAE paging as we could use physical memory > 4 GB */
1700 val |= X86_CR4_PAE;
1701 break;
1702
1703 case PGMMODE_AMD64: /* 64-bit AMD paging (long mode). */
1704 case PGMMODE_AMD64_NX: /* 64-bit AMD paging (long mode) with NX enabled. */
1705#ifdef VBOX_ENABLE_64_BITS_GUESTS
1706 break;
1707#else
1708 AssertFailed();
1709 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1710#endif
1711 default: /* shut up gcc */
1712 AssertFailed();
1713 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1714 }
1715 }
1716 else
1717 if ( !CPUMIsGuestInPagedProtectedModeEx(pCtx)
1718 && !pVM->hwaccm.s.vmx.fUnrestrictedGuest)
1719 {
1720 /* We use 4 MB pages in our identity mapping page table for real and protected mode without paging. */
1721 val |= X86_CR4_PSE;
1722 /* Our identity mapping is a 32 bits page directory. */
1723 val &= ~X86_CR4_PAE;
1724 }
1725
1726 /* Turn off VME if we're in emulated real mode. */
1727 if ( CPUMIsGuestInRealModeEx(pCtx)
1728 && pVM->hwaccm.s.vmx.pRealModeTSS)
1729 val &= ~X86_CR4_VME;
1730
1731 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_CR4, val);
1732 Log2(("Guest CR4 %08x\n", val));
1733 /* CR4 flags owned by the host; if the guests attempts to change them, then
1734 * the VM will exit.
1735 */
1736 val = 0
1737 | X86_CR4_VME
1738 | X86_CR4_PAE
1739 | X86_CR4_PGE
1740 | X86_CR4_PSE
1741 | X86_CR4_VMXE;
1742 pVCpu->hwaccm.s.vmx.cr4_mask = val;
1743
1744 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_CR4_MASK, val);
1745 Log2(("Guest CR4-mask %08x\n", val));
1746 AssertRC(rc);
1747 }
1748
1749 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR3)
1750 {
1751 if (pVM->hwaccm.s.fNestedPaging)
1752 {
1753 Assert(PGMGetHyperCR3(pVCpu));
1754 pVCpu->hwaccm.s.vmx.GCPhysEPTP = PGMGetHyperCR3(pVCpu);
1755
1756 Assert(!(pVCpu->hwaccm.s.vmx.GCPhysEPTP & 0xfff));
1757 /** @todo Check the IA32_VMX_EPT_VPID_CAP MSR for other supported memory types. */
1758 pVCpu->hwaccm.s.vmx.GCPhysEPTP |= VMX_EPT_MEMTYPE_WB
1759 | (VMX_EPT_PAGE_WALK_LENGTH_DEFAULT << VMX_EPT_PAGE_WALK_LENGTH_SHIFT);
1760
1761 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_EPTP_FULL, pVCpu->hwaccm.s.vmx.GCPhysEPTP);
1762 AssertRC(rc);
1763
1764 if ( !CPUMIsGuestInPagedProtectedModeEx(pCtx)
1765 && !pVM->hwaccm.s.vmx.fUnrestrictedGuest)
1766 {
1767 RTGCPHYS GCPhys;
1768
1769 /* We convert it here every time as pci regions could be reconfigured. */
1770 rc = PDMVMMDevHeapR3ToGCPhys(pVM, pVM->hwaccm.s.vmx.pNonPagingModeEPTPageTable, &GCPhys);
1771 AssertMsgRC(rc, ("pNonPagingModeEPTPageTable = %RGv\n", pVM->hwaccm.s.vmx.pNonPagingModeEPTPageTable));
1772
1773 /* We use our identity mapping page table here as we need to map guest virtual to guest physical addresses; EPT will
1774 * take care of the translation to host physical addresses.
1775 */
1776 val = GCPhys;
1777 }
1778 else
1779 {
1780 /* Save the real guest CR3 in VMX_VMCS_GUEST_CR3 */
1781 val = pCtx->cr3;
1782 rc = hmR0VmxLoadPaePdpes(pVCpu, pCtx);
1783 AssertRCReturn(rc, rc);
1784 }
1785 }
1786 else
1787 {
1788 val = PGMGetHyperCR3(pVCpu);
1789 Assert(val || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL));
1790 }
1791
1792 /* Save our shadow CR3 register. */
1793 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_CR3, val);
1794 AssertRC(rc);
1795 }
1796
1797 /* Debug registers. */
1798 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_DEBUG)
1799 {
1800 pCtx->dr[6] |= X86_DR6_INIT_VAL; /* set all reserved bits to 1. */
1801 pCtx->dr[6] &= ~RT_BIT(12); /* must be zero. */
1802
1803 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
1804 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
1805 pCtx->dr[7] |= 0x400; /* must be one */
1806
1807 /* Resync DR7 */
1808 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]);
1809 AssertRC(rc);
1810
1811#ifdef DEBUG
1812 /* Sync the hypervisor debug state now if any breakpoint is armed. */
1813 if ( CPUMGetHyperDR7(pVCpu) & (X86_DR7_ENABLED_MASK|X86_DR7_GD)
1814 && !CPUMIsHyperDebugStateActive(pVCpu)
1815 && !DBGFIsStepping(pVCpu))
1816 {
1817 /* Save the host and load the hypervisor debug state. */
1818 rc = CPUMR0LoadHyperDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
1819 AssertRC(rc);
1820
1821 /* DRx intercepts remain enabled. */
1822
1823 /* Override dr7 with the hypervisor value. */
1824 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, CPUMGetHyperDR7(pVCpu));
1825 AssertRC(rc);
1826 }
1827 else
1828#endif
1829 /* Sync the debug state now if any breakpoint is armed. */
1830 if ( (pCtx->dr[7] & (X86_DR7_ENABLED_MASK|X86_DR7_GD))
1831 && !CPUMIsGuestDebugStateActive(pVCpu)
1832 && !DBGFIsStepping(pVCpu))
1833 {
1834 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxArmed);
1835
1836 /* Disable drx move intercepts. */
1837 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
1838 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1839 AssertRC(rc);
1840
1841 /* Save the host and load the guest debug state. */
1842 rc = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
1843 AssertRC(rc);
1844 }
1845
1846 /* IA32_DEBUGCTL MSR. */
1847 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_DEBUGCTL_FULL, 0);
1848 AssertRC(rc);
1849
1850 /** @todo do we really ever need this? */
1851 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_DEBUG_EXCEPTIONS, 0);
1852 AssertRC(rc);
1853 }
1854
1855 /* 64 bits guest mode? */
1856 if (CPUMIsGuestInLongModeEx(pCtx))
1857 {
1858#if !defined(VBOX_ENABLE_64_BITS_GUESTS)
1859 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1860#elif HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1861 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0SwitcherStartVM64;
1862#else
1863# ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1864 if (!pVM->hwaccm.s.fAllow64BitGuests)
1865 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1866# endif
1867 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0StartVM64;
1868#endif
1869 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_MSR)
1870 {
1871 /* Update these as wrmsr might have changed them. */
1872 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_FS_BASE, pCtx->fsHid.u64Base);
1873 AssertRC(rc);
1874 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_GS_BASE, pCtx->gsHid.u64Base);
1875 AssertRC(rc);
1876 }
1877 }
1878 else
1879 {
1880 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0StartVM32;
1881 }
1882
1883 hmR0VmxUpdateExceptionBitmap(pVM, pVCpu, pCtx);
1884
1885#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
1886 /* Store all guest MSRs in the VM-Entry load area, so they will be loaded during the world switch. */
1887 PVMXMSR pMsr = (PVMXMSR)pVCpu->hwaccm.s.vmx.pGuestMSR;
1888 unsigned idxMsr = 0;
1889
1890 uint32_t ulEdx;
1891 uint32_t ulTemp;
1892 CPUMGetGuestCpuId(pVCpu, 0x80000001, &ulTemp, &ulTemp, &ulTemp, &ulEdx);
1893 /* EFER MSR present? */
1894 if (ulEdx & (X86_CPUID_AMD_FEATURE_EDX_NX|X86_CPUID_AMD_FEATURE_EDX_LONG_MODE))
1895 {
1896 pMsr->u32IndexMSR = MSR_K6_EFER;
1897 pMsr->u32Reserved = 0;
1898 pMsr->u64Value = pCtx->msrEFER;
1899 /* VT-x will complain if only MSR_K6_EFER_LME is set. */
1900 if (!CPUMIsGuestInLongModeEx(pCtx))
1901 pMsr->u64Value &= ~(MSR_K6_EFER_LMA|MSR_K6_EFER_LME);
1902 pMsr++; idxMsr++;
1903
1904 if (ulEdx & X86_CPUID_AMD_FEATURE_EDX_LONG_MODE)
1905 {
1906 pMsr->u32IndexMSR = MSR_K8_LSTAR;
1907 pMsr->u32Reserved = 0;
1908 pMsr->u64Value = pCtx->msrLSTAR; /* 64 bits mode syscall rip */
1909 pMsr++; idxMsr++;
1910 pMsr->u32IndexMSR = MSR_K6_STAR;
1911 pMsr->u32Reserved = 0;
1912 pMsr->u64Value = pCtx->msrSTAR; /* legacy syscall eip, cs & ss */
1913 pMsr++; idxMsr++;
1914 pMsr->u32IndexMSR = MSR_K8_SF_MASK;
1915 pMsr->u32Reserved = 0;
1916 pMsr->u64Value = pCtx->msrSFMASK; /* syscall flag mask */
1917 pMsr++; idxMsr++;
1918 pMsr->u32IndexMSR = MSR_K8_KERNEL_GS_BASE;
1919 pMsr->u32Reserved = 0;
1920 pMsr->u64Value = pCtx->msrKERNELGSBASE; /* swapgs exchange value */
1921 pMsr++; idxMsr++;
1922 }
1923 }
1924 pVCpu->hwaccm.s.vmx.cCachedMSRs = idxMsr;
1925
1926 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_MSR_LOAD_COUNT, idxMsr);
1927 AssertRC(rc);
1928
1929 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_STORE_COUNT, idxMsr);
1930 AssertRC(rc);
1931#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
1932
1933 bool fOffsettedTsc;
1934 if (pVM->hwaccm.s.vmx.fUsePreemptTimer)
1935 {
1936 uint64_t cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVCpu, &fOffsettedTsc, &pVCpu->hwaccm.s.vmx.u64TSCOffset);
1937
1938 /* Make sure the returned values have sane upper and lower boundaries. */
1939 uint64_t u64CpuHz = SUPGetCpuHzFromGIP(g_pSUPGlobalInfoPage);
1940
1941 cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64 of a second */
1942 cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 2048); /* 1/2048th of a second */
1943
1944 cTicksToDeadline >>= pVM->hwaccm.s.vmx.cPreemptTimerShift;
1945 uint32_t cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16);
1946 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_PREEMPTION_TIMER_VALUE, cPreemptionTickCount);
1947 AssertRC(rc);
1948 }
1949 else
1950 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVCpu, &pVCpu->hwaccm.s.vmx.u64TSCOffset);
1951 if (fOffsettedTsc)
1952 {
1953 uint64_t u64CurTSC = ASMReadTSC();
1954 if (u64CurTSC + pVCpu->hwaccm.s.vmx.u64TSCOffset >= TMCpuTickGetLastSeen(pVCpu))
1955 {
1956 /* Note: VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT takes precedence over TSC_OFFSET */
1957 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_TSC_OFFSET_FULL, pVCpu->hwaccm.s.vmx.u64TSCOffset);
1958 AssertRC(rc);
1959
1960 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
1961 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1962 AssertRC(rc);
1963 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCOffset);
1964 }
1965 else
1966 {
1967 /* Fall back to rdtsc emulation as we would otherwise pass decreasing tsc values to the guest. */
1968 LogFlow(("TSC %RX64 offset %RX64 time=%RX64 last=%RX64 (diff=%RX64, virt_tsc=%RX64)\n", u64CurTSC, pVCpu->hwaccm.s.vmx.u64TSCOffset, u64CurTSC + pVCpu->hwaccm.s.vmx.u64TSCOffset, TMCpuTickGetLastSeen(pVCpu), TMCpuTickGetLastSeen(pVCpu) - u64CurTSC - pVCpu->hwaccm.s.vmx.u64TSCOffset, TMCpuTickGet(pVCpu)));
1969 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
1970 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1971 AssertRC(rc);
1972 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCInterceptOverFlow);
1973 }
1974 }
1975 else
1976 {
1977 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
1978 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1979 AssertRC(rc);
1980 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCIntercept);
1981 }
1982
1983 /* Done with the major changes */
1984 pVCpu->hwaccm.s.fContextUseFlags &= ~HWACCM_CHANGED_ALL_GUEST;
1985
1986 /* Minimal guest state update (esp, eip, eflags mostly) */
1987 VMXR0LoadMinimalGuestState(pVM, pVCpu, pCtx);
1988 return rc;
1989}
1990
1991/**
1992 * Syncs back the guest state
1993 *
1994 * @returns VBox status code.
1995 * @param pVM The VM to operate on.
1996 * @param pVCpu The VMCPU to operate on.
1997 * @param pCtx Guest context
1998 */
1999DECLINLINE(int) VMXR0SaveGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
2000{
2001 RTGCUINTREG val, valShadow;
2002 RTGCUINTPTR uInterruptState;
2003 int rc;
2004
2005 /* Let's first sync back eip, esp, and eflags. */
2006 rc = VMXReadCachedVMCS(VMX_VMCS64_GUEST_RIP, &val);
2007 AssertRC(rc);
2008 pCtx->rip = val;
2009 rc = VMXReadCachedVMCS(VMX_VMCS64_GUEST_RSP, &val);
2010 AssertRC(rc);
2011 pCtx->rsp = val;
2012 rc = VMXReadCachedVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
2013 AssertRC(rc);
2014 pCtx->eflags.u32 = val;
2015
2016 /* Take care of instruction fusing (sti, mov ss) */
2017 rc |= VMXReadCachedVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, &val);
2018 uInterruptState = val;
2019 if (uInterruptState != 0)
2020 {
2021 Assert(uInterruptState <= 2); /* only sti & mov ss */
2022 Log(("uInterruptState %x eip=%RGv\n", (uint32_t)uInterruptState, pCtx->rip));
2023 EMSetInhibitInterruptsPC(pVCpu, pCtx->rip);
2024 }
2025 else
2026 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
2027
2028 /* Control registers. */
2029 VMXReadCachedVMCS(VMX_VMCS_CTRL_CR0_READ_SHADOW, &valShadow);
2030 VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR0, &val);
2031 val = (valShadow & pVCpu->hwaccm.s.vmx.cr0_mask) | (val & ~pVCpu->hwaccm.s.vmx.cr0_mask);
2032 CPUMSetGuestCR0(pVCpu, val);
2033
2034 VMXReadCachedVMCS(VMX_VMCS_CTRL_CR4_READ_SHADOW, &valShadow);
2035 VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR4, &val);
2036 val = (valShadow & pVCpu->hwaccm.s.vmx.cr4_mask) | (val & ~pVCpu->hwaccm.s.vmx.cr4_mask);
2037 CPUMSetGuestCR4(pVCpu, val);
2038
2039 /* Note: no reason to sync back the CRx registers. They can't be changed by the guest. */
2040 /* Note: only in the nested paging case can CR3 & CR4 be changed by the guest. */
2041 if ( pVM->hwaccm.s.fNestedPaging
2042 && CPUMIsGuestInPagedProtectedModeEx(pCtx))
2043 {
2044 PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache;
2045
2046 /* Can be updated behind our back in the nested paging case. */
2047 CPUMSetGuestCR2(pVCpu, pCache->cr2);
2048
2049 VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR3, &val);
2050
2051 if (val != pCtx->cr3)
2052 {
2053 CPUMSetGuestCR3(pVCpu, val);
2054 PGMUpdateCR3(pVCpu, val);
2055 }
2056 rc = hmR0VmxSavePaePdpes(pVCpu, pCtx);
2057 AssertRCReturn(rc, rc);
2058 }
2059
2060 /* Sync back DR7 here. */
2061 VMXReadCachedVMCS(VMX_VMCS64_GUEST_DR7, &val);
2062 pCtx->dr[7] = val;
2063
2064 /* Guest CPU context: ES, CS, SS, DS, FS, GS. */
2065 VMX_READ_SELREG(ES, es);
2066 VMX_READ_SELREG(SS, ss);
2067 VMX_READ_SELREG(CS, cs);
2068 VMX_READ_SELREG(DS, ds);
2069 VMX_READ_SELREG(FS, fs);
2070 VMX_READ_SELREG(GS, gs);
2071
2072 /*
2073 * System MSRs
2074 */
2075 VMXReadCachedVMCS(VMX_VMCS32_GUEST_SYSENTER_CS, &val);
2076 pCtx->SysEnter.cs = val;
2077 VMXReadCachedVMCS(VMX_VMCS64_GUEST_SYSENTER_EIP, &val);
2078 pCtx->SysEnter.eip = val;
2079 VMXReadCachedVMCS(VMX_VMCS64_GUEST_SYSENTER_ESP, &val);
2080 pCtx->SysEnter.esp = val;
2081
2082 /* Misc. registers; must sync everything otherwise we can get out of sync when jumping to ring 3. */
2083 VMX_READ_SELREG(LDTR, ldtr);
2084
2085 VMXReadCachedVMCS(VMX_VMCS32_GUEST_GDTR_LIMIT, &val);
2086 pCtx->gdtr.cbGdt = val;
2087 VMXReadCachedVMCS(VMX_VMCS64_GUEST_GDTR_BASE, &val);
2088 pCtx->gdtr.pGdt = val;
2089
2090 VMXReadCachedVMCS(VMX_VMCS32_GUEST_IDTR_LIMIT, &val);
2091 pCtx->idtr.cbIdt = val;
2092 VMXReadCachedVMCS(VMX_VMCS64_GUEST_IDTR_BASE, &val);
2093 pCtx->idtr.pIdt = val;
2094
2095 /* Real mode emulation using v86 mode. */
2096 if ( CPUMIsGuestInRealModeEx(pCtx)
2097 && pVM->hwaccm.s.vmx.pRealModeTSS)
2098 {
2099 /* Hide our emulation flags */
2100 pCtx->eflags.Bits.u1VM = 0;
2101
2102 /* Restore original IOPL setting as we always use 0. */
2103 pCtx->eflags.Bits.u2IOPL = pVCpu->hwaccm.s.vmx.RealMode.eflags.Bits.u2IOPL;
2104
2105 /* Force a TR resync every time in case we switch modes. */
2106 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_TR;
2107 }
2108 else
2109 {
2110 /* In real mode we have a fake TSS, so only sync it back when it's supposed to be valid. */
2111 VMX_READ_SELREG(TR, tr);
2112 }
2113
2114#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
2115 /* Save the possibly changed MSRs that we automatically restore and save during a world switch. */
2116 for (unsigned i = 0; i < pVCpu->hwaccm.s.vmx.cCachedMSRs; i++)
2117 {
2118 PVMXMSR pMsr = (PVMXMSR)pVCpu->hwaccm.s.vmx.pGuestMSR;
2119 pMsr += i;
2120
2121 switch (pMsr->u32IndexMSR)
2122 {
2123 case MSR_K8_LSTAR:
2124 pCtx->msrLSTAR = pMsr->u64Value;
2125 break;
2126 case MSR_K6_STAR:
2127 pCtx->msrSTAR = pMsr->u64Value;
2128 break;
2129 case MSR_K8_SF_MASK:
2130 pCtx->msrSFMASK = pMsr->u64Value;
2131 break;
2132 case MSR_K8_KERNEL_GS_BASE:
2133 pCtx->msrKERNELGSBASE = pMsr->u64Value;
2134 break;
2135 case MSR_K6_EFER:
2136 /* EFER can't be changed without causing a VM-exit. */
2137// Assert(pCtx->msrEFER == pMsr->u64Value);
2138 break;
2139 default:
2140 AssertFailed();
2141 return VERR_INTERNAL_ERROR;
2142 }
2143 }
2144#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
2145 return VINF_SUCCESS;
2146}
2147
2148/**
2149 * Dummy placeholder
2150 *
2151 * @param pVM The VM to operate on.
2152 * @param pVCpu The VMCPU to operate on.
2153 */
2154static void hmR0VmxSetupTLBDummy(PVM pVM, PVMCPU pVCpu)
2155{
2156 NOREF(pVM);
2157 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
2158 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2159 pVCpu->hwaccm.s.TlbShootdown.cPages = 0;
2160 return;
2161}
2162
2163/**
2164 * Setup the tagged TLB for EPT
2165 *
2166 * @returns VBox status code.
2167 * @param pVM The VM to operate on.
2168 * @param pVCpu The VMCPU to operate on.
2169 */
2170static void hmR0VmxSetupTLBEPT(PVM pVM, PVMCPU pVCpu)
2171{
2172 PHMGLOBLCPUINFO pCpu;
2173
2174 Assert(pVM->hwaccm.s.fNestedPaging);
2175 Assert(!pVM->hwaccm.s.vmx.fVPID);
2176
2177 /* Deal with tagged TLBs if VPID or EPT is supported. */
2178 pCpu = HWACCMR0GetCurrentCpu();
2179 /* Force a TLB flush for the first world switch if the current cpu differs from the one we ran on last. */
2180 /* Note that this can happen both for start and resume due to long jumps back to ring 3. */
2181 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
2182 /* if the tlb flush count has changed, another VM has flushed the TLB of this cpu, so we can't use our current ASID anymore. */
2183 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
2184 {
2185 /* Force a TLB flush on VM entry. */
2186 pVCpu->hwaccm.s.fForceTLBFlush = true;
2187 }
2188 /* Disabled because this has triggered every time I have suspended my
2189 * laptop with a VM running for the past three months or more. */
2190 // else
2191 // Assert(!pCpu->fFlushTLB);
2192
2193 /* Check for tlb shootdown flushes. */
2194 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2195 pVCpu->hwaccm.s.fForceTLBFlush = true;
2196
2197 pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu;
2198 pCpu->fFlushTLB = false;
2199
2200 if (pVCpu->hwaccm.s.fForceTLBFlush)
2201 {
2202 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushContext, 0);
2203 }
2204 else
2205 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2206 {
2207 /* Deal with pending TLB shootdown actions which were queued when we were not executing code. */
2208 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTlbShootdown);
2209
2210 for (unsigned i=0;i<pVCpu->hwaccm.s.TlbShootdown.cPages;i++)
2211 {
2212 /* aTlbShootdownPages contains physical addresses in this case. */
2213 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushPage, pVCpu->hwaccm.s.TlbShootdown.aPages[i]);
2214 }
2215 }
2216 pVCpu->hwaccm.s.TlbShootdown.cPages= 0;
2217 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2218
2219#ifdef VBOX_WITH_STATISTICS
2220 if (pVCpu->hwaccm.s.fForceTLBFlush)
2221 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushTLBWorldSwitch);
2222 else
2223 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch);
2224#endif
2225}
2226
2227#ifdef HWACCM_VTX_WITH_VPID
2228/**
2229 * Setup the tagged TLB for VPID
2230 *
2231 * @returns VBox status code.
2232 * @param pVM The VM to operate on.
2233 * @param pVCpu The VMCPU to operate on.
2234 */
2235static void hmR0VmxSetupTLBVPID(PVM pVM, PVMCPU pVCpu)
2236{
2237 PHMGLOBLCPUINFO pCpu;
2238
2239 Assert(pVM->hwaccm.s.vmx.fVPID);
2240 Assert(!pVM->hwaccm.s.fNestedPaging);
2241
2242 /* Deal with tagged TLBs if VPID or EPT is supported. */
2243 pCpu = HWACCMR0GetCurrentCpu();
2244 /* Force a TLB flush for the first world switch if the current cpu differs from the one we ran on last. */
2245 /* Note that this can happen both for start and resume due to long jumps back to ring 3. */
2246 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
2247 /* if the tlb flush count has changed, another VM has flushed the TLB of this cpu, so we can't use our current ASID anymore. */
2248 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
2249 {
2250 /* Force a TLB flush on VM entry. */
2251 pVCpu->hwaccm.s.fForceTLBFlush = true;
2252 }
2253 else
2254 Assert(!pCpu->fFlushTLB);
2255
2256 pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu;
2257
2258 /* Check for tlb shootdown flushes. */
2259 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2260 pVCpu->hwaccm.s.fForceTLBFlush = true;
2261
2262 /* Make sure we flush the TLB when required. Switch ASID to achieve the same thing, but without actually flushing the whole TLB (which is expensive). */
2263 if (pVCpu->hwaccm.s.fForceTLBFlush)
2264 {
2265 if ( ++pCpu->uCurrentASID >= pVM->hwaccm.s.uMaxASID
2266 || pCpu->fFlushTLB)
2267 {
2268 pCpu->fFlushTLB = false;
2269 pCpu->uCurrentASID = 1; /* start at 1; host uses 0 */
2270 pCpu->cTLBFlushes++;
2271 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_ALL_CONTEXTS, 0);
2272 }
2273 else
2274 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushASID);
2275
2276 pVCpu->hwaccm.s.fForceTLBFlush = false;
2277 pVCpu->hwaccm.s.cTLBFlushes = pCpu->cTLBFlushes;
2278 pVCpu->hwaccm.s.uCurrentASID = pCpu->uCurrentASID;
2279 }
2280 else
2281 {
2282 Assert(!pCpu->fFlushTLB);
2283 Assert(pVCpu->hwaccm.s.uCurrentASID && pCpu->uCurrentASID);
2284
2285 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2286 {
2287 /* Deal with pending TLB shootdown actions which were queued when we were not executing code. */
2288 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTlbShootdown);
2289 for (unsigned i = 0; i < pVCpu->hwaccm.s.TlbShootdown.cPages; i++)
2290 hmR0VmxFlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushPage, pVCpu->hwaccm.s.TlbShootdown.aPages[i]);
2291 }
2292 }
2293 pVCpu->hwaccm.s.TlbShootdown.cPages = 0;
2294 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2295
2296 AssertMsg(pVCpu->hwaccm.s.cTLBFlushes == pCpu->cTLBFlushes, ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
2297 AssertMsg(pCpu->uCurrentASID >= 1 && pCpu->uCurrentASID < pVM->hwaccm.s.uMaxASID, ("cpu%d uCurrentASID = %x\n", pCpu->idCpu, pCpu->uCurrentASID));
2298 AssertMsg(pVCpu->hwaccm.s.uCurrentASID >= 1 && pVCpu->hwaccm.s.uCurrentASID < pVM->hwaccm.s.uMaxASID, ("cpu%d VM uCurrentASID = %x\n", pCpu->idCpu, pVCpu->hwaccm.s.uCurrentASID));
2299
2300 int rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hwaccm.s.uCurrentASID);
2301 AssertRC(rc);
2302
2303 if (pVCpu->hwaccm.s.fForceTLBFlush)
2304 hmR0VmxFlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushContext, 0);
2305
2306# ifdef VBOX_WITH_STATISTICS
2307 if (pVCpu->hwaccm.s.fForceTLBFlush)
2308 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushTLBWorldSwitch);
2309 else
2310 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch);
2311# endif
2312}
2313#endif /* HWACCM_VTX_WITH_VPID */
2314
2315/**
2316 * Runs guest code in a VT-x VM.
2317 *
2318 * @returns VBox status code.
2319 * @param pVM The VM to operate on.
2320 * @param pVCpu The VMCPU to operate on.
2321 * @param pCtx Guest context
2322 */
2323VMMR0DECL(int) VMXR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
2324{
2325 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatEntry, x);
2326 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hwaccm.s.StatExit1);
2327 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hwaccm.s.StatExit2);
2328
2329 VBOXSTRICTRC rc = VINF_SUCCESS;
2330 int rc2;
2331 RTGCUINTREG val;
2332 RTGCUINTREG exitReason = (RTGCUINTREG)VMX_EXIT_INVALID;
2333 RTGCUINTREG instrError, cbInstr;
2334 RTGCUINTPTR exitQualification = 0;
2335 RTGCUINTPTR intInfo = 0; /* shut up buggy gcc 4 */
2336 RTGCUINTPTR errCode, instrInfo;
2337 bool fSetupTPRCaching = false;
2338 uint64_t u64OldLSTAR = 0;
2339 uint8_t u8LastTPR = 0;
2340 RTCCUINTREG uOldEFlags = ~(RTCCUINTREG)0;
2341 unsigned cResume = 0;
2342#ifdef VBOX_STRICT
2343 RTCPUID idCpuCheck;
2344 bool fWasInLongMode = false;
2345#endif
2346#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0
2347 uint64_t u64LastTime = RTTimeMilliTS();
2348#endif
2349
2350 Assert(!(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC) || (pVCpu->hwaccm.s.vmx.pbVAPIC && pVM->hwaccm.s.vmx.pAPIC));
2351
2352 /* Check if we need to use TPR shadowing. */
2353 if ( CPUMIsGuestInLongModeEx(pCtx)
2354 || ( ((pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC) || pVM->hwaccm.s.fTRPPatchingAllowed)
2355 && pVM->hwaccm.s.fHasIoApic)
2356 )
2357 {
2358 fSetupTPRCaching = true;
2359 }
2360
2361 Log2(("\nE"));
2362
2363#ifdef VBOX_STRICT
2364 {
2365 RTCCUINTREG val2;
2366
2367 rc2 = VMXReadVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, &val2);
2368 AssertRC(rc2);
2369 Log2(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS = %08x\n", val2));
2370
2371 /* allowed zero */
2372 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0)
2373 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: zero\n"));
2374
2375 /* allowed one */
2376 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.allowed1) != 0)
2377 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: one\n"));
2378
2379 rc2 = VMXReadVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, &val2);
2380 AssertRC(rc2);
2381 Log2(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS = %08x\n", val2));
2382
2383 /* Must be set according to the MSR, but can be cleared in case of EPT. */
2384 if (pVM->hwaccm.s.fNestedPaging)
2385 val2 |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
2386 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
2387 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
2388
2389 /* allowed zero */
2390 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0)
2391 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: zero\n"));
2392
2393 /* allowed one */
2394 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1) != 0)
2395 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: one\n"));
2396
2397 rc2 = VMXReadVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, &val2);
2398 AssertRC(rc2);
2399 Log2(("VMX_VMCS_CTRL_ENTRY_CONTROLS = %08x\n", val2));
2400
2401 /* allowed zero */
2402 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0)
2403 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: zero\n"));
2404
2405 /* allowed one */
2406 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_entry.n.allowed1) != 0)
2407 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: one\n"));
2408
2409 rc2 = VMXReadVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, &val2);
2410 AssertRC(rc2);
2411 Log2(("VMX_VMCS_CTRL_EXIT_CONTROLS = %08x\n", val2));
2412
2413 /* allowed zero */
2414 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0)
2415 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: zero\n"));
2416
2417 /* allowed one */
2418 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_exit.n.allowed1) != 0)
2419 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: one\n"));
2420 }
2421 fWasInLongMode = CPUMIsGuestInLongModeEx(pCtx);
2422#endif /* VBOX_STRICT */
2423
2424#ifdef VBOX_WITH_CRASHDUMP_MAGIC
2425 pVCpu->hwaccm.s.vmx.VMCSCache.u64TimeEntry = RTTimeNanoTS();
2426#endif
2427
2428 /* We can jump to this point to resume execution after determining that a VM-exit is innocent.
2429 */
2430ResumeExecution:
2431 if (!STAM_REL_PROFILE_ADV_IS_RUNNING(&pVCpu->hwaccm.s.StatEntry))
2432 STAM_REL_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatExit2, &pVCpu->hwaccm.s.StatEntry, x);
2433 AssertMsg(pVCpu->hwaccm.s.idEnteredCpu == RTMpCpuId(),
2434 ("Expected %d, I'm %d; cResume=%d exitReason=%RGv exitQualification=%RGv\n",
2435 (int)pVCpu->hwaccm.s.idEnteredCpu, (int)RTMpCpuId(), cResume, exitReason, exitQualification));
2436 Assert(!HWACCMR0SuspendPending());
2437 /* Not allowed to switch modes without reloading the host state (32->64 switcher)!! */
2438 Assert(fWasInLongMode == CPUMIsGuestInLongModeEx(pCtx));
2439
2440 /* Safety precaution; looping for too long here can have a very bad effect on the host */
2441 if (RT_UNLIKELY(++cResume > pVM->hwaccm.s.cMaxResumeLoops))
2442 {
2443 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMaxResume);
2444 rc = VINF_EM_RAW_INTERRUPT;
2445 goto end;
2446 }
2447
2448 /* Check for irq inhibition due to instruction fusing (sti, mov ss). */
2449 if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
2450 {
2451 Log(("VM_FF_INHIBIT_INTERRUPTS at %RGv successor %RGv\n", (RTGCPTR)pCtx->rip, EMGetInhibitInterruptsPC(pVCpu)));
2452 if (pCtx->rip != EMGetInhibitInterruptsPC(pVCpu))
2453 {
2454 /* Note: we intentionally don't clear VM_FF_INHIBIT_INTERRUPTS here.
2455 * Before we are able to execute this instruction in raw mode (iret to guest code) an external interrupt might
2456 * force a world switch again. Possibly allowing a guest interrupt to be dispatched in the process. This could
2457 * break the guest. Sounds very unlikely, but such timing sensitive problems are not as rare as you might think.
2458 */
2459 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
2460 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
2461 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0);
2462 AssertRC(rc2);
2463 }
2464 }
2465 else
2466 {
2467 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
2468 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0);
2469 AssertRC(rc2);
2470 }
2471
2472#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0
2473 if (RT_UNLIKELY((cResume & 0xf) == 0))
2474 {
2475 uint64_t u64CurTime = RTTimeMilliTS();
2476
2477 if (RT_UNLIKELY(u64CurTime > u64LastTime))
2478 {
2479 u64LastTime = u64CurTime;
2480 TMTimerPollVoid(pVM, pVCpu);
2481 }
2482 }
2483#endif
2484
2485 /* Check for pending actions that force us to go back to ring 3. */
2486 if ( VM_FF_ISPENDING(pVM, VM_FF_HWACCM_TO_R3_MASK | VM_FF_REQUEST | VM_FF_PGM_POOL_FLUSH_PENDING | VM_FF_PDM_DMA)
2487 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HWACCM_TO_R3_MASK | VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL | VMCPU_FF_REQUEST))
2488 {
2489 /* Check if a sync operation is pending. */
2490 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL))
2491 {
2492 rc = PGMSyncCR3(pVCpu, pCtx->cr0, pCtx->cr3, pCtx->cr4, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
2493 if (rc != VINF_SUCCESS)
2494 {
2495 AssertRC(VBOXSTRICTRC_VAL(rc));
2496 Log(("Pending pool sync is forcing us back to ring 3; rc=%d\n", VBOXSTRICTRC_VAL(rc)));
2497 goto end;
2498 }
2499 }
2500
2501#ifdef DEBUG
2502 /* Intercept X86_XCPT_DB if stepping is enabled */
2503 if (!DBGFIsStepping(pVCpu))
2504#endif
2505 {
2506 if ( VM_FF_ISPENDING(pVM, VM_FF_HWACCM_TO_R3_MASK)
2507 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HWACCM_TO_R3_MASK))
2508 {
2509 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatSwitchToR3);
2510 rc = RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_NO_MEMORY : VINF_EM_RAW_TO_R3;
2511 goto end;
2512 }
2513 }
2514
2515 /* Pending request packets might contain actions that need immediate attention, such as pending hardware interrupts. */
2516 if ( VM_FF_ISPENDING(pVM, VM_FF_REQUEST)
2517 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_REQUEST))
2518 {
2519 rc = VINF_EM_PENDING_REQUEST;
2520 goto end;
2521 }
2522
2523 /* Check if a pgm pool flush is in progress. */
2524 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_POOL_FLUSH_PENDING))
2525 {
2526 rc = VINF_PGM_POOL_FLUSH_PENDING;
2527 goto end;
2528 }
2529
2530 /* Check if DMA work is pending (2nd+ run). */
2531 if (VM_FF_ISPENDING(pVM, VM_FF_PDM_DMA) && cResume > 1)
2532 {
2533 rc = VINF_EM_RAW_TO_R3;
2534 goto end;
2535 }
2536 }
2537
2538#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
2539 /*
2540 * Exit to ring-3 preemption/work is pending.
2541 *
2542 * Interrupts are disabled before the call to make sure we don't miss any interrupt
2543 * that would flag preemption (IPI, timer tick, ++). (Would've been nice to do this
2544 * further down, but hmR0VmxCheckPendingInterrupt makes that impossible.)
2545 *
2546 * Note! Interrupts must be disabled done *before* we check for TLB flushes; TLB
2547 * shootdowns rely on this.
2548 */
2549 uOldEFlags = ASMIntDisableFlags();
2550 if (RTThreadPreemptIsPending(NIL_RTTHREAD))
2551 {
2552 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitPreemptPending);
2553 rc = VINF_EM_RAW_INTERRUPT;
2554 goto end;
2555 }
2556 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
2557#endif
2558
2559 /* When external interrupts are pending, we should exit the VM when IF is set. */
2560 /* Note! *After* VM_FF_INHIBIT_INTERRUPTS check!!! */
2561 rc = hmR0VmxCheckPendingInterrupt(pVM, pVCpu, pCtx);
2562 if (RT_FAILURE(rc))
2563 goto end;
2564
2565 /** @todo check timers?? */
2566
2567 /* TPR caching using CR8 is only available in 64 bits mode */
2568 /* Note the 32 bits exception for AMD (X86_CPUID_AMD_FEATURE_ECX_CR8L), but that appears missing in Intel CPUs */
2569 /* Note: we can't do this in LoadGuestState as PDMApicGetTPR can jump back to ring 3 (lock)!!!!! (no longer true) */
2570 /**
2571 * @todo query and update the TPR only when it could have been changed (mmio access & wrmsr (x2apic))
2572 */
2573 if (fSetupTPRCaching)
2574 {
2575 /* TPR caching in CR8 */
2576 bool fPending;
2577
2578 rc2 = PDMApicGetTPR(pVCpu, &u8LastTPR, &fPending);
2579 AssertRC(rc2);
2580 /* The TPR can be found at offset 0x80 in the APIC mmio page. */
2581 pVCpu->hwaccm.s.vmx.pbVAPIC[0x80] = u8LastTPR;
2582
2583 /* Two options here:
2584 * - external interrupt pending, but masked by the TPR value.
2585 * -> a CR8 update that lower the current TPR value should cause an exit
2586 * - no pending interrupts
2587 * -> We don't need to be explicitely notified. There are enough world switches for detecting pending interrupts.
2588 */
2589 rc = VMXWriteVMCS(VMX_VMCS_CTRL_TPR_THRESHOLD, (fPending) ? (u8LastTPR >> 4) : 0); /* cr8 bits 3-0 correspond to bits 7-4 of the task priority mmio register. */
2590 AssertRC(VBOXSTRICTRC_VAL(rc));
2591
2592 if (pVM->hwaccm.s.fTPRPatchingActive)
2593 {
2594 Assert(!CPUMIsGuestInLongModeEx(pCtx));
2595 /* Our patch code uses LSTAR for TPR caching. */
2596 pCtx->msrLSTAR = u8LastTPR;
2597
2598 if (fPending)
2599 {
2600 /* A TPR change could activate a pending interrupt, so catch lstar writes. */
2601 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, false);
2602 }
2603 else
2604 {
2605 /* No interrupts are pending, so we don't need to be explicitely notified.
2606 * There are enough world switches for detecting pending interrupts.
2607 */
2608 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true);
2609 }
2610 }
2611 }
2612
2613#if defined(HWACCM_VTX_WITH_EPT) && defined(LOG_ENABLED)
2614 if ( pVM->hwaccm.s.fNestedPaging
2615# ifdef HWACCM_VTX_WITH_VPID
2616 || pVM->hwaccm.s.vmx.fVPID
2617# endif /* HWACCM_VTX_WITH_VPID */
2618 )
2619 {
2620 PHMGLOBLCPUINFO pCpu;
2621
2622 pCpu = HWACCMR0GetCurrentCpu();
2623 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
2624 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
2625 {
2626 if (pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu)
2627 LogFlow(("Force TLB flush due to rescheduling to a different cpu (%d vs %d)\n", pVCpu->hwaccm.s.idLastCpu, pCpu->idCpu));
2628 else
2629 LogFlow(("Force TLB flush due to changed TLB flush count (%x vs %x)\n", pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
2630 }
2631 if (pCpu->fFlushTLB)
2632 LogFlow(("Force TLB flush: first time cpu %d is used -> flush\n", pCpu->idCpu));
2633 else
2634 if (pVCpu->hwaccm.s.fForceTLBFlush)
2635 LogFlow(("Manual TLB flush\n"));
2636 }
2637#endif
2638#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
2639 PGMRZDynMapFlushAutoSet(pVCpu);
2640#endif
2641
2642 /*
2643 * NOTE: DO NOT DO ANYTHING AFTER THIS POINT THAT MIGHT JUMP BACK TO RING 3!
2644 * (until the actual world switch)
2645 */
2646#ifdef VBOX_STRICT
2647 idCpuCheck = RTMpCpuId();
2648#endif
2649#ifdef LOG_ENABLED
2650 VMMR0LogFlushDisable(pVCpu);
2651#endif
2652 /* Save the host state first. */
2653 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_HOST_CONTEXT)
2654 {
2655 rc = VMXR0SaveHostState(pVM, pVCpu);
2656 if (RT_UNLIKELY(rc != VINF_SUCCESS))
2657 {
2658 VMMR0LogFlushEnable(pVCpu);
2659 goto end;
2660 }
2661 }
2662
2663 /* Load the guest state */
2664 if (!pVCpu->hwaccm.s.fContextUseFlags)
2665 {
2666 VMXR0LoadMinimalGuestState(pVM, pVCpu, pCtx);
2667 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatLoadMinimal);
2668 }
2669 else
2670 {
2671 rc = VMXR0LoadGuestState(pVM, pVCpu, pCtx);
2672 if (RT_UNLIKELY(rc != VINF_SUCCESS))
2673 {
2674 VMMR0LogFlushEnable(pVCpu);
2675 goto end;
2676 }
2677 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatLoadFull);
2678 }
2679
2680#ifndef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
2681 /* Disable interrupts to make sure a poke will interrupt execution.
2682 * This must be done *before* we check for TLB flushes; TLB shootdowns rely on this.
2683 */
2684 uOldEFlags = ASMIntDisableFlags();
2685 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
2686#endif
2687
2688 /* Non-register state Guest Context */
2689 /** @todo change me according to cpu state */
2690 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_ACTIVITY_STATE, VMX_CMS_GUEST_ACTIVITY_ACTIVE);
2691 AssertRC(rc2);
2692
2693 /* Set TLB flush state as checked until we return from the world switch. */
2694 ASMAtomicWriteBool(&pVCpu->hwaccm.s.fCheckedTLBFlush, true);
2695 /* Deal with tagged TLB setup and invalidation. */
2696 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB(pVM, pVCpu);
2697
2698 /* Manual save and restore:
2699 * - General purpose registers except RIP, RSP
2700 *
2701 * Trashed:
2702 * - CR2 (we don't care)
2703 * - LDTR (reset to 0)
2704 * - DRx (presumably not changed at all)
2705 * - DR7 (reset to 0x400)
2706 * - EFLAGS (reset to RT_BIT(1); not relevant)
2707 *
2708 */
2709
2710 /* All done! Let's start VM execution. */
2711 STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatEntry, &pVCpu->hwaccm.s.StatInGC, x);
2712 Assert(idCpuCheck == RTMpCpuId());
2713
2714#ifdef VBOX_WITH_CRASHDUMP_MAGIC
2715 pVCpu->hwaccm.s.vmx.VMCSCache.cResume = cResume;
2716 pVCpu->hwaccm.s.vmx.VMCSCache.u64TimeSwitch = RTTimeNanoTS();
2717#endif
2718
2719 /* Save the current TPR value in the LSTAR msr so our patches can access it. */
2720 if (pVM->hwaccm.s.fTPRPatchingActive)
2721 {
2722 Assert(pVM->hwaccm.s.fTPRPatchingActive);
2723 u64OldLSTAR = ASMRdMsr(MSR_K8_LSTAR);
2724 ASMWrMsr(MSR_K8_LSTAR, u8LastTPR);
2725 }
2726
2727 TMNotifyStartOfExecution(pVCpu);
2728#ifdef VBOX_WITH_KERNEL_USING_XMM
2729 rc = hwaccmR0VMXStartVMWrapXMM(pVCpu->hwaccm.s.fResumeVM, pCtx, &pVCpu->hwaccm.s.vmx.VMCSCache, pVM, pVCpu, pVCpu->hwaccm.s.vmx.pfnStartVM);
2730#else
2731 rc = pVCpu->hwaccm.s.vmx.pfnStartVM(pVCpu->hwaccm.s.fResumeVM, pCtx, &pVCpu->hwaccm.s.vmx.VMCSCache, pVM, pVCpu);
2732#endif
2733 ASMAtomicWriteBool(&pVCpu->hwaccm.s.fCheckedTLBFlush, false);
2734 ASMAtomicIncU32(&pVCpu->hwaccm.s.cWorldSwitchExits);
2735 /* Possibly the last TSC value seen by the guest (too high) (only when we're in tsc offset mode). */
2736 if (!(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT))
2737 TMCpuTickSetLastSeen(pVCpu, ASMReadTSC() + pVCpu->hwaccm.s.vmx.u64TSCOffset - 0x400 /* guestimate of world switch overhead in clock ticks */);
2738
2739 TMNotifyEndOfExecution(pVCpu);
2740 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED);
2741 Assert(!(ASMGetFlags() & X86_EFL_IF));
2742
2743 /* Restore the host LSTAR msr if the guest could have changed it. */
2744 if (pVM->hwaccm.s.fTPRPatchingActive)
2745 {
2746 Assert(pVM->hwaccm.s.fTPRPatchingActive);
2747 pVCpu->hwaccm.s.vmx.pbVAPIC[0x80] = pCtx->msrLSTAR = ASMRdMsr(MSR_K8_LSTAR);
2748 ASMWrMsr(MSR_K8_LSTAR, u64OldLSTAR);
2749 }
2750
2751 STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatInGC, &pVCpu->hwaccm.s.StatExit1, x);
2752 ASMSetFlags(uOldEFlags);
2753#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
2754 uOldEFlags = ~(RTCCUINTREG)0;
2755#endif
2756
2757 AssertMsg(!pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries, ("pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries=%d\n", pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries));
2758
2759 /* In case we execute a goto ResumeExecution later on. */
2760 pVCpu->hwaccm.s.fResumeVM = true;
2761 pVCpu->hwaccm.s.fForceTLBFlush = false;
2762
2763 /*
2764 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2765 * IMPORTANT: WE CAN'T DO ANY LOGGING OR OPERATIONS THAT CAN DO A LONGJMP BACK TO RING 3 *BEFORE* WE'VE SYNCED BACK (MOST OF) THE GUEST STATE
2766 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2767 */
2768
2769 if (RT_UNLIKELY(rc != VINF_SUCCESS))
2770 {
2771 hmR0VmxReportWorldSwitchError(pVM, pVCpu, rc, pCtx);
2772 VMMR0LogFlushEnable(pVCpu);
2773 goto end;
2774 }
2775
2776 /* Success. Query the guest state and figure out what has happened. */
2777
2778 /* Investigate why there was a VM-exit. */
2779 rc2 = VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_REASON, &exitReason);
2780 STAM_COUNTER_INC(&pVCpu->hwaccm.s.paStatExitReasonR0[exitReason & MASK_EXITREASON_STAT]);
2781
2782 exitReason &= 0xffff; /* bit 0-15 contain the exit code. */
2783 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
2784 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INSTR_LENGTH, &cbInstr);
2785 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO, &intInfo);
2786 /* might not be valid; depends on VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID. */
2787 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE, &errCode);
2788 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INSTR_INFO, &instrInfo);
2789 rc2 |= VMXReadCachedVMCS(VMX_VMCS_RO_EXIT_QUALIFICATION, &exitQualification);
2790 AssertRC(rc2);
2791
2792 /* Sync back the guest state */
2793 rc2 = VMXR0SaveGuestState(pVM, pVCpu, pCtx);
2794 AssertRC(rc2);
2795
2796 /* Note! NOW IT'S SAFE FOR LOGGING! */
2797 VMMR0LogFlushEnable(pVCpu);
2798 Log2(("Raw exit reason %08x\n", exitReason));
2799
2800 /* Check if an injected event was interrupted prematurely. */
2801 rc2 = VMXReadCachedVMCS(VMX_VMCS32_RO_IDT_INFO, &val);
2802 AssertRC(rc2);
2803 pVCpu->hwaccm.s.Event.intInfo = VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(val);
2804 if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hwaccm.s.Event.intInfo)
2805 /* Ignore 'int xx' as they'll be restarted anyway. */
2806 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
2807 /* Ignore software exceptions (such as int3) as they'll reoccur when we restart the instruction anyway. */
2808 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT)
2809 {
2810 Assert(!pVCpu->hwaccm.s.Event.fPending);
2811 pVCpu->hwaccm.s.Event.fPending = true;
2812 /* Error code present? */
2813 if (VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hwaccm.s.Event.intInfo))
2814 {
2815 rc2 = VMXReadCachedVMCS(VMX_VMCS32_RO_IDT_ERRCODE, &val);
2816 AssertRC(rc2);
2817 pVCpu->hwaccm.s.Event.errCode = val;
2818 Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv pending error=%RX64\n", pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification, val));
2819 }
2820 else
2821 {
2822 Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv\n", pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification));
2823 pVCpu->hwaccm.s.Event.errCode = 0;
2824 }
2825 }
2826#ifdef VBOX_STRICT
2827 else
2828 if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hwaccm.s.Event.intInfo)
2829 /* Ignore software exceptions (such as int3) as they're reoccur when we restart the instruction anyway. */
2830 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT)
2831 {
2832 Log(("Ignore pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv\n", pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification));
2833 }
2834
2835 if (exitReason == VMX_EXIT_ERR_INVALID_GUEST_STATE)
2836 HWACCMDumpRegs(pVM, pVCpu, pCtx);
2837#endif
2838
2839 Log2(("E%d: New EIP=%x:%RGv\n", (uint32_t)exitReason, pCtx->cs, (RTGCPTR)pCtx->rip));
2840 Log2(("Exit reason %d, exitQualification %RGv\n", (uint32_t)exitReason, exitQualification));
2841 Log2(("instrInfo=%d instrError=%d instr length=%d\n", (uint32_t)instrInfo, (uint32_t)instrError, (uint32_t)cbInstr));
2842 Log2(("Interruption error code %d\n", (uint32_t)errCode));
2843 Log2(("IntInfo = %08x\n", (uint32_t)intInfo));
2844
2845 /* Sync back the TPR if it was changed. */
2846 if ( fSetupTPRCaching
2847 && u8LastTPR != pVCpu->hwaccm.s.vmx.pbVAPIC[0x80])
2848 {
2849 rc2 = PDMApicSetTPR(pVCpu, pVCpu->hwaccm.s.vmx.pbVAPIC[0x80]);
2850 AssertRC(rc2);
2851 }
2852
2853 STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatExit1, &pVCpu->hwaccm.s.StatExit2, x);
2854
2855 /* Some cases don't need a complete resync of the guest CPU state; handle them here. */
2856 Assert(rc == VINF_SUCCESS); /* might consider VERR_IPE_UNINITIALIZED_STATUS here later... */
2857 switch (exitReason)
2858 {
2859 case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */
2860 case VMX_EXIT_EXTERNAL_IRQ: /* 1 External interrupt. */
2861 {
2862 uint32_t vector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
2863
2864 if (!VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
2865 {
2866 Assert(exitReason == VMX_EXIT_EXTERNAL_IRQ);
2867#if 0 //def VBOX_WITH_VMMR0_DISABLE_PREEMPTION
2868 if ( RTThreadPreemptIsPendingTrusty()
2869 && !RTThreadPreemptIsPending(NIL_RTTHREAD))
2870 goto ResumeExecution;
2871#endif
2872 /* External interrupt; leave to allow it to be dispatched again. */
2873 rc = VINF_EM_RAW_INTERRUPT;
2874 break;
2875 }
2876 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2877 switch (VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo))
2878 {
2879 case VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI: /* Non-maskable interrupt. */
2880 /* External interrupt; leave to allow it to be dispatched again. */
2881 rc = VINF_EM_RAW_INTERRUPT;
2882 break;
2883
2884 case VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT: /* External hardware interrupt. */
2885 AssertFailed(); /* can't come here; fails the first check. */
2886 break;
2887
2888 case VMX_EXIT_INTERRUPTION_INFO_TYPE_DBEXCPT: /* Unknown why we get this type for #DB */
2889 case VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT: /* Software exception. (#BP or #OF) */
2890 Assert(vector == 1 || vector == 3 || vector == 4);
2891 /* no break */
2892 case VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT: /* Hardware exception. */
2893 Log2(("Hardware/software interrupt %d\n", vector));
2894 switch (vector)
2895 {
2896 case X86_XCPT_NM:
2897 {
2898 Log(("#NM fault at %RGv error code %x\n", (RTGCPTR)pCtx->rip, errCode));
2899
2900 /** @todo don't intercept #NM exceptions anymore when we've activated the guest FPU state. */
2901 /* If we sync the FPU/XMM state on-demand, then we can continue execution as if nothing has happened. */
2902 rc = CPUMR0LoadGuestFPU(pVM, pVCpu, pCtx);
2903 if (rc == VINF_SUCCESS)
2904 {
2905 Assert(CPUMIsGuestFPUStateActive(pVCpu));
2906
2907 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowNM);
2908
2909 /* Continue execution. */
2910 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
2911
2912 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2913 goto ResumeExecution;
2914 }
2915
2916 Log(("Forward #NM fault to the guest\n"));
2917 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestNM);
2918 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, 0);
2919 AssertRC(rc2);
2920 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2921 goto ResumeExecution;
2922 }
2923
2924 case X86_XCPT_PF: /* Page fault */
2925 {
2926#ifdef DEBUG
2927 if (pVM->hwaccm.s.fNestedPaging)
2928 { /* A genuine pagefault.
2929 * Forward the trap to the guest by injecting the exception and resuming execution.
2930 */
2931 Log(("Guest page fault at %RGv cr2=%RGv error code %RGv rsp=%RGv\n", (RTGCPTR)pCtx->rip, exitQualification, errCode, (RTGCPTR)pCtx->rsp));
2932
2933 Assert(CPUMIsGuestInPagedProtectedModeEx(pCtx));
2934
2935 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestPF);
2936
2937 /* Now we must update CR2. */
2938 pCtx->cr2 = exitQualification;
2939 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2940 AssertRC(rc2);
2941
2942 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2943 goto ResumeExecution;
2944 }
2945#endif
2946 Assert(!pVM->hwaccm.s.fNestedPaging);
2947
2948#ifdef VBOX_HWACCM_WITH_GUEST_PATCHING
2949 /* Shortcut for APIC TPR reads and writes; 32 bits guests only */
2950 if ( pVM->hwaccm.s.fTRPPatchingAllowed
2951 && pVM->hwaccm.s.pGuestPatchMem
2952 && (exitQualification & 0xfff) == 0x080
2953 && !(errCode & X86_TRAP_PF_P) /* not present */
2954 && CPUMGetGuestCPL(pVCpu, CPUMCTX2CORE(pCtx)) == 0
2955 && !CPUMIsGuestInLongModeEx(pCtx)
2956 && pVM->hwaccm.s.cPatches < RT_ELEMENTS(pVM->hwaccm.s.aPatches))
2957 {
2958 RTGCPHYS GCPhysApicBase, GCPhys;
2959 PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */
2960 GCPhysApicBase &= PAGE_BASE_GC_MASK;
2961
2962 rc = PGMGstGetPage(pVCpu, (RTGCPTR)exitQualification, NULL, &GCPhys);
2963 if ( rc == VINF_SUCCESS
2964 && GCPhys == GCPhysApicBase)
2965 {
2966 /* Only attempt to patch the instruction once. */
2967 PHWACCMTPRPATCH pPatch = (PHWACCMTPRPATCH)RTAvloU32Get(&pVM->hwaccm.s.PatchTree, (AVLOU32KEY)pCtx->eip);
2968 if (!pPatch)
2969 {
2970 rc = VINF_EM_HWACCM_PATCH_TPR_INSTR;
2971 break;
2972 }
2973 }
2974 }
2975#endif
2976
2977 Log2(("Page fault at %RGv error code %x\n", exitQualification, errCode));
2978 /* Exit qualification contains the linear address of the page fault. */
2979 TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP);
2980 TRPMSetErrorCode(pVCpu, errCode);
2981 TRPMSetFaultAddress(pVCpu, exitQualification);
2982
2983 /* Shortcut for APIC TPR reads and writes. */
2984 if ( (exitQualification & 0xfff) == 0x080
2985 && !(errCode & X86_TRAP_PF_P) /* not present */
2986 && fSetupTPRCaching
2987 && (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
2988 {
2989 RTGCPHYS GCPhysApicBase, GCPhys;
2990 PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */
2991 GCPhysApicBase &= PAGE_BASE_GC_MASK;
2992
2993 rc = PGMGstGetPage(pVCpu, (RTGCPTR)exitQualification, NULL, &GCPhys);
2994 if ( rc == VINF_SUCCESS
2995 && GCPhys == GCPhysApicBase)
2996 {
2997 Log(("Enable VT-x virtual APIC access filtering\n"));
2998 rc2 = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hwaccm.s.vmx.pAPICPhys, X86_PTE_RW | X86_PTE_P);
2999 AssertRC(rc2);
3000 }
3001 }
3002
3003 /* Forward it to our trap handler first, in case our shadow pages are out of sync. */
3004 rc = PGMTrap0eHandler(pVCpu, errCode, CPUMCTX2CORE(pCtx), (RTGCPTR)exitQualification);
3005 Log2(("PGMTrap0eHandler %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3006
3007 if (rc == VINF_SUCCESS)
3008 { /* We've successfully synced our shadow pages, so let's just continue execution. */
3009 Log2(("Shadow page fault at %RGv cr2=%RGv error code %x\n", (RTGCPTR)pCtx->rip, exitQualification ,errCode));
3010 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowPF);
3011
3012 TRPMResetTrap(pVCpu);
3013 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3014 goto ResumeExecution;
3015 }
3016 else
3017 if (rc == VINF_EM_RAW_GUEST_TRAP)
3018 { /* A genuine pagefault.
3019 * Forward the trap to the guest by injecting the exception and resuming execution.
3020 */
3021 Log2(("Forward page fault to the guest\n"));
3022
3023 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestPF);
3024 /* The error code might have been changed. */
3025 errCode = TRPMGetErrorCode(pVCpu);
3026
3027 TRPMResetTrap(pVCpu);
3028
3029 /* Now we must update CR2. */
3030 pCtx->cr2 = exitQualification;
3031 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
3032 AssertRC(rc2);
3033
3034 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3035 goto ResumeExecution;
3036 }
3037#ifdef VBOX_STRICT
3038 if (rc != VINF_EM_RAW_EMULATE_INSTR && rc != VINF_EM_RAW_EMULATE_IO_BLOCK)
3039 Log2(("PGMTrap0eHandler failed with %d\n", VBOXSTRICTRC_VAL(rc)));
3040#endif
3041 /* Need to go back to the recompiler to emulate the instruction. */
3042 TRPMResetTrap(pVCpu);
3043 break;
3044 }
3045
3046 case X86_XCPT_MF: /* Floating point exception. */
3047 {
3048 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestMF);
3049 if (!(pCtx->cr0 & X86_CR0_NE))
3050 {
3051 /* old style FPU error reporting needs some extra work. */
3052 /** @todo don't fall back to the recompiler, but do it manually. */
3053 rc = VINF_EM_RAW_EMULATE_INSTR;
3054 break;
3055 }
3056 Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs, (RTGCPTR)pCtx->rip));
3057 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
3058 AssertRC(rc2);
3059
3060 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3061 goto ResumeExecution;
3062 }
3063
3064 case X86_XCPT_DB: /* Debug exception. */
3065 {
3066 uint64_t uDR6;
3067
3068 /* DR6, DR7.GD and IA32_DEBUGCTL.LBR are not updated yet.
3069 *
3070 * Exit qualification bits:
3071 * 3:0 B0-B3 which breakpoint condition was met
3072 * 12:4 Reserved (0)
3073 * 13 BD - debug register access detected
3074 * 14 BS - single step execution or branch taken
3075 * 63:15 Reserved (0)
3076 */
3077 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestDB);
3078
3079 /* Note that we don't support guest and host-initiated debugging at the same time. */
3080
3081 uDR6 = X86_DR6_INIT_VAL;
3082 uDR6 |= (exitQualification & (X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3|X86_DR6_BD|X86_DR6_BS));
3083 rc = DBGFRZTrap01Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx), uDR6);
3084 if (rc == VINF_EM_RAW_GUEST_TRAP)
3085 {
3086 /* Update DR6 here. */
3087 pCtx->dr[6] = uDR6;
3088
3089 /* Resync DR6 if the debug state is active. */
3090 if (CPUMIsGuestDebugStateActive(pVCpu))
3091 ASMSetDR6(pCtx->dr[6]);
3092
3093 /* X86_DR7_GD will be cleared if drx accesses should be trapped inside the guest. */
3094 pCtx->dr[7] &= ~X86_DR7_GD;
3095
3096 /* Paranoia. */
3097 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
3098 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
3099 pCtx->dr[7] |= 0x400; /* must be one */
3100
3101 /* Resync DR7 */
3102 rc2 = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]);
3103 AssertRC(rc2);
3104
3105 Log(("Trap %x (debug) at %RGv exit qualification %RX64 dr6=%x dr7=%x\n", vector, (RTGCPTR)pCtx->rip, exitQualification, (uint32_t)pCtx->dr[6], (uint32_t)pCtx->dr[7]));
3106 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
3107 AssertRC(rc2);
3108
3109 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3110 goto ResumeExecution;
3111 }
3112 /* Return to ring 3 to deal with the debug exit code. */
3113 Log(("Debugger hardware BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs, pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3114 break;
3115 }
3116
3117 case X86_XCPT_BP: /* Breakpoint. */
3118 {
3119 rc = DBGFRZTrap03Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3120 if (rc == VINF_EM_RAW_GUEST_TRAP)
3121 {
3122 Log(("Guest #BP at %04x:%RGv\n", pCtx->cs, pCtx->rip));
3123 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
3124 AssertRC(rc2);
3125 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3126 goto ResumeExecution;
3127 }
3128 if (rc == VINF_SUCCESS)
3129 {
3130 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3131 goto ResumeExecution;
3132 }
3133 Log(("Debugger BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs, pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3134 break;
3135 }
3136
3137 case X86_XCPT_GP: /* General protection failure exception.*/
3138 {
3139 uint32_t cbOp;
3140 uint32_t cbSize;
3141 PDISCPUSTATE pDis = &pVCpu->hwaccm.s.DisState;
3142
3143 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestGP);
3144#ifdef VBOX_STRICT
3145 if ( !CPUMIsGuestInRealModeEx(pCtx)
3146 || !pVM->hwaccm.s.vmx.pRealModeTSS)
3147 {
3148 Log(("Trap %x at %04X:%RGv errorCode=%RGv\n", vector, pCtx->cs, (RTGCPTR)pCtx->rip, errCode));
3149 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
3150 AssertRC(rc2);
3151 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3152 goto ResumeExecution;
3153 }
3154#endif
3155 Assert(CPUMIsGuestInRealModeEx(pCtx));
3156
3157 LogFlow(("Real mode X86_XCPT_GP instruction emulation at %x:%RGv\n", pCtx->cs, (RTGCPTR)pCtx->rip));
3158
3159 rc2 = EMInterpretDisasOne(pVM, pVCpu, CPUMCTX2CORE(pCtx), pDis, &cbOp);
3160 if (RT_SUCCESS(rc2))
3161 {
3162 bool fUpdateRIP = true;
3163
3164 rc = VINF_SUCCESS;
3165 Assert(cbOp == pDis->opsize);
3166 switch (pDis->pCurInstr->opcode)
3167 {
3168 case OP_CLI:
3169 pCtx->eflags.Bits.u1IF = 0;
3170 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCli);
3171 break;
3172
3173 case OP_STI:
3174 pCtx->eflags.Bits.u1IF = 1;
3175 EMSetInhibitInterruptsPC(pVCpu, pCtx->rip + pDis->opsize);
3176 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
3177 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI);
3178 AssertRC(rc2);
3179 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitSti);
3180 break;
3181
3182 case OP_HLT:
3183 fUpdateRIP = false;
3184 rc = VINF_EM_HALT;
3185 pCtx->rip += pDis->opsize;
3186 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitHlt);
3187 break;
3188
3189 case OP_POPF:
3190 {
3191 RTGCPTR GCPtrStack;
3192 uint32_t cbParm;
3193 uint32_t uMask;
3194 X86EFLAGS eflags;
3195
3196 if (pDis->prefix & PREFIX_OPSIZE)
3197 {
3198 cbParm = 4;
3199 uMask = 0xffffffff;
3200 }
3201 else
3202 {
3203 cbParm = 2;
3204 uMask = 0xffff;
3205 }
3206
3207 rc2 = SELMToFlatEx(pVM, DIS_SELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack);
3208 if (RT_FAILURE(rc2))
3209 {
3210 rc = VERR_EM_INTERPRETER;
3211 break;
3212 }
3213 eflags.u = 0;
3214 rc2 = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm);
3215 if (RT_FAILURE(rc2))
3216 {
3217 rc = VERR_EM_INTERPRETER;
3218 break;
3219 }
3220 LogFlow(("POPF %x -> %RGv mask=%x\n", eflags.u, pCtx->rsp, uMask));
3221 pCtx->eflags.u = (pCtx->eflags.u & ~(X86_EFL_POPF_BITS & uMask)) | (eflags.u & X86_EFL_POPF_BITS & uMask);
3222 /* RF cleared when popped in real mode; see pushf description in AMD manual. */
3223 pCtx->eflags.Bits.u1RF = 0;
3224 pCtx->esp += cbParm;
3225 pCtx->esp &= uMask;
3226
3227 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitPopf);
3228 break;
3229 }
3230
3231 case OP_PUSHF:
3232 {
3233 RTGCPTR GCPtrStack;
3234 uint32_t cbParm;
3235 uint32_t uMask;
3236 X86EFLAGS eflags;
3237
3238 if (pDis->prefix & PREFIX_OPSIZE)
3239 {
3240 cbParm = 4;
3241 uMask = 0xffffffff;
3242 }
3243 else
3244 {
3245 cbParm = 2;
3246 uMask = 0xffff;
3247 }
3248
3249 rc2 = SELMToFlatEx(pVM, DIS_SELREG_SS, CPUMCTX2CORE(pCtx), (pCtx->esp - cbParm) & uMask, 0, &GCPtrStack);
3250 if (RT_FAILURE(rc2))
3251 {
3252 rc = VERR_EM_INTERPRETER;
3253 break;
3254 }
3255 eflags = pCtx->eflags;
3256 /* RF & VM cleared when pushed in real mode; see pushf description in AMD manual. */
3257 eflags.Bits.u1RF = 0;
3258 eflags.Bits.u1VM = 0;
3259
3260 rc2 = PGMPhysWrite(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm);
3261 if (RT_FAILURE(rc2))
3262 {
3263 rc = VERR_EM_INTERPRETER;
3264 break;
3265 }
3266 LogFlow(("PUSHF %x -> %RGv\n", eflags.u, GCPtrStack));
3267 pCtx->esp -= cbParm;
3268 pCtx->esp &= uMask;
3269 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitPushf);
3270 break;
3271 }
3272
3273 case OP_IRET:
3274 {
3275 RTGCPTR GCPtrStack;
3276 uint32_t uMask = 0xffff;
3277 uint16_t aIretFrame[3];
3278
3279 if (pDis->prefix & (PREFIX_OPSIZE | PREFIX_ADDRSIZE))
3280 {
3281 rc = VERR_EM_INTERPRETER;
3282 break;
3283 }
3284
3285 rc2 = SELMToFlatEx(pVM, DIS_SELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack);
3286 if (RT_FAILURE(rc2))
3287 {
3288 rc = VERR_EM_INTERPRETER;
3289 break;
3290 }
3291 rc2 = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &aIretFrame[0], sizeof(aIretFrame));
3292 if (RT_FAILURE(rc2))
3293 {
3294 rc = VERR_EM_INTERPRETER;
3295 break;
3296 }
3297 pCtx->ip = aIretFrame[0];
3298 pCtx->cs = aIretFrame[1];
3299 pCtx->csHid.u64Base = pCtx->cs << 4;
3300 pCtx->eflags.u = (pCtx->eflags.u & ~(X86_EFL_POPF_BITS & uMask)) | (aIretFrame[2] & X86_EFL_POPF_BITS & uMask);
3301 pCtx->sp += sizeof(aIretFrame);
3302
3303 LogFlow(("iret to %04x:%x\n", pCtx->cs, pCtx->ip));
3304 fUpdateRIP = false;
3305 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIret);
3306 break;
3307 }
3308
3309 case OP_INT:
3310 {
3311 uint32_t intInfo2;
3312
3313 LogFlow(("Realmode: INT %x\n", pDis->param1.parval & 0xff));
3314 intInfo2 = pDis->param1.parval & 0xff;
3315 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3316 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3317
3318 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3319 AssertRC(VBOXSTRICTRC_VAL(rc));
3320 fUpdateRIP = false;
3321 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt);
3322 break;
3323 }
3324
3325 case OP_INTO:
3326 {
3327 if (pCtx->eflags.Bits.u1OF)
3328 {
3329 uint32_t intInfo2;
3330
3331 LogFlow(("Realmode: INTO\n"));
3332 intInfo2 = X86_XCPT_OF;
3333 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3334 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3335
3336 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3337 AssertRC(VBOXSTRICTRC_VAL(rc));
3338 fUpdateRIP = false;
3339 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt);
3340 }
3341 break;
3342 }
3343
3344 case OP_INT3:
3345 {
3346 uint32_t intInfo2;
3347
3348 LogFlow(("Realmode: INT 3\n"));
3349 intInfo2 = 3;
3350 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3351 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3352
3353 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3354 AssertRC(VBOXSTRICTRC_VAL(rc));
3355 fUpdateRIP = false;
3356 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt);
3357 break;
3358 }
3359
3360 default:
3361 rc = EMInterpretInstructionCPU(pVM, pVCpu, pDis, CPUMCTX2CORE(pCtx), 0, EMCODETYPE_SUPERVISOR, &cbSize);
3362 break;
3363 }
3364
3365 if (rc == VINF_SUCCESS)
3366 {
3367 if (fUpdateRIP)
3368 pCtx->rip += cbOp; /* Move on to the next instruction. */
3369
3370 /* lidt, lgdt can end up here. In the future crx changes as well. Just reload the whole context to be done with it. */
3371 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_ALL;
3372
3373 /* Only resume if successful. */
3374 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3375 goto ResumeExecution;
3376 }
3377 }
3378 else
3379 rc = VERR_EM_INTERPRETER;
3380
3381 AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_EM_HALT, ("Unexpected rc=%Rrc\n", VBOXSTRICTRC_VAL(rc)));
3382 break;
3383 }
3384
3385#ifdef VBOX_STRICT
3386 case X86_XCPT_XF: /* SIMD exception. */
3387 case X86_XCPT_DE: /* Divide error. */
3388 case X86_XCPT_UD: /* Unknown opcode exception. */
3389 case X86_XCPT_SS: /* Stack segment exception. */
3390 case X86_XCPT_NP: /* Segment not present exception. */
3391 {
3392 switch(vector)
3393 {
3394 case X86_XCPT_DE:
3395 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestDE);
3396 break;
3397 case X86_XCPT_UD:
3398 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestUD);
3399 break;
3400 case X86_XCPT_SS:
3401 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestSS);
3402 break;
3403 case X86_XCPT_NP:
3404 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestNP);
3405 break;
3406 }
3407
3408 Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs, (RTGCPTR)pCtx->rip));
3409 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
3410 AssertRC(rc2);
3411
3412 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3413 goto ResumeExecution;
3414 }
3415#endif
3416 default:
3417 if ( CPUMIsGuestInRealModeEx(pCtx)
3418 && pVM->hwaccm.s.vmx.pRealModeTSS)
3419 {
3420 Log(("Real Mode Trap %x at %04x:%04X error code %x\n", vector, pCtx->cs, pCtx->eip, errCode));
3421 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
3422 AssertRC(VBOXSTRICTRC_VAL(rc)); /* Strict RC check below. */
3423
3424 /* Go back to ring 3 in case of a triple fault. */
3425 if ( vector == X86_XCPT_DF
3426 && rc == VINF_EM_RESET)
3427 break;
3428
3429 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3430 goto ResumeExecution;
3431 }
3432 AssertMsgFailed(("Unexpected vm-exit caused by exception %x\n", vector));
3433 rc = VERR_VMX_UNEXPECTED_EXCEPTION;
3434 break;
3435 } /* switch (vector) */
3436
3437 break;
3438
3439 default:
3440 rc = VERR_VMX_UNEXPECTED_INTERRUPTION_EXIT_CODE;
3441 AssertMsgFailed(("Unexpected interruption code %x\n", intInfo));
3442 break;
3443 }
3444
3445 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3446 break;
3447 }
3448
3449 case VMX_EXIT_EPT_VIOLATION: /* 48 EPT violation. An attempt to access memory with a guest-physical address was disallowed by the configuration of the EPT paging structures. */
3450 {
3451 RTGCPHYS GCPhys;
3452
3453 Assert(pVM->hwaccm.s.fNestedPaging);
3454
3455 rc2 = VMXReadVMCS64(VMX_VMCS_EXIT_PHYS_ADDR_FULL, &GCPhys);
3456 AssertRC(rc2);
3457 Assert(((exitQualification >> 7) & 3) != 2);
3458
3459 /* Determine the kind of violation. */
3460 errCode = 0;
3461 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_INSTR_FETCH)
3462 errCode |= X86_TRAP_PF_ID;
3463
3464 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_DATA_WRITE)
3465 errCode |= X86_TRAP_PF_RW;
3466
3467 /* If the page is present, then it's a page level protection fault. */
3468 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_ENTRY_PRESENT)
3469 {
3470 errCode |= X86_TRAP_PF_P;
3471 }
3472 else
3473 {
3474 /* Shortcut for APIC TPR reads and writes. */
3475 if ( (GCPhys & 0xfff) == 0x080
3476 && GCPhys > 0x1000000 /* to skip VGA frame buffer accesses */
3477 && fSetupTPRCaching
3478 && (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
3479 {
3480 RTGCPHYS GCPhysApicBase;
3481 PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */
3482 GCPhysApicBase &= PAGE_BASE_GC_MASK;
3483 if (GCPhys == GCPhysApicBase + 0x80)
3484 {
3485 Log(("Enable VT-x virtual APIC access filtering\n"));
3486 rc2 = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hwaccm.s.vmx.pAPICPhys, X86_PTE_RW | X86_PTE_P);
3487 AssertRC(rc2);
3488 }
3489 }
3490 }
3491 Log(("EPT Page fault %x at %RGp error code %x\n", (uint32_t)exitQualification, GCPhys, errCode));
3492
3493 /* GCPhys contains the guest physical address of the page fault. */
3494 TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP);
3495 TRPMSetErrorCode(pVCpu, errCode);
3496 TRPMSetFaultAddress(pVCpu, GCPhys);
3497
3498 /* Handle the pagefault trap for the nested shadow table. */
3499 rc = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, PGMMODE_EPT, errCode, CPUMCTX2CORE(pCtx), GCPhys);
3500 Log2(("PGMR0Trap0eHandlerNestedPaging %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3501 if (rc == VINF_SUCCESS)
3502 { /* We've successfully synced our shadow pages, so let's just continue execution. */
3503 Log2(("Shadow page fault at %RGv cr2=%RGp error code %x\n", (RTGCPTR)pCtx->rip, exitQualification , errCode));
3504 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitReasonNPF);
3505
3506 TRPMResetTrap(pVCpu);
3507 goto ResumeExecution;
3508 }
3509
3510#ifdef VBOX_STRICT
3511 if (rc != VINF_EM_RAW_EMULATE_INSTR)
3512 LogFlow(("PGMTrap0eHandlerNestedPaging failed with %d\n", VBOXSTRICTRC_VAL(rc)));
3513#endif
3514 /* Need to go back to the recompiler to emulate the instruction. */
3515 TRPMResetTrap(pVCpu);
3516 break;
3517 }
3518
3519 case VMX_EXIT_EPT_MISCONFIG:
3520 {
3521 RTGCPHYS GCPhys;
3522
3523 Assert(pVM->hwaccm.s.fNestedPaging);
3524
3525 rc2 = VMXReadVMCS64(VMX_VMCS_EXIT_PHYS_ADDR_FULL, &GCPhys);
3526 AssertRC(rc2);
3527 Log(("VMX_EXIT_EPT_MISCONFIG for %RGp\n", GCPhys));
3528
3529 /* Shortcut for APIC TPR reads and writes. */
3530 if ( (GCPhys & 0xfff) == 0x080
3531 && GCPhys > 0x1000000 /* to skip VGA frame buffer accesses */
3532 && fSetupTPRCaching
3533 && (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
3534 {
3535 RTGCPHYS GCPhysApicBase;
3536 PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */
3537 GCPhysApicBase &= PAGE_BASE_GC_MASK;
3538 if (GCPhys == GCPhysApicBase + 0x80)
3539 {
3540 Log(("Enable VT-x virtual APIC access filtering\n"));
3541 rc2 = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hwaccm.s.vmx.pAPICPhys, X86_PTE_RW | X86_PTE_P);
3542 AssertRC(rc2);
3543 }
3544 }
3545
3546 rc = PGMR0Trap0eHandlerNPMisconfig(pVM, pVCpu, PGMMODE_EPT, CPUMCTX2CORE(pCtx), GCPhys, UINT32_MAX);
3547 if (rc == VINF_SUCCESS)
3548 {
3549 Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> resume\n", GCPhys, (RTGCPTR)pCtx->rip));
3550 goto ResumeExecution;
3551 }
3552
3553 Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> %Rrc\n", GCPhys, (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3554 break;
3555 }
3556
3557 case VMX_EXIT_IRQ_WINDOW: /* 7 Interrupt window. */
3558 /* Clear VM-exit on IF=1 change. */
3559 LogFlow(("VMX_EXIT_IRQ_WINDOW %RGv pending=%d IF=%d\n", (RTGCPTR)pCtx->rip, VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)), pCtx->eflags.Bits.u1IF));
3560 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT;
3561 rc2 = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
3562 AssertRC(rc2);
3563 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIrqWindow);
3564 goto ResumeExecution; /* we check for pending guest interrupts there */
3565
3566 case VMX_EXIT_WBINVD: /* 54 Guest software attempted to execute WBINVD. (conditional) */
3567 case VMX_EXIT_INVD: /* 13 Guest software attempted to execute INVD. (unconditional) */
3568 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInvd);
3569 /* Skip instruction and continue directly. */
3570 pCtx->rip += cbInstr;
3571 /* Continue execution.*/
3572 goto ResumeExecution;
3573
3574 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
3575 {
3576 Log2(("VMX: Cpuid %x\n", pCtx->eax));
3577 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCpuid);
3578 rc = EMInterpretCpuId(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3579 if (rc == VINF_SUCCESS)
3580 {
3581 /* Update EIP and continue execution. */
3582 Assert(cbInstr == 2);
3583 pCtx->rip += cbInstr;
3584 goto ResumeExecution;
3585 }
3586 AssertMsgFailed(("EMU: cpuid failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
3587 rc = VINF_EM_RAW_EMULATE_INSTR;
3588 break;
3589 }
3590
3591 case VMX_EXIT_RDPMC: /* 15 Guest software attempted to execute RDPMC. */
3592 {
3593 Log2(("VMX: Rdpmc %x\n", pCtx->ecx));
3594 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdpmc);
3595 rc = EMInterpretRdpmc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3596 if (rc == VINF_SUCCESS)
3597 {
3598 /* Update EIP and continue execution. */
3599 Assert(cbInstr == 2);
3600 pCtx->rip += cbInstr;
3601 goto ResumeExecution;
3602 }
3603 rc = VINF_EM_RAW_EMULATE_INSTR;
3604 break;
3605 }
3606
3607 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
3608 {
3609 Log2(("VMX: Rdtsc\n"));
3610 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdtsc);
3611 rc = EMInterpretRdtsc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3612 if (rc == VINF_SUCCESS)
3613 {
3614 /* Update EIP and continue execution. */
3615 Assert(cbInstr == 2);
3616 pCtx->rip += cbInstr;
3617 goto ResumeExecution;
3618 }
3619 rc = VINF_EM_RAW_EMULATE_INSTR;
3620 break;
3621 }
3622
3623 case VMX_EXIT_INVPG: /* 14 Guest software attempted to execute INVPG. */
3624 {
3625 Log2(("VMX: invlpg\n"));
3626 Assert(!pVM->hwaccm.s.fNestedPaging);
3627
3628 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInvpg);
3629 rc = EMInterpretInvlpg(pVM, pVCpu, CPUMCTX2CORE(pCtx), exitQualification);
3630 if (rc == VINF_SUCCESS)
3631 {
3632 /* Update EIP and continue execution. */
3633 pCtx->rip += cbInstr;
3634 goto ResumeExecution;
3635 }
3636 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: invlpg %RGv failed with %Rrc\n", exitQualification, VBOXSTRICTRC_VAL(rc)));
3637 break;
3638 }
3639
3640 case VMX_EXIT_MONITOR: /* 39 Guest software attempted to execute MONITOR. */
3641 {
3642 Log2(("VMX: monitor\n"));
3643
3644 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMonitor);
3645 rc = EMInterpretMonitor(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3646 if (rc == VINF_SUCCESS)
3647 {
3648 /* Update EIP and continue execution. */
3649 pCtx->rip += cbInstr;
3650 goto ResumeExecution;
3651 }
3652 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: monitor failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
3653 break;
3654 }
3655
3656 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
3657 /* When an interrupt is pending, we'll let MSR_K8_LSTAR writes fault in our TPR patch code. */
3658 if ( pVM->hwaccm.s.fTPRPatchingActive
3659 && pCtx->ecx == MSR_K8_LSTAR)
3660 {
3661 Assert(!CPUMIsGuestInLongModeEx(pCtx));
3662 if ((pCtx->eax & 0xff) != u8LastTPR)
3663 {
3664 Log(("VMX: Faulting MSR_K8_LSTAR write with new TPR value %x\n", pCtx->eax & 0xff));
3665
3666 /* Our patch code uses LSTAR for TPR caching. */
3667 rc2 = PDMApicSetTPR(pVCpu, pCtx->eax & 0xff);
3668 AssertRC(rc2);
3669 }
3670
3671 /* Skip the instruction and continue. */
3672 pCtx->rip += cbInstr; /* wrmsr = [0F 30] */
3673
3674 /* Only resume if successful. */
3675 goto ResumeExecution;
3676 }
3677 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_MSR;
3678 /* no break */
3679 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
3680 {
3681 uint32_t cbSize;
3682
3683 STAM_COUNTER_INC((exitReason == VMX_EXIT_RDMSR) ? &pVCpu->hwaccm.s.StatExitRdmsr : &pVCpu->hwaccm.s.StatExitWrmsr);
3684
3685 /* Note: the intel manual claims there's a REX version of RDMSR that's slightly different, so we play safe by completely disassembling the instruction. */
3686 Log2(("VMX: %s\n", (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr"));
3687 rc = EMInterpretInstruction(pVM, pVCpu, CPUMCTX2CORE(pCtx), 0, &cbSize);
3688 if (rc == VINF_SUCCESS)
3689 {
3690 /* EIP has been updated already. */
3691
3692 /* Only resume if successful. */
3693 goto ResumeExecution;
3694 }
3695 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: %s failed with %Rrc\n", (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr", VBOXSTRICTRC_VAL(rc)));
3696 break;
3697 }
3698
3699 case VMX_EXIT_CRX_MOVE: /* 28 Control-register accesses. */
3700 {
3701 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub2, y2);
3702
3703 switch (VMX_EXIT_QUALIFICATION_CRX_ACCESS(exitQualification))
3704 {
3705 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_WRITE:
3706 Log2(("VMX: %RGv mov cr%d, x\n", (RTGCPTR)pCtx->rip, VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)));
3707 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCRxWrite[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]);
3708 rc = EMInterpretCRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx),
3709 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification),
3710 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification));
3711
3712 switch (VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification))
3713 {
3714 case 0:
3715 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0 | HWACCM_CHANGED_GUEST_CR3;
3716 break;
3717 case 2:
3718 break;
3719 case 3:
3720 Assert(!pVM->hwaccm.s.fNestedPaging || !CPUMIsGuestInPagedProtectedModeEx(pCtx));
3721 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR3;
3722 break;
3723 case 4:
3724 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR4;
3725 break;
3726 case 8:
3727 /* CR8 contains the APIC TPR */
3728 Assert(!(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
3729 break;
3730
3731 default:
3732 AssertFailed();
3733 break;
3734 }
3735 break;
3736
3737 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_READ:
3738 Log2(("VMX: mov x, crx\n"));
3739 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCRxRead[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]);
3740
3741 Assert(!pVM->hwaccm.s.fNestedPaging || !CPUMIsGuestInPagedProtectedModeEx(pCtx) || VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != USE_REG_CR3);
3742
3743 /* CR8 reads only cause an exit when the TPR shadow feature isn't present. */
3744 Assert(VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != 8 || !(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
3745
3746 rc = EMInterpretCRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx),
3747 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification),
3748 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification));
3749 break;
3750
3751 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_CLTS:
3752 Log2(("VMX: clts\n"));
3753 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCLTS);
3754 rc = EMInterpretCLTS(pVM, pVCpu);
3755 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
3756 break;
3757
3758 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_LMSW:
3759 Log2(("VMX: lmsw %x\n", VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification)));
3760 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitLMSW);
3761 rc = EMInterpretLMSW(pVM, pVCpu, CPUMCTX2CORE(pCtx), VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification));
3762 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
3763 break;
3764 }
3765
3766 /* Update EIP if no error occurred. */
3767 if (RT_SUCCESS(rc))
3768 pCtx->rip += cbInstr;
3769
3770 if (rc == VINF_SUCCESS)
3771 {
3772 /* Only resume if successful. */
3773 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub2, y2);
3774 goto ResumeExecution;
3775 }
3776 Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3);
3777 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub2, y2);
3778 break;
3779 }
3780
3781 case VMX_EXIT_DRX_MOVE: /* 29 Debug-register accesses. */
3782 {
3783 if ( !DBGFIsStepping(pVCpu)
3784 && !CPUMIsHyperDebugStateActive(pVCpu))
3785 {
3786 /* Disable drx move intercepts. */
3787 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
3788 rc2 = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
3789 AssertRC(rc2);
3790
3791 /* Save the host and load the guest debug state. */
3792 rc2 = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
3793 AssertRC(rc2);
3794
3795#ifdef LOG_ENABLED
3796 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
3797 Log(("VMX_EXIT_DRX_MOVE: write DR%d genreg %d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification), VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification)));
3798 else
3799 Log(("VMX_EXIT_DRX_MOVE: read DR%d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification)));
3800#endif
3801
3802#ifdef VBOX_WITH_STATISTICS
3803 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxContextSwitch);
3804 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
3805 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxWrite);
3806 else
3807 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxRead);
3808#endif
3809
3810 goto ResumeExecution;
3811 }
3812
3813 /** @todo clear VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT after the first time and restore drx registers afterwards */
3814 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
3815 {
3816 Log2(("VMX: mov drx%d, genreg%d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification), VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification)));
3817 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxWrite);
3818 rc = EMInterpretDRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx),
3819 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
3820 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification));
3821 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_DEBUG;
3822 Log2(("DR7=%08x\n", pCtx->dr[7]));
3823 }
3824 else
3825 {
3826 Log2(("VMX: mov x, drx\n"));
3827 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxRead);
3828 rc = EMInterpretDRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx),
3829 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification),
3830 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification));
3831 }
3832 /* Update EIP if no error occurred. */
3833 if (RT_SUCCESS(rc))
3834 pCtx->rip += cbInstr;
3835
3836 if (rc == VINF_SUCCESS)
3837 {
3838 /* Only resume if successful. */
3839 goto ResumeExecution;
3840 }
3841 Assert(rc == VERR_EM_INTERPRETER);
3842 break;
3843 }
3844
3845 /* Note: We'll get a #GP if the IO instruction isn't allowed (IOPL or TSS bitmap); no need to double check. */
3846 case VMX_EXIT_PORT_IO: /* 30 I/O instruction. */
3847 {
3848 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
3849 uint32_t uIOWidth = VMX_EXIT_QUALIFICATION_IO_WIDTH(exitQualification);
3850 uint32_t uPort;
3851 bool fIOWrite = (VMX_EXIT_QUALIFICATION_IO_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_IO_DIRECTION_OUT);
3852
3853 /** @todo necessary to make the distinction? */
3854 if (VMX_EXIT_QUALIFICATION_IO_ENCODING(exitQualification) == VMX_EXIT_QUALIFICATION_IO_ENCODING_DX)
3855 {
3856 uPort = pCtx->edx & 0xffff;
3857 }
3858 else
3859 uPort = VMX_EXIT_QUALIFICATION_IO_PORT(exitQualification); /* Immediate encoding. */
3860
3861 /* paranoia */
3862 if (RT_UNLIKELY(uIOWidth == 2 || uIOWidth >= 4))
3863 {
3864 rc = fIOWrite ? VINF_IOM_HC_IOPORT_WRITE : VINF_IOM_HC_IOPORT_READ;
3865 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
3866 break;
3867 }
3868
3869 uint32_t cbSize = g_aIOSize[uIOWidth];
3870
3871 if (VMX_EXIT_QUALIFICATION_IO_STRING(exitQualification))
3872 {
3873 /* ins/outs */
3874 PDISCPUSTATE pDis = &pVCpu->hwaccm.s.DisState;
3875
3876 /* Disassemble manually to deal with segment prefixes. */
3877 /** @todo VMX_VMCS_EXIT_GUEST_LINEAR_ADDR contains the flat pointer operand of the instruction. */
3878 /** @todo VMX_VMCS32_RO_EXIT_INSTR_INFO also contains segment prefix info. */
3879 rc2 = EMInterpretDisasOne(pVM, pVCpu, CPUMCTX2CORE(pCtx), pDis, NULL);
3880 if (RT_SUCCESS(rc))
3881 {
3882 if (fIOWrite)
3883 {
3884 Log2(("IOMInterpretOUTSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize));
3885 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringWrite);
3886 rc = IOMInterpretOUTSEx(pVM, CPUMCTX2CORE(pCtx), uPort, pDis->prefix, pDis->addrmode, cbSize);
3887 }
3888 else
3889 {
3890 Log2(("IOMInterpretINSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize));
3891 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringRead);
3892 rc = IOMInterpretINSEx(pVM, CPUMCTX2CORE(pCtx), uPort, pDis->prefix, pDis->addrmode, cbSize);
3893 }
3894 }
3895 else
3896 rc = VINF_EM_RAW_EMULATE_INSTR;
3897 }
3898 else
3899 {
3900 /* normal in/out */
3901 uint32_t uAndVal = g_aIOOpAnd[uIOWidth];
3902
3903 Assert(!VMX_EXIT_QUALIFICATION_IO_REP(exitQualification));
3904
3905 if (fIOWrite)
3906 {
3907 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOWrite);
3908 rc = IOMIOPortWrite(pVM, uPort, pCtx->eax & uAndVal, cbSize);
3909 if (rc == VINF_IOM_HC_IOPORT_WRITE)
3910 HWACCMR0SavePendingIOPortWrite(pVCpu, pCtx->rip, pCtx->rip + cbInstr, uPort, uAndVal, cbSize);
3911 }
3912 else
3913 {
3914 uint32_t u32Val = 0;
3915
3916 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIORead);
3917 rc = IOMIOPortRead(pVM, uPort, &u32Val, cbSize);
3918 if (IOM_SUCCESS(rc))
3919 {
3920 /* Write back to the EAX register. */
3921 pCtx->eax = (pCtx->eax & ~uAndVal) | (u32Val & uAndVal);
3922 }
3923 else
3924 if (rc == VINF_IOM_HC_IOPORT_READ)
3925 HWACCMR0SavePendingIOPortRead(pVCpu, pCtx->rip, pCtx->rip + cbInstr, uPort, uAndVal, cbSize);
3926 }
3927 }
3928 /*
3929 * Handled the I/O return codes.
3930 * (The unhandled cases end up with rc == VINF_EM_RAW_EMULATE_INSTR.)
3931 */
3932 if (IOM_SUCCESS(rc))
3933 {
3934 /* Update EIP and continue execution. */
3935 pCtx->rip += cbInstr;
3936 if (RT_LIKELY(rc == VINF_SUCCESS))
3937 {
3938 /* If any IO breakpoints are armed, then we should check if a debug trap needs to be generated. */
3939 if (pCtx->dr[7] & X86_DR7_ENABLED_MASK)
3940 {
3941 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxIOCheck);
3942 for (unsigned i=0;i<4;i++)
3943 {
3944 unsigned uBPLen = g_aIOSize[X86_DR7_GET_LEN(pCtx->dr[7], i)];
3945
3946 if ( (uPort >= pCtx->dr[i] && uPort < pCtx->dr[i] + uBPLen)
3947 && (pCtx->dr[7] & (X86_DR7_L(i) | X86_DR7_G(i)))
3948 && (pCtx->dr[7] & X86_DR7_RW(i, X86_DR7_RW_IO)) == X86_DR7_RW(i, X86_DR7_RW_IO))
3949 {
3950 uint64_t uDR6;
3951
3952 Assert(CPUMIsGuestDebugStateActive(pVCpu));
3953
3954 uDR6 = ASMGetDR6();
3955
3956 /* Clear all breakpoint status flags and set the one we just hit. */
3957 uDR6 &= ~(X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3);
3958 uDR6 |= (uint64_t)RT_BIT(i);
3959
3960 /* Note: AMD64 Architecture Programmer's Manual 13.1:
3961 * Bits 15:13 of the DR6 register is never cleared by the processor and must be cleared by software after
3962 * the contents have been read.
3963 */
3964 ASMSetDR6(uDR6);
3965
3966 /* X86_DR7_GD will be cleared if drx accesses should be trapped inside the guest. */
3967 pCtx->dr[7] &= ~X86_DR7_GD;
3968
3969 /* Paranoia. */
3970 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
3971 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
3972 pCtx->dr[7] |= 0x400; /* must be one */
3973
3974 /* Resync DR7 */
3975 rc2 = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]);
3976 AssertRC(rc2);
3977
3978 /* Construct inject info. */
3979 intInfo = X86_XCPT_DB;
3980 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3981 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3982
3983 Log(("Inject IO debug trap at %RGv\n", (RTGCPTR)pCtx->rip));
3984 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), 0, 0);
3985 AssertRC(rc2);
3986
3987 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
3988 goto ResumeExecution;
3989 }
3990 }
3991 }
3992 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
3993 goto ResumeExecution;
3994 }
3995 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
3996 break;
3997 }
3998
3999#ifdef VBOX_STRICT
4000 if (rc == VINF_IOM_HC_IOPORT_READ)
4001 Assert(!fIOWrite);
4002 else if (rc == VINF_IOM_HC_IOPORT_WRITE)
4003 Assert(fIOWrite);
4004 else
4005 AssertMsg(RT_FAILURE(rc) || rc == VINF_EM_RAW_EMULATE_INSTR || rc == VINF_EM_RAW_GUEST_TRAP || rc == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", VBOXSTRICTRC_VAL(rc)));
4006#endif
4007 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
4008 break;
4009 }
4010
4011 case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
4012 LogFlow(("VMX_EXIT_TPR\n"));
4013 /* RIP is already set to the next instruction and the TPR has been synced back. Just resume. */
4014 goto ResumeExecution;
4015
4016 case VMX_EXIT_APIC_ACCESS: /* 44 APIC access. Guest software attempted to access memory at a physical address on the APIC-access page. */
4017 {
4018 LogFlow(("VMX_EXIT_APIC_ACCESS\n"));
4019 unsigned uAccessType = VMX_EXIT_QUALIFICATION_APIC_ACCESS_TYPE(exitQualification);
4020
4021 switch(uAccessType)
4022 {
4023 case VMX_APIC_ACCESS_TYPE_LINEAR_READ:
4024 case VMX_APIC_ACCESS_TYPE_LINEAR_WRITE:
4025 {
4026 RTGCPHYS GCPhys;
4027 PDMApicGetBase(pVM, &GCPhys);
4028 GCPhys &= PAGE_BASE_GC_MASK;
4029 GCPhys += VMX_EXIT_QUALIFICATION_APIC_ACCESS_OFFSET(exitQualification);
4030
4031 LogFlow(("Apic access at %RGp\n", GCPhys));
4032 rc = IOMMMIOPhysHandler(pVM, (uAccessType == VMX_APIC_ACCESS_TYPE_LINEAR_READ) ? 0 : X86_TRAP_PF_RW, CPUMCTX2CORE(pCtx), GCPhys);
4033 if (rc == VINF_SUCCESS)
4034 goto ResumeExecution; /* rip already updated */
4035 break;
4036 }
4037
4038 default:
4039 rc = VINF_EM_RAW_EMULATE_INSTR;
4040 break;
4041 }
4042 break;
4043 }
4044
4045 case VMX_EXIT_PREEMPTION_TIMER: /* 52 VMX-preemption timer expired. The preemption timer counted down to zero. */
4046 if (!TMTimerPollBool(pVM, pVCpu))
4047 goto ResumeExecution;
4048 rc = VINF_EM_RAW_TIMER_PENDING;
4049 break;
4050
4051 default:
4052 /* The rest is handled after syncing the entire CPU state. */
4053 break;
4054 }
4055
4056 /* Note: the guest state isn't entirely synced back at this stage. */
4057
4058 /* Investigate why there was a VM-exit. (part 2) */
4059 switch (exitReason)
4060 {
4061 case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */
4062 case VMX_EXIT_EXTERNAL_IRQ: /* 1 External interrupt. */
4063 case VMX_EXIT_EPT_VIOLATION:
4064 case VMX_EXIT_EPT_MISCONFIG: /* 49 EPT misconfig is used by the PGM/MMIO optimizations. */
4065 case VMX_EXIT_PREEMPTION_TIMER: /* 52 VMX-preemption timer expired. The preemption timer counted down to zero. */
4066 /* Already handled above. */
4067 break;
4068
4069 case VMX_EXIT_TRIPLE_FAULT: /* 2 Triple fault. */
4070 rc = VINF_EM_RESET; /* Triple fault equals a reset. */
4071 break;
4072
4073 case VMX_EXIT_INIT_SIGNAL: /* 3 INIT signal. */
4074 case VMX_EXIT_SIPI: /* 4 Start-up IPI (SIPI). */
4075 rc = VINF_EM_RAW_INTERRUPT;
4076 AssertFailed(); /* Can't happen. Yet. */
4077 break;
4078
4079 case VMX_EXIT_IO_SMI_IRQ: /* 5 I/O system-management interrupt (SMI). */
4080 case VMX_EXIT_SMI_IRQ: /* 6 Other SMI. */
4081 rc = VINF_EM_RAW_INTERRUPT;
4082 AssertFailed(); /* Can't happen afaik. */
4083 break;
4084
4085 case VMX_EXIT_TASK_SWITCH: /* 9 Task switch: too complicated to emulate, so fall back to the recompiler */
4086 Log(("VMX_EXIT_TASK_SWITCH: exit=%RX64\n", exitQualification));
4087 if ( (VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE(exitQualification) == VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE_IDT)
4088 && pVCpu->hwaccm.s.Event.fPending)
4089 {
4090 /* Caused by an injected interrupt. */
4091 pVCpu->hwaccm.s.Event.fPending = false;
4092
4093 Log(("VMX_EXIT_TASK_SWITCH: reassert trap %d\n", VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hwaccm.s.Event.intInfo)));
4094 Assert(!VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hwaccm.s.Event.intInfo));
4095 rc2 = TRPMAssertTrap(pVCpu, VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hwaccm.s.Event.intInfo), TRPM_HARDWARE_INT);
4096 AssertRC(rc2);
4097 }
4098 /* else Exceptions and software interrupts can just be restarted. */
4099 rc = VERR_EM_INTERPRETER;
4100 break;
4101
4102 case VMX_EXIT_HLT: /* 12 Guest software attempted to execute HLT. */
4103 /** Check if external interrupts are pending; if so, don't switch back. */
4104 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitHlt);
4105 pCtx->rip++; /* skip hlt */
4106 if (EMShouldContinueAfterHalt(pVCpu, pCtx))
4107 goto ResumeExecution;
4108
4109 rc = VINF_EM_HALT;
4110 break;
4111
4112 case VMX_EXIT_MWAIT: /* 36 Guest software executed MWAIT. */
4113 Log2(("VMX: mwait\n"));
4114 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMwait);
4115 rc = EMInterpretMWait(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4116 if ( rc == VINF_EM_HALT
4117 || rc == VINF_SUCCESS)
4118 {
4119 /* Update EIP and continue execution. */
4120 pCtx->rip += cbInstr;
4121
4122 /** Check if external interrupts are pending; if so, don't switch back. */
4123 if ( rc == VINF_SUCCESS
4124 || ( rc == VINF_EM_HALT
4125 && EMShouldContinueAfterHalt(pVCpu, pCtx))
4126 )
4127 goto ResumeExecution;
4128 }
4129 AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_EM_HALT, ("EMU: mwait failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
4130 break;
4131
4132 case VMX_EXIT_RSM: /* 17 Guest software attempted to execute RSM in SMM. */
4133 AssertFailed(); /* can't happen. */
4134 rc = VERR_EM_INTERPRETER;
4135 break;
4136
4137 case VMX_EXIT_VMCALL: /* 18 Guest software executed VMCALL. */
4138 case VMX_EXIT_VMCLEAR: /* 19 Guest software executed VMCLEAR. */
4139 case VMX_EXIT_VMLAUNCH: /* 20 Guest software executed VMLAUNCH. */
4140 case VMX_EXIT_VMPTRLD: /* 21 Guest software executed VMPTRLD. */
4141 case VMX_EXIT_VMPTRST: /* 22 Guest software executed VMPTRST. */
4142 case VMX_EXIT_VMREAD: /* 23 Guest software executed VMREAD. */
4143 case VMX_EXIT_VMRESUME: /* 24 Guest software executed VMRESUME. */
4144 case VMX_EXIT_VMWRITE: /* 25 Guest software executed VMWRITE. */
4145 case VMX_EXIT_VMXOFF: /* 26 Guest software executed VMXOFF. */
4146 case VMX_EXIT_VMXON: /* 27 Guest software executed VMXON. */
4147 /** @todo inject #UD immediately */
4148 rc = VERR_EM_INTERPRETER;
4149 break;
4150
4151 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
4152 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
4153 case VMX_EXIT_INVPG: /* 14 Guest software attempted to execute INVPG. */
4154 case VMX_EXIT_CRX_MOVE: /* 28 Control-register accesses. */
4155 case VMX_EXIT_DRX_MOVE: /* 29 Debug-register accesses. */
4156 case VMX_EXIT_PORT_IO: /* 30 I/O instruction. */
4157 case VMX_EXIT_RDPMC: /* 15 Guest software attempted to execute RDPMC. */
4158 /* already handled above */
4159 AssertMsg( rc == VINF_PGM_CHANGE_MODE
4160 || rc == VINF_EM_RAW_INTERRUPT
4161 || rc == VERR_EM_INTERPRETER
4162 || rc == VINF_EM_RAW_EMULATE_INSTR
4163 || rc == VINF_PGM_SYNC_CR3
4164 || rc == VINF_IOM_HC_IOPORT_READ
4165 || rc == VINF_IOM_HC_IOPORT_WRITE
4166 || rc == VINF_EM_RAW_GUEST_TRAP
4167 || rc == VINF_TRPM_XCPT_DISPATCHED
4168 || rc == VINF_EM_RESCHEDULE_REM,
4169 ("rc = %d\n", VBOXSTRICTRC_VAL(rc)));
4170 break;
4171
4172 case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
4173 case VMX_EXIT_APIC_ACCESS: /* 44 APIC access. Guest software attempted to access memory at a physical address on the APIC-access page. */
4174 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
4175 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
4176 case VMX_EXIT_PAUSE: /* 40 Guest software attempted to execute PAUSE. */
4177 case VMX_EXIT_MONITOR: /* 39 Guest software attempted to execute MONITOR. */
4178 /* Note: If we decide to emulate them here, then we must sync the MSRs that could have been changed (sysenter, fs/gs base)!!! */
4179 rc = VERR_EM_INTERPRETER;
4180 break;
4181
4182 case VMX_EXIT_IRQ_WINDOW: /* 7 Interrupt window. */
4183 Assert(rc == VINF_EM_RAW_INTERRUPT);
4184 break;
4185
4186 case VMX_EXIT_ERR_INVALID_GUEST_STATE: /* 33 VM-entry failure due to invalid guest state. */
4187 {
4188#ifdef VBOX_STRICT
4189 RTCCUINTREG val2 = 0;
4190
4191 Log(("VMX_EXIT_ERR_INVALID_GUEST_STATE\n"));
4192
4193 VMXReadVMCS(VMX_VMCS64_GUEST_RIP, &val2);
4194 Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val2));
4195
4196 VMXReadVMCS(VMX_VMCS64_GUEST_CR0, &val2);
4197 Log(("VMX_VMCS_GUEST_CR0 %RX64\n", (uint64_t)val2));
4198
4199 VMXReadVMCS(VMX_VMCS64_GUEST_CR3, &val2);
4200 Log(("VMX_VMCS_GUEST_CR3 %RX64\n", (uint64_t)val2));
4201
4202 VMXReadVMCS(VMX_VMCS64_GUEST_CR4, &val2);
4203 Log(("VMX_VMCS_GUEST_CR4 %RX64\n", (uint64_t)val2));
4204
4205 VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val2);
4206 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val2));
4207
4208 VMX_LOG_SELREG(CS, "CS", val2);
4209 VMX_LOG_SELREG(DS, "DS", val2);
4210 VMX_LOG_SELREG(ES, "ES", val2);
4211 VMX_LOG_SELREG(FS, "FS", val2);
4212 VMX_LOG_SELREG(GS, "GS", val2);
4213 VMX_LOG_SELREG(SS, "SS", val2);
4214 VMX_LOG_SELREG(TR, "TR", val2);
4215 VMX_LOG_SELREG(LDTR, "LDTR", val2);
4216
4217 VMXReadVMCS(VMX_VMCS64_GUEST_GDTR_BASE, &val2);
4218 Log(("VMX_VMCS_GUEST_GDTR_BASE %RX64\n", (uint64_t)val2));
4219 VMXReadVMCS(VMX_VMCS64_GUEST_IDTR_BASE, &val2);
4220 Log(("VMX_VMCS_GUEST_IDTR_BASE %RX64\n", (uint64_t)val2));
4221#endif /* VBOX_STRICT */
4222 rc = VERR_VMX_INVALID_GUEST_STATE;
4223 break;
4224 }
4225
4226 case VMX_EXIT_ERR_MSR_LOAD: /* 34 VM-entry failure due to MSR loading. */
4227 case VMX_EXIT_ERR_MACHINE_CHECK: /* 41 VM-entry failure due to machine-check. */
4228 default:
4229 rc = VERR_VMX_UNEXPECTED_EXIT_CODE;
4230 AssertMsgFailed(("Unexpected exit code %d\n", exitReason)); /* Can't happen. */
4231 break;
4232
4233 }
4234end:
4235
4236 /* We now going back to ring-3, so clear the action flag. */
4237 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
4238
4239 /* Signal changes for the recompiler. */
4240 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR | CPUM_CHANGED_LDTR | CPUM_CHANGED_GDTR | CPUM_CHANGED_IDTR | CPUM_CHANGED_TR | CPUM_CHANGED_HIDDEN_SEL_REGS);
4241
4242 /* If we executed vmlaunch/vmresume and an external irq was pending, then we don't have to do a full sync the next time. */
4243 if ( exitReason == VMX_EXIT_EXTERNAL_IRQ
4244 && !VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
4245 {
4246 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatPendingHostIrq);
4247 /* On the next entry we'll only sync the host context. */
4248 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_HOST_CONTEXT;
4249 }
4250 else
4251 {
4252 /* On the next entry we'll sync everything. */
4253 /** @todo we can do better than this */
4254 /* Not in the VINF_PGM_CHANGE_MODE though! */
4255 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_ALL;
4256 }
4257
4258 /* translate into a less severe return code */
4259 if (rc == VERR_EM_INTERPRETER)
4260 rc = VINF_EM_RAW_EMULATE_INSTR;
4261 else
4262 /* Try to extract more information about what might have gone wrong here. */
4263 if (rc == VERR_VMX_INVALID_VMCS_PTR)
4264 {
4265 VMXGetActivateVMCS(&pVCpu->hwaccm.s.vmx.lasterror.u64VMCSPhys);
4266 pVCpu->hwaccm.s.vmx.lasterror.ulVMCSRevision = *(uint32_t *)pVCpu->hwaccm.s.vmx.pvVMCS;
4267 pVCpu->hwaccm.s.vmx.lasterror.idEnteredCpu = pVCpu->hwaccm.s.idEnteredCpu;
4268 pVCpu->hwaccm.s.vmx.lasterror.idCurrentCpu = RTMpCpuId();
4269 }
4270
4271 /* Just set the correct state here instead of trying to catch every goto above. */
4272 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED, VMCPUSTATE_STARTED_EXEC);
4273
4274#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
4275 /* Restore interrupts if we exitted after disabling them. */
4276 if (uOldEFlags != ~(RTCCUINTREG)0)
4277 ASMSetFlags(uOldEFlags);
4278#endif
4279
4280 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2, x);
4281 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x);
4282 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x);
4283 Log2(("X"));
4284 return VBOXSTRICTRC_TODO(rc);
4285}
4286
4287
4288/**
4289 * Enters the VT-x session
4290 *
4291 * @returns VBox status code.
4292 * @param pVM The VM to operate on.
4293 * @param pVCpu The VMCPU to operate on.
4294 * @param pCpu CPU info struct
4295 */
4296VMMR0DECL(int) VMXR0Enter(PVM pVM, PVMCPU pVCpu, PHMGLOBLCPUINFO pCpu)
4297{
4298 Assert(pVM->hwaccm.s.vmx.fSupported);
4299
4300 unsigned cr4 = ASMGetCR4();
4301 if (!(cr4 & X86_CR4_VMXE))
4302 {
4303 AssertMsgFailed(("X86_CR4_VMXE should be set!\n"));
4304 return VERR_VMX_X86_CR4_VMXE_CLEARED;
4305 }
4306
4307 /* Activate the VM Control Structure. */
4308 int rc = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
4309 if (RT_FAILURE(rc))
4310 return rc;
4311
4312 pVCpu->hwaccm.s.fResumeVM = false;
4313 return VINF_SUCCESS;
4314}
4315
4316
4317/**
4318 * Leaves the VT-x session
4319 *
4320 * @returns VBox status code.
4321 * @param pVM The VM to operate on.
4322 * @param pVCpu The VMCPU to operate on.
4323 * @param pCtx CPU context
4324 */
4325VMMR0DECL(int) VMXR0Leave(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
4326{
4327 Assert(pVM->hwaccm.s.vmx.fSupported);
4328
4329#ifdef DEBUG
4330 if (CPUMIsHyperDebugStateActive(pVCpu))
4331 {
4332 CPUMR0LoadHostDebugState(pVM, pVCpu);
4333 Assert(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT);
4334 }
4335 else
4336#endif
4337 /* Save the guest debug state if necessary. */
4338 if (CPUMIsGuestDebugStateActive(pVCpu))
4339 {
4340 CPUMR0SaveGuestDebugState(pVM, pVCpu, pCtx, true /* save DR6 */);
4341
4342 /* Enable drx move intercepts again. */
4343 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
4344 int rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
4345 AssertRC(rc);
4346
4347 /* Resync the debug registers the next time. */
4348 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_DEBUG;
4349 }
4350 else
4351 Assert(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT);
4352
4353 /* Clear VM Control Structure. Marking it inactive, clearing implementation specific data and writing back VMCS data to memory. */
4354 int rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
4355 AssertRC(rc);
4356
4357 return VINF_SUCCESS;
4358}
4359
4360/**
4361 * Flush the TLB (EPT)
4362 *
4363 * @returns VBox status code.
4364 * @param pVM The VM to operate on.
4365 * @param pVCpu The VM CPU to operate on.
4366 * @param enmFlush Type of flush
4367 * @param GCPhys Physical address of the page to flush
4368 */
4369static void hmR0VmxFlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH enmFlush, RTGCPHYS GCPhys)
4370{
4371 uint64_t descriptor[2];
4372
4373 LogFlow(("hmR0VmxFlushEPT %d %RGv\n", enmFlush, GCPhys));
4374 Assert(pVM->hwaccm.s.fNestedPaging);
4375 descriptor[0] = pVCpu->hwaccm.s.vmx.GCPhysEPTP;
4376 descriptor[1] = GCPhys;
4377 int rc = VMXR0InvEPT(enmFlush, &descriptor[0]);
4378 AssertRC(rc);
4379}
4380
4381#ifdef HWACCM_VTX_WITH_VPID
4382/**
4383 * Flush the TLB (EPT)
4384 *
4385 * @returns VBox status code.
4386 * @param pVM The VM to operate on.
4387 * @param pVCpu The VM CPU to operate on.
4388 * @param enmFlush Type of flush
4389 * @param GCPtr Virtual address of the page to flush
4390 */
4391static void hmR0VmxFlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH enmFlush, RTGCPTR GCPtr)
4392{
4393#if HC_ARCH_BITS == 32
4394 /* If we get a flush in 64 bits guest mode, then force a full TLB flush. Invvpid probably takes only 32 bits addresses. (@todo) */
4395 if ( CPUMIsGuestInLongMode(pVCpu)
4396 && !VMX_IS_64BIT_HOST_MODE())
4397 {
4398 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
4399 }
4400 else
4401#endif
4402 {
4403 uint64_t descriptor[2];
4404
4405 Assert(pVM->hwaccm.s.vmx.fVPID);
4406 descriptor[0] = pVCpu->hwaccm.s.uCurrentASID;
4407 descriptor[1] = GCPtr;
4408 int rc = VMXR0InvVPID(enmFlush, &descriptor[0]);
4409 AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvVPID %x %x %RGv failed with %d\n", enmFlush, pVCpu->hwaccm.s.uCurrentASID, GCPtr, rc));
4410 }
4411}
4412#endif /* HWACCM_VTX_WITH_VPID */
4413
4414/**
4415 * Invalidates a guest page
4416 *
4417 * @returns VBox status code.
4418 * @param pVM The VM to operate on.
4419 * @param pVCpu The VM CPU to operate on.
4420 * @param GCVirt Page to invalidate
4421 */
4422VMMR0DECL(int) VMXR0InvalidatePage(PVM pVM, PVMCPU pVCpu, RTGCPTR GCVirt)
4423{
4424 bool fFlushPending = VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH);
4425
4426 Log2(("VMXR0InvalidatePage %RGv\n", GCVirt));
4427
4428 /* Only relevant if we want to use VPID.
4429 * In the nested paging case we still see such calls, but
4430 * can safely ignore them. (e.g. after cr3 updates)
4431 */
4432#ifdef HWACCM_VTX_WITH_VPID
4433 /* Skip it if a TLB flush is already pending. */
4434 if ( !fFlushPending
4435 && pVM->hwaccm.s.vmx.fVPID)
4436 hmR0VmxFlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushPage, GCVirt);
4437#endif /* HWACCM_VTX_WITH_VPID */
4438
4439 return VINF_SUCCESS;
4440}
4441
4442/**
4443 * Invalidates a guest page by physical address
4444 *
4445 * NOTE: Assumes the current instruction references this physical page though a virtual address!!
4446 *
4447 * @returns VBox status code.
4448 * @param pVM The VM to operate on.
4449 * @param pVCpu The VM CPU to operate on.
4450 * @param GCPhys Page to invalidate
4451 */
4452VMMR0DECL(int) VMXR0InvalidatePhysPage(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys)
4453{
4454 bool fFlushPending = VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH);
4455
4456 Assert(pVM->hwaccm.s.fNestedPaging);
4457
4458 LogFlow(("VMXR0InvalidatePhysPage %RGp\n", GCPhys));
4459
4460 /* Skip it if a TLB flush is already pending. */
4461 if (!fFlushPending)
4462 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushPage, GCPhys);
4463
4464 return VINF_SUCCESS;
4465}
4466
4467/**
4468 * Report world switch error and dump some useful debug info
4469 *
4470 * @param pVM The VM to operate on.
4471 * @param pVCpu The VMCPU to operate on.
4472 * @param rc Return code
4473 * @param pCtx Current CPU context (not updated)
4474 */
4475static void hmR0VmxReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, VBOXSTRICTRC rc, PCPUMCTX pCtx)
4476{
4477 switch (VBOXSTRICTRC_VAL(rc))
4478 {
4479 case VERR_VMX_INVALID_VMXON_PTR:
4480 AssertFailed();
4481 break;
4482
4483 case VERR_VMX_UNABLE_TO_START_VM:
4484 case VERR_VMX_UNABLE_TO_RESUME_VM:
4485 {
4486 int rc2;
4487 RTCCUINTREG exitReason, instrError;
4488
4489 rc2 = VMXReadVMCS(VMX_VMCS32_RO_EXIT_REASON, &exitReason);
4490 rc2 |= VMXReadVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
4491 AssertRC(rc2);
4492 if (rc2 == VINF_SUCCESS)
4493 {
4494 Log(("Unable to start/resume VM for reason: %x. Instruction error %x\n", (uint32_t)exitReason, (uint32_t)instrError));
4495 Log(("Current stack %08x\n", &rc2));
4496
4497 pVCpu->hwaccm.s.vmx.lasterror.ulInstrError = instrError;
4498 pVCpu->hwaccm.s.vmx.lasterror.ulExitReason = exitReason;
4499
4500#ifdef VBOX_STRICT
4501 RTGDTR gdtr;
4502 PCX86DESCHC pDesc;
4503 RTCCUINTREG val;
4504
4505 ASMGetGDTR(&gdtr);
4506
4507 VMXReadVMCS(VMX_VMCS64_GUEST_RIP, &val);
4508 Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val));
4509 VMXReadVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, &val);
4510 Log(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS %08x\n", val));
4511 VMXReadVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, &val);
4512 Log(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS %08x\n", val));
4513 VMXReadVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, &val);
4514 Log(("VMX_VMCS_CTRL_ENTRY_CONTROLS %08x\n", val));
4515 VMXReadVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, &val);
4516 Log(("VMX_VMCS_CTRL_EXIT_CONTROLS %08x\n", val));
4517
4518 VMXReadVMCS(VMX_VMCS_HOST_CR0, &val);
4519 Log(("VMX_VMCS_HOST_CR0 %08x\n", val));
4520
4521 VMXReadVMCS(VMX_VMCS_HOST_CR3, &val);
4522 Log(("VMX_VMCS_HOST_CR3 %08x\n", val));
4523
4524 VMXReadVMCS(VMX_VMCS_HOST_CR4, &val);
4525 Log(("VMX_VMCS_HOST_CR4 %08x\n", val));
4526
4527 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_CS, &val);
4528 Log(("VMX_VMCS_HOST_FIELD_CS %08x\n", val));
4529
4530 VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
4531 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val));
4532
4533 if (val < gdtr.cbGdt)
4534 {
4535 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
4536 HWACCMR0DumpDescriptor(pDesc, val, "CS: ");
4537 }
4538
4539 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_DS, &val);
4540 Log(("VMX_VMCS_HOST_FIELD_DS %08x\n", val));
4541 if (val < gdtr.cbGdt)
4542 {
4543 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
4544 HWACCMR0DumpDescriptor(pDesc, val, "DS: ");
4545 }
4546
4547 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_ES, &val);
4548 Log(("VMX_VMCS_HOST_FIELD_ES %08x\n", val));
4549 if (val < gdtr.cbGdt)
4550 {
4551 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
4552 HWACCMR0DumpDescriptor(pDesc, val, "ES: ");
4553 }
4554
4555 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_FS, &val);
4556 Log(("VMX_VMCS16_HOST_FIELD_FS %08x\n", val));
4557 if (val < gdtr.cbGdt)
4558 {
4559 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
4560 HWACCMR0DumpDescriptor(pDesc, val, "FS: ");
4561 }
4562
4563 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_GS, &val);
4564 Log(("VMX_VMCS16_HOST_FIELD_GS %08x\n", val));
4565 if (val < gdtr.cbGdt)
4566 {
4567 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
4568 HWACCMR0DumpDescriptor(pDesc, val, "GS: ");
4569 }
4570
4571 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_SS, &val);
4572 Log(("VMX_VMCS16_HOST_FIELD_SS %08x\n", val));
4573 if (val < gdtr.cbGdt)
4574 {
4575 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
4576 HWACCMR0DumpDescriptor(pDesc, val, "SS: ");
4577 }
4578
4579 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_TR, &val);
4580 Log(("VMX_VMCS16_HOST_FIELD_TR %08x\n", val));
4581 if (val < gdtr.cbGdt)
4582 {
4583 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
4584 HWACCMR0DumpDescriptor(pDesc, val, "TR: ");
4585 }
4586
4587 VMXReadVMCS(VMX_VMCS_HOST_TR_BASE, &val);
4588 Log(("VMX_VMCS_HOST_TR_BASE %RHv\n", val));
4589
4590 VMXReadVMCS(VMX_VMCS_HOST_GDTR_BASE, &val);
4591 Log(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", val));
4592 VMXReadVMCS(VMX_VMCS_HOST_IDTR_BASE, &val);
4593 Log(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", val));
4594
4595 VMXReadVMCS(VMX_VMCS32_HOST_SYSENTER_CS, &val);
4596 Log(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", val));
4597
4598 VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_EIP, &val);
4599 Log(("VMX_VMCS_HOST_SYSENTER_EIP %RHv\n", val));
4600
4601 VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_ESP, &val);
4602 Log(("VMX_VMCS_HOST_SYSENTER_ESP %RHv\n", val));
4603
4604 VMXReadVMCS(VMX_VMCS_HOST_RSP, &val);
4605 Log(("VMX_VMCS_HOST_RSP %RHv\n", val));
4606 VMXReadVMCS(VMX_VMCS_HOST_RIP, &val);
4607 Log(("VMX_VMCS_HOST_RIP %RHv\n", val));
4608
4609# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
4610 if (VMX_IS_64BIT_HOST_MODE())
4611 {
4612 Log(("MSR_K6_EFER = %RX64\n", ASMRdMsr(MSR_K6_EFER)));
4613 Log(("MSR_K6_STAR = %RX64\n", ASMRdMsr(MSR_K6_STAR)));
4614 Log(("MSR_K8_LSTAR = %RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
4615 Log(("MSR_K8_CSTAR = %RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
4616 Log(("MSR_K8_SF_MASK = %RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
4617 }
4618# endif
4619#endif /* VBOX_STRICT */
4620 }
4621 break;
4622 }
4623
4624 default:
4625 /* impossible */
4626 AssertMsgFailed(("%Rrc (%#x)\n", VBOXSTRICTRC_VAL(rc), VBOXSTRICTRC_VAL(rc)));
4627 break;
4628 }
4629}
4630
4631#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
4632/**
4633 * Prepares for and executes VMLAUNCH (64 bits guest mode)
4634 *
4635 * @returns VBox status code
4636 * @param fResume vmlauch/vmresume
4637 * @param pCtx Guest context
4638 * @param pCache VMCS cache
4639 * @param pVM The VM to operate on.
4640 * @param pVCpu The VMCPU to operate on.
4641 */
4642DECLASM(int) VMXR0SwitcherStartVM64(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu)
4643{
4644 uint32_t aParam[6];
4645 PHMGLOBLCPUINFO pCpu;
4646 RTHCPHYS HCPhysCpuPage;
4647 int rc;
4648
4649 pCpu = HWACCMR0GetCurrentCpu();
4650 HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0);
4651
4652#ifdef VBOX_WITH_CRASHDUMP_MAGIC
4653 pCache->uPos = 1;
4654 pCache->interPD = PGMGetInterPaeCR3(pVM);
4655 pCache->pSwitcher = (uint64_t)pVM->hwaccm.s.pfnHost32ToGuest64R0;
4656#endif
4657
4658#ifdef DEBUG
4659 pCache->TestIn.HCPhysCpuPage= 0;
4660 pCache->TestIn.HCPhysVMCS = 0;
4661 pCache->TestIn.pCache = 0;
4662 pCache->TestOut.HCPhysVMCS = 0;
4663 pCache->TestOut.pCache = 0;
4664 pCache->TestOut.pCtx = 0;
4665 pCache->TestOut.eflags = 0;
4666#endif
4667
4668 aParam[0] = (uint32_t)(HCPhysCpuPage); /* Param 1: VMXON physical address - Lo. */
4669 aParam[1] = (uint32_t)(HCPhysCpuPage >> 32); /* Param 1: VMXON physical address - Hi. */
4670 aParam[2] = (uint32_t)(pVCpu->hwaccm.s.vmx.HCPhysVMCS); /* Param 2: VMCS physical address - Lo. */
4671 aParam[3] = (uint32_t)(pVCpu->hwaccm.s.vmx.HCPhysVMCS >> 32); /* Param 2: VMCS physical address - Hi. */
4672 aParam[4] = VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache);
4673 aParam[5] = 0;
4674
4675#ifdef VBOX_WITH_CRASHDUMP_MAGIC
4676 pCtx->dr[4] = pVM->hwaccm.s.vmx.pScratchPhys + 16 + 8;
4677 *(uint32_t *)(pVM->hwaccm.s.vmx.pScratch + 16 + 8) = 1;
4678#endif
4679 rc = VMXR0Execute64BitsHandler(pVM, pVCpu, pCtx, pVM->hwaccm.s.pfnVMXGCStartVM64, 6, &aParam[0]);
4680
4681#ifdef VBOX_WITH_CRASHDUMP_MAGIC
4682 Assert(*(uint32_t *)(pVM->hwaccm.s.vmx.pScratch + 16 + 8) == 5);
4683 Assert(pCtx->dr[4] == 10);
4684 *(uint32_t *)(pVM->hwaccm.s.vmx.pScratch + 16 + 8) = 0xff;
4685#endif
4686
4687#ifdef DEBUG
4688 AssertMsg(pCache->TestIn.HCPhysCpuPage== HCPhysCpuPage, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysCpuPage, HCPhysCpuPage));
4689 AssertMsg(pCache->TestIn.HCPhysVMCS == pVCpu->hwaccm.s.vmx.HCPhysVMCS, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVMCS, pVCpu->hwaccm.s.vmx.HCPhysVMCS));
4690 AssertMsg(pCache->TestIn.HCPhysVMCS == pCache->TestOut.HCPhysVMCS, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVMCS, pCache->TestOut.HCPhysVMCS));
4691 AssertMsg(pCache->TestIn.pCache == pCache->TestOut.pCache, ("%RGv vs %RGv\n", pCache->TestIn.pCache, pCache->TestOut.pCache));
4692 AssertMsg(pCache->TestIn.pCache == VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache), ("%RGv vs %RGv\n", pCache->TestIn.pCache, VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache)));
4693 AssertMsg(pCache->TestIn.pCtx == pCache->TestOut.pCtx, ("%RGv vs %RGv\n", pCache->TestIn.pCtx, pCache->TestOut.pCtx));
4694 Assert(!(pCache->TestOut.eflags & X86_EFL_IF));
4695#endif
4696 return rc;
4697}
4698
4699/**
4700 * Executes the specified handler in 64 mode
4701 *
4702 * @returns VBox status code.
4703 * @param pVM The VM to operate on.
4704 * @param pVCpu The VMCPU to operate on.
4705 * @param pCtx Guest context
4706 * @param pfnHandler RC handler
4707 * @param cbParam Number of parameters
4708 * @param paParam Array of 32 bits parameters
4709 */
4710VMMR0DECL(int) VMXR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, RTRCPTR pfnHandler, uint32_t cbParam, uint32_t *paParam)
4711{
4712 int rc, rc2;
4713 PHMGLOBLCPUINFO pCpu;
4714 RTHCPHYS HCPhysCpuPage;
4715 RTHCUINTREG uOldEFlags;
4716
4717 AssertReturn(pVM->hwaccm.s.pfnHost32ToGuest64R0, VERR_INTERNAL_ERROR);
4718 Assert(pfnHandler);
4719 Assert(pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries <= RT_ELEMENTS(pVCpu->hwaccm.s.vmx.VMCSCache.Write.aField));
4720 Assert(pVCpu->hwaccm.s.vmx.VMCSCache.Read.cValidEntries <= RT_ELEMENTS(pVCpu->hwaccm.s.vmx.VMCSCache.Read.aField));
4721
4722#ifdef VBOX_STRICT
4723 for (unsigned i=0;i<pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries;i++)
4724 Assert(hmR0VmxIsValidWriteField(pVCpu->hwaccm.s.vmx.VMCSCache.Write.aField[i]));
4725
4726 for (unsigned i=0;i<pVCpu->hwaccm.s.vmx.VMCSCache.Read.cValidEntries;i++)
4727 Assert(hmR0VmxIsValidReadField(pVCpu->hwaccm.s.vmx.VMCSCache.Read.aField[i]));
4728#endif
4729
4730 /* Disable interrupts. */
4731 uOldEFlags = ASMIntDisableFlags();
4732
4733 pCpu = HWACCMR0GetCurrentCpu();
4734 HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0);
4735
4736 /* Clear VM Control Structure. Marking it inactive, clearing implementation specific data and writing back VMCS data to memory. */
4737 VMXClearVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
4738
4739 /* Leave VMX Root Mode. */
4740 VMXDisable();
4741
4742 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
4743
4744 CPUMSetHyperESP(pVCpu, VMMGetStackRC(pVCpu));
4745 CPUMSetHyperEIP(pVCpu, pfnHandler);
4746 for (int i=(int)cbParam-1;i>=0;i--)
4747 CPUMPushHyper(pVCpu, paParam[i]);
4748
4749 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatWorldSwitch3264, z);
4750 /* Call switcher. */
4751 rc = pVM->hwaccm.s.pfnHost32ToGuest64R0(pVM, RT_OFFSETOF(VM, aCpus[pVCpu->idCpu].cpum) - RT_OFFSETOF(VM, cpum));
4752 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatWorldSwitch3264, z);
4753
4754 /* Make sure the VMX instructions don't cause #UD faults. */
4755 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE);
4756
4757 /* Enter VMX Root Mode */
4758 rc2 = VMXEnable(HCPhysCpuPage);
4759 if (RT_FAILURE(rc2))
4760 {
4761 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
4762 ASMSetFlags(uOldEFlags);
4763 return VERR_VMX_VMXON_FAILED;
4764 }
4765
4766 rc2 = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
4767 AssertRC(rc2);
4768 Assert(!(ASMGetFlags() & X86_EFL_IF));
4769 ASMSetFlags(uOldEFlags);
4770 return rc;
4771}
4772
4773#endif /* HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) */
4774
4775
4776#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4777/**
4778 * Executes VMWRITE
4779 *
4780 * @returns VBox status code
4781 * @param pVCpu The VMCPU to operate on.
4782 * @param idxField VMCS index
4783 * @param u64Val 16, 32 or 64 bits value
4784 */
4785VMMR0DECL(int) VMXWriteVMCS64Ex(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
4786{
4787 int rc;
4788
4789 switch (idxField)
4790 {
4791 case VMX_VMCS_CTRL_TSC_OFFSET_FULL:
4792 case VMX_VMCS_CTRL_IO_BITMAP_A_FULL:
4793 case VMX_VMCS_CTRL_IO_BITMAP_B_FULL:
4794 case VMX_VMCS_CTRL_MSR_BITMAP_FULL:
4795 case VMX_VMCS_CTRL_VMEXIT_MSR_STORE_FULL:
4796 case VMX_VMCS_CTRL_VMEXIT_MSR_LOAD_FULL:
4797 case VMX_VMCS_CTRL_VMENTRY_MSR_LOAD_FULL:
4798 case VMX_VMCS_CTRL_VAPIC_PAGEADDR_FULL:
4799 case VMX_VMCS_CTRL_APIC_ACCESSADDR_FULL:
4800 case VMX_VMCS_GUEST_LINK_PTR_FULL:
4801 case VMX_VMCS_GUEST_PDPTR0_FULL:
4802 case VMX_VMCS_GUEST_PDPTR1_FULL:
4803 case VMX_VMCS_GUEST_PDPTR2_FULL:
4804 case VMX_VMCS_GUEST_PDPTR3_FULL:
4805 case VMX_VMCS_GUEST_DEBUGCTL_FULL:
4806 case VMX_VMCS_GUEST_EFER_FULL:
4807 case VMX_VMCS_CTRL_EPTP_FULL:
4808 /* These fields consist of two parts, which are both writable in 32 bits mode. */
4809 rc = VMXWriteVMCS32(idxField, u64Val);
4810 rc |= VMXWriteVMCS32(idxField + 1, (uint32_t)(u64Val >> 32ULL));
4811 AssertRC(rc);
4812 return rc;
4813
4814 case VMX_VMCS64_GUEST_LDTR_BASE:
4815 case VMX_VMCS64_GUEST_TR_BASE:
4816 case VMX_VMCS64_GUEST_GDTR_BASE:
4817 case VMX_VMCS64_GUEST_IDTR_BASE:
4818 case VMX_VMCS64_GUEST_SYSENTER_EIP:
4819 case VMX_VMCS64_GUEST_SYSENTER_ESP:
4820 case VMX_VMCS64_GUEST_CR0:
4821 case VMX_VMCS64_GUEST_CR4:
4822 case VMX_VMCS64_GUEST_CR3:
4823 case VMX_VMCS64_GUEST_DR7:
4824 case VMX_VMCS64_GUEST_RIP:
4825 case VMX_VMCS64_GUEST_RSP:
4826 case VMX_VMCS64_GUEST_CS_BASE:
4827 case VMX_VMCS64_GUEST_DS_BASE:
4828 case VMX_VMCS64_GUEST_ES_BASE:
4829 case VMX_VMCS64_GUEST_FS_BASE:
4830 case VMX_VMCS64_GUEST_GS_BASE:
4831 case VMX_VMCS64_GUEST_SS_BASE:
4832 /* Queue a 64 bits value as we can't set it in 32 bits host mode. */
4833 if (u64Val >> 32ULL)
4834 rc = VMXWriteCachedVMCSEx(pVCpu, idxField, u64Val);
4835 else
4836 rc = VMXWriteVMCS32(idxField, (uint32_t)u64Val);
4837
4838 return rc;
4839
4840 default:
4841 AssertMsgFailed(("Unexpected field %x\n", idxField));
4842 return VERR_INVALID_PARAMETER;
4843 }
4844}
4845
4846/**
4847 * Cache VMCS writes for performance reasons (Darwin) and for running 64 bits guests on 32 bits hosts.
4848 *
4849 * @param pVCpu The VMCPU to operate on.
4850 * @param idxField VMCS field
4851 * @param u64Val Value
4852 */
4853VMMR0DECL(int) VMXWriteCachedVMCSEx(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
4854{
4855 PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache;
4856
4857 AssertMsgReturn(pCache->Write.cValidEntries < VMCSCACHE_MAX_ENTRY - 1, ("entries=%x\n", pCache->Write.cValidEntries), VERR_ACCESS_DENIED);
4858
4859 /* Make sure there are no duplicates. */
4860 for (unsigned i=0;i<pCache->Write.cValidEntries;i++)
4861 {
4862 if (pCache->Write.aField[i] == idxField)
4863 {
4864 pCache->Write.aFieldVal[i] = u64Val;
4865 return VINF_SUCCESS;
4866 }
4867 }
4868
4869 pCache->Write.aField[pCache->Write.cValidEntries] = idxField;
4870 pCache->Write.aFieldVal[pCache->Write.cValidEntries] = u64Val;
4871 pCache->Write.cValidEntries++;
4872 return VINF_SUCCESS;
4873}
4874
4875#endif /* HC_ARCH_BITS == 32 && !VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
4876
4877#ifdef VBOX_STRICT
4878static bool hmR0VmxIsValidReadField(uint32_t idxField)
4879{
4880 switch(idxField)
4881 {
4882 case VMX_VMCS64_GUEST_RIP:
4883 case VMX_VMCS64_GUEST_RSP:
4884 case VMX_VMCS_GUEST_RFLAGS:
4885 case VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE:
4886 case VMX_VMCS_CTRL_CR0_READ_SHADOW:
4887 case VMX_VMCS64_GUEST_CR0:
4888 case VMX_VMCS_CTRL_CR4_READ_SHADOW:
4889 case VMX_VMCS64_GUEST_CR4:
4890 case VMX_VMCS64_GUEST_DR7:
4891 case VMX_VMCS32_GUEST_SYSENTER_CS:
4892 case VMX_VMCS64_GUEST_SYSENTER_EIP:
4893 case VMX_VMCS64_GUEST_SYSENTER_ESP:
4894 case VMX_VMCS32_GUEST_GDTR_LIMIT:
4895 case VMX_VMCS64_GUEST_GDTR_BASE:
4896 case VMX_VMCS32_GUEST_IDTR_LIMIT:
4897 case VMX_VMCS64_GUEST_IDTR_BASE:
4898 case VMX_VMCS16_GUEST_FIELD_CS:
4899 case VMX_VMCS32_GUEST_CS_LIMIT:
4900 case VMX_VMCS64_GUEST_CS_BASE:
4901 case VMX_VMCS32_GUEST_CS_ACCESS_RIGHTS:
4902 case VMX_VMCS16_GUEST_FIELD_DS:
4903 case VMX_VMCS32_GUEST_DS_LIMIT:
4904 case VMX_VMCS64_GUEST_DS_BASE:
4905 case VMX_VMCS32_GUEST_DS_ACCESS_RIGHTS:
4906 case VMX_VMCS16_GUEST_FIELD_ES:
4907 case VMX_VMCS32_GUEST_ES_LIMIT:
4908 case VMX_VMCS64_GUEST_ES_BASE:
4909 case VMX_VMCS32_GUEST_ES_ACCESS_RIGHTS:
4910 case VMX_VMCS16_GUEST_FIELD_FS:
4911 case VMX_VMCS32_GUEST_FS_LIMIT:
4912 case VMX_VMCS64_GUEST_FS_BASE:
4913 case VMX_VMCS32_GUEST_FS_ACCESS_RIGHTS:
4914 case VMX_VMCS16_GUEST_FIELD_GS:
4915 case VMX_VMCS32_GUEST_GS_LIMIT:
4916 case VMX_VMCS64_GUEST_GS_BASE:
4917 case VMX_VMCS32_GUEST_GS_ACCESS_RIGHTS:
4918 case VMX_VMCS16_GUEST_FIELD_SS:
4919 case VMX_VMCS32_GUEST_SS_LIMIT:
4920 case VMX_VMCS64_GUEST_SS_BASE:
4921 case VMX_VMCS32_GUEST_SS_ACCESS_RIGHTS:
4922 case VMX_VMCS16_GUEST_FIELD_LDTR:
4923 case VMX_VMCS32_GUEST_LDTR_LIMIT:
4924 case VMX_VMCS64_GUEST_LDTR_BASE:
4925 case VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS:
4926 case VMX_VMCS16_GUEST_FIELD_TR:
4927 case VMX_VMCS32_GUEST_TR_LIMIT:
4928 case VMX_VMCS64_GUEST_TR_BASE:
4929 case VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS:
4930 case VMX_VMCS32_RO_EXIT_REASON:
4931 case VMX_VMCS32_RO_VM_INSTR_ERROR:
4932 case VMX_VMCS32_RO_EXIT_INSTR_LENGTH:
4933 case VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE:
4934 case VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO:
4935 case VMX_VMCS32_RO_EXIT_INSTR_INFO:
4936 case VMX_VMCS_RO_EXIT_QUALIFICATION:
4937 case VMX_VMCS32_RO_IDT_INFO:
4938 case VMX_VMCS32_RO_IDT_ERRCODE:
4939 case VMX_VMCS64_GUEST_CR3:
4940 case VMX_VMCS_EXIT_PHYS_ADDR_FULL:
4941 return true;
4942 }
4943 return false;
4944}
4945
4946static bool hmR0VmxIsValidWriteField(uint32_t idxField)
4947{
4948 switch(idxField)
4949 {
4950 case VMX_VMCS64_GUEST_LDTR_BASE:
4951 case VMX_VMCS64_GUEST_TR_BASE:
4952 case VMX_VMCS64_GUEST_GDTR_BASE:
4953 case VMX_VMCS64_GUEST_IDTR_BASE:
4954 case VMX_VMCS64_GUEST_SYSENTER_EIP:
4955 case VMX_VMCS64_GUEST_SYSENTER_ESP:
4956 case VMX_VMCS64_GUEST_CR0:
4957 case VMX_VMCS64_GUEST_CR4:
4958 case VMX_VMCS64_GUEST_CR3:
4959 case VMX_VMCS64_GUEST_DR7:
4960 case VMX_VMCS64_GUEST_RIP:
4961 case VMX_VMCS64_GUEST_RSP:
4962 case VMX_VMCS64_GUEST_CS_BASE:
4963 case VMX_VMCS64_GUEST_DS_BASE:
4964 case VMX_VMCS64_GUEST_ES_BASE:
4965 case VMX_VMCS64_GUEST_FS_BASE:
4966 case VMX_VMCS64_GUEST_GS_BASE:
4967 case VMX_VMCS64_GUEST_SS_BASE:
4968 return true;
4969 }
4970 return false;
4971}
4972
4973#endif
4974
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette