VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/PGMR0.cpp@ 84071

Last change on this file since 84071 was 82968, checked in by vboxsync, 5 years ago

Copyright year updates by scm.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 29.4 KB
Line 
1/* $Id: PGMR0.cpp 82968 2020-02-04 10:35:17Z vboxsync $ */
2/** @file
3 * PGM - Page Manager and Monitor, Ring-0.
4 */
5
6/*
7 * Copyright (C) 2007-2020 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM
23#include <VBox/rawpci.h>
24#include <VBox/vmm/pgm.h>
25#include <VBox/vmm/gmm.h>
26#include "PGMInternal.h"
27#include <VBox/vmm/pdmdev.h>
28#include <VBox/vmm/vmcc.h>
29#include <VBox/vmm/gvm.h>
30#include "PGMInline.h"
31#include <VBox/log.h>
32#include <VBox/err.h>
33#include <iprt/assert.h>
34#include <iprt/mem.h>
35#include <iprt/memobj.h>
36
37
38/*
39 * Instantiate the ring-0 header/code templates.
40 */
41/** @todo r=bird: Gotta love this nested paging hacking we're still carrying with us... (Split PGM_TYPE_NESTED.) */
42#define PGM_BTH_NAME(name) PGM_BTH_NAME_32BIT_PROT(name)
43#include "PGMR0Bth.h"
44#undef PGM_BTH_NAME
45
46#define PGM_BTH_NAME(name) PGM_BTH_NAME_PAE_PROT(name)
47#include "PGMR0Bth.h"
48#undef PGM_BTH_NAME
49
50#define PGM_BTH_NAME(name) PGM_BTH_NAME_AMD64_PROT(name)
51#include "PGMR0Bth.h"
52#undef PGM_BTH_NAME
53
54#define PGM_BTH_NAME(name) PGM_BTH_NAME_EPT_PROT(name)
55#include "PGMR0Bth.h"
56#undef PGM_BTH_NAME
57
58
59/**
60 * Initializes the per-VM data for the PGM.
61 *
62 * This is called from under the GVMM lock, so it should only initialize the
63 * data so PGMR0CleanupVM and others will work smoothly.
64 *
65 * @returns VBox status code.
66 * @param pGVM Pointer to the global VM structure.
67 */
68VMMR0_INT_DECL(int) PGMR0InitPerVMData(PGVM pGVM)
69{
70 AssertCompile(sizeof(pGVM->pgm.s) <= sizeof(pGVM->pgm.padding));
71 AssertCompile(sizeof(pGVM->pgmr0.s) <= sizeof(pGVM->pgmr0.padding));
72
73 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs) == RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMapObjs));
74 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs); i++)
75 {
76 pGVM->pgmr0.s.ahPoolMemObjs[i] = NIL_RTR0MEMOBJ;
77 pGVM->pgmr0.s.ahPoolMapObjs[i] = NIL_RTR0MEMOBJ;
78 }
79 return RTCritSectInit(&pGVM->pgmr0.s.PoolGrowCritSect);
80}
81
82
83/**
84 * Initalize the per-VM PGM for ring-0.
85 *
86 * @returns VBox status code.
87 * @param pGVM Pointer to the global VM structure.
88 */
89VMMR0_INT_DECL(int) PGMR0InitVM(PGVM pGVM)
90{
91 int rc = VINF_SUCCESS;
92#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE
93 rc = PGMR0DynMapInitVM(pGVM);
94#endif
95 RT_NOREF(pGVM);
96 return rc;
97}
98
99
100/**
101 * Cleans up any loose ends before the GVM structure is destroyed.
102 */
103VMMR0_INT_DECL(void) PGMR0CleanupVM(PGVM pGVM)
104{
105 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs); i++)
106 {
107 if (pGVM->pgmr0.s.ahPoolMapObjs[i] != NIL_RTR0MEMOBJ)
108 {
109 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahPoolMapObjs[i], true /*fFreeMappings*/);
110 AssertRC(rc);
111 pGVM->pgmr0.s.ahPoolMapObjs[i] = NIL_RTR0MEMOBJ;
112 }
113
114 if (pGVM->pgmr0.s.ahPoolMemObjs[i] != NIL_RTR0MEMOBJ)
115 {
116 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahPoolMemObjs[i], true /*fFreeMappings*/);
117 AssertRC(rc);
118 pGVM->pgmr0.s.ahPoolMemObjs[i] = NIL_RTR0MEMOBJ;
119 }
120 }
121
122 if (RTCritSectIsInitialized(&pGVM->pgmr0.s.PoolGrowCritSect))
123 RTCritSectDelete(&pGVM->pgmr0.s.PoolGrowCritSect);
124}
125
126
127/**
128 * Worker function for PGMR3PhysAllocateHandyPages and pgmPhysEnsureHandyPage.
129 *
130 * @returns The following VBox status codes.
131 * @retval VINF_SUCCESS on success. FF cleared.
132 * @retval VINF_EM_NO_MEMORY if we're out of memory. The FF is set in this case.
133 *
134 * @param pGVM The global (ring-0) VM structure.
135 * @param idCpu The ID of the calling EMT.
136 *
137 * @thread EMT(idCpu)
138 *
139 * @remarks Must be called from within the PGM critical section. The caller
140 * must clear the new pages.
141 */
142VMMR0_INT_DECL(int) PGMR0PhysAllocateHandyPages(PGVM pGVM, VMCPUID idCpu)
143{
144 /*
145 * Validate inputs.
146 */
147 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
148 AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
149 PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
150
151 /*
152 * Check for error injection.
153 */
154 if (RT_UNLIKELY(pGVM->pgm.s.fErrInjHandyPages))
155 return VERR_NO_MEMORY;
156
157 /*
158 * Try allocate a full set of handy pages.
159 */
160 uint32_t iFirst = pGVM->pgm.s.cHandyPages;
161 AssertReturn(iFirst <= RT_ELEMENTS(pGVM->pgm.s.aHandyPages), VERR_PGM_HANDY_PAGE_IPE);
162 uint32_t cPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages) - iFirst;
163 if (!cPages)
164 return VINF_SUCCESS;
165 int rc = GMMR0AllocateHandyPages(pGVM, idCpu, cPages, cPages, &pGVM->pgm.s.aHandyPages[iFirst]);
166 if (RT_SUCCESS(rc))
167 {
168#ifdef VBOX_STRICT
169 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgm.s.aHandyPages); i++)
170 {
171 Assert(pGVM->pgm.s.aHandyPages[i].idPage != NIL_GMM_PAGEID);
172 Assert(pGVM->pgm.s.aHandyPages[i].idPage <= GMM_PAGEID_LAST);
173 Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
174 Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys != NIL_RTHCPHYS);
175 Assert(!(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys & ~X86_PTE_PAE_PG_MASK));
176 }
177#endif
178
179 pGVM->pgm.s.cHandyPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages);
180 }
181 else if (rc != VERR_GMM_SEED_ME)
182 {
183 if ( ( rc == VERR_GMM_HIT_GLOBAL_LIMIT
184 || rc == VERR_GMM_HIT_VM_ACCOUNT_LIMIT)
185 && iFirst < PGM_HANDY_PAGES_MIN)
186 {
187
188#ifdef VBOX_STRICT
189 /* We're ASSUMING that GMM has updated all the entires before failing us. */
190 uint32_t i;
191 for (i = iFirst; i < RT_ELEMENTS(pGVM->pgm.s.aHandyPages); i++)
192 {
193 Assert(pGVM->pgm.s.aHandyPages[i].idPage == NIL_GMM_PAGEID);
194 Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
195 Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys == NIL_RTHCPHYS);
196 }
197#endif
198
199 /*
200 * Reduce the number of pages until we hit the minimum limit.
201 */
202 do
203 {
204 cPages >>= 1;
205 if (cPages + iFirst < PGM_HANDY_PAGES_MIN)
206 cPages = PGM_HANDY_PAGES_MIN - iFirst;
207 rc = GMMR0AllocateHandyPages(pGVM, idCpu, 0, cPages, &pGVM->pgm.s.aHandyPages[iFirst]);
208 } while ( ( rc == VERR_GMM_HIT_GLOBAL_LIMIT
209 || rc == VERR_GMM_HIT_VM_ACCOUNT_LIMIT)
210 && cPages + iFirst > PGM_HANDY_PAGES_MIN);
211 if (RT_SUCCESS(rc))
212 {
213#ifdef VBOX_STRICT
214 i = iFirst + cPages;
215 while (i-- > 0)
216 {
217 Assert(pGVM->pgm.s.aHandyPages[i].idPage != NIL_GMM_PAGEID);
218 Assert(pGVM->pgm.s.aHandyPages[i].idPage <= GMM_PAGEID_LAST);
219 Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
220 Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys != NIL_RTHCPHYS);
221 Assert(!(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys & ~X86_PTE_PAE_PG_MASK));
222 }
223
224 for (i = cPages + iFirst; i < RT_ELEMENTS(pGVM->pgm.s.aHandyPages); i++)
225 {
226 Assert(pGVM->pgm.s.aHandyPages[i].idPage == NIL_GMM_PAGEID);
227 Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
228 Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys == NIL_RTHCPHYS);
229 }
230#endif
231
232 pGVM->pgm.s.cHandyPages = iFirst + cPages;
233 }
234 }
235
236 if (RT_FAILURE(rc) && rc != VERR_GMM_SEED_ME)
237 {
238 LogRel(("PGMR0PhysAllocateHandyPages: rc=%Rrc iFirst=%d cPages=%d\n", rc, iFirst, cPages));
239 VM_FF_SET(pGVM, VM_FF_PGM_NO_MEMORY);
240 }
241 }
242
243
244 LogFlow(("PGMR0PhysAllocateHandyPages: cPages=%d rc=%Rrc\n", cPages, rc));
245 return rc;
246}
247
248
249/**
250 * Flushes any changes pending in the handy page array.
251 *
252 * It is very important that this gets done when page sharing is enabled.
253 *
254 * @returns The following VBox status codes.
255 * @retval VINF_SUCCESS on success. FF cleared.
256 *
257 * @param pGVM The global (ring-0) VM structure.
258 * @param idCpu The ID of the calling EMT.
259 *
260 * @thread EMT(idCpu)
261 *
262 * @remarks Must be called from within the PGM critical section.
263 */
264VMMR0_INT_DECL(int) PGMR0PhysFlushHandyPages(PGVM pGVM, VMCPUID idCpu)
265{
266 /*
267 * Validate inputs.
268 */
269 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
270 AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
271 PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
272
273 /*
274 * Try allocate a full set of handy pages.
275 */
276 uint32_t iFirst = pGVM->pgm.s.cHandyPages;
277 AssertReturn(iFirst <= RT_ELEMENTS(pGVM->pgm.s.aHandyPages), VERR_PGM_HANDY_PAGE_IPE);
278 uint32_t cPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages) - iFirst;
279 if (!cPages)
280 return VINF_SUCCESS;
281 int rc = GMMR0AllocateHandyPages(pGVM, idCpu, cPages, 0, &pGVM->pgm.s.aHandyPages[iFirst]);
282
283 LogFlow(("PGMR0PhysFlushHandyPages: cPages=%d rc=%Rrc\n", cPages, rc));
284 return rc;
285}
286
287
288/**
289 * Worker function for PGMR3PhysAllocateLargeHandyPage
290 *
291 * @returns The following VBox status codes.
292 * @retval VINF_SUCCESS on success.
293 * @retval VINF_EM_NO_MEMORY if we're out of memory.
294 *
295 * @param pGVM The global (ring-0) VM structure.
296 * @param idCpu The ID of the calling EMT.
297 *
298 * @thread EMT(idCpu)
299 *
300 * @remarks Must be called from within the PGM critical section. The caller
301 * must clear the new pages.
302 */
303VMMR0_INT_DECL(int) PGMR0PhysAllocateLargeHandyPage(PGVM pGVM, VMCPUID idCpu)
304{
305 /*
306 * Validate inputs.
307 */
308 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
309 AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
310 PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
311 Assert(!pGVM->pgm.s.cLargeHandyPages);
312
313 /*
314 * Do the job.
315 */
316 int rc = GMMR0AllocateLargePage(pGVM, idCpu, _2M,
317 &pGVM->pgm.s.aLargeHandyPage[0].idPage,
318 &pGVM->pgm.s.aLargeHandyPage[0].HCPhysGCPhys);
319 if (RT_SUCCESS(rc))
320 pGVM->pgm.s.cLargeHandyPages = 1;
321
322 return rc;
323}
324
325
326/**
327 * Locate a MMIO2 range.
328 *
329 * @returns Pointer to the MMIO2 range.
330 * @param pGVM The global (ring-0) VM structure.
331 * @param pDevIns The device instance owning the region.
332 * @param hMmio2 Handle to look up.
333 */
334DECLINLINE(PPGMREGMMIO2RANGE) pgmR0PhysMMIOExFind(PGVM pGVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2)
335{
336 /*
337 * We use the lookup table here as list walking is tedious in ring-0 when using
338 * ring-3 pointers and this probably will require some kind of refactoring anyway.
339 */
340 if (hMmio2 <= RT_ELEMENTS(pGVM->pgm.s.apMmio2RangesR0) && hMmio2 != 0)
341 {
342 PPGMREGMMIO2RANGE pCur = pGVM->pgm.s.apMmio2RangesR0[hMmio2 - 1];
343 if (pCur && pCur->pDevInsR3 == pDevIns->pDevInsForR3)
344 {
345 Assert(pCur->idMmio2 == hMmio2);
346 AssertReturn(pCur->fFlags & PGMREGMMIO2RANGE_F_MMIO2, NULL);
347 return pCur;
348 }
349 Assert(!pCur);
350 }
351 return NULL;
352}
353
354
355/**
356 * Worker for PDMDEVHLPR0::pfnMmio2SetUpContext.
357 *
358 * @returns VBox status code.
359 * @param pGVM The global (ring-0) VM structure.
360 * @param pDevIns The device instance.
361 * @param hMmio2 The MMIO2 region to map into ring-0 address space.
362 * @param offSub The offset into the region.
363 * @param cbSub The size of the mapping, zero meaning all the rest.
364 * @param ppvMapping Where to return the ring-0 mapping address.
365 */
366VMMR0_INT_DECL(int) PGMR0PhysMMIO2MapKernel(PGVM pGVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2,
367 size_t offSub, size_t cbSub, void **ppvMapping)
368{
369 AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_UNSUPPORTED_ALIGNMENT);
370 AssertReturn(!(cbSub & PAGE_OFFSET_MASK), VERR_UNSUPPORTED_ALIGNMENT);
371
372 /*
373 * Translate hRegion into a range pointer.
374 */
375 PPGMREGMMIO2RANGE pFirstRegMmio = pgmR0PhysMMIOExFind(pGVM, pDevIns, hMmio2);
376 AssertReturn(pFirstRegMmio, VERR_NOT_FOUND);
377#if defined(VBOX_WITH_RAM_IN_KERNEL) && !defined(VBOX_WITH_LINEAR_HOST_PHYS_MEM)
378 uint8_t * const pvR0 = (uint8_t *)pFirstRegMmio->pvR0;
379#else
380 RTR3PTR const pvR3 = pFirstRegMmio->pvR3;
381#endif
382 RTGCPHYS const cbReal = pFirstRegMmio->cbReal;
383 pFirstRegMmio = NULL;
384 ASMCompilerBarrier();
385
386 AssertReturn(offSub < cbReal, VERR_OUT_OF_RANGE);
387 if (cbSub == 0)
388 cbSub = cbReal - offSub;
389 else
390 AssertReturn(cbSub < cbReal && cbSub + offSub <= cbReal, VERR_OUT_OF_RANGE);
391
392 /*
393 * Do the mapping.
394 */
395#if defined(VBOX_WITH_RAM_IN_KERNEL) && !defined(VBOX_WITH_LINEAR_HOST_PHYS_MEM)
396 AssertPtr(pvR0);
397 *ppvMapping = pvR0 + offSub;
398 return VINF_SUCCESS;
399#else
400 return SUPR0PageMapKernel(pGVM->pSession, pvR3, (uint32_t)offSub, (uint32_t)cbSub, 0 /*fFlags*/, ppvMapping);
401#endif
402}
403
404
405#ifdef VBOX_WITH_PCI_PASSTHROUGH
406/* Interface sketch. The interface belongs to a global PCI pass-through
407 manager. It shall use the global VM handle, not the user VM handle to
408 store the per-VM info (domain) since that is all ring-0 stuff, thus
409 passing pGVM here. I've tentitively prefixed the functions 'GPciRawR0',
410 we can discuss the PciRaw code re-organtization when I'm back from
411 vacation.
412
413 I've implemented the initial IOMMU set up below. For things to work
414 reliably, we will probably need add a whole bunch of checks and
415 GPciRawR0GuestPageUpdate call to the PGM code. For the present,
416 assuming nested paging (enforced) and prealloc (enforced), no
417 ballooning (check missing), page sharing (check missing) or live
418 migration (check missing), it might work fine. At least if some
419 VM power-off hook is present and can tear down the IOMMU page tables. */
420
421/**
422 * Tells the global PCI pass-through manager that we are about to set up the
423 * guest page to host page mappings for the specfied VM.
424 *
425 * @returns VBox status code.
426 *
427 * @param pGVM The ring-0 VM structure.
428 */
429VMMR0_INT_DECL(int) GPciRawR0GuestPageBeginAssignments(PGVM pGVM)
430{
431 NOREF(pGVM);
432 return VINF_SUCCESS;
433}
434
435
436/**
437 * Assigns a host page mapping for a guest page.
438 *
439 * This is only used when setting up the mappings, i.e. between
440 * GPciRawR0GuestPageBeginAssignments and GPciRawR0GuestPageEndAssignments.
441 *
442 * @returns VBox status code.
443 * @param pGVM The ring-0 VM structure.
444 * @param GCPhys The address of the guest page (page aligned).
445 * @param HCPhys The address of the host page (page aligned).
446 */
447VMMR0_INT_DECL(int) GPciRawR0GuestPageAssign(PGVM pGVM, RTGCPHYS GCPhys, RTHCPHYS HCPhys)
448{
449 AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
450 AssertReturn(!(HCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
451
452 if (pGVM->rawpci.s.pfnContigMemInfo)
453 /** @todo what do we do on failure? */
454 pGVM->rawpci.s.pfnContigMemInfo(&pGVM->rawpci.s, HCPhys, GCPhys, PAGE_SIZE, PCIRAW_MEMINFO_MAP);
455
456 return VINF_SUCCESS;
457}
458
459
460/**
461 * Indicates that the specified guest page doesn't exists but doesn't have host
462 * page mapping we trust PCI pass-through with.
463 *
464 * This is only used when setting up the mappings, i.e. between
465 * GPciRawR0GuestPageBeginAssignments and GPciRawR0GuestPageEndAssignments.
466 *
467 * @returns VBox status code.
468 * @param pGVM The ring-0 VM structure.
469 * @param GCPhys The address of the guest page (page aligned).
470 * @param HCPhys The address of the host page (page aligned).
471 */
472VMMR0_INT_DECL(int) GPciRawR0GuestPageUnassign(PGVM pGVM, RTGCPHYS GCPhys)
473{
474 AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
475
476 if (pGVM->rawpci.s.pfnContigMemInfo)
477 /** @todo what do we do on failure? */
478 pGVM->rawpci.s.pfnContigMemInfo(&pGVM->rawpci.s, 0, GCPhys, PAGE_SIZE, PCIRAW_MEMINFO_UNMAP);
479
480 return VINF_SUCCESS;
481}
482
483
484/**
485 * Tells the global PCI pass-through manager that we have completed setting up
486 * the guest page to host page mappings for the specfied VM.
487 *
488 * This complements GPciRawR0GuestPageBeginAssignments and will be called even
489 * if some page assignment failed.
490 *
491 * @returns VBox status code.
492 *
493 * @param pGVM The ring-0 VM structure.
494 */
495VMMR0_INT_DECL(int) GPciRawR0GuestPageEndAssignments(PGVM pGVM)
496{
497 NOREF(pGVM);
498 return VINF_SUCCESS;
499}
500
501
502/**
503 * Tells the global PCI pass-through manager that a guest page mapping has
504 * changed after the initial setup.
505 *
506 * @returns VBox status code.
507 * @param pGVM The ring-0 VM structure.
508 * @param GCPhys The address of the guest page (page aligned).
509 * @param HCPhys The new host page address or NIL_RTHCPHYS if
510 * now unassigned.
511 */
512VMMR0_INT_DECL(int) GPciRawR0GuestPageUpdate(PGVM pGVM, RTGCPHYS GCPhys, RTHCPHYS HCPhys)
513{
514 AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_4);
515 AssertReturn(!(HCPhys & PAGE_OFFSET_MASK) || HCPhys == NIL_RTHCPHYS, VERR_INTERNAL_ERROR_4);
516 NOREF(pGVM);
517 return VINF_SUCCESS;
518}
519
520#endif /* VBOX_WITH_PCI_PASSTHROUGH */
521
522
523/**
524 * Sets up the IOMMU when raw PCI device is enabled.
525 *
526 * @note This is a hack that will probably be remodelled and refined later!
527 *
528 * @returns VBox status code.
529 *
530 * @param pGVM The global (ring-0) VM structure.
531 */
532VMMR0_INT_DECL(int) PGMR0PhysSetupIoMmu(PGVM pGVM)
533{
534 int rc = GVMMR0ValidateGVM(pGVM);
535 if (RT_FAILURE(rc))
536 return rc;
537
538#ifdef VBOX_WITH_PCI_PASSTHROUGH
539 if (pGVM->pgm.s.fPciPassthrough)
540 {
541 /*
542 * The Simplistic Approach - Enumerate all the pages and call tell the
543 * IOMMU about each of them.
544 */
545 pgmLock(pGVM);
546 rc = GPciRawR0GuestPageBeginAssignments(pGVM);
547 if (RT_SUCCESS(rc))
548 {
549 for (PPGMRAMRANGE pRam = pGVM->pgm.s.pRamRangesXR0; RT_SUCCESS(rc) && pRam; pRam = pRam->pNextR0)
550 {
551 PPGMPAGE pPage = &pRam->aPages[0];
552 RTGCPHYS GCPhys = pRam->GCPhys;
553 uint32_t cLeft = pRam->cb >> PAGE_SHIFT;
554 while (cLeft-- > 0)
555 {
556 /* Only expose pages that are 100% safe for now. */
557 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
558 && PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED
559 && !PGM_PAGE_HAS_ANY_HANDLERS(pPage))
560 rc = GPciRawR0GuestPageAssign(pGVM, GCPhys, PGM_PAGE_GET_HCPHYS(pPage));
561 else
562 rc = GPciRawR0GuestPageUnassign(pGVM, GCPhys);
563
564 /* next */
565 pPage++;
566 GCPhys += PAGE_SIZE;
567 }
568 }
569
570 int rc2 = GPciRawR0GuestPageEndAssignments(pGVM);
571 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
572 rc = rc2;
573 }
574 pgmUnlock(pGVM);
575 }
576 else
577#endif
578 rc = VERR_NOT_SUPPORTED;
579 return rc;
580}
581
582
583/**
584 * \#PF Handler for nested paging.
585 *
586 * @returns VBox status code (appropriate for trap handling and GC return).
587 * @param pGVM The global (ring-0) VM structure.
588 * @param pGVCpu The global (ring-0) CPU structure of the calling
589 * EMT.
590 * @param enmShwPagingMode Paging mode for the nested page tables.
591 * @param uErr The trap error code.
592 * @param pRegFrame Trap register frame.
593 * @param GCPhysFault The fault address.
594 */
595VMMR0DECL(int) PGMR0Trap0eHandlerNestedPaging(PGVM pGVM, PGVMCPU pGVCpu, PGMMODE enmShwPagingMode, RTGCUINT uErr,
596 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault)
597{
598 int rc;
599
600 LogFlow(("PGMTrap0eHandler: uErr=%RGx GCPhysFault=%RGp eip=%RGv\n", uErr, GCPhysFault, (RTGCPTR)pRegFrame->rip));
601 STAM_PROFILE_START(&pGVCpu->pgm.s.StatRZTrap0e, a);
602 STAM_STATS({ pGVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = NULL; } );
603
604 /* AMD uses the host's paging mode; Intel has a single mode (EPT). */
605 AssertMsg( enmShwPagingMode == PGMMODE_32_BIT || enmShwPagingMode == PGMMODE_PAE || enmShwPagingMode == PGMMODE_PAE_NX
606 || enmShwPagingMode == PGMMODE_AMD64 || enmShwPagingMode == PGMMODE_AMD64_NX || enmShwPagingMode == PGMMODE_EPT,
607 ("enmShwPagingMode=%d\n", enmShwPagingMode));
608
609 /* Reserved shouldn't end up here. */
610 Assert(!(uErr & X86_TRAP_PF_RSVD));
611
612#ifdef VBOX_WITH_STATISTICS
613 /*
614 * Error code stats.
615 */
616 if (uErr & X86_TRAP_PF_US)
617 {
618 if (!(uErr & X86_TRAP_PF_P))
619 {
620 if (uErr & X86_TRAP_PF_RW)
621 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSNotPresentWrite);
622 else
623 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSNotPresentRead);
624 }
625 else if (uErr & X86_TRAP_PF_RW)
626 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSWrite);
627 else if (uErr & X86_TRAP_PF_RSVD)
628 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSReserved);
629 else if (uErr & X86_TRAP_PF_ID)
630 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSNXE);
631 else
632 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSRead);
633 }
634 else
635 { /* Supervisor */
636 if (!(uErr & X86_TRAP_PF_P))
637 {
638 if (uErr & X86_TRAP_PF_RW)
639 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eSVNotPresentWrite);
640 else
641 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eSVNotPresentRead);
642 }
643 else if (uErr & X86_TRAP_PF_RW)
644 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eSVWrite);
645 else if (uErr & X86_TRAP_PF_ID)
646 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eSNXE);
647 else if (uErr & X86_TRAP_PF_RSVD)
648 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eSVReserved);
649 }
650#endif
651
652 /*
653 * Call the worker.
654 *
655 * Note! We pretend the guest is in protected mode without paging, so we
656 * can use existing code to build the nested page tables.
657 */
658/** @todo r=bird: Gotta love this nested paging hacking we're still carrying with us... (Split PGM_TYPE_NESTED.) */
659 bool fLockTaken = false;
660 switch (enmShwPagingMode)
661 {
662 case PGMMODE_32_BIT:
663 rc = PGM_BTH_NAME_32BIT_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
664 break;
665 case PGMMODE_PAE:
666 case PGMMODE_PAE_NX:
667 rc = PGM_BTH_NAME_PAE_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
668 break;
669 case PGMMODE_AMD64:
670 case PGMMODE_AMD64_NX:
671 rc = PGM_BTH_NAME_AMD64_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
672 break;
673 case PGMMODE_EPT:
674 rc = PGM_BTH_NAME_EPT_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
675 break;
676 default:
677 AssertFailed();
678 rc = VERR_INVALID_PARAMETER;
679 break;
680 }
681 if (fLockTaken)
682 {
683 PGM_LOCK_ASSERT_OWNER(pGVM);
684 pgmUnlock(pGVM);
685 }
686
687 if (rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
688 rc = VINF_SUCCESS;
689 /*
690 * Handle the case where we cannot interpret the instruction because we cannot get the guest physical address
691 * via its page tables, see @bugref{6043}.
692 */
693 else if ( rc == VERR_PAGE_NOT_PRESENT /* SMP only ; disassembly might fail. */
694 || rc == VERR_PAGE_TABLE_NOT_PRESENT /* seen with UNI & SMP */
695 || rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT /* seen with SMP */
696 || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT) /* precaution */
697 {
698 Log(("WARNING: Unexpected VERR_PAGE_TABLE_NOT_PRESENT (%d) for page fault at %RGp error code %x (rip=%RGv)\n", rc, GCPhysFault, uErr, pRegFrame->rip));
699 /* Some kind of inconsistency in the SMP case; it's safe to just execute the instruction again; not sure about
700 single VCPU VMs though. */
701 rc = VINF_SUCCESS;
702 }
703
704 STAM_STATS({ if (!pGVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution))
705 pGVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2Misc; });
706 STAM_PROFILE_STOP_EX(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0e, pGVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution), a);
707 return rc;
708}
709
710
711/**
712 * \#PF Handler for deliberate nested paging misconfiguration (/reserved bit)
713 * employed for MMIO pages.
714 *
715 * @returns VBox status code (appropriate for trap handling and GC return).
716 * @param pGVM The global (ring-0) VM structure.
717 * @param pGVCpu The global (ring-0) CPU structure of the calling
718 * EMT.
719 * @param enmShwPagingMode Paging mode for the nested page tables.
720 * @param pRegFrame Trap register frame.
721 * @param GCPhysFault The fault address.
722 * @param uErr The error code, UINT32_MAX if not available
723 * (VT-x).
724 */
725VMMR0DECL(VBOXSTRICTRC) PGMR0Trap0eHandlerNPMisconfig(PGVM pGVM, PGVMCPU pGVCpu, PGMMODE enmShwPagingMode,
726 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, uint32_t uErr)
727{
728#ifdef PGM_WITH_MMIO_OPTIMIZATIONS
729 STAM_PROFILE_START(&pGVCpu->CTX_SUFF(pStats)->StatR0NpMiscfg, a);
730 VBOXSTRICTRC rc;
731
732 /*
733 * Try lookup the all access physical handler for the address.
734 */
735 pgmLock(pGVM);
736 PPGMPHYSHANDLER pHandler = pgmHandlerPhysicalLookup(pGVM, GCPhysFault);
737 PPGMPHYSHANDLERTYPEINT pHandlerType = RT_LIKELY(pHandler) ? PGMPHYSHANDLER_GET_TYPE(pGVM, pHandler) : NULL;
738 if (RT_LIKELY(pHandler && pHandlerType->enmKind != PGMPHYSHANDLERKIND_WRITE))
739 {
740 /*
741 * If the handle has aliases page or pages that have been temporarily
742 * disabled, we'll have to take a detour to make sure we resync them
743 * to avoid lots of unnecessary exits.
744 */
745 PPGMPAGE pPage;
746 if ( ( pHandler->cAliasedPages
747 || pHandler->cTmpOffPages)
748 && ( (pPage = pgmPhysGetPage(pGVM, GCPhysFault)) == NULL
749 || PGM_PAGE_GET_HNDL_PHYS_STATE(pPage) == PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
750 )
751 {
752 Log(("PGMR0Trap0eHandlerNPMisconfig: Resyncing aliases / tmp-off page at %RGp (uErr=%#x) %R[pgmpage]\n", GCPhysFault, uErr, pPage));
753 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatR0NpMiscfgSyncPage);
754 rc = pgmShwSyncNestedPageLocked(pGVCpu, GCPhysFault, 1 /*cPages*/, enmShwPagingMode);
755 pgmUnlock(pGVM);
756 }
757 else
758 {
759 if (pHandlerType->CTX_SUFF(pfnPfHandler))
760 {
761 void *pvUser = pHandler->CTX_SUFF(pvUser);
762 STAM_PROFILE_START(&pHandler->Stat, h);
763 pgmUnlock(pGVM);
764
765 Log6(("PGMR0Trap0eHandlerNPMisconfig: calling %p(,%#x,,%RGp,%p)\n", pHandlerType->CTX_SUFF(pfnPfHandler), uErr, GCPhysFault, pvUser));
766 rc = pHandlerType->CTX_SUFF(pfnPfHandler)(pGVM, pGVCpu, uErr == UINT32_MAX ? RTGCPTR_MAX : uErr, pRegFrame,
767 GCPhysFault, GCPhysFault, pvUser);
768
769#ifdef VBOX_WITH_STATISTICS
770 pgmLock(pGVM);
771 pHandler = pgmHandlerPhysicalLookup(pGVM, GCPhysFault);
772 if (pHandler)
773 STAM_PROFILE_STOP(&pHandler->Stat, h);
774 pgmUnlock(pGVM);
775#endif
776 }
777 else
778 {
779 pgmUnlock(pGVM);
780 Log(("PGMR0Trap0eHandlerNPMisconfig: %RGp (uErr=%#x) -> R3\n", GCPhysFault, uErr));
781 rc = VINF_EM_RAW_EMULATE_INSTR;
782 }
783 }
784 }
785 else
786 {
787 /*
788 * Must be out of sync, so do a SyncPage and restart the instruction.
789 *
790 * ASSUMES that ALL handlers are page aligned and covers whole pages
791 * (assumption asserted in PGMHandlerPhysicalRegisterEx).
792 */
793 Log(("PGMR0Trap0eHandlerNPMisconfig: Out of sync page at %RGp (uErr=%#x)\n", GCPhysFault, uErr));
794 STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatR0NpMiscfgSyncPage);
795 rc = pgmShwSyncNestedPageLocked(pGVCpu, GCPhysFault, 1 /*cPages*/, enmShwPagingMode);
796 pgmUnlock(pGVM);
797 }
798
799 STAM_PROFILE_STOP(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatR0NpMiscfg, a);
800 return rc;
801
802#else
803 AssertLogRelFailed();
804 return VERR_PGM_NOT_USED_IN_MODE;
805#endif
806}
807
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette