VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/PGMR0DynMap.cpp@ 31126

Last change on this file since 31126 was 31126, checked in by vboxsync, 14 years ago

PGM: Mac build fixes.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 79.5 KB
Line 
1/* $Id: PGMR0DynMap.cpp 31126 2010-07-26 18:21:50Z vboxsync $ */
2/** @file
3 * PGM - Page Manager and Monitor, ring-0 dynamic mapping cache.
4 */
5
6/*
7 * Copyright (C) 2008 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/*******************************************************************************
19* Internal Functions *
20*******************************************************************************/
21#define LOG_GROUP LOG_GROUP_PGM
22#include <VBox/pgm.h>
23#include "../PGMInternal.h"
24#include <VBox/vm.h>
25#include "../PGMInline.h"
26#include <VBox/sup.h>
27#include <VBox/err.h>
28#include <iprt/asm.h>
29#include <iprt/asm-amd64-x86.h>
30#include <iprt/alloc.h>
31#include <iprt/assert.h>
32#include <iprt/cpuset.h>
33#include <iprt/memobj.h>
34#include <iprt/mp.h>
35#include <iprt/semaphore.h>
36#include <iprt/spinlock.h>
37#include <iprt/string.h>
38
39
40/*******************************************************************************
41* Defined Constants And Macros *
42*******************************************************************************/
43/** The max size of the mapping cache (in pages). */
44#define PGMR0DYNMAP_MAX_PAGES ((16*_1M) >> PAGE_SHIFT)
45/** The small segment size that is adopted on out-of-memory conditions with a
46 * single big segment. */
47#define PGMR0DYNMAP_SMALL_SEG_PAGES 128
48/** The number of pages we reserve per CPU. */
49#define PGMR0DYNMAP_PAGES_PER_CPU 256
50/** The minimum number of pages we reserve per CPU.
51 * This must be equal or larger than the autoset size. */
52#define PGMR0DYNMAP_PAGES_PER_CPU_MIN 64
53/** The number of guard pages.
54 * @remarks Never do tuning of the hashing or whatnot with a strict build! */
55#if defined(VBOX_STRICT)
56# define PGMR0DYNMAP_GUARD_PAGES 1
57#else
58# define PGMR0DYNMAP_GUARD_PAGES 0
59#endif
60/** The dummy physical address of guard pages. */
61#define PGMR0DYNMAP_GUARD_PAGE_HCPHYS UINT32_C(0x7777feed)
62/** The dummy reference count of guard pages. (Must be non-zero.) */
63#define PGMR0DYNMAP_GUARD_PAGE_REF_COUNT INT32_C(0x7777feed)
64#if 0
65/** Define this to just clear the present bit on guard pages.
66 * The alternative is to replace the entire PTE with an bad not-present
67 * PTE. Either way, XNU will screw us. :-/ */
68#define PGMR0DYNMAP_GUARD_NP
69#endif
70/** The dummy PTE value for a page. */
71#define PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE X86_PTE_PG_MASK
72/** The dummy PTE value for a page. */
73#define PGMR0DYNMAP_GUARD_PAGE_PAE_PTE UINT64_MAX /*X86_PTE_PAE_PG_MASK*/
74/** Calcs the overload threshold. Current set at 50%. */
75#define PGMR0DYNMAP_CALC_OVERLOAD(cPages) ((cPages) / 2)
76
77#if 0
78/* Assertions causes panics if preemption is disabled, this can be used to work aroudn that. */
79//#define RTSpinlockAcquire(a,b) do {} while (0)
80//#define RTSpinlockRelease(a,b) do {} while (0)
81#endif
82
83
84/*******************************************************************************
85* Structures and Typedefs *
86*******************************************************************************/
87/**
88 * Ring-0 dynamic mapping cache segment.
89 *
90 * The dynamic mapping cache can be extended with additional segments if the
91 * load is found to be too high. This done the next time a VM is created, under
92 * the protection of the init mutex. The arrays is reallocated and the new
93 * segment is added to the end of these. Nothing is rehashed of course, as the
94 * indexes / addresses must remain unchanged.
95 *
96 * This structure is only modified while owning the init mutex or during module
97 * init / term.
98 */
99typedef struct PGMR0DYNMAPSEG
100{
101 /** Pointer to the next segment. */
102 struct PGMR0DYNMAPSEG *pNext;
103 /** The memory object for the virtual address range that we're abusing. */
104 RTR0MEMOBJ hMemObj;
105 /** The start page in the cache. (I.e. index into the arrays.) */
106 uint16_t iPage;
107 /** The number of pages this segment contributes. */
108 uint16_t cPages;
109 /** The number of page tables. */
110 uint16_t cPTs;
111 /** The memory objects for the page tables. */
112 RTR0MEMOBJ ahMemObjPTs[1];
113} PGMR0DYNMAPSEG;
114/** Pointer to a ring-0 dynamic mapping cache segment. */
115typedef PGMR0DYNMAPSEG *PPGMR0DYNMAPSEG;
116
117
118/**
119 * Ring-0 dynamic mapping cache entry.
120 *
121 * This structure tracks
122 */
123typedef struct PGMR0DYNMAPENTRY
124{
125 /** The physical address of the currently mapped page.
126 * This is duplicate for three reasons: cache locality, cache policy of the PT
127 * mappings and sanity checks. */
128 RTHCPHYS HCPhys;
129 /** Pointer to the page. */
130 void *pvPage;
131 /** The number of references. */
132 int32_t volatile cRefs;
133 /** PTE pointer union. */
134 union PGMR0DYNMAPENTRY_PPTE
135 {
136 /** PTE pointer, 32-bit legacy version. */
137 PX86PTE pLegacy;
138 /** PTE pointer, PAE version. */
139 PX86PTEPAE pPae;
140 /** PTE pointer, the void version. */
141 void *pv;
142 } uPte;
143 /** CPUs that haven't invalidated this entry after it's last update. */
144 RTCPUSET PendingSet;
145} PGMR0DYNMAPENTRY;
146/** Pointer to a ring-0 dynamic mapping cache entry. */
147typedef PGMR0DYNMAPENTRY *PPGMR0DYNMAPENTRY;
148
149
150/**
151 * Ring-0 dynamic mapping cache.
152 *
153 * This is initialized during VMMR0 module init but no segments are allocated at
154 * that time. Segments will be added when the first VM is started and removed
155 * again when the last VM shuts down, thus avoid consuming memory while dormant.
156 * At module termination, the remaining bits will be freed up.
157 */
158typedef struct PGMR0DYNMAP
159{
160 /** The usual magic number / eye catcher (PGMR0DYNMAP_MAGIC). */
161 uint32_t u32Magic;
162 /** Spinlock serializing the normal operation of the cache. */
163 RTSPINLOCK hSpinlock;
164 /** Array for tracking and managing the pages. */
165 PPGMR0DYNMAPENTRY paPages;
166 /** The cache size given as a number of pages. */
167 uint32_t cPages;
168 /** Whether it's 32-bit legacy or PAE/AMD64 paging mode. */
169 bool fLegacyMode;
170 /** The current load.
171 * This does not include guard pages. */
172 uint32_t cLoad;
173 /** The max load ever.
174 * This is maintained to get trigger adding of more mapping space. */
175 uint32_t cMaxLoad;
176 /** Initialization / termination lock. */
177 RTSEMFASTMUTEX hInitLock;
178 /** The number of guard pages. */
179 uint32_t cGuardPages;
180 /** The number of users (protected by hInitLock). */
181 uint32_t cUsers;
182 /** Array containing a copy of the original page tables.
183 * The entries are either X86PTE or X86PTEPAE according to fLegacyMode. */
184 void *pvSavedPTEs;
185 /** List of segments. */
186 PPGMR0DYNMAPSEG pSegHead;
187 /** The paging mode. */
188 SUPPAGINGMODE enmPgMode;
189} PGMR0DYNMAP;
190/** Pointer to the ring-0 dynamic mapping cache */
191typedef PGMR0DYNMAP *PPGMR0DYNMAP;
192
193/** PGMR0DYNMAP::u32Magic. (Jens Christian Bugge Wesseltoft) */
194#define PGMR0DYNMAP_MAGIC 0x19640201
195
196
197/**
198 * Paging level data.
199 */
200typedef struct PGMR0DYNMAPPGLVL
201{
202 uint32_t cLevels; /**< The number of levels. */
203 struct
204 {
205 RTHCPHYS HCPhys; /**< The address of the page for the current level,
206 * i.e. what hMemObj/hMapObj is currently mapping. */
207 RTHCPHYS fPhysMask; /**< Mask for extracting HCPhys from uEntry. */
208 RTR0MEMOBJ hMemObj; /**< Memory object for HCPhys, PAGE_SIZE. */
209 RTR0MEMOBJ hMapObj; /**< Mapping object for hMemObj. */
210 uint32_t fPtrShift; /**< The pointer shift count. */
211 uint64_t fPtrMask; /**< The mask to apply to the shifted pointer to get the table index. */
212 uint64_t fAndMask; /**< And mask to check entry flags. */
213 uint64_t fResMask; /**< The result from applying fAndMask. */
214 union
215 {
216 void *pv; /**< hMapObj address. */
217 PX86PGUINT paLegacy; /**< Legacy table view. */
218 PX86PGPAEUINT paPae; /**< PAE/AMD64 table view. */
219 } u;
220 } a[4];
221} PGMR0DYNMAPPGLVL;
222/** Pointer to paging level data. */
223typedef PGMR0DYNMAPPGLVL *PPGMR0DYNMAPPGLVL;
224
225
226/*******************************************************************************
227* Global Variables *
228*******************************************************************************/
229/** Pointer to the ring-0 dynamic mapping cache. */
230static PPGMR0DYNMAP g_pPGMR0DynMap;
231/** For overflow testing. */
232static bool g_fPGMR0DynMapTestRunning = false;
233
234
235/*******************************************************************************
236* Internal Functions *
237*******************************************************************************/
238static void pgmR0DynMapReleasePage(PPGMR0DYNMAP pThis, uint32_t iPage, uint32_t cRefs);
239static int pgmR0DynMapSetup(PPGMR0DYNMAP pThis);
240static int pgmR0DynMapExpand(PPGMR0DYNMAP pThis);
241static void pgmR0DynMapTearDown(PPGMR0DYNMAP pThis);
242#if 0 /*def DEBUG*/
243static int pgmR0DynMapTest(PVM pVM);
244#endif
245
246
247/**
248 * Initializes the ring-0 dynamic mapping cache.
249 *
250 * @returns VBox status code.
251 */
252VMMR0DECL(int) PGMR0DynMapInit(void)
253{
254 Assert(!g_pPGMR0DynMap);
255
256 /*
257 * Create and initialize the cache instance.
258 */
259 PPGMR0DYNMAP pThis = (PPGMR0DYNMAP)RTMemAllocZ(sizeof(*pThis));
260 AssertLogRelReturn(pThis, VERR_NO_MEMORY);
261 int rc = VINF_SUCCESS;
262 pThis->enmPgMode = SUPR0GetPagingMode();
263 switch (pThis->enmPgMode)
264 {
265 case SUPPAGINGMODE_32_BIT:
266 case SUPPAGINGMODE_32_BIT_GLOBAL:
267 pThis->fLegacyMode = false;
268 break;
269 case SUPPAGINGMODE_PAE:
270 case SUPPAGINGMODE_PAE_GLOBAL:
271 case SUPPAGINGMODE_PAE_NX:
272 case SUPPAGINGMODE_PAE_GLOBAL_NX:
273 case SUPPAGINGMODE_AMD64:
274 case SUPPAGINGMODE_AMD64_GLOBAL:
275 case SUPPAGINGMODE_AMD64_NX:
276 case SUPPAGINGMODE_AMD64_GLOBAL_NX:
277 pThis->fLegacyMode = false;
278 break;
279 default:
280 rc = VERR_INTERNAL_ERROR;
281 break;
282 }
283 if (RT_SUCCESS(rc))
284 {
285 rc = RTSemFastMutexCreate(&pThis->hInitLock);
286 if (RT_SUCCESS(rc))
287 {
288 rc = RTSpinlockCreate(&pThis->hSpinlock);
289 if (RT_SUCCESS(rc))
290 {
291 pThis->u32Magic = PGMR0DYNMAP_MAGIC;
292 g_pPGMR0DynMap = pThis;
293 return VINF_SUCCESS;
294 }
295 RTSemFastMutexDestroy(pThis->hInitLock);
296 }
297 }
298 RTMemFree(pThis);
299 return rc;
300}
301
302
303/**
304 * Terminates the ring-0 dynamic mapping cache.
305 */
306VMMR0DECL(void) PGMR0DynMapTerm(void)
307{
308 /*
309 * Destroy the cache.
310 *
311 * There is not supposed to be any races here, the loader should
312 * make sure about that. So, don't bother locking anything.
313 *
314 * The VM objects should all be destroyed by now, so there is no
315 * dangling users or anything like that to clean up. This routine
316 * is just a mirror image of PGMR0DynMapInit.
317 */
318 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
319 if (pThis)
320 {
321 AssertPtr(pThis);
322 g_pPGMR0DynMap = NULL;
323
324 /* This should *never* happen, but in case it does try not to leak memory. */
325 AssertLogRelMsg(!pThis->cUsers && !pThis->paPages && !pThis->pvSavedPTEs && !pThis->cPages,
326 ("cUsers=%d paPages=%p pvSavedPTEs=%p cPages=%#x\n",
327 pThis->cUsers, pThis->paPages, pThis->pvSavedPTEs, pThis->cPages));
328 if (pThis->paPages)
329 pgmR0DynMapTearDown(pThis);
330
331 /* Free the associated resources. */
332 RTSemFastMutexDestroy(pThis->hInitLock);
333 pThis->hInitLock = NIL_RTSEMFASTMUTEX;
334 RTSpinlockDestroy(pThis->hSpinlock);
335 pThis->hSpinlock = NIL_RTSPINLOCK;
336 pThis->u32Magic = UINT32_MAX;
337 RTMemFree(pThis);
338 }
339}
340
341
342/**
343 * Initializes the dynamic mapping cache for a new VM.
344 *
345 * @returns VBox status code.
346 * @param pVM Pointer to the shared VM structure.
347 */
348VMMR0DECL(int) PGMR0DynMapInitVM(PVM pVM)
349{
350 AssertMsgReturn(!pVM->pgm.s.pvR0DynMapUsed, ("%p (pThis=%p)\n", pVM->pgm.s.pvR0DynMapUsed, g_pPGMR0DynMap), VERR_WRONG_ORDER);
351
352 /*
353 * Initialize the auto sets.
354 */
355 VMCPUID idCpu = pVM->cCpus;
356 AssertReturn(idCpu > 0 && idCpu <= VMM_MAX_CPU_COUNT, VERR_INTERNAL_ERROR);
357 while (idCpu-- > 0)
358 {
359 PPGMMAPSET pSet = &pVM->aCpus[idCpu].pgm.s.AutoSet;
360 uint32_t j = RT_ELEMENTS(pSet->aEntries);
361 while (j-- > 0)
362 {
363 pSet->aEntries[j].iPage = UINT16_MAX;
364 pSet->aEntries[j].cRefs = 0;
365 pSet->aEntries[j].pvPage = NULL;
366 pSet->aEntries[j].HCPhys = NIL_RTHCPHYS;
367 }
368 pSet->cEntries = PGMMAPSET_CLOSED;
369 pSet->iSubset = UINT32_MAX;
370 pSet->iCpu = -1;
371 memset(&pSet->aiHashTable[0], 0xff, sizeof(pSet->aiHashTable));
372 }
373
374 /*
375 * Do we need the cache? Skip the last bit if we don't.
376 */
377 if (!VMMIsHwVirtExtForced(pVM))
378 return VINF_SUCCESS;
379
380 /*
381 * Reference and if necessary setup or expand the cache.
382 */
383 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
384 AssertPtrReturn(pThis, VERR_INTERNAL_ERROR);
385 int rc = RTSemFastMutexRequest(pThis->hInitLock);
386 AssertLogRelRCReturn(rc, rc);
387
388 pThis->cUsers++;
389 if (pThis->cUsers == 1)
390 {
391 rc = pgmR0DynMapSetup(pThis);
392#if 0 /*def DEBUG*/
393 if (RT_SUCCESS(rc))
394 {
395 rc = pgmR0DynMapTest(pVM);
396 if (RT_FAILURE(rc))
397 pgmR0DynMapTearDown(pThis);
398 }
399#endif
400 }
401 else if (pThis->cMaxLoad > PGMR0DYNMAP_CALC_OVERLOAD(pThis->cPages - pThis->cGuardPages))
402 rc = pgmR0DynMapExpand(pThis);
403 if (RT_SUCCESS(rc))
404 pVM->pgm.s.pvR0DynMapUsed = pThis;
405 else
406 pThis->cUsers--;
407
408 RTSemFastMutexRelease(pThis->hInitLock);
409 return rc;
410}
411
412
413/**
414 * Terminates the dynamic mapping cache usage for a VM.
415 *
416 * @param pVM Pointer to the shared VM structure.
417 */
418VMMR0DECL(void) PGMR0DynMapTermVM(PVM pVM)
419{
420 /*
421 * Return immediately if we're not using the cache.
422 */
423 if (!pVM->pgm.s.pvR0DynMapUsed)
424 return;
425
426 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
427 AssertPtrReturnVoid(pThis);
428
429 int rc = RTSemFastMutexRequest(pThis->hInitLock);
430 AssertLogRelRCReturnVoid(rc);
431
432 if (pVM->pgm.s.pvR0DynMapUsed == pThis)
433 {
434 pVM->pgm.s.pvR0DynMapUsed = NULL;
435
436#ifdef VBOX_STRICT
437 PGMR0DynMapAssertIntegrity();
438#endif
439
440 /*
441 * Clean up and check the auto sets.
442 */
443 VMCPUID idCpu = pVM->cCpus;
444 while (idCpu-- > 0)
445 {
446 PPGMMAPSET pSet = &pVM->aCpus[idCpu].pgm.s.AutoSet;
447 uint32_t j = pSet->cEntries;
448 if (j <= RT_ELEMENTS(pSet->aEntries))
449 {
450 /*
451 * The set is open, close it.
452 */
453 while (j-- > 0)
454 {
455 int32_t cRefs = pSet->aEntries[j].cRefs;
456 uint32_t iPage = pSet->aEntries[j].iPage;
457 LogRel(("PGMR0DynMapTermVM: %d dangling refs to %#x\n", cRefs, iPage));
458 if (iPage < pThis->cPages && cRefs > 0)
459 pgmR0DynMapReleasePage(pThis, iPage, cRefs);
460 else
461 AssertLogRelMsgFailed(("cRefs=%d iPage=%#x cPages=%u\n", cRefs, iPage, pThis->cPages));
462
463 pSet->aEntries[j].iPage = UINT16_MAX;
464 pSet->aEntries[j].cRefs = 0;
465 pSet->aEntries[j].pvPage = NULL;
466 pSet->aEntries[j].HCPhys = NIL_RTHCPHYS;
467 }
468 pSet->cEntries = PGMMAPSET_CLOSED;
469 pSet->iSubset = UINT32_MAX;
470 pSet->iCpu = -1;
471 }
472 else
473 AssertMsg(j == PGMMAPSET_CLOSED, ("cEntries=%#x\n", j));
474
475 j = RT_ELEMENTS(pSet->aEntries);
476 while (j-- > 0)
477 {
478 Assert(pSet->aEntries[j].iPage == UINT16_MAX);
479 Assert(!pSet->aEntries[j].cRefs);
480 }
481 }
482
483 /*
484 * Release our reference to the mapping cache.
485 */
486 Assert(pThis->cUsers > 0);
487 pThis->cUsers--;
488 if (!pThis->cUsers)
489 pgmR0DynMapTearDown(pThis);
490 }
491 else
492 AssertLogRelMsgFailed(("pvR0DynMapUsed=%p pThis=%p\n", pVM->pgm.s.pvR0DynMapUsed, pThis));
493
494 RTSemFastMutexRelease(pThis->hInitLock);
495}
496
497
498/**
499 * Shoots down the TLBs for all the cache pages, pgmR0DynMapTearDown helper.
500 *
501 * @param idCpu The current CPU.
502 * @param pvUser1 The dynamic mapping cache instance.
503 * @param pvUser2 Unused, NULL.
504 */
505static DECLCALLBACK(void) pgmR0DynMapShootDownTlbs(RTCPUID idCpu, void *pvUser1, void *pvUser2)
506{
507 Assert(!pvUser2);
508 PPGMR0DYNMAP pThis = (PPGMR0DYNMAP)pvUser1;
509 Assert(pThis == g_pPGMR0DynMap);
510 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
511 uint32_t iPage = pThis->cPages;
512 while (iPage-- > 0)
513 ASMInvalidatePage(paPages[iPage].pvPage);
514}
515
516
517/**
518 * Shoot down the TLBs for every single cache entry on all CPUs.
519 *
520 * @returns IPRT status code (RTMpOnAll).
521 * @param pThis The dynamic mapping cache instance.
522 */
523static int pgmR0DynMapTlbShootDown(PPGMR0DYNMAP pThis)
524{
525 int rc = RTMpOnAll(pgmR0DynMapShootDownTlbs, pThis, NULL);
526 AssertRC(rc);
527 if (RT_FAILURE(rc))
528 {
529 uint32_t iPage = pThis->cPages;
530 while (iPage-- > 0)
531 ASMInvalidatePage(pThis->paPages[iPage].pvPage);
532 }
533 return rc;
534}
535
536
537/**
538 * Calculate the new cache size based on cMaxLoad statistics.
539 *
540 * @returns Number of pages.
541 * @param pThis The dynamic mapping cache instance.
542 * @param pcMinPages The minimal size in pages.
543 */
544static uint32_t pgmR0DynMapCalcNewSize(PPGMR0DYNMAP pThis, uint32_t *pcMinPages)
545{
546 Assert(pThis->cPages <= PGMR0DYNMAP_MAX_PAGES);
547
548 /* cCpus * PGMR0DYNMAP_PAGES_PER_CPU(_MIN). */
549 RTCPUID cCpus = RTMpGetCount();
550 AssertReturn(cCpus > 0 && cCpus <= RTCPUSET_MAX_CPUS, 0);
551 uint32_t cPages = cCpus * PGMR0DYNMAP_PAGES_PER_CPU;
552 uint32_t cMinPages = cCpus * PGMR0DYNMAP_PAGES_PER_CPU_MIN;
553
554 /* adjust against cMaxLoad. */
555 AssertMsg(pThis->cMaxLoad <= PGMR0DYNMAP_MAX_PAGES, ("%#x\n", pThis->cMaxLoad));
556 if (pThis->cMaxLoad > PGMR0DYNMAP_MAX_PAGES)
557 pThis->cMaxLoad = 0;
558
559 while (pThis->cMaxLoad > PGMR0DYNMAP_CALC_OVERLOAD(cPages))
560 cPages += PGMR0DYNMAP_PAGES_PER_CPU;
561
562 if (pThis->cMaxLoad > cMinPages)
563 cMinPages = pThis->cMaxLoad;
564
565 /* adjust against max and current size. */
566 if (cPages < pThis->cPages)
567 cPages = pThis->cPages;
568 cPages *= PGMR0DYNMAP_GUARD_PAGES + 1;
569 if (cPages > PGMR0DYNMAP_MAX_PAGES)
570 cPages = PGMR0DYNMAP_MAX_PAGES;
571
572 if (cMinPages < pThis->cPages)
573 cMinPages = pThis->cPages;
574 cMinPages *= PGMR0DYNMAP_GUARD_PAGES + 1;
575 if (cMinPages > PGMR0DYNMAP_MAX_PAGES)
576 cMinPages = PGMR0DYNMAP_MAX_PAGES;
577
578 Assert(cMinPages);
579 *pcMinPages = cMinPages;
580 return cPages;
581}
582
583
584/**
585 * Initializes the paging level data.
586 *
587 * @param pThis The dynamic mapping cache instance.
588 * @param pPgLvl The paging level data.
589 */
590void pgmR0DynMapPagingArrayInit(PPGMR0DYNMAP pThis, PPGMR0DYNMAPPGLVL pPgLvl)
591{
592 RTCCUINTREG cr4 = ASMGetCR4();
593 switch (pThis->enmPgMode)
594 {
595 case SUPPAGINGMODE_32_BIT:
596 case SUPPAGINGMODE_32_BIT_GLOBAL:
597 pPgLvl->cLevels = 2;
598 pPgLvl->a[0].fPhysMask = X86_CR3_PAGE_MASK;
599 pPgLvl->a[0].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
600 pPgLvl->a[0].fResMask = X86_PDE_P | X86_PDE_RW;
601 pPgLvl->a[0].fPtrMask = X86_PD_MASK;
602 pPgLvl->a[0].fPtrShift = X86_PD_SHIFT;
603
604 pPgLvl->a[1].fPhysMask = X86_PDE_PG_MASK;
605 pPgLvl->a[1].fAndMask = X86_PTE_P | X86_PTE_RW;
606 pPgLvl->a[1].fResMask = X86_PTE_P | X86_PTE_RW;
607 pPgLvl->a[1].fPtrMask = X86_PT_MASK;
608 pPgLvl->a[1].fPtrShift = X86_PT_SHIFT;
609 break;
610
611 case SUPPAGINGMODE_PAE:
612 case SUPPAGINGMODE_PAE_GLOBAL:
613 case SUPPAGINGMODE_PAE_NX:
614 case SUPPAGINGMODE_PAE_GLOBAL_NX:
615 pPgLvl->cLevels = 3;
616 pPgLvl->a[0].fPhysMask = X86_CR3_PAE_PAGE_MASK;
617 pPgLvl->a[0].fPtrMask = X86_PDPT_MASK_PAE;
618 pPgLvl->a[0].fPtrShift = X86_PDPT_SHIFT;
619 pPgLvl->a[0].fAndMask = X86_PDPE_P;
620 pPgLvl->a[0].fResMask = X86_PDPE_P;
621
622 pPgLvl->a[1].fPhysMask = X86_PDPE_PG_MASK;
623 pPgLvl->a[1].fPtrMask = X86_PD_PAE_MASK;
624 pPgLvl->a[1].fPtrShift = X86_PD_PAE_SHIFT;
625 pPgLvl->a[1].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
626 pPgLvl->a[1].fResMask = X86_PDE_P | X86_PDE_RW;
627
628 pPgLvl->a[2].fPhysMask = X86_PDE_PAE_PG_MASK;
629 pPgLvl->a[2].fPtrMask = X86_PT_PAE_MASK;
630 pPgLvl->a[2].fPtrShift = X86_PT_PAE_SHIFT;
631 pPgLvl->a[2].fAndMask = X86_PTE_P | X86_PTE_RW;
632 pPgLvl->a[2].fResMask = X86_PTE_P | X86_PTE_RW;
633 break;
634
635 case SUPPAGINGMODE_AMD64:
636 case SUPPAGINGMODE_AMD64_GLOBAL:
637 case SUPPAGINGMODE_AMD64_NX:
638 case SUPPAGINGMODE_AMD64_GLOBAL_NX:
639 pPgLvl->cLevels = 4;
640 pPgLvl->a[0].fPhysMask = X86_CR3_AMD64_PAGE_MASK;
641 pPgLvl->a[0].fPtrShift = X86_PML4_SHIFT;
642 pPgLvl->a[0].fPtrMask = X86_PML4_MASK;
643 pPgLvl->a[0].fAndMask = X86_PML4E_P | X86_PML4E_RW;
644 pPgLvl->a[0].fResMask = X86_PML4E_P | X86_PML4E_RW;
645
646 pPgLvl->a[1].fPhysMask = X86_PML4E_PG_MASK;
647 pPgLvl->a[1].fPtrShift = X86_PDPT_SHIFT;
648 pPgLvl->a[1].fPtrMask = X86_PDPT_MASK_AMD64;
649 pPgLvl->a[1].fAndMask = X86_PDPE_P | X86_PDPE_RW /** @todo check for X86_PDPT_PS support. */;
650 pPgLvl->a[1].fResMask = X86_PDPE_P | X86_PDPE_RW;
651
652 pPgLvl->a[2].fPhysMask = X86_PDPE_PG_MASK;
653 pPgLvl->a[2].fPtrShift = X86_PD_PAE_SHIFT;
654 pPgLvl->a[2].fPtrMask = X86_PD_PAE_MASK;
655 pPgLvl->a[2].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
656 pPgLvl->a[2].fResMask = X86_PDE_P | X86_PDE_RW;
657
658 pPgLvl->a[3].fPhysMask = X86_PDE_PAE_PG_MASK;
659 pPgLvl->a[3].fPtrShift = X86_PT_PAE_SHIFT;
660 pPgLvl->a[3].fPtrMask = X86_PT_PAE_MASK;
661 pPgLvl->a[3].fAndMask = X86_PTE_P | X86_PTE_RW;
662 pPgLvl->a[3].fResMask = X86_PTE_P | X86_PTE_RW;
663 break;
664
665 default:
666 AssertFailed();
667 pPgLvl->cLevels = 0;
668 break;
669 }
670
671 for (uint32_t i = 0; i < 4; i++) /* ASSUMING array size. */
672 {
673 pPgLvl->a[i].HCPhys = NIL_RTHCPHYS;
674 pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
675 pPgLvl->a[i].hMemObj = NIL_RTR0MEMOBJ;
676 pPgLvl->a[i].u.pv = NULL;
677 }
678}
679
680
681/**
682 * Maps a PTE.
683 *
684 * This will update the segment structure when new PTs are mapped.
685 *
686 * It also assumes that we (for paranoid reasons) wish to establish a mapping
687 * chain from CR3 to the PT that all corresponds to the processor we're
688 * currently running on, and go about this by running with interrupts disabled
689 * and restarting from CR3 for every change.
690 *
691 * @returns VBox status code, VINF_TRY_AGAIN if we changed any mappings and had
692 * to re-enable interrupts.
693 * @param pThis The dynamic mapping cache instance.
694 * @param pPgLvl The paging level structure.
695 * @param pvPage The page.
696 * @param pSeg The segment.
697 * @param cMaxPTs The max number of PTs expected in the segment.
698 * @param ppvPTE Where to store the PTE address.
699 */
700static int pgmR0DynMapPagingArrayMapPte(PPGMR0DYNMAP pThis, PPGMR0DYNMAPPGLVL pPgLvl, void *pvPage,
701 PPGMR0DYNMAPSEG pSeg, uint32_t cMaxPTs, void **ppvPTE)
702{
703 Assert(!(ASMGetFlags() & X86_EFL_IF));
704 void *pvEntry = NULL;
705 X86PGPAEUINT uEntry = ASMGetCR3();
706 for (uint32_t i = 0; i < pPgLvl->cLevels; i++)
707 {
708 RTHCPHYS HCPhys = uEntry & pPgLvl->a[i].fPhysMask;
709 if (pPgLvl->a[i].HCPhys != HCPhys)
710 {
711 /*
712 * Need to remap this level.
713 * The final level, the PT, will not be freed since that is what it's all about.
714 */
715 ASMIntEnable();
716 if (i + 1 == pPgLvl->cLevels)
717 AssertReturn(pSeg->cPTs < cMaxPTs, VERR_INTERNAL_ERROR);
718 else
719 {
720 int rc2 = RTR0MemObjFree(pPgLvl->a[i].hMemObj, true /* fFreeMappings */); AssertRC(rc2);
721 pPgLvl->a[i].hMemObj = pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
722 }
723
724 int rc = RTR0MemObjEnterPhys(&pPgLvl->a[i].hMemObj, HCPhys, PAGE_SIZE, RTMEM_CACHE_POLICY_DONT_CARE);
725 if (RT_SUCCESS(rc))
726 {
727 rc = RTR0MemObjMapKernel(&pPgLvl->a[i].hMapObj, pPgLvl->a[i].hMemObj,
728 (void *)-1 /* pvFixed */, 0 /* cbAlignment */,
729 RTMEM_PROT_WRITE | RTMEM_PROT_READ);
730 if (RT_SUCCESS(rc))
731 {
732 pPgLvl->a[i].u.pv = RTR0MemObjAddress(pPgLvl->a[i].hMapObj);
733 AssertMsg(((uintptr_t)pPgLvl->a[i].u.pv & ~(uintptr_t)PAGE_OFFSET_MASK), ("%p\n", pPgLvl->a[i].u.pv));
734 pPgLvl->a[i].HCPhys = HCPhys;
735 if (i + 1 == pPgLvl->cLevels)
736 pSeg->ahMemObjPTs[pSeg->cPTs++] = pPgLvl->a[i].hMemObj;
737 ASMIntDisable();
738 return VINF_TRY_AGAIN;
739 }
740
741 pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
742 }
743 else
744 pPgLvl->a[i].hMemObj = NIL_RTR0MEMOBJ;
745 pPgLvl->a[i].HCPhys = NIL_RTHCPHYS;
746 return rc;
747 }
748
749 /*
750 * The next level.
751 */
752 uint32_t iEntry = ((uint64_t)(uintptr_t)pvPage >> pPgLvl->a[i].fPtrShift) & pPgLvl->a[i].fPtrMask;
753 if (pThis->fLegacyMode)
754 {
755 pvEntry = &pPgLvl->a[i].u.paLegacy[iEntry];
756 uEntry = pPgLvl->a[i].u.paLegacy[iEntry];
757 }
758 else
759 {
760 pvEntry = &pPgLvl->a[i].u.paPae[iEntry];
761 uEntry = pPgLvl->a[i].u.paPae[iEntry];
762 }
763
764 if ((uEntry & pPgLvl->a[i].fAndMask) != pPgLvl->a[i].fResMask)
765 {
766 LogRel(("PGMR0DynMap: internal error - iPgLvl=%u cLevels=%u uEntry=%#llx fAnd=%#llx fRes=%#llx got=%#llx\n"
767 "PGMR0DynMap: pv=%p pvPage=%p iEntry=%#x fLegacyMode=%RTbool\n",
768 i, pPgLvl->cLevels, uEntry, pPgLvl->a[i].fAndMask, pPgLvl->a[i].fResMask, uEntry & pPgLvl->a[i].fAndMask,
769 pPgLvl->a[i].u.pv, pvPage, iEntry, pThis->fLegacyMode));
770 return VERR_INTERNAL_ERROR;
771 }
772 /*Log(("#%d: iEntry=%4d uEntry=%#llx pvEntry=%p HCPhys=%RHp \n", i, iEntry, uEntry, pvEntry, pPgLvl->a[i].HCPhys));*/
773 }
774
775 /* made it thru without needing to remap anything. */
776 *ppvPTE = pvEntry;
777 return VINF_SUCCESS;
778}
779
780
781/**
782 * Sets up a guard page.
783 *
784 * @param pThis The dynamic mapping cache instance.
785 * @param pPage The page.
786 */
787DECLINLINE(void) pgmR0DynMapSetupGuardPage(PPGMR0DYNMAP pThis, PPGMR0DYNMAPENTRY pPage)
788{
789 memset(pPage->pvPage, 0xfd, PAGE_SIZE);
790 pPage->cRefs = PGMR0DYNMAP_GUARD_PAGE_REF_COUNT;
791 pPage->HCPhys = PGMR0DYNMAP_GUARD_PAGE_HCPHYS;
792#ifdef PGMR0DYNMAP_GUARD_NP
793 ASMAtomicBitClear(pPage->uPte.pv, X86_PTE_BIT_P);
794#else
795 if (pThis->fLegacyMode)
796 ASMAtomicWriteU32(&pPage->uPte.pLegacy->u, PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE);
797 else
798 ASMAtomicWriteU64(&pPage->uPte.pPae->u, PGMR0DYNMAP_GUARD_PAGE_PAE_PTE);
799#endif
800 pThis->cGuardPages++;
801}
802
803
804/**
805 * Adds a new segment of the specified size.
806 *
807 * @returns VBox status code.
808 * @param pThis The dynamic mapping cache instance.
809 * @param cPages The size of the new segment, give as a page count.
810 */
811static int pgmR0DynMapAddSeg(PPGMR0DYNMAP pThis, uint32_t cPages)
812{
813 int rc2;
814 AssertReturn(ASMGetFlags() & X86_EFL_IF, VERR_PREEMPT_DISABLED);
815
816 /*
817 * Do the array reallocations first.
818 * (The pages array has to be replaced behind the spinlock of course.)
819 */
820 void *pvSavedPTEs = RTMemRealloc(pThis->pvSavedPTEs, (pThis->fLegacyMode ? sizeof(X86PGUINT) : sizeof(X86PGPAEUINT)) * (pThis->cPages + cPages));
821 if (!pvSavedPTEs)
822 return VERR_NO_MEMORY;
823 pThis->pvSavedPTEs = pvSavedPTEs;
824
825 void *pvPages = RTMemAllocZ(sizeof(pThis->paPages[0]) * (pThis->cPages + cPages));
826 if (!pvPages)
827 {
828 pvSavedPTEs = RTMemRealloc(pThis->pvSavedPTEs, (pThis->fLegacyMode ? sizeof(X86PGUINT) : sizeof(X86PGPAEUINT)) * pThis->cPages);
829 if (pvSavedPTEs)
830 pThis->pvSavedPTEs = pvSavedPTEs;
831 return VERR_NO_MEMORY;
832 }
833
834 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
835 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
836
837 memcpy(pvPages, pThis->paPages, sizeof(pThis->paPages[0]) * pThis->cPages);
838 void *pvToFree = pThis->paPages;
839 pThis->paPages = (PPGMR0DYNMAPENTRY)pvPages;
840
841 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
842 RTMemFree(pvToFree);
843
844 /*
845 * Allocate the segment structure and pages of memory, then touch all the pages (paranoia).
846 */
847 uint32_t cMaxPTs = cPages / (pThis->fLegacyMode ? X86_PG_ENTRIES : X86_PG_PAE_ENTRIES) + 2;
848 PPGMR0DYNMAPSEG pSeg = (PPGMR0DYNMAPSEG)RTMemAllocZ(RT_UOFFSETOF(PGMR0DYNMAPSEG, ahMemObjPTs[cMaxPTs]));
849 if (!pSeg)
850 return VERR_NO_MEMORY;
851 pSeg->pNext = NULL;
852 pSeg->cPages = cPages;
853 pSeg->iPage = pThis->cPages;
854 pSeg->cPTs = 0;
855 int rc = RTR0MemObjAllocPage(&pSeg->hMemObj, cPages << PAGE_SHIFT, false);
856 if (RT_SUCCESS(rc))
857 {
858 uint8_t *pbPage = (uint8_t *)RTR0MemObjAddress(pSeg->hMemObj);
859 AssertMsg(VALID_PTR(pbPage) && !((uintptr_t)pbPage & PAGE_OFFSET_MASK), ("%p\n", pbPage));
860 memset(pbPage, 0xfe, cPages << PAGE_SHIFT);
861
862 /*
863 * Walk thru the pages and set them up with a mapping of their PTE and everything.
864 */
865 ASMIntDisable();
866 PGMR0DYNMAPPGLVL PgLvl;
867 pgmR0DynMapPagingArrayInit(pThis, &PgLvl);
868 uint32_t const iEndPage = pSeg->iPage + cPages;
869 for (uint32_t iPage = pSeg->iPage;
870 iPage < iEndPage;
871 iPage++, pbPage += PAGE_SIZE)
872 {
873 /* Initialize the page data. */
874 pThis->paPages[iPage].HCPhys = NIL_RTHCPHYS;
875 pThis->paPages[iPage].pvPage = pbPage;
876 pThis->paPages[iPage].cRefs = 0;
877 pThis->paPages[iPage].uPte.pPae = 0;
878 RTCpuSetFill(&pThis->paPages[iPage].PendingSet);
879
880 /* Map its page table, retry until we've got a clean run (paranoia). */
881 do
882 rc = pgmR0DynMapPagingArrayMapPte(pThis, &PgLvl, pbPage, pSeg, cMaxPTs,
883 &pThis->paPages[iPage].uPte.pv);
884 while (rc == VINF_TRY_AGAIN);
885 if (RT_FAILURE(rc))
886 break;
887
888 /* Save the PTE. */
889 if (pThis->fLegacyMode)
890 ((PX86PGUINT)pThis->pvSavedPTEs)[iPage] = pThis->paPages[iPage].uPte.pLegacy->u;
891 else
892 ((PX86PGPAEUINT)pThis->pvSavedPTEs)[iPage] = pThis->paPages[iPage].uPte.pPae->u;
893
894#ifdef VBOX_STRICT
895 /* Check that we've got the right entry. */
896 RTHCPHYS HCPhysPage = RTR0MemObjGetPagePhysAddr(pSeg->hMemObj, iPage - pSeg->iPage);
897 RTHCPHYS HCPhysPte = pThis->fLegacyMode
898 ? pThis->paPages[iPage].uPte.pLegacy->u & X86_PTE_PG_MASK
899 : pThis->paPages[iPage].uPte.pPae->u & X86_PTE_PAE_PG_MASK;
900 if (HCPhysPage != HCPhysPte)
901 {
902 LogRel(("pgmR0DynMapAddSeg: internal error - page #%u HCPhysPage=%RHp HCPhysPte=%RHp pbPage=%p pvPte=%p\n",
903 iPage - pSeg->iPage, HCPhysPage, HCPhysPte, pbPage, pThis->paPages[iPage].uPte.pv));
904 rc = VERR_INTERNAL_ERROR;
905 break;
906 }
907#endif
908 } /* for each page */
909 ASMIntEnable();
910
911 /* cleanup non-PT mappings */
912 for (uint32_t i = 0; i < PgLvl.cLevels - 1; i++)
913 RTR0MemObjFree(PgLvl.a[i].hMemObj, true /* fFreeMappings */);
914
915 if (RT_SUCCESS(rc))
916 {
917#if PGMR0DYNMAP_GUARD_PAGES > 0
918 /*
919 * Setup guard pages.
920 * (Note: TLBs will be shot down later on.)
921 */
922 uint32_t iPage = pSeg->iPage;
923 while (iPage < iEndPage)
924 {
925 for (uint32_t iGPg = 0; iGPg < PGMR0DYNMAP_GUARD_PAGES && iPage < iEndPage; iGPg++, iPage++)
926 pgmR0DynMapSetupGuardPage(pThis, &pThis->paPages[iPage]);
927 iPage++; /* the guarded page */
928 }
929
930 /* Make sure the very last page is a guard page too. */
931 iPage = iEndPage - 1;
932 if (pThis->paPages[iPage].cRefs != PGMR0DYNMAP_GUARD_PAGE_REF_COUNT)
933 pgmR0DynMapSetupGuardPage(pThis, &pThis->paPages[iPage]);
934#endif /* PGMR0DYNMAP_GUARD_PAGES > 0 */
935
936 /*
937 * Commit it by adding the segment to the list and updating the page count.
938 */
939 pSeg->pNext = pThis->pSegHead;
940 pThis->pSegHead = pSeg;
941 pThis->cPages += cPages;
942 return VINF_SUCCESS;
943 }
944
945 /*
946 * Bail out.
947 */
948 while (pSeg->cPTs-- > 0)
949 {
950 rc2 = RTR0MemObjFree(pSeg->ahMemObjPTs[pSeg->cPTs], true /* fFreeMappings */);
951 AssertRC(rc2);
952 pSeg->ahMemObjPTs[pSeg->cPTs] = NIL_RTR0MEMOBJ;
953 }
954
955 rc2 = RTR0MemObjFree(pSeg->hMemObj, true /* fFreeMappings */);
956 AssertRC(rc2);
957 pSeg->hMemObj = NIL_RTR0MEMOBJ;
958 }
959 RTMemFree(pSeg);
960
961 /* Don't bother resizing the arrays, but free them if we're the only user. */
962 if (!pThis->cPages)
963 {
964 RTMemFree(pThis->paPages);
965 pThis->paPages = NULL;
966 RTMemFree(pThis->pvSavedPTEs);
967 pThis->pvSavedPTEs = NULL;
968 }
969 return rc;
970}
971
972
973/**
974 * Called by PGMR0DynMapInitVM under the init lock.
975 *
976 * @returns VBox status code.
977 * @param pThis The dynamic mapping cache instance.
978 */
979static int pgmR0DynMapSetup(PPGMR0DYNMAP pThis)
980{
981 /*
982 * Calc the size and add a segment of that size.
983 */
984 uint32_t cMinPages;
985 uint32_t cPages = pgmR0DynMapCalcNewSize(pThis, &cMinPages);
986 AssertReturn(cPages, VERR_INTERNAL_ERROR);
987 int rc = pgmR0DynMapAddSeg(pThis, cPages);
988 if (rc == VERR_NO_MEMORY)
989 {
990 /*
991 * Try adding smaller segments.
992 */
993 do
994 rc = pgmR0DynMapAddSeg(pThis, PGMR0DYNMAP_SMALL_SEG_PAGES);
995 while (RT_SUCCESS(rc) && pThis->cPages < cPages);
996 if (rc == VERR_NO_MEMORY && pThis->cPages >= cMinPages)
997 rc = VINF_SUCCESS;
998 if (rc == VERR_NO_MEMORY)
999 {
1000 if (pThis->cPages)
1001 pgmR0DynMapTearDown(pThis);
1002 rc = VERR_PGM_DYNMAP_SETUP_ERROR;
1003 }
1004 }
1005 Assert(ASMGetFlags() & X86_EFL_IF);
1006
1007#if PGMR0DYNMAP_GUARD_PAGES > 0
1008 /* paranoia */
1009 if (RT_SUCCESS(rc))
1010 pgmR0DynMapTlbShootDown(pThis);
1011#endif
1012 return rc;
1013}
1014
1015
1016/**
1017 * Called by PGMR0DynMapInitVM under the init lock.
1018 *
1019 * @returns VBox status code.
1020 * @param pThis The dynamic mapping cache instance.
1021 */
1022static int pgmR0DynMapExpand(PPGMR0DYNMAP pThis)
1023{
1024 /*
1025 * Calc the new target size and add a segment of the appropriate size.
1026 */
1027 uint32_t cMinPages;
1028 uint32_t cPages = pgmR0DynMapCalcNewSize(pThis, &cMinPages);
1029 AssertReturn(cPages, VERR_INTERNAL_ERROR);
1030 if (pThis->cPages >= cPages)
1031 return VINF_SUCCESS;
1032
1033 uint32_t cAdd = cPages - pThis->cPages;
1034 int rc = pgmR0DynMapAddSeg(pThis, cAdd);
1035 if (rc == VERR_NO_MEMORY)
1036 {
1037 /*
1038 * Try adding smaller segments.
1039 */
1040 do
1041 rc = pgmR0DynMapAddSeg(pThis, PGMR0DYNMAP_SMALL_SEG_PAGES);
1042 while (RT_SUCCESS(rc) && pThis->cPages < cPages);
1043 if (rc == VERR_NO_MEMORY && pThis->cPages >= cMinPages)
1044 rc = VINF_SUCCESS;
1045 if (rc == VERR_NO_MEMORY)
1046 rc = VERR_PGM_DYNMAP_EXPAND_ERROR;
1047 }
1048 Assert(ASMGetFlags() & X86_EFL_IF);
1049
1050#if PGMR0DYNMAP_GUARD_PAGES > 0
1051 /* paranoia */
1052 if (RT_SUCCESS(rc))
1053 pgmR0DynMapTlbShootDown(pThis);
1054#endif
1055 return rc;
1056}
1057
1058
1059/**
1060 * Called by PGMR0DynMapTermVM under the init lock.
1061 *
1062 * @returns VBox status code.
1063 * @param pThis The dynamic mapping cache instance.
1064 */
1065static void pgmR0DynMapTearDown(PPGMR0DYNMAP pThis)
1066{
1067 /*
1068 * Restore the original page table entries
1069 */
1070 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1071 uint32_t iPage = pThis->cPages;
1072 if (pThis->fLegacyMode)
1073 {
1074 X86PGUINT const *paSavedPTEs = (X86PGUINT const *)pThis->pvSavedPTEs;
1075 while (iPage-- > 0)
1076 {
1077 X86PGUINT uOld = paPages[iPage].uPte.pLegacy->u;
1078 X86PGUINT uOld2 = uOld; NOREF(uOld2);
1079 X86PGUINT uNew = paSavedPTEs[iPage];
1080 while (!ASMAtomicCmpXchgExU32(&paPages[iPage].uPte.pLegacy->u, uNew, uOld, &uOld))
1081 AssertMsgFailed(("uOld=%#x uOld2=%#x uNew=%#x\n", uOld, uOld2, uNew));
1082 Assert(paPages[iPage].uPte.pLegacy->u == paSavedPTEs[iPage]);
1083 }
1084 }
1085 else
1086 {
1087 X86PGPAEUINT const *paSavedPTEs = (X86PGPAEUINT const *)pThis->pvSavedPTEs;
1088 while (iPage-- > 0)
1089 {
1090 X86PGPAEUINT uOld = paPages[iPage].uPte.pPae->u;
1091 X86PGPAEUINT uOld2 = uOld; NOREF(uOld2);
1092 X86PGPAEUINT uNew = paSavedPTEs[iPage];
1093 while (!ASMAtomicCmpXchgExU64(&paPages[iPage].uPte.pPae->u, uNew, uOld, &uOld))
1094 AssertMsgFailed(("uOld=%#llx uOld2=%#llx uNew=%#llx\n", uOld, uOld2, uNew));
1095 Assert(paPages[iPage].uPte.pPae->u == paSavedPTEs[iPage]);
1096 }
1097 }
1098
1099 /*
1100 * Shoot down the TLBs on all CPUs before freeing them.
1101 */
1102 pgmR0DynMapTlbShootDown(pThis);
1103
1104 /*
1105 * Free the segments.
1106 */
1107 while (pThis->pSegHead)
1108 {
1109 int rc;
1110 PPGMR0DYNMAPSEG pSeg = pThis->pSegHead;
1111 pThis->pSegHead = pSeg->pNext;
1112
1113 uint32_t iPT = pSeg->cPTs;
1114 while (iPT-- > 0)
1115 {
1116 rc = RTR0MemObjFree(pSeg->ahMemObjPTs[iPT], true /* fFreeMappings */); AssertRC(rc);
1117 pSeg->ahMemObjPTs[iPT] = NIL_RTR0MEMOBJ;
1118 }
1119 rc = RTR0MemObjFree(pSeg->hMemObj, true /* fFreeMappings */); AssertRC(rc);
1120 pSeg->hMemObj = NIL_RTR0MEMOBJ;
1121 pSeg->pNext = NULL;
1122 pSeg->iPage = UINT16_MAX;
1123 pSeg->cPages = 0;
1124 pSeg->cPTs = 0;
1125 RTMemFree(pSeg);
1126 }
1127
1128 /*
1129 * Free the arrays and restore the initial state.
1130 * The cLoadMax value is left behind for the next setup.
1131 */
1132 RTMemFree(pThis->paPages);
1133 pThis->paPages = NULL;
1134 RTMemFree(pThis->pvSavedPTEs);
1135 pThis->pvSavedPTEs = NULL;
1136 pThis->cPages = 0;
1137 pThis->cLoad = 0;
1138 pThis->cGuardPages = 0;
1139}
1140
1141
1142/**
1143 * Release references to a page, caller owns the spin lock.
1144 *
1145 * @param pThis The dynamic mapping cache instance.
1146 * @param iPage The page.
1147 * @param cRefs The number of references to release.
1148 */
1149DECLINLINE(void) pgmR0DynMapReleasePageLocked(PPGMR0DYNMAP pThis, uint32_t iPage, int32_t cRefs)
1150{
1151 cRefs = ASMAtomicSubS32(&pThis->paPages[iPage].cRefs, cRefs) - cRefs;
1152 AssertMsg(cRefs >= 0, ("%d\n", cRefs));
1153 if (!cRefs)
1154 pThis->cLoad--;
1155}
1156
1157
1158/**
1159 * Release references to a page, caller does not own the spin lock.
1160 *
1161 * @param pThis The dynamic mapping cache instance.
1162 * @param iPage The page.
1163 * @param cRefs The number of references to release.
1164 */
1165static void pgmR0DynMapReleasePage(PPGMR0DYNMAP pThis, uint32_t iPage, uint32_t cRefs)
1166{
1167 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1168 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1169 pgmR0DynMapReleasePageLocked(pThis, iPage, cRefs);
1170 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1171}
1172
1173
1174/**
1175 * pgmR0DynMapPage worker that deals with the tedious bits.
1176 *
1177 * @returns The page index on success, UINT32_MAX on failure.
1178 * @param pThis The dynamic mapping cache instance.
1179 * @param HCPhys The address of the page to be mapped.
1180 * @param iPage The page index pgmR0DynMapPage hashed HCPhys to.
1181 * @param pVM The shared VM structure, for statistics only.
1182 */
1183static uint32_t pgmR0DynMapPageSlow(PPGMR0DYNMAP pThis, RTHCPHYS HCPhys, uint32_t iPage, PVM pVM)
1184{
1185#ifdef VBOX_WITH_STATISTICS
1186 PVMCPU pVCpu = VMMGetCpu(pVM);
1187#endif
1188 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatR0DynMapPageSlow);
1189
1190 /*
1191 * Check if any of the first 3 pages are unreferenced since the caller
1192 * already has made sure they aren't matching.
1193 */
1194#ifdef VBOX_WITH_STATISTICS
1195 bool fLooped = false;
1196#endif
1197 uint32_t const cPages = pThis->cPages;
1198 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1199 uint32_t iFreePage;
1200 if (!paPages[iPage].cRefs)
1201 iFreePage = iPage;
1202 else if (!paPages[(iPage + 1) % cPages].cRefs)
1203 iFreePage = (iPage + 1) % cPages;
1204 else if (!paPages[(iPage + 2) % cPages].cRefs)
1205 iFreePage = (iPage + 2) % cPages;
1206 else
1207 {
1208 /*
1209 * Search for an unused or matching entry.
1210 */
1211 iFreePage = (iPage + 3) % cPages;
1212 for (;;)
1213 {
1214 if (paPages[iFreePage].HCPhys == HCPhys)
1215 {
1216 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatR0DynMapPageSlowLoopHits);
1217 return iFreePage;
1218 }
1219 if (!paPages[iFreePage].cRefs)
1220 break;
1221
1222 /* advance */
1223 iFreePage = (iFreePage + 1) % cPages;
1224 if (RT_UNLIKELY(iFreePage == iPage))
1225 return UINT32_MAX;
1226 }
1227 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatR0DynMapPageSlowLoopMisses);
1228#ifdef VBOX_WITH_STATISTICS
1229 fLooped = true;
1230#endif
1231 }
1232 Assert(iFreePage < cPages);
1233
1234#if 0 //def VBOX_WITH_STATISTICS
1235 /* Check for lost hits. */
1236 if (!fLooped)
1237 for (uint32_t iPage2 = (iPage + 3) % cPages; iPage2 != iPage; iPage2 = (iPage2 + 1) % cPages)
1238 if (paPages[iPage2].HCPhys == HCPhys)
1239 STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapPageSlowLostHits);
1240#endif
1241
1242 /*
1243 * Setup the new entry.
1244 */
1245 /*Log6(("pgmR0DynMapPageSlow: old - %RHp %#x %#llx\n", paPages[iFreePage].HCPhys, paPages[iFreePage].cRefs, paPages[iFreePage].uPte.pPae->u));*/
1246 paPages[iFreePage].HCPhys = HCPhys;
1247 RTCpuSetFill(&paPages[iFreePage].PendingSet);
1248 if (pThis->fLegacyMode)
1249 {
1250 X86PGUINT uOld = paPages[iFreePage].uPte.pLegacy->u;
1251 X86PGUINT uOld2 = uOld; NOREF(uOld2);
1252 X86PGUINT uNew = (uOld & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1253 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1254 | (HCPhys & X86_PTE_PG_MASK);
1255 while (!ASMAtomicCmpXchgExU32(&paPages[iFreePage].uPte.pLegacy->u, uNew, uOld, &uOld))
1256 AssertMsgFailed(("uOld=%#x uOld2=%#x uNew=%#x\n", uOld, uOld2, uNew));
1257 Assert(paPages[iFreePage].uPte.pLegacy->u == uNew);
1258 }
1259 else
1260 {
1261 X86PGPAEUINT uOld = paPages[iFreePage].uPte.pPae->u;
1262 X86PGPAEUINT uOld2 = uOld; NOREF(uOld2);
1263 X86PGPAEUINT uNew = (uOld & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1264 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1265 | (HCPhys & X86_PTE_PAE_PG_MASK);
1266 while (!ASMAtomicCmpXchgExU64(&paPages[iFreePage].uPte.pPae->u, uNew, uOld, &uOld))
1267 AssertMsgFailed(("uOld=%#llx uOld2=%#llx uNew=%#llx\n", uOld, uOld2, uNew));
1268 Assert(paPages[iFreePage].uPte.pPae->u == uNew);
1269 /*Log6(("pgmR0DynMapPageSlow: #%x - %RHp %p %#llx\n", iFreePage, HCPhys, paPages[iFreePage].pvPage, uNew));*/
1270 }
1271 return iFreePage;
1272}
1273
1274
1275/**
1276 * Maps a page into the pool.
1277 *
1278 * @returns Page index on success, UINT32_MAX on failure.
1279 * @param pThis The dynamic mapping cache instance.
1280 * @param HCPhys The address of the page to be mapped.
1281 * @param iRealCpu The real cpu set index. (optimization)
1282 * @param pVM The shared VM structure, for statistics only.
1283 * @param ppvPage Where to the page address.
1284 */
1285DECLINLINE(uint32_t) pgmR0DynMapPage(PPGMR0DYNMAP pThis, RTHCPHYS HCPhys, int32_t iRealCpu, PVM pVM, void **ppvPage)
1286{
1287#ifdef VBOX_WITH_STATISTICS
1288 PVMCPU pVCpu = VMMGetCpu(pVM);
1289#endif
1290 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1291 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1292 AssertMsg(!(HCPhys & PAGE_OFFSET_MASK), ("HCPhys=%RHp\n", HCPhys));
1293 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatR0DynMapPage);
1294
1295 /*
1296 * Find an entry, if possible a matching one. The HCPhys address is hashed
1297 * down to a page index, collisions are handled by linear searching.
1298 * Optimized for a hit in the first 3 pages.
1299 *
1300 * Field easy hits here and defer the tedious searching and inserting
1301 * to pgmR0DynMapPageSlow().
1302 */
1303 uint32_t const cPages = pThis->cPages;
1304 uint32_t iPage = (HCPhys >> PAGE_SHIFT) % cPages;
1305 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1306 if (RT_LIKELY(paPages[iPage].HCPhys == HCPhys))
1307 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatR0DynMapPageHits0);
1308 else
1309 {
1310 uint32_t iPage2 = (iPage + 1) % cPages;
1311 if (RT_LIKELY(paPages[iPage2].HCPhys == HCPhys))
1312 {
1313 iPage = iPage2;
1314 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatR0DynMapPageHits1);
1315 }
1316 else
1317 {
1318 iPage2 = (iPage + 2) % cPages;
1319 if (paPages[iPage2].HCPhys == HCPhys)
1320 {
1321 iPage = iPage2;
1322 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatR0DynMapPageHits2);
1323 }
1324 else
1325 {
1326 iPage = pgmR0DynMapPageSlow(pThis, HCPhys, iPage, pVM);
1327 if (RT_UNLIKELY(iPage == UINT32_MAX))
1328 {
1329 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1330 *ppvPage = NULL;
1331 return iPage;
1332 }
1333 }
1334 }
1335 }
1336
1337 /*
1338 * Reference it, update statistics and get the return address.
1339 */
1340 int32_t cRefs = ASMAtomicIncS32(&paPages[iPage].cRefs);
1341 if (cRefs == 1)
1342 {
1343 pThis->cLoad++;
1344 if (pThis->cLoad > pThis->cMaxLoad)
1345 pThis->cMaxLoad = pThis->cLoad;
1346 AssertMsg(pThis->cLoad <= pThis->cPages - pThis->cGuardPages, ("%d/%d\n", pThis->cLoad, pThis->cPages - pThis->cGuardPages));
1347 }
1348 else if (RT_UNLIKELY(cRefs <= 0))
1349 {
1350 ASMAtomicDecS32(&paPages[iPage].cRefs);
1351 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1352 *ppvPage = NULL;
1353 AssertLogRelMsgFailedReturn(("cRefs=%d iPage=%p HCPhys=%RHp\n", cRefs, iPage, HCPhys), UINT32_MAX);
1354 }
1355 void *pvPage = paPages[iPage].pvPage;
1356
1357 /*
1358 * Invalidate the entry?
1359 */
1360 bool fInvalidateIt = RTCpuSetIsMemberByIndex(&paPages[iPage].PendingSet, iRealCpu);
1361 if (RT_UNLIKELY(fInvalidateIt))
1362 RTCpuSetDelByIndex(&paPages[iPage].PendingSet, iRealCpu);
1363
1364 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1365
1366 /*
1367 * Do the actual invalidation outside the spinlock.
1368 */
1369 if (RT_UNLIKELY(fInvalidateIt))
1370 {
1371 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatR0DynMapPageInvlPg);
1372 ASMInvalidatePage(pvPage);
1373 }
1374
1375 *ppvPage = pvPage;
1376 return iPage;
1377}
1378
1379
1380/**
1381 * Assert the the integrity of the pool.
1382 *
1383 * @returns VBox status code.
1384 */
1385VMMR0DECL(int) PGMR0DynMapAssertIntegrity(void)
1386{
1387 /*
1388 * Basic pool stuff that doesn't require any lock, just assumes we're a user.
1389 */
1390 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1391 if (!pThis)
1392 return VINF_SUCCESS;
1393 AssertPtrReturn(pThis, VERR_INVALID_POINTER);
1394 AssertReturn(pThis->u32Magic == PGMR0DYNMAP_MAGIC, VERR_INVALID_MAGIC);
1395 if (!pThis->cUsers)
1396 return VERR_INVALID_PARAMETER;
1397
1398
1399 int rc = VINF_SUCCESS;
1400 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1401 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1402
1403#define CHECK_RET(expr, a) \
1404 do { \
1405 if (RT_UNLIKELY(!(expr))) \
1406 { \
1407 RTSpinlockRelease(pThis->hSpinlock, &Tmp); \
1408 RTAssertMsg1Weak(#expr, __LINE__, __FILE__, __PRETTY_FUNCTION__); \
1409 RTAssertMsg2Weak a; \
1410 return VERR_INTERNAL_ERROR; \
1411 } \
1412 } while (0)
1413
1414 /*
1415 * Check that the PTEs are correct.
1416 */
1417 uint32_t cGuard = 0;
1418 uint32_t cLoad = 0;
1419 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1420 uint32_t iPage = pThis->cPages;
1421 if (pThis->fLegacyMode)
1422 {
1423 PCX86PGUINT paSavedPTEs = (PCX86PGUINT)pThis->pvSavedPTEs; NOREF(paSavedPTEs);
1424 while (iPage-- > 0)
1425 {
1426 CHECK_RET(!((uintptr_t)paPages[iPage].pvPage & PAGE_OFFSET_MASK), ("#%u: %p\n", iPage, paPages[iPage].pvPage));
1427 if ( paPages[iPage].cRefs == PGMR0DYNMAP_GUARD_PAGE_REF_COUNT
1428 && paPages[iPage].HCPhys == PGMR0DYNMAP_GUARD_PAGE_HCPHYS)
1429 {
1430#ifdef PGMR0DYNMAP_GUARD_NP
1431 CHECK_RET(paPages[iPage].uPte.pLegacy->u == (paSavedPTEs[iPage] & ~(X86PGUINT)X86_PTE_P),
1432 ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, paSavedPTEs[iPage]));
1433#else
1434 CHECK_RET(paPages[iPage].uPte.pLegacy->u == PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE,
1435 ("#%u: %#x", iPage, paPages[iPage].uPte.pLegacy->u));
1436#endif
1437 cGuard++;
1438 }
1439 else if (paPages[iPage].HCPhys != NIL_RTHCPHYS)
1440 {
1441 CHECK_RET(!(paPages[iPage].HCPhys & PAGE_OFFSET_MASK), ("#%u: %RHp\n", iPage, paPages[iPage].HCPhys));
1442 X86PGUINT uPte = (paSavedPTEs[iPage] & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1443 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1444 | (paPages[iPage].HCPhys & X86_PTE_PAE_PG_MASK);
1445 CHECK_RET(paPages[iPage].uPte.pLegacy->u == uPte,
1446 ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, uPte));
1447 if (paPages[iPage].cRefs)
1448 cLoad++;
1449 }
1450 else
1451 CHECK_RET(paPages[iPage].uPte.pLegacy->u == paSavedPTEs[iPage],
1452 ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, paSavedPTEs[iPage]));
1453 }
1454 }
1455 else
1456 {
1457 PCX86PGPAEUINT paSavedPTEs = (PCX86PGPAEUINT)pThis->pvSavedPTEs; NOREF(paSavedPTEs);
1458 while (iPage-- > 0)
1459 {
1460 CHECK_RET(!((uintptr_t)paPages[iPage].pvPage & PAGE_OFFSET_MASK), ("#%u: %p\n", iPage, paPages[iPage].pvPage));
1461 if ( paPages[iPage].cRefs == PGMR0DYNMAP_GUARD_PAGE_REF_COUNT
1462 && paPages[iPage].HCPhys == PGMR0DYNMAP_GUARD_PAGE_HCPHYS)
1463 {
1464#ifdef PGMR0DYNMAP_GUARD_NP
1465 CHECK_RET(paPages[iPage].uPte.pPae->u == (paSavedPTEs[iPage] & ~(X86PGPAEUINT)X86_PTE_P),
1466 ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pPae->u, paSavedPTEs[iPage]));
1467#else
1468 CHECK_RET(paPages[iPage].uPte.pPae->u == PGMR0DYNMAP_GUARD_PAGE_PAE_PTE,
1469 ("#%u: %#llx", iPage, paPages[iPage].uPte.pPae->u));
1470#endif
1471 cGuard++;
1472 }
1473 else if (paPages[iPage].HCPhys != NIL_RTHCPHYS)
1474 {
1475 CHECK_RET(!(paPages[iPage].HCPhys & PAGE_OFFSET_MASK), ("#%u: %RHp\n", iPage, paPages[iPage].HCPhys));
1476 X86PGPAEUINT uPte = (paSavedPTEs[iPage] & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1477 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1478 | (paPages[iPage].HCPhys & X86_PTE_PAE_PG_MASK);
1479 CHECK_RET(paPages[iPage].uPte.pPae->u == uPte,
1480 ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pLegacy->u, uPte));
1481 if (paPages[iPage].cRefs)
1482 cLoad++;
1483 }
1484 else
1485 CHECK_RET(paPages[iPage].uPte.pPae->u == paSavedPTEs[iPage],
1486 ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pPae->u, paSavedPTEs[iPage]));
1487 }
1488 }
1489
1490 CHECK_RET(cLoad == pThis->cLoad, ("%u %u\n", cLoad, pThis->cLoad));
1491 CHECK_RET(cGuard == pThis->cGuardPages, ("%u %u\n", cGuard, pThis->cGuardPages));
1492
1493#undef CHECK_RET
1494 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1495 return VINF_SUCCESS;
1496}
1497
1498
1499/**
1500 * Signals the start of a new set of mappings.
1501 *
1502 * Mostly for strictness. PGMDynMapHCPage won't work unless this
1503 * API is called.
1504 *
1505 * @param pVCpu The shared data for the current virtual CPU.
1506 */
1507VMMDECL(void) PGMDynMapStartAutoSet(PVMCPU pVCpu)
1508{
1509 Assert(pVCpu->pgm.s.AutoSet.cEntries == PGMMAPSET_CLOSED);
1510 Assert(pVCpu->pgm.s.AutoSet.iSubset == UINT32_MAX);
1511 pVCpu->pgm.s.AutoSet.cEntries = 0;
1512 pVCpu->pgm.s.AutoSet.iCpu = RTMpCpuIdToSetIndex(RTMpCpuId());
1513}
1514
1515
1516/**
1517 * Starts or migrates the autoset of a virtual CPU.
1518 *
1519 * This is used by HWACCMR0Enter. When we've longjumped out of the HWACCM
1520 * execution loop with the set open, we'll migrate it when re-entering. While
1521 * under normal circumstances, we'll start it so VMXR0LoadGuestState can access
1522 * guest memory.
1523 *
1524 * @returns @c true if started, @c false if migrated.
1525 * @param pVCpu The shared data for the current virtual CPU.
1526 * @thread EMT
1527 */
1528VMMDECL(bool) PGMDynMapStartOrMigrateAutoSet(PVMCPU pVCpu)
1529{
1530 bool fStartIt = pVCpu->pgm.s.AutoSet.cEntries == PGMMAPSET_CLOSED;
1531 if (fStartIt)
1532 PGMDynMapStartAutoSet(pVCpu);
1533 else
1534 PGMDynMapMigrateAutoSet(pVCpu);
1535 return fStartIt;
1536}
1537
1538
1539/**
1540 * Worker that performs the actual flushing of the set.
1541 *
1542 * @param pSet The set to flush.
1543 * @param cEntries The number of entries.
1544 */
1545DECLINLINE(void) pgmDynMapFlushAutoSetWorker(PPGMMAPSET pSet, uint32_t cEntries)
1546{
1547 /*
1548 * Release any pages it's referencing.
1549 */
1550 if ( cEntries != 0
1551 && RT_LIKELY(cEntries <= RT_ELEMENTS(pSet->aEntries)))
1552 {
1553 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1554 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1555 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1556
1557 uint32_t i = cEntries;
1558 while (i-- > 0)
1559 {
1560 uint32_t iPage = pSet->aEntries[i].iPage;
1561 Assert(iPage < pThis->cPages);
1562 int32_t cRefs = pSet->aEntries[i].cRefs;
1563 Assert(cRefs > 0);
1564 pgmR0DynMapReleasePageLocked(pThis, iPage, cRefs);
1565
1566 pSet->aEntries[i].iPage = UINT16_MAX;
1567 pSet->aEntries[i].cRefs = 0;
1568 }
1569
1570 Assert(pThis->cLoad <= pThis->cPages - pThis->cGuardPages);
1571 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1572 }
1573}
1574
1575
1576/**
1577 * Releases the dynamic memory mappings made by PGMDynMapHCPage and associates
1578 * since the PGMDynMapStartAutoSet call.
1579 *
1580 * @param pVCpu The shared data for the current virtual CPU.
1581 */
1582VMMDECL(void) PGMDynMapReleaseAutoSet(PVMCPU pVCpu)
1583{
1584 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1585
1586 /*
1587 * Close and flush the set.
1588 */
1589 uint32_t cEntries = pSet->cEntries;
1590 AssertReturnVoid(cEntries != PGMMAPSET_CLOSED);
1591 pSet->cEntries = PGMMAPSET_CLOSED;
1592 pSet->iSubset = UINT32_MAX;
1593 pSet->iCpu = -1;
1594
1595 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->aStatR0DynMapSetSize[(cEntries * 10 / RT_ELEMENTS(pSet->aEntries)) % 11]);
1596 AssertMsg(cEntries < PGMMAPSET_MAX_FILL, ("%u\n", cEntries));
1597 if (cEntries > RT_ELEMENTS(pSet->aEntries) * 50 / 100)
1598 Log(("PGMDynMapReleaseAutoSet: cEntries=%d\n", pSet->cEntries));
1599
1600 pgmDynMapFlushAutoSetWorker(pSet, cEntries);
1601}
1602
1603
1604/**
1605 * Flushes the set if it's above a certain threshold.
1606 *
1607 * @param pVCpu The shared data for the current virtual CPU.
1608 */
1609VMMDECL(void) PGMDynMapFlushAutoSet(PVMCPU pVCpu)
1610{
1611 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1612 AssertMsg(pSet->iCpu == RTMpCpuIdToSetIndex(RTMpCpuId()), ("%d %d(%d) efl=%#x\n", pSet->iCpu, RTMpCpuIdToSetIndex(RTMpCpuId()), RTMpCpuId(), ASMGetFlags()));
1613
1614 /*
1615 * Only flush it if it's 45% full.
1616 */
1617 uint32_t cEntries = pSet->cEntries;
1618 AssertReturnVoid(cEntries != PGMMAPSET_CLOSED);
1619 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->aStatR0DynMapSetSize[(cEntries * 10 / RT_ELEMENTS(pSet->aEntries)) % 11]);
1620 if (cEntries >= RT_ELEMENTS(pSet->aEntries) * 45 / 100)
1621 {
1622 pSet->cEntries = 0;
1623
1624 AssertMsg(cEntries < PGMMAPSET_MAX_FILL, ("%u\n", cEntries));
1625 Log(("PGMDynMapFlushAutoSet: cEntries=%d\n", pSet->cEntries));
1626
1627 pgmDynMapFlushAutoSetWorker(pSet, cEntries);
1628 AssertMsg(pSet->iCpu == RTMpCpuIdToSetIndex(RTMpCpuId()), ("%d %d(%d) efl=%#x\n", pSet->iCpu, RTMpCpuIdToSetIndex(RTMpCpuId()), RTMpCpuId(), ASMGetFlags()));
1629 }
1630}
1631
1632
1633/**
1634 * Migrates the automatic mapping set of the current vCPU if it's active and
1635 * necessary.
1636 *
1637 * This is called when re-entering the hardware assisted execution mode after a
1638 * nip down to ring-3. We run the risk that the CPU might have change and we
1639 * will therefore make sure all the cache entries currently in the auto set will
1640 * be valid on the new CPU. If the cpu didn't change nothing will happen as all
1641 * the entries will have been flagged as invalidated.
1642 *
1643 * @param pVCpu The shared data for the current virtual CPU.
1644 * @thread EMT
1645 */
1646VMMDECL(void) PGMDynMapMigrateAutoSet(PVMCPU pVCpu)
1647{
1648 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1649 int32_t iRealCpu = RTMpCpuIdToSetIndex(RTMpCpuId());
1650 if (pSet->iCpu != iRealCpu)
1651 {
1652 uint32_t i = pSet->cEntries;
1653 if (i != PGMMAPSET_CLOSED)
1654 {
1655 AssertMsg(i <= RT_ELEMENTS(pSet->aEntries), ("%#x (%u)\n", i, i));
1656 if (i != 0 && RT_LIKELY(i <= RT_ELEMENTS(pSet->aEntries)))
1657 {
1658 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1659 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1660 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1661
1662 while (i-- > 0)
1663 {
1664 Assert(pSet->aEntries[i].cRefs > 0);
1665 uint32_t iPage = pSet->aEntries[i].iPage;
1666 Assert(iPage < pThis->cPages);
1667 if (RTCpuSetIsMemberByIndex(&pThis->paPages[iPage].PendingSet, iRealCpu))
1668 {
1669 RTCpuSetDelByIndex(&pThis->paPages[iPage].PendingSet, iRealCpu);
1670 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1671
1672 ASMInvalidatePage(pThis->paPages[iPage].pvPage);
1673 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatR0DynMapMigrateInvlPg);
1674
1675 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1676 }
1677 }
1678
1679 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1680 }
1681 }
1682 pSet->iCpu = iRealCpu;
1683 }
1684}
1685
1686
1687/**
1688 * Worker function that flushes the current subset.
1689 *
1690 * This is called when the set is popped or when the set
1691 * hash a too high load. As also pointed out elsewhere, the
1692 * whole subset thing is a hack for working around code that
1693 * accesses too many pages. Like PGMPool.
1694 *
1695 * @param pSet The set which subset to flush.
1696 */
1697static void pgmDynMapFlushSubset(PPGMMAPSET pSet)
1698{
1699 uint32_t iSubset = pSet->iSubset;
1700 uint32_t i = pSet->cEntries;
1701 Assert(i <= RT_ELEMENTS(pSet->aEntries));
1702 if ( i > iSubset
1703 && i <= RT_ELEMENTS(pSet->aEntries))
1704 {
1705 Log(("pgmDynMapFlushSubset: cEntries=%d iSubset=%d\n", pSet->cEntries, iSubset));
1706 pSet->cEntries = iSubset;
1707
1708 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1709 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1710 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1711
1712 while (i-- > iSubset)
1713 {
1714 uint32_t iPage = pSet->aEntries[i].iPage;
1715 Assert(iPage < pThis->cPages);
1716 int32_t cRefs = pSet->aEntries[i].cRefs;
1717 Assert(cRefs > 0);
1718 pgmR0DynMapReleasePageLocked(pThis, iPage, cRefs);
1719
1720 pSet->aEntries[i].iPage = UINT16_MAX;
1721 pSet->aEntries[i].cRefs = 0;
1722 }
1723
1724 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1725 }
1726}
1727
1728
1729/**
1730 * Creates a subset.
1731 *
1732 * A subset is a hack to avoid having to rewrite code that touches a lot of
1733 * pages. It prevents the mapping set from being overflowed by automatically
1734 * flushing previous mappings when a certain threshold is reached.
1735 *
1736 * Pages mapped after calling this function are only valid until the next page
1737 * is mapped.
1738 *
1739 * @returns The index of the previous subset. Pass this to
1740 * PGMDynMapPopAutoSubset when poping it.
1741 * @param pVCpu Pointer to the virtual cpu data.
1742 */
1743VMMDECL(uint32_t) PGMDynMapPushAutoSubset(PVMCPU pVCpu)
1744{
1745 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1746 AssertReturn(pSet->cEntries != PGMMAPSET_CLOSED, UINT32_MAX);
1747 uint32_t iPrevSubset = pSet->iSubset;
1748 LogFlow(("PGMDynMapPushAutoSubset: pVCpu=%p iPrevSubset=%u\n", pVCpu, iPrevSubset));
1749
1750 pSet->iSubset = pSet->cEntries;
1751 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatR0DynMapSubsets);
1752 return iPrevSubset;
1753}
1754
1755
1756/**
1757 * Pops a subset created by a previous call to PGMDynMapPushAutoSubset.
1758 *
1759 * @param pVCpu Pointer to the virtual cpu data.
1760 * @param iPrevSubset What PGMDynMapPushAutoSubset returned.
1761 */
1762VMMDECL(void) PGMDynMapPopAutoSubset(PVMCPU pVCpu, uint32_t iPrevSubset)
1763{
1764 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1765 uint32_t cEntries = pSet->cEntries;
1766 LogFlow(("PGMDynMapPopAutoSubset: pVCpu=%p iPrevSubset=%u iSubset=%u cEntries=%u\n", pVCpu, iPrevSubset, pSet->iSubset, cEntries));
1767 AssertReturnVoid(cEntries != PGMMAPSET_CLOSED);
1768 AssertReturnVoid(pSet->iSubset >= iPrevSubset || iPrevSubset == UINT32_MAX);
1769 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->aStatR0DynMapSetSize[(cEntries * 10 / RT_ELEMENTS(pSet->aEntries)) % 11]);
1770 if ( cEntries >= RT_ELEMENTS(pSet->aEntries) * 40 / 100
1771 && cEntries != pSet->iSubset)
1772 {
1773 AssertMsg(cEntries < PGMMAPSET_MAX_FILL, ("%u\n", cEntries));
1774 pgmDynMapFlushSubset(pSet);
1775 }
1776 pSet->iSubset = iPrevSubset;
1777}
1778
1779
1780/**
1781 * As a final resort for a full auto set, try merge duplicate entries.
1782 *
1783 * @param pSet The set.
1784 */
1785static void pgmDynMapOptimizeAutoSet(PPGMMAPSET pSet)
1786{
1787 for (uint32_t i = 0 ; i < pSet->cEntries; i++)
1788 {
1789 uint16_t const iPage = pSet->aEntries[i].iPage;
1790 uint32_t j = i + 1;
1791 while (j < pSet->cEntries)
1792 {
1793 if (pSet->aEntries[j].iPage != iPage)
1794 j++;
1795 else if ((uint32_t)pSet->aEntries[i].cRefs + (uint32_t)pSet->aEntries[j].cRefs < UINT16_MAX)
1796 {
1797 /* merge j into i removing j. */
1798 pSet->aEntries[i].cRefs += pSet->aEntries[j].cRefs;
1799 pSet->cEntries--;
1800 if (j < pSet->cEntries)
1801 {
1802 pSet->aEntries[j] = pSet->aEntries[pSet->cEntries];
1803 pSet->aEntries[pSet->cEntries].iPage = UINT16_MAX;
1804 pSet->aEntries[pSet->cEntries].cRefs = 0;
1805 }
1806 else
1807 {
1808 pSet->aEntries[j].iPage = UINT16_MAX;
1809 pSet->aEntries[j].cRefs = 0;
1810 }
1811 }
1812 else
1813 {
1814 /* migrate the max number of refs from j into i and quit the inner loop. */
1815 uint32_t cMigrate = UINT16_MAX - 1 - pSet->aEntries[i].cRefs;
1816 Assert(pSet->aEntries[j].cRefs > cMigrate);
1817 pSet->aEntries[j].cRefs -= cMigrate;
1818 pSet->aEntries[i].cRefs = UINT16_MAX - 1;
1819 break;
1820 }
1821 }
1822 }
1823}
1824
1825
1826/**
1827 * Common worker code for PGMDynMapHCPhys, pgmR0DynMapHCPageInlined and
1828 * pgmR0DynMapGCPageInlined.
1829 *
1830 * @returns VINF_SUCCESS, bails out to ring-3 on failure.
1831 * @param pVM The shared VM structure (for statistics).
1832 * @param pSet The set.
1833 * @param HCPhys The physical address of the page.
1834 * @param ppv Where to store the address of the mapping on success.
1835 *
1836 * @remarks This is a very hot path.
1837 */
1838int pgmR0DynMapHCPageCommon(PVM pVM, PPGMMAPSET pSet, RTHCPHYS HCPhys, void **ppv)
1839{
1840 LogFlow(("pgmR0DynMapHCPageCommon: pVM=%p pSet=%p HCPhys=%RHp ppv=%p\n",
1841 pVM, pSet, HCPhys, ppv));
1842#ifdef VBOX_WITH_STATISTICS
1843 PVMCPU pVCpu = VMMGetCpu(pVM);
1844#endif
1845 AssertMsg(pSet->iCpu == RTMpCpuIdToSetIndex(RTMpCpuId()), ("%d %d(%d) efl=%#x\n", pSet->iCpu, RTMpCpuIdToSetIndex(RTMpCpuId()), RTMpCpuId(), ASMGetFlags()));
1846
1847 /*
1848 * Map it.
1849 */
1850 void *pvPage;
1851 uint32_t const iPage = pgmR0DynMapPage(g_pPGMR0DynMap, HCPhys, pSet->iCpu, pVM, &pvPage);
1852 if (RT_UNLIKELY(iPage == UINT32_MAX))
1853 {
1854 RTAssertMsg2Weak("PGMDynMapHCPage: cLoad=%u/%u cPages=%u cGuardPages=%u\n",
1855 g_pPGMR0DynMap->cLoad, g_pPGMR0DynMap->cMaxLoad, g_pPGMR0DynMap->cPages, g_pPGMR0DynMap->cGuardPages);
1856 if (!g_fPGMR0DynMapTestRunning)
1857 VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_VM_R0_ASSERTION, 0);
1858 *ppv = NULL;
1859 return VERR_PGM_DYNMAP_FAILED;
1860 }
1861
1862 /*
1863 * Add the page to the auto reference set.
1864 *
1865 * The typical usage pattern means that the same pages will be mapped
1866 * several times in the same set. We can catch most of these
1867 * remappings by looking a few pages back into the set. (The searching
1868 * and set optimizing path will hardly ever be used when doing this.)
1869 */
1870 AssertCompile(RT_ELEMENTS(pSet->aEntries) >= 8);
1871 int32_t i = pSet->cEntries;
1872 if (i-- < 5)
1873 {
1874 unsigned iEntry = pSet->cEntries++;
1875 pSet->aEntries[iEntry].cRefs = 1;
1876 pSet->aEntries[iEntry].iPage = iPage;
1877 pSet->aEntries[iEntry].pvPage = pvPage;
1878 pSet->aEntries[iEntry].HCPhys = HCPhys;
1879 pSet->aiHashTable[PGMMAPSET_HASH(HCPhys)] = iEntry;
1880 }
1881 /* Any of the last 5 pages? */
1882 else if ( pSet->aEntries[i - 0].iPage == iPage
1883 && pSet->aEntries[i - 0].cRefs < UINT16_MAX - 1)
1884 pSet->aEntries[i - 0].cRefs++;
1885 else if ( pSet->aEntries[i - 1].iPage == iPage
1886 && pSet->aEntries[i - 1].cRefs < UINT16_MAX - 1)
1887 pSet->aEntries[i - 1].cRefs++;
1888 else if ( pSet->aEntries[i - 2].iPage == iPage
1889 && pSet->aEntries[i - 2].cRefs < UINT16_MAX - 1)
1890 pSet->aEntries[i - 2].cRefs++;
1891 else if ( pSet->aEntries[i - 3].iPage == iPage
1892 && pSet->aEntries[i - 3].cRefs < UINT16_MAX - 1)
1893 pSet->aEntries[i - 3].cRefs++;
1894 else if ( pSet->aEntries[i - 4].iPage == iPage
1895 && pSet->aEntries[i - 4].cRefs < UINT16_MAX - 1)
1896 pSet->aEntries[i - 4].cRefs++;
1897 /* Don't bother searching unless we're above a 60% load. */
1898 else if (RT_LIKELY(i <= (int32_t)RT_ELEMENTS(pSet->aEntries) * 60 / 100))
1899 {
1900 unsigned iEntry = pSet->cEntries++;
1901 pSet->aEntries[iEntry].cRefs = 1;
1902 pSet->aEntries[iEntry].iPage = iPage;
1903 pSet->aEntries[iEntry].pvPage = pvPage;
1904 pSet->aEntries[iEntry].HCPhys = HCPhys;
1905 pSet->aiHashTable[PGMMAPSET_HASH(HCPhys)] = iEntry;
1906 }
1907 else
1908 {
1909 /* Search the rest of the set. */
1910 Assert(pSet->cEntries <= RT_ELEMENTS(pSet->aEntries));
1911 i -= 4;
1912 while (i-- > 0)
1913 if ( pSet->aEntries[i].iPage == iPage
1914 && pSet->aEntries[i].cRefs < UINT16_MAX - 1)
1915 {
1916 pSet->aEntries[i].cRefs++;
1917 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatR0DynMapSetSearchHits);
1918 break;
1919 }
1920 if (i < 0)
1921 {
1922 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatR0DynMapSetSearchMisses);
1923 if (pSet->iSubset < pSet->cEntries)
1924 {
1925 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatR0DynMapSetSearchFlushes);
1926 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->aStatR0DynMapSetSize[(pSet->cEntries * 10 / RT_ELEMENTS(pSet->aEntries)) % 11]);
1927 AssertMsg(pSet->cEntries < PGMMAPSET_MAX_FILL, ("%u\n", pSet->cEntries));
1928 pgmDynMapFlushSubset(pSet);
1929 }
1930
1931 if (RT_UNLIKELY(pSet->cEntries >= RT_ELEMENTS(pSet->aEntries)))
1932 {
1933 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatR0DynMapSetOptimize);
1934 pgmDynMapOptimizeAutoSet(pSet);
1935 }
1936
1937 if (RT_LIKELY(pSet->cEntries < RT_ELEMENTS(pSet->aEntries)))
1938 {
1939 unsigned iEntry = pSet->cEntries++;
1940 pSet->aEntries[iEntry].cRefs = 1;
1941 pSet->aEntries[iEntry].iPage = iPage;
1942 pSet->aEntries[iEntry].pvPage = pvPage;
1943 pSet->aEntries[iEntry].HCPhys = HCPhys;
1944 pSet->aiHashTable[PGMMAPSET_HASH(HCPhys)] = iEntry;
1945 }
1946 else
1947 {
1948 /* We're screwed. */
1949 pgmR0DynMapReleasePage(g_pPGMR0DynMap, iPage, 1);
1950
1951 RTAssertMsg2Weak("PGMDynMapHCPage: set is full!\n");
1952 if (!g_fPGMR0DynMapTestRunning)
1953 VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_VM_R0_ASSERTION, 0);
1954 *ppv = NULL;
1955 return VERR_PGM_DYNMAP_FULL_SET;
1956 }
1957 }
1958 }
1959
1960 *ppv = pvPage;
1961 return VINF_SUCCESS;
1962}
1963
1964
1965#if 0 /* Not used in R0, should internalized the other PGMDynMapHC/GCPage too. */
1966/* documented elsewhere - a bit of a mess. */
1967VMMDECL(int) PGMDynMapHCPage(PVM pVM, RTHCPHYS HCPhys, void **ppv)
1968{
1969#ifdef VBOX_WITH_STATISTICS
1970 PVMCPU pVCpu = VMMGetCpu(pVM);
1971#endif
1972 /*
1973 * Validate state.
1974 */
1975 STAM_PROFILE_START(&pVCpu->pgm.s.StatR0DynMapHCPage, a);
1976 AssertPtr(ppv);
1977 AssertMsg(pVM->pgm.s.pvR0DynMapUsed == g_pPGMR0DynMap,
1978 ("%p != %p\n", pVM->pgm.s.pvR0DynMapUsed, g_pPGMR0DynMap));
1979 AssertMsg(!(HCPhys & PAGE_OFFSET_MASK), ("HCPhys=%RHp\n", HCPhys));
1980 PVMCPU pVCpu = VMMGetCpu(pVM);
1981 AssertPtr(pVCpu);
1982 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1983 AssertMsg(pSet->cEntries <= RT_ELEMENTS(pSet->aEntries),
1984 ("%#x (%u)\n", pSet->cEntries, pSet->cEntries));
1985
1986 /*
1987 * Call common code.
1988 */
1989 int rc = pgmR0DynMapHCPageCommon(pVM, pSet, HCPhys, ppv);
1990
1991 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatR0DynMapHCPage, a);
1992 return rc;
1993}
1994#endif
1995
1996
1997#if 0 /*def DEBUG*/
1998/** For pgmR0DynMapTest3PerCpu. */
1999typedef struct PGMR0DYNMAPTEST
2000{
2001 uint32_t u32Expect;
2002 uint32_t *pu32;
2003 uint32_t volatile cFailures;
2004} PGMR0DYNMAPTEST;
2005typedef PGMR0DYNMAPTEST *PPGMR0DYNMAPTEST;
2006
2007/**
2008 * Checks that the content of the page is the same on all CPUs, i.e. that there
2009 * are no CPU specfic PTs or similar nasty stuff involved.
2010 *
2011 * @param idCpu The current CPU.
2012 * @param pvUser1 Pointer a PGMR0DYNMAPTEST structure.
2013 * @param pvUser2 Unused, ignored.
2014 */
2015static DECLCALLBACK(void) pgmR0DynMapTest3PerCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
2016{
2017 PPGMR0DYNMAPTEST pTest = (PPGMR0DYNMAPTEST)pvUser1;
2018 ASMInvalidatePage(pTest->pu32);
2019 if (*pTest->pu32 != pTest->u32Expect)
2020 ASMAtomicIncU32(&pTest->cFailures);
2021 NOREF(pvUser2); NOREF(idCpu);
2022}
2023
2024
2025/**
2026 * Performs some basic tests in debug builds.
2027 */
2028static int pgmR0DynMapTest(PVM pVM)
2029{
2030 LogRel(("pgmR0DynMapTest: ****** START ******\n"));
2031 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
2032 PPGMMAPSET pSet = &pVM->aCpus[0].pgm.s.AutoSet;
2033 uint32_t i;
2034
2035 /*
2036 * Assert internal integrity first.
2037 */
2038 LogRel(("Test #0\n"));
2039 int rc = PGMR0DynMapAssertIntegrity();
2040 if (RT_FAILURE(rc))
2041 return rc;
2042
2043 void *pvR0DynMapUsedSaved = pVM->pgm.s.pvR0DynMapUsed;
2044 pVM->pgm.s.pvR0DynMapUsed = pThis;
2045 g_fPGMR0DynMapTestRunning = true;
2046
2047 /*
2048 * Simple test, map CR3 twice and check that we're getting the
2049 * same mapping address back.
2050 */
2051 LogRel(("Test #1\n"));
2052 ASMIntDisable();
2053 PGMDynMapStartAutoSet(&pVM->aCpus[0]);
2054
2055 uint64_t cr3 = ASMGetCR3() & ~(uint64_t)PAGE_OFFSET_MASK;
2056 void *pv = (void *)(intptr_t)-1;
2057 void *pv2 = (void *)(intptr_t)-2;
2058 rc = PGMDynMapHCPage(pVM, cr3, &pv);
2059 int rc2 = PGMDynMapHCPage(pVM, cr3, &pv2);
2060 ASMIntEnable();
2061 if ( RT_SUCCESS(rc2)
2062 && RT_SUCCESS(rc)
2063 && pv == pv2)
2064 {
2065 LogRel(("Load=%u/%u/%u Set=%u/%u\n", pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
2066 rc = PGMR0DynMapAssertIntegrity();
2067
2068 /*
2069 * Check that the simple set overflow code works by filling it
2070 * with more CR3 mappings.
2071 */
2072 LogRel(("Test #2\n"));
2073 ASMIntDisable();
2074 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
2075 for (i = 0 ; i < UINT16_MAX*2 - 1 && RT_SUCCESS(rc) && pv2 == pv; i++)
2076 {
2077 pv2 = (void *)(intptr_t)-4;
2078 rc = PGMDynMapHCPage(pVM, cr3, &pv2);
2079 }
2080 ASMIntEnable();
2081 if (RT_FAILURE(rc) || pv != pv2)
2082 {
2083 LogRel(("failed(%d): rc=%Rrc; pv=%p pv2=%p i=%p\n", __LINE__, rc, pv, pv2, i));
2084 if (RT_SUCCESS(rc)) rc = VERR_INTERNAL_ERROR;
2085 }
2086 else if (pSet->cEntries != 5)
2087 {
2088 LogRel(("failed(%d): cEntries=%d expected %d\n", __LINE__, pSet->cEntries, RT_ELEMENTS(pSet->aEntries) / 2));
2089 rc = VERR_INTERNAL_ERROR;
2090 }
2091 else if ( pSet->aEntries[4].cRefs != UINT16_MAX - 1
2092 || pSet->aEntries[3].cRefs != UINT16_MAX - 1
2093 || pSet->aEntries[2].cRefs != 1
2094 || pSet->aEntries[1].cRefs != 1
2095 || pSet->aEntries[0].cRefs != 1)
2096 {
2097 LogRel(("failed(%d): bad set dist: ", __LINE__));
2098 for (i = 0; i < pSet->cEntries; i++)
2099 LogRel(("[%d]=%d, ", i, pSet->aEntries[i].cRefs));
2100 LogRel(("\n"));
2101 rc = VERR_INTERNAL_ERROR;
2102 }
2103 if (RT_SUCCESS(rc))
2104 rc = PGMR0DynMapAssertIntegrity();
2105 if (RT_SUCCESS(rc))
2106 {
2107 /*
2108 * Trigger an set optimization run (exactly).
2109 */
2110 LogRel(("Test #3\n"));
2111 ASMIntDisable();
2112 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
2113 pv2 = NULL;
2114 for (i = 0 ; i < RT_ELEMENTS(pSet->aEntries) - 5 && RT_SUCCESS(rc) && pv2 != pv; i++)
2115 {
2116 pv2 = (void *)(intptr_t)(-5 - i);
2117 rc = PGMDynMapHCPage(pVM, cr3 + PAGE_SIZE * (i + 5), &pv2);
2118 }
2119 ASMIntEnable();
2120 if (RT_FAILURE(rc) || pv == pv2)
2121 {
2122 LogRel(("failed(%d): rc=%Rrc; pv=%p pv2=%p i=%d\n", __LINE__, rc, pv, pv2, i));
2123 if (RT_SUCCESS(rc)) rc = VERR_INTERNAL_ERROR;
2124 }
2125 else if (pSet->cEntries != RT_ELEMENTS(pSet->aEntries))
2126 {
2127 LogRel(("failed(%d): cEntries=%d expected %d\n", __LINE__, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
2128 rc = VERR_INTERNAL_ERROR;
2129 }
2130 LogRel(("Load=%u/%u/%u Set=%u/%u\n", pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
2131 if (RT_SUCCESS(rc))
2132 rc = PGMR0DynMapAssertIntegrity();
2133 if (RT_SUCCESS(rc))
2134 {
2135 /*
2136 * Trigger an overflow error.
2137 */
2138 LogRel(("Test #4\n"));
2139 ASMIntDisable();
2140 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
2141 for (i = 0 ; i < RT_ELEMENTS(pSet->aEntries) + 2; i++)
2142 {
2143 rc = PGMDynMapHCPage(pVM, cr3 - PAGE_SIZE * (i + 5), &pv2);
2144 if (RT_SUCCESS(rc))
2145 rc = PGMR0DynMapAssertIntegrity();
2146 if (RT_FAILURE(rc))
2147 break;
2148 }
2149 ASMIntEnable();
2150 if (rc == VERR_PGM_DYNMAP_FULL_SET)
2151 {
2152 /* flush the set. */
2153 LogRel(("Test #5\n"));
2154 ASMIntDisable();
2155 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
2156 PGMDynMapReleaseAutoSet(&pVM->aCpus[0]);
2157 PGMDynMapStartAutoSet(&pVM->aCpus[0]);
2158 ASMIntEnable();
2159
2160 rc = PGMR0DynMapAssertIntegrity();
2161 }
2162 else
2163 {
2164 LogRel(("failed(%d): rc=%Rrc, wanted %d ; pv2=%p Set=%u/%u; i=%d\n", __LINE__,
2165 rc, VERR_PGM_DYNMAP_FULL_SET, pv2, pSet->cEntries, RT_ELEMENTS(pSet->aEntries), i));
2166 if (RT_SUCCESS(rc)) rc = VERR_INTERNAL_ERROR;
2167 }
2168 }
2169 }
2170 }
2171 else
2172 {
2173 LogRel(("failed(%d): rc=%Rrc rc2=%Rrc; pv=%p pv2=%p\n", __LINE__, rc, rc2, pv, pv2));
2174 if (RT_SUCCESS(rc))
2175 rc = rc2;
2176 }
2177
2178 /*
2179 * Check that everyone sees the same stuff.
2180 */
2181 if (RT_SUCCESS(rc))
2182 {
2183 LogRel(("Test #5\n"));
2184 ASMIntDisable();
2185 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
2186 RTHCPHYS HCPhysPT = RTR0MemObjGetPagePhysAddr(pThis->pSegHead->ahMemObjPTs[0], 0);
2187 rc = PGMDynMapHCPage(pVM, HCPhysPT, &pv);
2188 if (RT_SUCCESS(rc))
2189 {
2190 PGMR0DYNMAPTEST Test;
2191 uint32_t *pu32Real = &pThis->paPages[pThis->pSegHead->iPage].uPte.pLegacy->u;
2192 Test.pu32 = (uint32_t *)((uintptr_t)pv | ((uintptr_t)pu32Real & PAGE_OFFSET_MASK));
2193 Test.u32Expect = *pu32Real;
2194 ASMAtomicWriteU32(&Test.cFailures, 0);
2195 ASMIntEnable();
2196
2197 rc = RTMpOnAll(pgmR0DynMapTest3PerCpu, &Test, NULL);
2198 if (RT_FAILURE(rc))
2199 LogRel(("failed(%d): RTMpOnAll rc=%Rrc\n", __LINE__, rc));
2200 else if (Test.cFailures)
2201 {
2202 LogRel(("failed(%d): cFailures=%d pu32Real=%p pu32=%p u32Expect=%#x *pu32=%#x\n", __LINE__,
2203 Test.cFailures, pu32Real, Test.pu32, Test.u32Expect, *Test.pu32));
2204 rc = VERR_INTERNAL_ERROR;
2205 }
2206 else
2207 LogRel(("pu32Real=%p pu32=%p u32Expect=%#x *pu32=%#x\n",
2208 pu32Real, Test.pu32, Test.u32Expect, *Test.pu32));
2209 }
2210 else
2211 {
2212 ASMIntEnable();
2213 LogRel(("failed(%d): rc=%Rrc\n", rc));
2214 }
2215 }
2216
2217 /*
2218 * Clean up.
2219 */
2220 LogRel(("Cleanup.\n"));
2221 ASMIntDisable();
2222 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
2223 PGMDynMapFlushAutoSet(&pVM->aCpus[0]);
2224 PGMDynMapReleaseAutoSet(&pVM->aCpus[0]);
2225 ASMIntEnable();
2226
2227 if (RT_SUCCESS(rc))
2228 rc = PGMR0DynMapAssertIntegrity();
2229 else
2230 PGMR0DynMapAssertIntegrity();
2231
2232 g_fPGMR0DynMapTestRunning = false;
2233 LogRel(("Result: rc=%Rrc Load=%u/%u/%u Set=%#x/%u\n", rc,
2234 pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
2235 pVM->pgm.s.pvR0DynMapUsed = pvR0DynMapUsedSaved;
2236 LogRel(("pgmR0DynMapTest: ****** END ******\n"));
2237 return rc;
2238}
2239#endif /* DEBUG */
2240
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette