VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/PGMR0DynMap.cpp@ 19420

Last change on this file since 19420 was 19403, checked in by vboxsync, 15 years ago

VBox/parma.h,VMM: VMCPU_MAX_CPU_COUNT & VMM_MAX_CPUS => VMM_MAX_CPU_COUNT, added VMM_MIN_CPU_COUNT for schema future replacement dropping a hint about these constants Main.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 78.2 KB
Line 
1/* $Id: PGMR0DynMap.cpp 19403 2009-05-05 22:23:42Z vboxsync $ */
2/** @file
3 * PGM - Page Manager and Monitor, ring-0 dynamic mapping cache.
4 */
5
6/*
7 * Copyright (C) 2008 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22/*******************************************************************************
23* Internal Functions *
24*******************************************************************************/
25#define LOG_GROUP LOG_GROUP_PGM
26#include <VBox/pgm.h>
27#include "../PGMInternal.h"
28#include <VBox/vm.h>
29#include <VBox/sup.h>
30#include <VBox/err.h>
31#include <iprt/asm.h>
32#include <iprt/alloc.h>
33#include <iprt/assert.h>
34#include <iprt/cpuset.h>
35#include <iprt/memobj.h>
36#include <iprt/mp.h>
37#include <iprt/semaphore.h>
38#include <iprt/spinlock.h>
39#include <iprt/string.h>
40
41
42/*******************************************************************************
43* Defined Constants And Macros *
44*******************************************************************************/
45/** The max size of the mapping cache (in pages). */
46#define PGMR0DYNMAP_MAX_PAGES ((16*_1M) >> PAGE_SHIFT)
47/** The small segment size that is adopted on out-of-memory conditions with a
48 * single big segment. */
49#define PGMR0DYNMAP_SMALL_SEG_PAGES 128
50/** The number of pages we reserve per CPU. */
51#define PGMR0DYNMAP_PAGES_PER_CPU 256
52/** The minimum number of pages we reserve per CPU.
53 * This must be equal or larger than the autoset size. */
54#define PGMR0DYNMAP_PAGES_PER_CPU_MIN 64
55/** The number of guard pages.
56 * @remarks Never do tuning of the hashing or whatnot with a strict build! */
57#if defined(VBOX_STRICT)
58# define PGMR0DYNMAP_GUARD_PAGES 1
59#else
60# define PGMR0DYNMAP_GUARD_PAGES 0
61#endif
62/** The dummy physical address of guard pages. */
63#define PGMR0DYNMAP_GUARD_PAGE_HCPHYS UINT32_C(0x7777feed)
64/** The dummy reference count of guard pages. (Must be non-zero.) */
65#define PGMR0DYNMAP_GUARD_PAGE_REF_COUNT INT32_C(0x7777feed)
66#if 0
67/** Define this to just clear the present bit on guard pages.
68 * The alternative is to replace the entire PTE with an bad not-present
69 * PTE. Either way, XNU will screw us. :-/ */
70#define PGMR0DYNMAP_GUARD_NP
71#endif
72/** The dummy PTE value for a page. */
73#define PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE X86_PTE_PG_MASK
74/** The dummy PTE value for a page. */
75#define PGMR0DYNMAP_GUARD_PAGE_PAE_PTE UINT64_MAX /*X86_PTE_PAE_PG_MASK*/
76/** Calcs the overload threshold. Current set at 50%. */
77#define PGMR0DYNMAP_CALC_OVERLOAD(cPages) ((cPages) / 2)
78
79#if 0
80/* Assertions causes panics if preemption is disabled, this can be used to work aroudn that. */
81//#define RTSpinlockAcquire(a,b) do {} while (0)
82//#define RTSpinlockRelease(a,b) do {} while (0)
83#endif
84
85
86/*******************************************************************************
87* Structures and Typedefs *
88*******************************************************************************/
89/**
90 * Ring-0 dynamic mapping cache segment.
91 *
92 * The dynamic mapping cache can be extended with additional segments if the
93 * load is found to be too high. This done the next time a VM is created, under
94 * the protection of the init mutex. The arrays is reallocated and the new
95 * segment is added to the end of these. Nothing is rehashed of course, as the
96 * indexes / addresses must remain unchanged.
97 *
98 * This structure is only modified while owning the init mutex or during module
99 * init / term.
100 */
101typedef struct PGMR0DYNMAPSEG
102{
103 /** Pointer to the next segment. */
104 struct PGMR0DYNMAPSEG *pNext;
105 /** The memory object for the virtual address range that we're abusing. */
106 RTR0MEMOBJ hMemObj;
107 /** The start page in the cache. (I.e. index into the arrays.) */
108 uint16_t iPage;
109 /** The number of pages this segment contributes. */
110 uint16_t cPages;
111 /** The number of page tables. */
112 uint16_t cPTs;
113 /** The memory objects for the page tables. */
114 RTR0MEMOBJ ahMemObjPTs[1];
115} PGMR0DYNMAPSEG;
116/** Pointer to a ring-0 dynamic mapping cache segment. */
117typedef PGMR0DYNMAPSEG *PPGMR0DYNMAPSEG;
118
119
120/**
121 * Ring-0 dynamic mapping cache entry.
122 *
123 * This structure tracks
124 */
125typedef struct PGMR0DYNMAPENTRY
126{
127 /** The physical address of the currently mapped page.
128 * This is duplicate for three reasons: cache locality, cache policy of the PT
129 * mappings and sanity checks. */
130 RTHCPHYS HCPhys;
131 /** Pointer to the page. */
132 void *pvPage;
133 /** The number of references. */
134 int32_t volatile cRefs;
135 /** PTE pointer union. */
136 union PGMR0DYNMAPENTRY_PPTE
137 {
138 /** PTE pointer, 32-bit legacy version. */
139 PX86PTE pLegacy;
140 /** PTE pointer, PAE version. */
141 PX86PTEPAE pPae;
142 /** PTE pointer, the void version. */
143 void *pv;
144 } uPte;
145 /** CPUs that haven't invalidated this entry after it's last update. */
146 RTCPUSET PendingSet;
147} PGMR0DYNMAPENTRY;
148/** Pointer to a ring-0 dynamic mapping cache entry. */
149typedef PGMR0DYNMAPENTRY *PPGMR0DYNMAPENTRY;
150
151
152/**
153 * Ring-0 dynamic mapping cache.
154 *
155 * This is initialized during VMMR0 module init but no segments are allocated at
156 * that time. Segments will be added when the first VM is started and removed
157 * again when the last VM shuts down, thus avoid consuming memory while dormant.
158 * At module termination, the remaining bits will be freed up.
159 */
160typedef struct PGMR0DYNMAP
161{
162 /** The usual magic number / eye catcher (PGMR0DYNMAP_MAGIC). */
163 uint32_t u32Magic;
164 /** Spinlock serializing the normal operation of the cache. */
165 RTSPINLOCK hSpinlock;
166 /** Array for tracking and managing the pages. */
167 PPGMR0DYNMAPENTRY paPages;
168 /** The cache size given as a number of pages. */
169 uint32_t cPages;
170 /** Whether it's 32-bit legacy or PAE/AMD64 paging mode. */
171 bool fLegacyMode;
172 /** The current load.
173 * This does not include guard pages. */
174 uint32_t cLoad;
175 /** The max load ever.
176 * This is maintained to get trigger adding of more mapping space. */
177 uint32_t cMaxLoad;
178 /** Initialization / termination lock. */
179 RTSEMFASTMUTEX hInitLock;
180 /** The number of guard pages. */
181 uint32_t cGuardPages;
182 /** The number of users (protected by hInitLock). */
183 uint32_t cUsers;
184 /** Array containing a copy of the original page tables.
185 * The entries are either X86PTE or X86PTEPAE according to fLegacyMode. */
186 void *pvSavedPTEs;
187 /** List of segments. */
188 PPGMR0DYNMAPSEG pSegHead;
189 /** The paging mode. */
190 SUPPAGINGMODE enmPgMode;
191} PGMR0DYNMAP;
192/** Pointer to the ring-0 dynamic mapping cache */
193typedef PGMR0DYNMAP *PPGMR0DYNMAP;
194
195/** PGMR0DYNMAP::u32Magic. (Jens Christian Bugge Wesseltoft) */
196#define PGMR0DYNMAP_MAGIC 0x19640201
197
198
199/**
200 * Paging level data.
201 */
202typedef struct PGMR0DYNMAPPGLVL
203{
204 uint32_t cLevels; /**< The number of levels. */
205 struct
206 {
207 RTHCPHYS HCPhys; /**< The address of the page for the current level,
208 * i.e. what hMemObj/hMapObj is currently mapping. */
209 RTHCPHYS fPhysMask; /**< Mask for extracting HCPhys from uEntry. */
210 RTR0MEMOBJ hMemObj; /**< Memory object for HCPhys, PAGE_SIZE. */
211 RTR0MEMOBJ hMapObj; /**< Mapping object for hMemObj. */
212 uint32_t fPtrShift; /**< The pointer shift count. */
213 uint64_t fPtrMask; /**< The mask to apply to the shifted pointer to get the table index. */
214 uint64_t fAndMask; /**< And mask to check entry flags. */
215 uint64_t fResMask; /**< The result from applying fAndMask. */
216 union
217 {
218 void *pv; /**< hMapObj address. */
219 PX86PGUINT paLegacy; /**< Legacy table view. */
220 PX86PGPAEUINT paPae; /**< PAE/AMD64 table view. */
221 } u;
222 } a[4];
223} PGMR0DYNMAPPGLVL;
224/** Pointer to paging level data. */
225typedef PGMR0DYNMAPPGLVL *PPGMR0DYNMAPPGLVL;
226
227
228/*******************************************************************************
229* Global Variables *
230*******************************************************************************/
231/** Pointer to the ring-0 dynamic mapping cache. */
232static PPGMR0DYNMAP g_pPGMR0DynMap;
233/** For overflow testing. */
234static bool g_fPGMR0DynMapTestRunning = false;
235
236
237/*******************************************************************************
238* Internal Functions *
239*******************************************************************************/
240static void pgmR0DynMapReleasePage(PPGMR0DYNMAP pThis, uint32_t iPage, uint32_t cRefs);
241static int pgmR0DynMapSetup(PPGMR0DYNMAP pThis);
242static int pgmR0DynMapExpand(PPGMR0DYNMAP pThis);
243static void pgmR0DynMapTearDown(PPGMR0DYNMAP pThis);
244#if 0 /*def DEBUG*/
245static int pgmR0DynMapTest(PVM pVM);
246#endif
247
248
249/**
250 * Initializes the ring-0 dynamic mapping cache.
251 *
252 * @returns VBox status code.
253 */
254VMMR0DECL(int) PGMR0DynMapInit(void)
255{
256 Assert(!g_pPGMR0DynMap);
257
258 /*
259 * Create and initialize the cache instance.
260 */
261 PPGMR0DYNMAP pThis = (PPGMR0DYNMAP)RTMemAllocZ(sizeof(*pThis));
262 AssertLogRelReturn(pThis, VERR_NO_MEMORY);
263 int rc = VINF_SUCCESS;
264 pThis->enmPgMode = SUPR0GetPagingMode();
265 switch (pThis->enmPgMode)
266 {
267 case SUPPAGINGMODE_32_BIT:
268 case SUPPAGINGMODE_32_BIT_GLOBAL:
269 pThis->fLegacyMode = false;
270 break;
271 case SUPPAGINGMODE_PAE:
272 case SUPPAGINGMODE_PAE_GLOBAL:
273 case SUPPAGINGMODE_PAE_NX:
274 case SUPPAGINGMODE_PAE_GLOBAL_NX:
275 case SUPPAGINGMODE_AMD64:
276 case SUPPAGINGMODE_AMD64_GLOBAL:
277 case SUPPAGINGMODE_AMD64_NX:
278 case SUPPAGINGMODE_AMD64_GLOBAL_NX:
279 pThis->fLegacyMode = false;
280 break;
281 default:
282 rc = VERR_INTERNAL_ERROR;
283 break;
284 }
285 if (RT_SUCCESS(rc))
286 {
287 rc = RTSemFastMutexCreate(&pThis->hInitLock);
288 if (RT_SUCCESS(rc))
289 {
290 rc = RTSpinlockCreate(&pThis->hSpinlock);
291 if (RT_SUCCESS(rc))
292 {
293 pThis->u32Magic = PGMR0DYNMAP_MAGIC;
294 g_pPGMR0DynMap = pThis;
295 return VINF_SUCCESS;
296 }
297 RTSemFastMutexDestroy(pThis->hInitLock);
298 }
299 }
300 RTMemFree(pThis);
301 return rc;
302}
303
304
305/**
306 * Terminates the ring-0 dynamic mapping cache.
307 */
308VMMR0DECL(void) PGMR0DynMapTerm(void)
309{
310 /*
311 * Destroy the cache.
312 *
313 * There is not supposed to be any races here, the loader should
314 * make sure about that. So, don't bother locking anything.
315 *
316 * The VM objects should all be destroyed by now, so there is no
317 * dangling users or anything like that to clean up. This routine
318 * is just a mirror image of PGMR0DynMapInit.
319 */
320 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
321 if (pThis)
322 {
323 AssertPtr(pThis);
324 g_pPGMR0DynMap = NULL;
325
326 /* This should *never* happen, but in case it does try not to leak memory. */
327 AssertLogRelMsg(!pThis->cUsers && !pThis->paPages && !pThis->pvSavedPTEs && !pThis->cPages,
328 ("cUsers=%d paPages=%p pvSavedPTEs=%p cPages=%#x\n",
329 pThis->cUsers, pThis->paPages, pThis->pvSavedPTEs, pThis->cPages));
330 if (pThis->paPages)
331 pgmR0DynMapTearDown(pThis);
332
333 /* Free the associated resources. */
334 RTSemFastMutexDestroy(pThis->hInitLock);
335 pThis->hInitLock = NIL_RTSEMFASTMUTEX;
336 RTSpinlockDestroy(pThis->hSpinlock);
337 pThis->hSpinlock = NIL_RTSPINLOCK;
338 pThis->u32Magic = UINT32_MAX;
339 RTMemFree(pThis);
340 }
341}
342
343
344/**
345 * Initializes the dynamic mapping cache for a new VM.
346 *
347 * @returns VBox status code.
348 * @param pVM Pointer to the shared VM structure.
349 */
350VMMR0DECL(int) PGMR0DynMapInitVM(PVM pVM)
351{
352 AssertMsgReturn(!pVM->pgm.s.pvR0DynMapUsed, ("%p (pThis=%p)\n", pVM->pgm.s.pvR0DynMapUsed, g_pPGMR0DynMap), VERR_WRONG_ORDER);
353
354 /*
355 * Initialize the auto sets.
356 */
357 VMCPUID idCpu = pVM->cCPUs;
358 AssertReturn(idCpu > 0 && idCpu <= VMM_MAX_CPU_COUNT, VERR_INTERNAL_ERROR);
359 while (idCpu-- > 0)
360 {
361 PPGMMAPSET pSet = &pVM->aCpus[idCpu].pgm.s.AutoSet;
362 uint32_t j = RT_ELEMENTS(pSet->aEntries);
363 while (j-- > 0)
364 {
365 pSet->aEntries[j].iPage = UINT16_MAX;
366 pSet->aEntries[j].cRefs = 0;
367 pSet->aEntries[j].pvPage = NULL;
368 pSet->aEntries[j].HCPhys = NIL_RTHCPHYS;
369 }
370 pSet->cEntries = PGMMAPSET_CLOSED;
371 pSet->iSubset = UINT32_MAX;
372 pSet->iCpu = -1;
373 memset(&pSet->aiHashTable[0], 0xff, sizeof(pSet->aiHashTable));
374 }
375
376 /*
377 * Do we need the cache? Skip the last bit if we don't.
378 */
379 if (!VMMIsHwVirtExtForced(pVM))
380 return VINF_SUCCESS;
381
382 /*
383 * Reference and if necessary setup or expand the cache.
384 */
385 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
386 AssertPtrReturn(pThis, VERR_INTERNAL_ERROR);
387 int rc = RTSemFastMutexRequest(pThis->hInitLock);
388 AssertLogRelRCReturn(rc, rc);
389
390 pThis->cUsers++;
391 if (pThis->cUsers == 1)
392 {
393 rc = pgmR0DynMapSetup(pThis);
394#if 0 /*def DEBUG*/
395 if (RT_SUCCESS(rc))
396 {
397 rc = pgmR0DynMapTest(pVM);
398 if (RT_FAILURE(rc))
399 pgmR0DynMapTearDown(pThis);
400 }
401#endif
402 }
403 else if (pThis->cMaxLoad > PGMR0DYNMAP_CALC_OVERLOAD(pThis->cPages - pThis->cGuardPages))
404 rc = pgmR0DynMapExpand(pThis);
405 if (RT_SUCCESS(rc))
406 pVM->pgm.s.pvR0DynMapUsed = pThis;
407 else
408 pThis->cUsers--;
409
410 RTSemFastMutexRelease(pThis->hInitLock);
411 return rc;
412}
413
414
415/**
416 * Terminates the dynamic mapping cache usage for a VM.
417 *
418 * @param pVM Pointer to the shared VM structure.
419 */
420VMMR0DECL(void) PGMR0DynMapTermVM(PVM pVM)
421{
422 /*
423 * Return immediately if we're not using the cache.
424 */
425 if (!pVM->pgm.s.pvR0DynMapUsed)
426 return;
427
428 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
429 AssertPtrReturnVoid(pThis);
430
431 int rc = RTSemFastMutexRequest(pThis->hInitLock);
432 AssertLogRelRCReturnVoid(rc);
433
434 if (pVM->pgm.s.pvR0DynMapUsed == pThis)
435 {
436 pVM->pgm.s.pvR0DynMapUsed = NULL;
437
438#ifdef VBOX_STRICT
439 PGMR0DynMapAssertIntegrity();
440#endif
441
442 /*
443 * Clean up and check the auto sets.
444 */
445 VMCPUID idCpu = pVM->cCPUs;
446 while (idCpu-- > 0)
447 {
448 PPGMMAPSET pSet = &pVM->aCpus[idCpu].pgm.s.AutoSet;
449 uint32_t j = pSet->cEntries;
450 if (j <= RT_ELEMENTS(pSet->aEntries))
451 {
452 /*
453 * The set is open, close it.
454 */
455 while (j-- > 0)
456 {
457 int32_t cRefs = pSet->aEntries[j].cRefs;
458 uint32_t iPage = pSet->aEntries[j].iPage;
459 LogRel(("PGMR0DynMapTermVM: %d dangling refs to %#x\n", cRefs, iPage));
460 if (iPage < pThis->cPages && cRefs > 0)
461 pgmR0DynMapReleasePage(pThis, iPage, cRefs);
462 else
463 AssertLogRelMsgFailed(("cRefs=%d iPage=%#x cPages=%u\n", cRefs, iPage, pThis->cPages));
464
465 pSet->aEntries[j].iPage = UINT16_MAX;
466 pSet->aEntries[j].cRefs = 0;
467 pSet->aEntries[j].pvPage = NULL;
468 pSet->aEntries[j].HCPhys = NIL_RTHCPHYS;
469 }
470 pSet->cEntries = PGMMAPSET_CLOSED;
471 pSet->iSubset = UINT32_MAX;
472 pSet->iCpu = -1;
473 }
474 else
475 AssertMsg(j == PGMMAPSET_CLOSED, ("cEntries=%#x\n", j));
476
477 j = RT_ELEMENTS(pSet->aEntries);
478 while (j-- > 0)
479 {
480 Assert(pSet->aEntries[j].iPage == UINT16_MAX);
481 Assert(!pSet->aEntries[j].cRefs);
482 }
483 }
484
485 /*
486 * Release our reference to the mapping cache.
487 */
488 Assert(pThis->cUsers > 0);
489 pThis->cUsers--;
490 if (!pThis->cUsers)
491 pgmR0DynMapTearDown(pThis);
492 }
493 else
494 AssertLogRelMsgFailed(("pvR0DynMapUsed=%p pThis=%p\n", pVM->pgm.s.pvR0DynMapUsed, pThis));
495
496 RTSemFastMutexRelease(pThis->hInitLock);
497}
498
499
500/**
501 * Shoots down the TLBs for all the cache pages, pgmR0DynMapTearDown helper.
502 *
503 * @param idCpu The current CPU.
504 * @param pvUser1 The dynamic mapping cache instance.
505 * @param pvUser2 Unused, NULL.
506 */
507static DECLCALLBACK(void) pgmR0DynMapShootDownTlbs(RTCPUID idCpu, void *pvUser1, void *pvUser2)
508{
509 Assert(!pvUser2);
510 PPGMR0DYNMAP pThis = (PPGMR0DYNMAP)pvUser1;
511 Assert(pThis == g_pPGMR0DynMap);
512 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
513 uint32_t iPage = pThis->cPages;
514 while (iPage-- > 0)
515 ASMInvalidatePage(paPages[iPage].pvPage);
516}
517
518
519/**
520 * Shoot down the TLBs for every single cache entry on all CPUs.
521 *
522 * @returns IPRT status code (RTMpOnAll).
523 * @param pThis The dynamic mapping cache instance.
524 */
525static int pgmR0DynMapTlbShootDown(PPGMR0DYNMAP pThis)
526{
527 int rc = RTMpOnAll(pgmR0DynMapShootDownTlbs, pThis, NULL);
528 AssertRC(rc);
529 if (RT_FAILURE(rc))
530 {
531 uint32_t iPage = pThis->cPages;
532 while (iPage-- > 0)
533 ASMInvalidatePage(pThis->paPages[iPage].pvPage);
534 }
535 return rc;
536}
537
538
539/**
540 * Calculate the new cache size based on cMaxLoad statistics.
541 *
542 * @returns Number of pages.
543 * @param pThis The dynamic mapping cache instance.
544 * @param pcMinPages The minimal size in pages.
545 */
546static uint32_t pgmR0DynMapCalcNewSize(PPGMR0DYNMAP pThis, uint32_t *pcMinPages)
547{
548 Assert(pThis->cPages <= PGMR0DYNMAP_MAX_PAGES);
549
550 /* cCpus * PGMR0DYNMAP_PAGES_PER_CPU(_MIN). */
551 RTCPUID cCpus = RTMpGetCount();
552 AssertReturn(cCpus > 0 && cCpus <= RTCPUSET_MAX_CPUS, 0);
553 uint32_t cPages = cCpus * PGMR0DYNMAP_PAGES_PER_CPU;
554 uint32_t cMinPages = cCpus * PGMR0DYNMAP_PAGES_PER_CPU_MIN;
555
556 /* adjust against cMaxLoad. */
557 AssertMsg(pThis->cMaxLoad <= PGMR0DYNMAP_MAX_PAGES, ("%#x\n", pThis->cMaxLoad));
558 if (pThis->cMaxLoad > PGMR0DYNMAP_MAX_PAGES)
559 pThis->cMaxLoad = 0;
560
561 while (pThis->cMaxLoad > PGMR0DYNMAP_CALC_OVERLOAD(cPages))
562 cPages += PGMR0DYNMAP_PAGES_PER_CPU;
563
564 if (pThis->cMaxLoad > cMinPages)
565 cMinPages = pThis->cMaxLoad;
566
567 /* adjust against max and current size. */
568 if (cPages < pThis->cPages)
569 cPages = pThis->cPages;
570 cPages *= PGMR0DYNMAP_GUARD_PAGES + 1;
571 if (cPages > PGMR0DYNMAP_MAX_PAGES)
572 cPages = PGMR0DYNMAP_MAX_PAGES;
573
574 if (cMinPages < pThis->cPages)
575 cMinPages = pThis->cPages;
576 cMinPages *= PGMR0DYNMAP_GUARD_PAGES + 1;
577 if (cMinPages > PGMR0DYNMAP_MAX_PAGES)
578 cMinPages = PGMR0DYNMAP_MAX_PAGES;
579
580 Assert(cMinPages);
581 *pcMinPages = cMinPages;
582 return cPages;
583}
584
585
586/**
587 * Initializes the paging level data.
588 *
589 * @param pThis The dynamic mapping cache instance.
590 * @param pPgLvl The paging level data.
591 */
592void pgmR0DynMapPagingArrayInit(PPGMR0DYNMAP pThis, PPGMR0DYNMAPPGLVL pPgLvl)
593{
594 RTCCUINTREG cr4 = ASMGetCR4();
595 switch (pThis->enmPgMode)
596 {
597 case SUPPAGINGMODE_32_BIT:
598 case SUPPAGINGMODE_32_BIT_GLOBAL:
599 pPgLvl->cLevels = 2;
600 pPgLvl->a[0].fPhysMask = X86_CR3_PAGE_MASK;
601 pPgLvl->a[0].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
602 pPgLvl->a[0].fResMask = X86_PDE_P | X86_PDE_RW;
603 pPgLvl->a[0].fPtrMask = X86_PD_MASK;
604 pPgLvl->a[0].fPtrShift = X86_PD_SHIFT;
605
606 pPgLvl->a[1].fPhysMask = X86_PDE_PG_MASK;
607 pPgLvl->a[1].fAndMask = X86_PTE_P | X86_PTE_RW;
608 pPgLvl->a[1].fResMask = X86_PTE_P | X86_PTE_RW;
609 pPgLvl->a[1].fPtrMask = X86_PT_MASK;
610 pPgLvl->a[1].fPtrShift = X86_PT_SHIFT;
611 break;
612
613 case SUPPAGINGMODE_PAE:
614 case SUPPAGINGMODE_PAE_GLOBAL:
615 case SUPPAGINGMODE_PAE_NX:
616 case SUPPAGINGMODE_PAE_GLOBAL_NX:
617 pPgLvl->cLevels = 3;
618 pPgLvl->a[0].fPhysMask = X86_CR3_PAE_PAGE_MASK;
619 pPgLvl->a[0].fPtrMask = X86_PDPT_MASK_PAE;
620 pPgLvl->a[0].fPtrShift = X86_PDPT_SHIFT;
621 pPgLvl->a[0].fAndMask = X86_PDPE_P;
622 pPgLvl->a[0].fResMask = X86_PDPE_P;
623
624 pPgLvl->a[1].fPhysMask = X86_PDPE_PG_MASK;
625 pPgLvl->a[1].fPtrMask = X86_PD_PAE_MASK;
626 pPgLvl->a[1].fPtrShift = X86_PD_PAE_SHIFT;
627 pPgLvl->a[1].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
628 pPgLvl->a[1].fResMask = X86_PDE_P | X86_PDE_RW;
629
630 pPgLvl->a[2].fPhysMask = X86_PDE_PAE_PG_MASK;
631 pPgLvl->a[2].fPtrMask = X86_PT_PAE_MASK;
632 pPgLvl->a[2].fPtrShift = X86_PT_PAE_SHIFT;
633 pPgLvl->a[2].fAndMask = X86_PTE_P | X86_PTE_RW;
634 pPgLvl->a[2].fResMask = X86_PTE_P | X86_PTE_RW;
635 break;
636
637 case SUPPAGINGMODE_AMD64:
638 case SUPPAGINGMODE_AMD64_GLOBAL:
639 case SUPPAGINGMODE_AMD64_NX:
640 case SUPPAGINGMODE_AMD64_GLOBAL_NX:
641 pPgLvl->cLevels = 4;
642 pPgLvl->a[0].fPhysMask = X86_CR3_AMD64_PAGE_MASK;
643 pPgLvl->a[0].fPtrShift = X86_PML4_SHIFT;
644 pPgLvl->a[0].fPtrMask = X86_PML4_MASK;
645 pPgLvl->a[0].fAndMask = X86_PML4E_P | X86_PML4E_RW;
646 pPgLvl->a[0].fResMask = X86_PML4E_P | X86_PML4E_RW;
647
648 pPgLvl->a[1].fPhysMask = X86_PML4E_PG_MASK;
649 pPgLvl->a[1].fPtrShift = X86_PDPT_SHIFT;
650 pPgLvl->a[1].fPtrMask = X86_PDPT_MASK_AMD64;
651 pPgLvl->a[1].fAndMask = X86_PDPE_P | X86_PDPE_RW /** @todo check for X86_PDPT_PS support. */;
652 pPgLvl->a[1].fResMask = X86_PDPE_P | X86_PDPE_RW;
653
654 pPgLvl->a[2].fPhysMask = X86_PDPE_PG_MASK;
655 pPgLvl->a[2].fPtrShift = X86_PD_PAE_SHIFT;
656 pPgLvl->a[2].fPtrMask = X86_PD_PAE_MASK;
657 pPgLvl->a[2].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
658 pPgLvl->a[2].fResMask = X86_PDE_P | X86_PDE_RW;
659
660 pPgLvl->a[3].fPhysMask = X86_PDE_PAE_PG_MASK;
661 pPgLvl->a[3].fPtrShift = X86_PT_PAE_SHIFT;
662 pPgLvl->a[3].fPtrMask = X86_PT_PAE_MASK;
663 pPgLvl->a[3].fAndMask = X86_PTE_P | X86_PTE_RW;
664 pPgLvl->a[3].fResMask = X86_PTE_P | X86_PTE_RW;
665 break;
666
667 default:
668 AssertFailed();
669 pPgLvl->cLevels = 0;
670 break;
671 }
672
673 for (uint32_t i = 0; i < 4; i++) /* ASSUMING array size. */
674 {
675 pPgLvl->a[i].HCPhys = NIL_RTHCPHYS;
676 pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
677 pPgLvl->a[i].hMemObj = NIL_RTR0MEMOBJ;
678 pPgLvl->a[i].u.pv = NULL;
679 }
680}
681
682
683/**
684 * Maps a PTE.
685 *
686 * This will update the segment structure when new PTs are mapped.
687 *
688 * It also assumes that we (for paranoid reasons) wish to establish a mapping
689 * chain from CR3 to the PT that all corresponds to the processor we're
690 * currently running on, and go about this by running with interrupts disabled
691 * and restarting from CR3 for every change.
692 *
693 * @returns VBox status code, VINF_TRY_AGAIN if we changed any mappings and had
694 * to re-enable interrupts.
695 * @param pThis The dynamic mapping cache instance.
696 * @param pPgLvl The paging level structure.
697 * @param pvPage The page.
698 * @param pSeg The segment.
699 * @param cMaxPTs The max number of PTs expected in the segment.
700 * @param ppvPTE Where to store the PTE address.
701 */
702static int pgmR0DynMapPagingArrayMapPte(PPGMR0DYNMAP pThis, PPGMR0DYNMAPPGLVL pPgLvl, void *pvPage,
703 PPGMR0DYNMAPSEG pSeg, uint32_t cMaxPTs, void **ppvPTE)
704{
705 Assert(!(ASMGetFlags() & X86_EFL_IF));
706 void *pvEntry = NULL;
707 X86PGPAEUINT uEntry = ASMGetCR3();
708 for (uint32_t i = 0; i < pPgLvl->cLevels; i++)
709 {
710 RTHCPHYS HCPhys = uEntry & pPgLvl->a[i].fPhysMask;
711 if (pPgLvl->a[i].HCPhys != HCPhys)
712 {
713 /*
714 * Need to remap this level.
715 * The final level, the PT, will not be freed since that is what it's all about.
716 */
717 ASMIntEnable();
718 if (i + 1 == pPgLvl->cLevels)
719 AssertReturn(pSeg->cPTs < cMaxPTs, VERR_INTERNAL_ERROR);
720 else
721 {
722 int rc2 = RTR0MemObjFree(pPgLvl->a[i].hMemObj, true /* fFreeMappings */); AssertRC(rc2);
723 pPgLvl->a[i].hMemObj = pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
724 }
725
726 int rc = RTR0MemObjEnterPhys(&pPgLvl->a[i].hMemObj, HCPhys, PAGE_SIZE);
727 if (RT_SUCCESS(rc))
728 {
729 rc = RTR0MemObjMapKernel(&pPgLvl->a[i].hMapObj, pPgLvl->a[i].hMemObj,
730 (void *)-1 /* pvFixed */, 0 /* cbAlignment */,
731 RTMEM_PROT_WRITE | RTMEM_PROT_READ);
732 if (RT_SUCCESS(rc))
733 {
734 pPgLvl->a[i].u.pv = RTR0MemObjAddress(pPgLvl->a[i].hMapObj);
735 AssertMsg(((uintptr_t)pPgLvl->a[i].u.pv & ~(uintptr_t)PAGE_OFFSET_MASK), ("%p\n", pPgLvl->a[i].u.pv));
736 pPgLvl->a[i].HCPhys = HCPhys;
737 if (i + 1 == pPgLvl->cLevels)
738 pSeg->ahMemObjPTs[pSeg->cPTs++] = pPgLvl->a[i].hMemObj;
739 ASMIntDisable();
740 return VINF_TRY_AGAIN;
741 }
742
743 pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
744 }
745 else
746 pPgLvl->a[i].hMemObj = NIL_RTR0MEMOBJ;
747 pPgLvl->a[i].HCPhys = NIL_RTHCPHYS;
748 return rc;
749 }
750
751 /*
752 * The next level.
753 */
754 uint32_t iEntry = ((uint64_t)(uintptr_t)pvPage >> pPgLvl->a[i].fPtrShift) & pPgLvl->a[i].fPtrMask;
755 if (pThis->fLegacyMode)
756 {
757 pvEntry = &pPgLvl->a[i].u.paLegacy[iEntry];
758 uEntry = pPgLvl->a[i].u.paLegacy[iEntry];
759 }
760 else
761 {
762 pvEntry = &pPgLvl->a[i].u.paPae[iEntry];
763 uEntry = pPgLvl->a[i].u.paPae[iEntry];
764 }
765
766 if ((uEntry & pPgLvl->a[i].fAndMask) != pPgLvl->a[i].fResMask)
767 {
768 LogRel(("PGMR0DynMap: internal error - iPgLvl=%u cLevels=%u uEntry=%#llx fAnd=%#llx fRes=%#llx got=%#llx\n"
769 "PGMR0DynMap: pv=%p pvPage=%p iEntry=%#x fLegacyMode=%RTbool\n",
770 i, pPgLvl->cLevels, uEntry, pPgLvl->a[i].fAndMask, pPgLvl->a[i].fResMask, uEntry & pPgLvl->a[i].fAndMask,
771 pPgLvl->a[i].u.pv, pvPage, iEntry, pThis->fLegacyMode));
772 return VERR_INTERNAL_ERROR;
773 }
774 /*Log(("#%d: iEntry=%4d uEntry=%#llx pvEntry=%p HCPhys=%RHp \n", i, iEntry, uEntry, pvEntry, pPgLvl->a[i].HCPhys));*/
775 }
776
777 /* made it thru without needing to remap anything. */
778 *ppvPTE = pvEntry;
779 return VINF_SUCCESS;
780}
781
782
783/**
784 * Sets up a guard page.
785 *
786 * @param pThis The dynamic mapping cache instance.
787 * @param pPage The page.
788 */
789DECLINLINE(void) pgmR0DynMapSetupGuardPage(PPGMR0DYNMAP pThis, PPGMR0DYNMAPENTRY pPage)
790{
791 memset(pPage->pvPage, 0xfd, PAGE_SIZE);
792 pPage->cRefs = PGMR0DYNMAP_GUARD_PAGE_REF_COUNT;
793 pPage->HCPhys = PGMR0DYNMAP_GUARD_PAGE_HCPHYS;
794#ifdef PGMR0DYNMAP_GUARD_NP
795 ASMAtomicBitClear(pPage->uPte.pv, X86_PTE_BIT_P);
796#else
797 if (pThis->fLegacyMode)
798 ASMAtomicWriteU32(&pPage->uPte.pLegacy->u, PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE);
799 else
800 ASMAtomicWriteU64(&pPage->uPte.pPae->u, PGMR0DYNMAP_GUARD_PAGE_PAE_PTE);
801#endif
802 pThis->cGuardPages++;
803}
804
805
806/**
807 * Adds a new segment of the specified size.
808 *
809 * @returns VBox status code.
810 * @param pThis The dynamic mapping cache instance.
811 * @param cPages The size of the new segment, give as a page count.
812 */
813static int pgmR0DynMapAddSeg(PPGMR0DYNMAP pThis, uint32_t cPages)
814{
815 int rc2;
816 AssertReturn(ASMGetFlags() & X86_EFL_IF, VERR_PREEMPT_DISABLED);
817
818 /*
819 * Do the array reallocations first.
820 * (The pages array has to be replaced behind the spinlock of course.)
821 */
822 void *pvSavedPTEs = RTMemRealloc(pThis->pvSavedPTEs, (pThis->fLegacyMode ? sizeof(X86PGUINT) : sizeof(X86PGPAEUINT)) * (pThis->cPages + cPages));
823 if (!pvSavedPTEs)
824 return VERR_NO_MEMORY;
825 pThis->pvSavedPTEs = pvSavedPTEs;
826
827 void *pvPages = RTMemAllocZ(sizeof(pThis->paPages[0]) * (pThis->cPages + cPages));
828 if (!pvPages)
829 {
830 pvSavedPTEs = RTMemRealloc(pThis->pvSavedPTEs, (pThis->fLegacyMode ? sizeof(X86PGUINT) : sizeof(X86PGPAEUINT)) * pThis->cPages);
831 if (pvSavedPTEs)
832 pThis->pvSavedPTEs = pvSavedPTEs;
833 return VERR_NO_MEMORY;
834 }
835
836 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
837 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
838
839 memcpy(pvPages, pThis->paPages, sizeof(pThis->paPages[0]) * pThis->cPages);
840 void *pvToFree = pThis->paPages;
841 pThis->paPages = (PPGMR0DYNMAPENTRY)pvPages;
842
843 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
844 RTMemFree(pvToFree);
845
846 /*
847 * Allocate the segment structure and pages of memory, then touch all the pages (paranoia).
848 */
849 uint32_t cMaxPTs = cPages / (pThis->fLegacyMode ? X86_PG_ENTRIES : X86_PG_PAE_ENTRIES) + 2;
850 PPGMR0DYNMAPSEG pSeg = (PPGMR0DYNMAPSEG)RTMemAllocZ(RT_UOFFSETOF(PGMR0DYNMAPSEG, ahMemObjPTs[cMaxPTs]));
851 if (!pSeg)
852 return VERR_NO_MEMORY;
853 pSeg->pNext = NULL;
854 pSeg->cPages = cPages;
855 pSeg->iPage = pThis->cPages;
856 pSeg->cPTs = 0;
857 int rc = RTR0MemObjAllocPage(&pSeg->hMemObj, cPages << PAGE_SHIFT, false);
858 if (RT_SUCCESS(rc))
859 {
860 uint8_t *pbPage = (uint8_t *)RTR0MemObjAddress(pSeg->hMemObj);
861 AssertMsg(VALID_PTR(pbPage) && !((uintptr_t)pbPage & PAGE_OFFSET_MASK), ("%p\n", pbPage));
862 memset(pbPage, 0xfe, cPages << PAGE_SHIFT);
863
864 /*
865 * Walk thru the pages and set them up with a mapping of their PTE and everything.
866 */
867 ASMIntDisable();
868 PGMR0DYNMAPPGLVL PgLvl;
869 pgmR0DynMapPagingArrayInit(pThis, &PgLvl);
870 uint32_t const iEndPage = pSeg->iPage + cPages;
871 for (uint32_t iPage = pSeg->iPage;
872 iPage < iEndPage;
873 iPage++, pbPage += PAGE_SIZE)
874 {
875 /* Initialize the page data. */
876 pThis->paPages[iPage].HCPhys = NIL_RTHCPHYS;
877 pThis->paPages[iPage].pvPage = pbPage;
878 pThis->paPages[iPage].cRefs = 0;
879 pThis->paPages[iPage].uPte.pPae = 0;
880 RTCpuSetFill(&pThis->paPages[iPage].PendingSet);
881
882 /* Map its page table, retry until we've got a clean run (paranoia). */
883 do
884 rc = pgmR0DynMapPagingArrayMapPte(pThis, &PgLvl, pbPage, pSeg, cMaxPTs,
885 &pThis->paPages[iPage].uPte.pv);
886 while (rc == VINF_TRY_AGAIN);
887 if (RT_FAILURE(rc))
888 break;
889
890 /* Save the PTE. */
891 if (pThis->fLegacyMode)
892 ((PX86PGUINT)pThis->pvSavedPTEs)[iPage] = pThis->paPages[iPage].uPte.pLegacy->u;
893 else
894 ((PX86PGPAEUINT)pThis->pvSavedPTEs)[iPage] = pThis->paPages[iPage].uPte.pPae->u;
895
896#ifdef VBOX_STRICT
897 /* Check that we've got the right entry. */
898 RTHCPHYS HCPhysPage = RTR0MemObjGetPagePhysAddr(pSeg->hMemObj, iPage - pSeg->iPage);
899 RTHCPHYS HCPhysPte = pThis->fLegacyMode
900 ? pThis->paPages[iPage].uPte.pLegacy->u & X86_PTE_PG_MASK
901 : pThis->paPages[iPage].uPte.pPae->u & X86_PTE_PAE_PG_MASK;
902 if (HCPhysPage != HCPhysPte)
903 {
904 LogRel(("pgmR0DynMapAddSeg: internal error - page #%u HCPhysPage=%RHp HCPhysPte=%RHp pbPage=%p pvPte=%p\n",
905 iPage - pSeg->iPage, HCPhysPage, HCPhysPte, pbPage, pThis->paPages[iPage].uPte.pv));
906 rc = VERR_INTERNAL_ERROR;
907 break;
908 }
909#endif
910 } /* for each page */
911 ASMIntEnable();
912
913 /* cleanup non-PT mappings */
914 for (uint32_t i = 0; i < PgLvl.cLevels - 1; i++)
915 RTR0MemObjFree(PgLvl.a[i].hMemObj, true /* fFreeMappings */);
916
917 if (RT_SUCCESS(rc))
918 {
919#if PGMR0DYNMAP_GUARD_PAGES > 0
920 /*
921 * Setup guard pages.
922 * (Note: TLBs will be shot down later on.)
923 */
924 uint32_t iPage = pSeg->iPage;
925 while (iPage < iEndPage)
926 {
927 for (uint32_t iGPg = 0; iGPg < PGMR0DYNMAP_GUARD_PAGES && iPage < iEndPage; iGPg++, iPage++)
928 pgmR0DynMapSetupGuardPage(pThis, &pThis->paPages[iPage]);
929 iPage++; /* the guarded page */
930 }
931
932 /* Make sure the very last page is a guard page too. */
933 iPage = iEndPage - 1;
934 if (pThis->paPages[iPage].cRefs != PGMR0DYNMAP_GUARD_PAGE_REF_COUNT)
935 pgmR0DynMapSetupGuardPage(pThis, &pThis->paPages[iPage]);
936#endif /* PGMR0DYNMAP_GUARD_PAGES > 0 */
937
938 /*
939 * Commit it by adding the segment to the list and updating the page count.
940 */
941 pSeg->pNext = pThis->pSegHead;
942 pThis->pSegHead = pSeg;
943 pThis->cPages += cPages;
944 return VINF_SUCCESS;
945 }
946
947 /*
948 * Bail out.
949 */
950 while (pSeg->cPTs-- > 0)
951 {
952 rc2 = RTR0MemObjFree(pSeg->ahMemObjPTs[pSeg->cPTs], true /* fFreeMappings */);
953 AssertRC(rc2);
954 pSeg->ahMemObjPTs[pSeg->cPTs] = NIL_RTR0MEMOBJ;
955 }
956
957 rc2 = RTR0MemObjFree(pSeg->hMemObj, true /* fFreeMappings */);
958 AssertRC(rc2);
959 pSeg->hMemObj = NIL_RTR0MEMOBJ;
960 }
961 RTMemFree(pSeg);
962
963 /* Don't bother resizing the arrays, but free them if we're the only user. */
964 if (!pThis->cPages)
965 {
966 RTMemFree(pThis->paPages);
967 pThis->paPages = NULL;
968 RTMemFree(pThis->pvSavedPTEs);
969 pThis->pvSavedPTEs = NULL;
970 }
971 return rc;
972}
973
974
975/**
976 * Called by PGMR0DynMapInitVM under the init lock.
977 *
978 * @returns VBox status code.
979 * @param pThis The dynamic mapping cache instance.
980 */
981static int pgmR0DynMapSetup(PPGMR0DYNMAP pThis)
982{
983 /*
984 * Calc the size and add a segment of that size.
985 */
986 uint32_t cMinPages;
987 uint32_t cPages = pgmR0DynMapCalcNewSize(pThis, &cMinPages);
988 AssertReturn(cPages, VERR_INTERNAL_ERROR);
989 int rc = pgmR0DynMapAddSeg(pThis, cPages);
990 if (rc == VERR_NO_MEMORY)
991 {
992 /*
993 * Try adding smaller segments.
994 */
995 do
996 rc = pgmR0DynMapAddSeg(pThis, PGMR0DYNMAP_SMALL_SEG_PAGES);
997 while (RT_SUCCESS(rc) && pThis->cPages < cPages);
998 if (rc == VERR_NO_MEMORY && pThis->cPages >= cMinPages)
999 rc = VINF_SUCCESS;
1000 if (rc == VERR_NO_MEMORY)
1001 {
1002 if (pThis->cPages)
1003 pgmR0DynMapTearDown(pThis);
1004 rc = VERR_PGM_DYNMAP_SETUP_ERROR;
1005 }
1006 }
1007 Assert(ASMGetFlags() & X86_EFL_IF);
1008
1009#if PGMR0DYNMAP_GUARD_PAGES > 0
1010 /* paranoia */
1011 if (RT_SUCCESS(rc))
1012 pgmR0DynMapTlbShootDown(pThis);
1013#endif
1014 return rc;
1015}
1016
1017
1018/**
1019 * Called by PGMR0DynMapInitVM under the init lock.
1020 *
1021 * @returns VBox status code.
1022 * @param pThis The dynamic mapping cache instance.
1023 */
1024static int pgmR0DynMapExpand(PPGMR0DYNMAP pThis)
1025{
1026 /*
1027 * Calc the new target size and add a segment of the appropriate size.
1028 */
1029 uint32_t cMinPages;
1030 uint32_t cPages = pgmR0DynMapCalcNewSize(pThis, &cMinPages);
1031 AssertReturn(cPages, VERR_INTERNAL_ERROR);
1032 if (pThis->cPages >= cPages)
1033 return VINF_SUCCESS;
1034
1035 uint32_t cAdd = cPages - pThis->cPages;
1036 int rc = pgmR0DynMapAddSeg(pThis, cAdd);
1037 if (rc == VERR_NO_MEMORY)
1038 {
1039 /*
1040 * Try adding smaller segments.
1041 */
1042 do
1043 rc = pgmR0DynMapAddSeg(pThis, PGMR0DYNMAP_SMALL_SEG_PAGES);
1044 while (RT_SUCCESS(rc) && pThis->cPages < cPages);
1045 if (rc == VERR_NO_MEMORY && pThis->cPages >= cMinPages)
1046 rc = VINF_SUCCESS;
1047 if (rc == VERR_NO_MEMORY)
1048 rc = VERR_PGM_DYNMAP_EXPAND_ERROR;
1049 }
1050 Assert(ASMGetFlags() & X86_EFL_IF);
1051
1052#if PGMR0DYNMAP_GUARD_PAGES > 0
1053 /* paranoia */
1054 if (RT_SUCCESS(rc))
1055 pgmR0DynMapTlbShootDown(pThis);
1056#endif
1057 return rc;
1058}
1059
1060
1061/**
1062 * Called by PGMR0DynMapTermVM under the init lock.
1063 *
1064 * @returns VBox status code.
1065 * @param pThis The dynamic mapping cache instance.
1066 */
1067static void pgmR0DynMapTearDown(PPGMR0DYNMAP pThis)
1068{
1069 /*
1070 * Restore the original page table entries
1071 */
1072 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1073 uint32_t iPage = pThis->cPages;
1074 if (pThis->fLegacyMode)
1075 {
1076 X86PGUINT const *paSavedPTEs = (X86PGUINT const *)pThis->pvSavedPTEs;
1077 while (iPage-- > 0)
1078 {
1079 X86PGUINT uOld = paPages[iPage].uPte.pLegacy->u;
1080 X86PGUINT uOld2 = uOld; NOREF(uOld2);
1081 X86PGUINT uNew = paSavedPTEs[iPage];
1082 while (!ASMAtomicCmpXchgExU32(&paPages[iPage].uPte.pLegacy->u, uNew, uOld, &uOld))
1083 AssertMsgFailed(("uOld=%#x uOld2=%#x uNew=%#x\n", uOld, uOld2, uNew));
1084 Assert(paPages[iPage].uPte.pLegacy->u == paSavedPTEs[iPage]);
1085 }
1086 }
1087 else
1088 {
1089 X86PGPAEUINT const *paSavedPTEs = (X86PGPAEUINT const *)pThis->pvSavedPTEs;
1090 while (iPage-- > 0)
1091 {
1092 X86PGPAEUINT uOld = paPages[iPage].uPte.pPae->u;
1093 X86PGPAEUINT uOld2 = uOld; NOREF(uOld2);
1094 X86PGPAEUINT uNew = paSavedPTEs[iPage];
1095 while (!ASMAtomicCmpXchgExU64(&paPages[iPage].uPte.pPae->u, uNew, uOld, &uOld))
1096 AssertMsgFailed(("uOld=%#llx uOld2=%#llx uNew=%#llx\n", uOld, uOld2, uNew));
1097 Assert(paPages[iPage].uPte.pPae->u == paSavedPTEs[iPage]);
1098 }
1099 }
1100
1101 /*
1102 * Shoot down the TLBs on all CPUs before freeing them.
1103 */
1104 pgmR0DynMapTlbShootDown(pThis);
1105
1106 /*
1107 * Free the segments.
1108 */
1109 while (pThis->pSegHead)
1110 {
1111 int rc;
1112 PPGMR0DYNMAPSEG pSeg = pThis->pSegHead;
1113 pThis->pSegHead = pSeg->pNext;
1114
1115 uint32_t iPT = pSeg->cPTs;
1116 while (iPT-- > 0)
1117 {
1118 rc = RTR0MemObjFree(pSeg->ahMemObjPTs[iPT], true /* fFreeMappings */); AssertRC(rc);
1119 pSeg->ahMemObjPTs[iPT] = NIL_RTR0MEMOBJ;
1120 }
1121 rc = RTR0MemObjFree(pSeg->hMemObj, true /* fFreeMappings */); AssertRC(rc);
1122 pSeg->hMemObj = NIL_RTR0MEMOBJ;
1123 pSeg->pNext = NULL;
1124 pSeg->iPage = UINT16_MAX;
1125 pSeg->cPages = 0;
1126 pSeg->cPTs = 0;
1127 RTMemFree(pSeg);
1128 }
1129
1130 /*
1131 * Free the arrays and restore the initial state.
1132 * The cLoadMax value is left behind for the next setup.
1133 */
1134 RTMemFree(pThis->paPages);
1135 pThis->paPages = NULL;
1136 RTMemFree(pThis->pvSavedPTEs);
1137 pThis->pvSavedPTEs = NULL;
1138 pThis->cPages = 0;
1139 pThis->cLoad = 0;
1140 pThis->cGuardPages = 0;
1141}
1142
1143
1144/**
1145 * Release references to a page, caller owns the spin lock.
1146 *
1147 * @param pThis The dynamic mapping cache instance.
1148 * @param iPage The page.
1149 * @param cRefs The number of references to release.
1150 */
1151DECLINLINE(void) pgmR0DynMapReleasePageLocked(PPGMR0DYNMAP pThis, uint32_t iPage, int32_t cRefs)
1152{
1153 cRefs = ASMAtomicSubS32(&pThis->paPages[iPage].cRefs, cRefs) - cRefs;
1154 AssertMsg(cRefs >= 0, ("%d\n", cRefs));
1155 if (!cRefs)
1156 pThis->cLoad--;
1157}
1158
1159
1160/**
1161 * Release references to a page, caller does not own the spin lock.
1162 *
1163 * @param pThis The dynamic mapping cache instance.
1164 * @param iPage The page.
1165 * @param cRefs The number of references to release.
1166 */
1167static void pgmR0DynMapReleasePage(PPGMR0DYNMAP pThis, uint32_t iPage, uint32_t cRefs)
1168{
1169 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1170 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1171 pgmR0DynMapReleasePageLocked(pThis, iPage, cRefs);
1172 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1173}
1174
1175
1176/**
1177 * pgmR0DynMapPage worker that deals with the tedious bits.
1178 *
1179 * @returns The page index on success, UINT32_MAX on failure.
1180 * @param pThis The dynamic mapping cache instance.
1181 * @param HCPhys The address of the page to be mapped.
1182 * @param iPage The page index pgmR0DynMapPage hashed HCPhys to.
1183 * @param pVM The shared VM structure, for statistics only.
1184 */
1185static uint32_t pgmR0DynMapPageSlow(PPGMR0DYNMAP pThis, RTHCPHYS HCPhys, uint32_t iPage, PVM pVM)
1186{
1187#ifdef VBOX_WITH_STATISTICS
1188 PVMCPU pVCpu = VMMGetCpu(pVM);
1189#endif
1190 STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapPageSlow);
1191
1192 /*
1193 * Check if any of the first 3 pages are unreferenced since the caller
1194 * already has made sure they aren't matching.
1195 */
1196#ifdef VBOX_WITH_STATISTICS
1197 bool fLooped = false;
1198#endif
1199 uint32_t const cPages = pThis->cPages;
1200 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1201 uint32_t iFreePage;
1202 if (!paPages[iPage].cRefs)
1203 iFreePage = iPage;
1204 else if (!paPages[(iPage + 1) % cPages].cRefs)
1205 iFreePage = (iPage + 1) % cPages;
1206 else if (!paPages[(iPage + 2) % cPages].cRefs)
1207 iFreePage = (iPage + 2) % cPages;
1208 else
1209 {
1210 /*
1211 * Search for an unused or matching entry.
1212 */
1213 iFreePage = (iPage + 3) % cPages;
1214 for (;;)
1215 {
1216 if (paPages[iFreePage].HCPhys == HCPhys)
1217 {
1218 STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapPageSlowLoopHits);
1219 return iFreePage;
1220 }
1221 if (!paPages[iFreePage].cRefs)
1222 break;
1223
1224 /* advance */
1225 iFreePage = (iFreePage + 1) % cPages;
1226 if (RT_UNLIKELY(iFreePage == iPage))
1227 return UINT32_MAX;
1228 }
1229 STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapPageSlowLoopMisses);
1230#ifdef VBOX_WITH_STATISTICS
1231 fLooped = true;
1232#endif
1233 }
1234 Assert(iFreePage < cPages);
1235
1236#if 0 //def VBOX_WITH_STATISTICS
1237 /* Check for lost hits. */
1238 if (!fLooped)
1239 for (uint32_t iPage2 = (iPage + 3) % cPages; iPage2 != iPage; iPage2 = (iPage2 + 1) % cPages)
1240 if (paPages[iPage2].HCPhys == HCPhys)
1241 STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapPageSlowLostHits);
1242#endif
1243
1244 /*
1245 * Setup the new entry.
1246 */
1247 /*Log6(("pgmR0DynMapPageSlow: old - %RHp %#x %#llx\n", paPages[iFreePage].HCPhys, paPages[iFreePage].cRefs, paPages[iFreePage].uPte.pPae->u));*/
1248 paPages[iFreePage].HCPhys = HCPhys;
1249 RTCpuSetFill(&paPages[iFreePage].PendingSet);
1250 if (pThis->fLegacyMode)
1251 {
1252 X86PGUINT uOld = paPages[iFreePage].uPte.pLegacy->u;
1253 X86PGUINT uOld2 = uOld; NOREF(uOld2);
1254 X86PGUINT uNew = (uOld & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1255 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1256 | (HCPhys & X86_PTE_PG_MASK);
1257 while (!ASMAtomicCmpXchgExU32(&paPages[iFreePage].uPte.pLegacy->u, uNew, uOld, &uOld))
1258 AssertMsgFailed(("uOld=%#x uOld2=%#x uNew=%#x\n", uOld, uOld2, uNew));
1259 Assert(paPages[iFreePage].uPte.pLegacy->u == uNew);
1260 }
1261 else
1262 {
1263 X86PGPAEUINT uOld = paPages[iFreePage].uPte.pPae->u;
1264 X86PGPAEUINT uOld2 = uOld; NOREF(uOld2);
1265 X86PGPAEUINT uNew = (uOld & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1266 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1267 | (HCPhys & X86_PTE_PAE_PG_MASK);
1268 while (!ASMAtomicCmpXchgExU64(&paPages[iFreePage].uPte.pPae->u, uNew, uOld, &uOld))
1269 AssertMsgFailed(("uOld=%#llx uOld2=%#llx uNew=%#llx\n", uOld, uOld2, uNew));
1270 Assert(paPages[iFreePage].uPte.pPae->u == uNew);
1271 /*Log6(("pgmR0DynMapPageSlow: #%x - %RHp %p %#llx\n", iFreePage, HCPhys, paPages[iFreePage].pvPage, uNew));*/
1272 }
1273 return iFreePage;
1274}
1275
1276
1277/**
1278 * Maps a page into the pool.
1279 *
1280 * @returns Page index on success, UINT32_MAX on failure.
1281 * @param pThis The dynamic mapping cache instance.
1282 * @param HCPhys The address of the page to be mapped.
1283 * @param iRealCpu The real cpu set index. (optimization)
1284 * @param pVM The shared VM structure, for statistics only.
1285 * @param ppvPage Where to the page address.
1286 */
1287DECLINLINE(uint32_t) pgmR0DynMapPage(PPGMR0DYNMAP pThis, RTHCPHYS HCPhys, int32_t iRealCpu, PVM pVM, void **ppvPage)
1288{
1289#ifdef VBOX_WITH_STATISTICS
1290 PVMCPU pVCpu = VMMGetCpu(pVM);
1291#endif
1292 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1293 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1294 AssertMsg(!(HCPhys & PAGE_OFFSET_MASK), ("HCPhys=%RHp\n", HCPhys));
1295 STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapPage);
1296
1297 /*
1298 * Find an entry, if possible a matching one. The HCPhys address is hashed
1299 * down to a page index, collisions are handled by linear searching.
1300 * Optimized for a hit in the first 3 pages.
1301 *
1302 * To the cheap hits here and defer the tedious searching and inserting
1303 * to a helper function.
1304 */
1305 uint32_t const cPages = pThis->cPages;
1306 uint32_t iPage = (HCPhys >> PAGE_SHIFT) % cPages;
1307 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1308 if (RT_LIKELY(paPages[iPage].HCPhys == HCPhys))
1309 STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapPageHits0);
1310 else
1311 {
1312 uint32_t iPage2 = (iPage + 1) % cPages;
1313 if (RT_LIKELY(paPages[iPage2].HCPhys == HCPhys))
1314 {
1315 iPage = iPage2;
1316 STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapPageHits1);
1317 }
1318 else
1319 {
1320 iPage2 = (iPage + 2) % cPages;
1321 if (paPages[iPage2].HCPhys == HCPhys)
1322 {
1323 iPage = iPage2;
1324 STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapPageHits2);
1325 }
1326 else
1327 {
1328 iPage = pgmR0DynMapPageSlow(pThis, HCPhys, iPage, pVM);
1329 if (RT_UNLIKELY(iPage == UINT32_MAX))
1330 {
1331 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1332 *ppvPage = NULL;
1333 return iPage;
1334 }
1335 }
1336 }
1337 }
1338
1339 /*
1340 * Reference it, update statistics and get the return address.
1341 */
1342 int32_t cRefs = ASMAtomicIncS32(&paPages[iPage].cRefs);
1343 if (cRefs == 1)
1344 {
1345 pThis->cLoad++;
1346 if (pThis->cLoad > pThis->cMaxLoad)
1347 pThis->cMaxLoad = pThis->cLoad;
1348 AssertMsg(pThis->cLoad <= pThis->cPages - pThis->cGuardPages, ("%d/%d\n", pThis->cLoad, pThis->cPages - pThis->cGuardPages));
1349 }
1350 else if (RT_UNLIKELY(cRefs <= 0))
1351 {
1352 ASMAtomicDecS32(&paPages[iPage].cRefs);
1353 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1354 *ppvPage = NULL;
1355 AssertLogRelMsgFailedReturn(("cRefs=%d iPage=%p HCPhys=%RHp\n", cRefs, iPage, HCPhys), UINT32_MAX);
1356 }
1357 void *pvPage = paPages[iPage].pvPage;
1358
1359 /*
1360 * Invalidate the entry?
1361 */
1362 bool fInvalidateIt = RTCpuSetIsMemberByIndex(&paPages[iPage].PendingSet, iRealCpu);
1363 if (RT_UNLIKELY(fInvalidateIt))
1364 RTCpuSetDelByIndex(&paPages[iPage].PendingSet, iRealCpu);
1365
1366 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1367
1368 /*
1369 * Do the actual invalidation outside the spinlock.
1370 */
1371 if (RT_UNLIKELY(fInvalidateIt))
1372 {
1373 STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapPageInvlPg);
1374 ASMInvalidatePage(pvPage);
1375 }
1376
1377 *ppvPage = pvPage;
1378 return iPage;
1379}
1380
1381
1382/**
1383 * Assert the the integrity of the pool.
1384 *
1385 * @returns VBox status code.
1386 */
1387VMMR0DECL(int) PGMR0DynMapAssertIntegrity(void)
1388{
1389 /*
1390 * Basic pool stuff that doesn't require any lock, just assumes we're a user.
1391 */
1392 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1393 if (!pThis)
1394 return VINF_SUCCESS;
1395 AssertPtrReturn(pThis, VERR_INVALID_POINTER);
1396 AssertReturn(pThis->u32Magic == PGMR0DYNMAP_MAGIC, VERR_INVALID_MAGIC);
1397 if (!pThis->cUsers)
1398 return VERR_INVALID_PARAMETER;
1399
1400
1401 int rc = VINF_SUCCESS;
1402 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1403 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1404
1405#define CHECK_RET(expr, a) \
1406 do { \
1407 if (RT_UNLIKELY(!(expr))) \
1408 { \
1409 RTSpinlockRelease(pThis->hSpinlock, &Tmp); \
1410 AssertMsg1(#expr, __LINE__, __FILE__, __PRETTY_FUNCTION__); \
1411 AssertMsg2 a; \
1412 return VERR_INTERNAL_ERROR; \
1413 } \
1414 } while (0)
1415
1416 /*
1417 * Check that the PTEs are correct.
1418 */
1419 uint32_t cGuard = 0;
1420 uint32_t cLoad = 0;
1421 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1422 uint32_t iPage = pThis->cPages;
1423 if (pThis->fLegacyMode)
1424 {
1425 PCX86PGUINT paSavedPTEs = (PCX86PGUINT)pThis->pvSavedPTEs; NOREF(paSavedPTEs);
1426 while (iPage-- > 0)
1427 {
1428 CHECK_RET(!((uintptr_t)paPages[iPage].pvPage & PAGE_OFFSET_MASK), ("#%u: %p\n", iPage, paPages[iPage].pvPage));
1429 if ( paPages[iPage].cRefs == PGMR0DYNMAP_GUARD_PAGE_REF_COUNT
1430 && paPages[iPage].HCPhys == PGMR0DYNMAP_GUARD_PAGE_HCPHYS)
1431 {
1432#ifdef PGMR0DYNMAP_GUARD_NP
1433 CHECK_RET(paPages[iPage].uPte.pLegacy->u == (paSavedPTEs[iPage] & ~(X86PGUINT)X86_PTE_P),
1434 ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, paSavedPTEs[iPage]));
1435#else
1436 CHECK_RET(paPages[iPage].uPte.pLegacy->u == PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE,
1437 ("#%u: %#x", iPage, paPages[iPage].uPte.pLegacy->u));
1438#endif
1439 cGuard++;
1440 }
1441 else if (paPages[iPage].HCPhys != NIL_RTHCPHYS)
1442 {
1443 CHECK_RET(!(paPages[iPage].HCPhys & PAGE_OFFSET_MASK), ("#%u: %RHp\n", iPage, paPages[iPage].HCPhys));
1444 X86PGUINT uPte = (paSavedPTEs[iPage] & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1445 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1446 | (paPages[iPage].HCPhys & X86_PTE_PAE_PG_MASK);
1447 CHECK_RET(paPages[iPage].uPte.pLegacy->u == uPte,
1448 ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, uPte));
1449 if (paPages[iPage].cRefs)
1450 cLoad++;
1451 }
1452 else
1453 CHECK_RET(paPages[iPage].uPte.pLegacy->u == paSavedPTEs[iPage],
1454 ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, paSavedPTEs[iPage]));
1455 }
1456 }
1457 else
1458 {
1459 PCX86PGPAEUINT paSavedPTEs = (PCX86PGPAEUINT)pThis->pvSavedPTEs; NOREF(paSavedPTEs);
1460 while (iPage-- > 0)
1461 {
1462 CHECK_RET(!((uintptr_t)paPages[iPage].pvPage & PAGE_OFFSET_MASK), ("#%u: %p\n", iPage, paPages[iPage].pvPage));
1463 if ( paPages[iPage].cRefs == PGMR0DYNMAP_GUARD_PAGE_REF_COUNT
1464 && paPages[iPage].HCPhys == PGMR0DYNMAP_GUARD_PAGE_HCPHYS)
1465 {
1466#ifdef PGMR0DYNMAP_GUARD_NP
1467 CHECK_RET(paPages[iPage].uPte.pPae->u == (paSavedPTEs[iPage] & ~(X86PGPAEUINT)X86_PTE_P),
1468 ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pPae->u, paSavedPTEs[iPage]));
1469#else
1470 CHECK_RET(paPages[iPage].uPte.pPae->u == PGMR0DYNMAP_GUARD_PAGE_PAE_PTE,
1471 ("#%u: %#llx", iPage, paPages[iPage].uPte.pPae->u));
1472#endif
1473 cGuard++;
1474 }
1475 else if (paPages[iPage].HCPhys != NIL_RTHCPHYS)
1476 {
1477 CHECK_RET(!(paPages[iPage].HCPhys & PAGE_OFFSET_MASK), ("#%u: %RHp\n", iPage, paPages[iPage].HCPhys));
1478 X86PGPAEUINT uPte = (paSavedPTEs[iPage] & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1479 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1480 | (paPages[iPage].HCPhys & X86_PTE_PAE_PG_MASK);
1481 CHECK_RET(paPages[iPage].uPte.pPae->u == uPte,
1482 ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pLegacy->u, uPte));
1483 if (paPages[iPage].cRefs)
1484 cLoad++;
1485 }
1486 else
1487 CHECK_RET(paPages[iPage].uPte.pPae->u == paSavedPTEs[iPage],
1488 ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pPae->u, paSavedPTEs[iPage]));
1489 }
1490 }
1491
1492 CHECK_RET(cLoad == pThis->cLoad, ("%u %u\n", cLoad, pThis->cLoad));
1493 CHECK_RET(cGuard == pThis->cGuardPages, ("%u %u\n", cGuard, pThis->cGuardPages));
1494
1495#undef CHECK_RET
1496 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1497 return VINF_SUCCESS;
1498}
1499
1500
1501/**
1502 * Signals the start of a new set of mappings.
1503 *
1504 * Mostly for strictness. PGMDynMapHCPage won't work unless this
1505 * API is called.
1506 *
1507 * @param pVCpu The shared data for the current virtual CPU.
1508 */
1509VMMDECL(void) PGMDynMapStartAutoSet(PVMCPU pVCpu)
1510{
1511 Assert(pVCpu->pgm.s.AutoSet.cEntries == PGMMAPSET_CLOSED);
1512 Assert(pVCpu->pgm.s.AutoSet.iSubset == UINT32_MAX);
1513 pVCpu->pgm.s.AutoSet.cEntries = 0;
1514 pVCpu->pgm.s.AutoSet.iCpu = RTMpCpuIdToSetIndex(RTMpCpuId());
1515}
1516
1517
1518/**
1519 * Worker that performs the actual flushing of the set.
1520 *
1521 * @param pSet The set to flush.
1522 * @param cEntries The number of entries.
1523 */
1524DECLINLINE(void) pgmDynMapFlushAutoSetWorker(PPGMMAPSET pSet, uint32_t cEntries)
1525{
1526 /*
1527 * Release any pages it's referencing.
1528 */
1529 if ( cEntries != 0
1530 && RT_LIKELY(cEntries <= RT_ELEMENTS(pSet->aEntries)))
1531 {
1532 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1533 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1534 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1535
1536 uint32_t i = cEntries;
1537 while (i-- > 0)
1538 {
1539 uint32_t iPage = pSet->aEntries[i].iPage;
1540 Assert(iPage < pThis->cPages);
1541 int32_t cRefs = pSet->aEntries[i].cRefs;
1542 Assert(cRefs > 0);
1543 pgmR0DynMapReleasePageLocked(pThis, iPage, cRefs);
1544
1545 pSet->aEntries[i].iPage = UINT16_MAX;
1546 pSet->aEntries[i].cRefs = 0;
1547 }
1548
1549 Assert(pThis->cLoad <= pThis->cPages - pThis->cGuardPages);
1550 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1551 }
1552}
1553
1554
1555/**
1556 * Releases the dynamic memory mappings made by PGMDynMapHCPage and associates
1557 * since the PGMDynMapStartAutoSet call.
1558 *
1559 * @param pVCpu The shared data for the current virtual CPU.
1560 */
1561VMMDECL(void) PGMDynMapReleaseAutoSet(PVMCPU pVCpu)
1562{
1563 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1564
1565 /*
1566 * Close and flush the set.
1567 */
1568 uint32_t cEntries = pSet->cEntries;
1569 AssertReturnVoid(cEntries != PGMMAPSET_CLOSED);
1570 pSet->cEntries = PGMMAPSET_CLOSED;
1571 pSet->iSubset = UINT32_MAX;
1572 pSet->iCpu = -1;
1573
1574 STAM_COUNTER_INC(&pVCpu->pgm.s.aStatR0DynMapSetSize[(cEntries * 10 / RT_ELEMENTS(pSet->aEntries)) % 11]);
1575 AssertMsg(cEntries < PGMMAPSET_MAX_FILL, ("%u\n", cEntries));
1576 if (cEntries > RT_ELEMENTS(pSet->aEntries) * 50 / 100)
1577 Log(("PGMDynMapReleaseAutoSet: cEntries=%d\n", pSet->cEntries));
1578
1579 pgmDynMapFlushAutoSetWorker(pSet, cEntries);
1580}
1581
1582
1583/**
1584 * Flushes the set if it's above a certain threshold.
1585 *
1586 * @param pVCpu The shared data for the current virtual CPU.
1587 */
1588VMMDECL(void) PGMDynMapFlushAutoSet(PVMCPU pVCpu)
1589{
1590 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1591 AssertMsg(pSet->iCpu == RTMpCpuIdToSetIndex(RTMpCpuId()), ("%d %d(%d) efl=%#x\n", pSet->iCpu, RTMpCpuIdToSetIndex(RTMpCpuId()), RTMpCpuId(), ASMGetFlags()));
1592
1593 /*
1594 * Only flush it if it's 45% full.
1595 */
1596 uint32_t cEntries = pSet->cEntries;
1597 AssertReturnVoid(cEntries != PGMMAPSET_CLOSED);
1598 STAM_COUNTER_INC(&pVCpu->pgm.s.aStatR0DynMapSetSize[(cEntries * 10 / RT_ELEMENTS(pSet->aEntries)) % 11]);
1599 if (cEntries >= RT_ELEMENTS(pSet->aEntries) * 45 / 100)
1600 {
1601 pSet->cEntries = 0;
1602
1603 AssertMsg(cEntries < PGMMAPSET_MAX_FILL, ("%u\n", cEntries));
1604 Log(("PGMDynMapFlushAutoSet: cEntries=%d\n", pSet->cEntries));
1605
1606 pgmDynMapFlushAutoSetWorker(pSet, cEntries);
1607 AssertMsg(pSet->iCpu == RTMpCpuIdToSetIndex(RTMpCpuId()), ("%d %d(%d) efl=%#x\n", pSet->iCpu, RTMpCpuIdToSetIndex(RTMpCpuId()), RTMpCpuId(), ASMGetFlags()));
1608 }
1609}
1610
1611
1612/**
1613 * Migrates the automatic mapping set of the current vCPU if it's active and
1614 * necessary.
1615 *
1616 * This is called when re-entering the hardware assisted execution mode after a
1617 * nip down to ring-3. We run the risk that the CPU might have change and we
1618 * will therefore make sure all the cache entries currently in the auto set will
1619 * be valid on the new CPU. If the cpu didn't change nothing will happen as all
1620 * the entries will have been flagged as invalidated.
1621 *
1622 * @param pVCpu The shared data for the current virtual CPU.
1623 * @thread EMT
1624 */
1625VMMDECL(void) PGMDynMapMigrateAutoSet(PVMCPU pVCpu)
1626{
1627 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1628 int32_t iRealCpu = RTMpCpuIdToSetIndex(RTMpCpuId());
1629 if (pSet->iCpu != iRealCpu)
1630 {
1631 uint32_t i = pSet->cEntries;
1632 if (i != PGMMAPSET_CLOSED)
1633 {
1634 AssertMsg(i <= RT_ELEMENTS(pSet->aEntries), ("%#x (%u)\n", i, i));
1635 if (i != 0 && RT_LIKELY(i <= RT_ELEMENTS(pSet->aEntries)))
1636 {
1637 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1638 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1639 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1640
1641 while (i-- > 0)
1642 {
1643 Assert(pSet->aEntries[i].cRefs > 0);
1644 uint32_t iPage = pSet->aEntries[i].iPage;
1645 Assert(iPage < pThis->cPages);
1646 if (RTCpuSetIsMemberByIndex(&pThis->paPages[iPage].PendingSet, iRealCpu))
1647 {
1648 RTCpuSetDelByIndex(&pThis->paPages[iPage].PendingSet, iRealCpu);
1649 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1650
1651 ASMInvalidatePage(pThis->paPages[iPage].pvPage);
1652 STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapMigrateInvlPg);
1653
1654 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1655 }
1656 }
1657
1658 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1659 }
1660 }
1661 pSet->iCpu = iRealCpu;
1662 }
1663}
1664
1665
1666/**
1667 * Worker function that flushes the current subset.
1668 *
1669 * This is called when the set is popped or when the set
1670 * hash a too high load. As also pointed out elsewhere, the
1671 * whole subset thing is a hack for working around code that
1672 * accesses too many pages. Like PGMPool.
1673 *
1674 * @param pSet The set which subset to flush.
1675 */
1676static void pgmDynMapFlushSubset(PPGMMAPSET pSet)
1677{
1678 uint32_t iSubset = pSet->iSubset;
1679 uint32_t i = pSet->cEntries;
1680 Assert(i <= RT_ELEMENTS(pSet->aEntries));
1681 if ( i > iSubset
1682 && i <= RT_ELEMENTS(pSet->aEntries))
1683 {
1684 Log(("pgmDynMapFlushSubset: cEntries=%d iSubset=%d\n", pSet->cEntries, iSubset));
1685 pSet->cEntries = iSubset;
1686
1687 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1688 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1689 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1690
1691 while (i-- > iSubset)
1692 {
1693 uint32_t iPage = pSet->aEntries[i].iPage;
1694 Assert(iPage < pThis->cPages);
1695 int32_t cRefs = pSet->aEntries[i].cRefs;
1696 Assert(cRefs > 0);
1697 pgmR0DynMapReleasePageLocked(pThis, iPage, cRefs);
1698
1699 pSet->aEntries[i].iPage = UINT16_MAX;
1700 pSet->aEntries[i].cRefs = 0;
1701 }
1702
1703 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1704 }
1705}
1706
1707
1708/**
1709 * Creates a subset.
1710 *
1711 * A subset is a hack to avoid having to rewrite code that touches a lot of
1712 * pages. It prevents the mapping set from being overflowed by automatically
1713 * flushing previous mappings when a certain threshold is reached.
1714 *
1715 * Pages mapped after calling this function are only valid until the next page
1716 * is mapped.
1717 *
1718 * @returns The index of the previous subset. Pass this to
1719 * PGMDynMapPopAutoSubset when poping it.
1720 * @param pVCpu Pointer to the virtual cpu data.
1721 */
1722VMMDECL(uint32_t) PGMDynMapPushAutoSubset(PVMCPU pVCpu)
1723{
1724 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1725 AssertReturn(pSet->cEntries != PGMMAPSET_CLOSED, UINT32_MAX);
1726 uint32_t iPrevSubset = pSet->iSubset;
1727Assert(iPrevSubset == UINT32_MAX);
1728 pSet->iSubset = pSet->cEntries;
1729 STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapSubsets);
1730 return iPrevSubset;
1731}
1732
1733
1734/**
1735 * Pops a subset created by a previous call to PGMDynMapPushAutoSubset.
1736 *
1737 * @param pVCpu Pointer to the virtual cpu data.
1738 * @param iPrevSubset What PGMDynMapPushAutoSubset returned.
1739 */
1740VMMDECL(void) PGMDynMapPopAutoSubset(PVMCPU pVCpu, uint32_t iPrevSubset)
1741{
1742 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1743 uint32_t cEntries = pSet->cEntries;
1744 AssertReturnVoid(cEntries != PGMMAPSET_CLOSED);
1745 AssertReturnVoid(pSet->iSubset <= iPrevSubset || iPrevSubset == UINT32_MAX);
1746Assert(iPrevSubset == UINT32_MAX);
1747 STAM_COUNTER_INC(&pVCpu->pgm.s.aStatR0DynMapSetSize[(cEntries * 10 / RT_ELEMENTS(pSet->aEntries)) % 11]);
1748 if ( cEntries >= RT_ELEMENTS(pSet->aEntries) * 40 / 100
1749 && cEntries != pSet->iSubset)
1750 {
1751 AssertMsg(cEntries < PGMMAPSET_MAX_FILL, ("%u\n", cEntries));
1752 pgmDynMapFlushSubset(pSet);
1753 }
1754 pSet->iSubset = iPrevSubset;
1755}
1756
1757
1758/**
1759 * As a final resort for a full auto set, try merge duplicate entries.
1760 *
1761 * @param pSet The set.
1762 */
1763static void pgmDynMapOptimizeAutoSet(PPGMMAPSET pSet)
1764{
1765 for (uint32_t i = 0 ; i < pSet->cEntries; i++)
1766 {
1767 uint16_t const iPage = pSet->aEntries[i].iPage;
1768 uint32_t j = i + 1;
1769 while (j < pSet->cEntries)
1770 {
1771 if (pSet->aEntries[j].iPage != iPage)
1772 j++;
1773 else if ((uint32_t)pSet->aEntries[i].cRefs + (uint32_t)pSet->aEntries[j].cRefs < UINT16_MAX)
1774 {
1775 /* merge j into i removing j. */
1776 pSet->aEntries[i].cRefs += pSet->aEntries[j].cRefs;
1777 pSet->cEntries--;
1778 if (j < pSet->cEntries)
1779 {
1780 pSet->aEntries[j] = pSet->aEntries[pSet->cEntries];
1781 pSet->aEntries[pSet->cEntries].iPage = UINT16_MAX;
1782 pSet->aEntries[pSet->cEntries].cRefs = 0;
1783 }
1784 else
1785 {
1786 pSet->aEntries[j].iPage = UINT16_MAX;
1787 pSet->aEntries[j].cRefs = 0;
1788 }
1789 }
1790 else
1791 {
1792 /* migrate the max number of refs from j into i and quit the inner loop. */
1793 uint32_t cMigrate = UINT16_MAX - 1 - pSet->aEntries[i].cRefs;
1794 Assert(pSet->aEntries[j].cRefs > cMigrate);
1795 pSet->aEntries[j].cRefs -= cMigrate;
1796 pSet->aEntries[i].cRefs = UINT16_MAX - 1;
1797 break;
1798 }
1799 }
1800 }
1801}
1802
1803
1804/**
1805 * Common worker code for PGMDynMapHCPhys, pgmR0DynMapHCPageInlined and
1806 * pgmR0DynMapGCPageInlined.
1807 *
1808 * @returns VINF_SUCCESS, bails out to ring-3 on failure.
1809 * @param pVM The shared VM structure (for statistics).
1810 * @param pSet The set.
1811 * @param HCPhys The physical address of the page.
1812 * @param ppv Where to store the address of the mapping on success.
1813 *
1814 * @remarks This is a very hot path.
1815 */
1816int pgmR0DynMapHCPageCommon(PVM pVM, PPGMMAPSET pSet, RTHCPHYS HCPhys, void **ppv)
1817{
1818#ifdef VBOX_WITH_STATISTICS
1819 PVMCPU pVCpu = VMMGetCpu(pVM);
1820#endif
1821 AssertMsg(pSet->iCpu == RTMpCpuIdToSetIndex(RTMpCpuId()), ("%d %d(%d) efl=%#x\n", pSet->iCpu, RTMpCpuIdToSetIndex(RTMpCpuId()), RTMpCpuId(), ASMGetFlags()));
1822
1823 /*
1824 * Map it.
1825 */
1826 void *pvPage;
1827 uint32_t const iPage = pgmR0DynMapPage(g_pPGMR0DynMap, HCPhys, pSet->iCpu, pVM, &pvPage);
1828 if (RT_UNLIKELY(iPage == UINT32_MAX))
1829 {
1830 AssertMsg2("PGMDynMapHCPage: cLoad=%u/%u cPages=%u cGuardPages=%u\n",
1831 g_pPGMR0DynMap->cLoad, g_pPGMR0DynMap->cMaxLoad, g_pPGMR0DynMap->cPages, g_pPGMR0DynMap->cGuardPages);
1832 if (!g_fPGMR0DynMapTestRunning)
1833 VMMR0CallHost(pVM, VMMCALLHOST_VM_R0_ASSERTION, 0);
1834 *ppv = NULL;
1835 return VERR_PGM_DYNMAP_FAILED;
1836 }
1837
1838 /*
1839 * Add the page to the auto reference set.
1840 *
1841 * The typical usage pattern means that the same pages will be mapped
1842 * several times in the same set. We can catch most of these
1843 * remappings by looking a few pages back into the set. (The searching
1844 * and set optimizing path will hardly ever be used when doing this.)
1845 */
1846 AssertCompile(RT_ELEMENTS(pSet->aEntries) >= 8);
1847 int32_t i = pSet->cEntries;
1848 if (i-- < 5)
1849 {
1850 unsigned iEntry = pSet->cEntries++;
1851 pSet->aEntries[iEntry].cRefs = 1;
1852 pSet->aEntries[iEntry].iPage = iPage;
1853 pSet->aEntries[iEntry].pvPage = pvPage;
1854 pSet->aEntries[iEntry].HCPhys = HCPhys;
1855 pSet->aiHashTable[PGMMAPSET_HASH(HCPhys)] = iEntry;
1856 }
1857 /* Any of the last 5 pages? */
1858 else if ( pSet->aEntries[i - 0].iPage == iPage
1859 && pSet->aEntries[i - 0].cRefs < UINT16_MAX - 1)
1860 pSet->aEntries[i - 0].cRefs++;
1861 else if ( pSet->aEntries[i - 1].iPage == iPage
1862 && pSet->aEntries[i - 1].cRefs < UINT16_MAX - 1)
1863 pSet->aEntries[i - 1].cRefs++;
1864 else if ( pSet->aEntries[i - 2].iPage == iPage
1865 && pSet->aEntries[i - 2].cRefs < UINT16_MAX - 1)
1866 pSet->aEntries[i - 2].cRefs++;
1867 else if ( pSet->aEntries[i - 3].iPage == iPage
1868 && pSet->aEntries[i - 3].cRefs < UINT16_MAX - 1)
1869 pSet->aEntries[i - 3].cRefs++;
1870 else if ( pSet->aEntries[i - 4].iPage == iPage
1871 && pSet->aEntries[i - 4].cRefs < UINT16_MAX - 1)
1872 pSet->aEntries[i - 4].cRefs++;
1873 /* Don't bother searching unless we're above a 60% load. */
1874 else if (RT_LIKELY(i <= (int32_t)RT_ELEMENTS(pSet->aEntries) * 60 / 100))
1875 {
1876 unsigned iEntry = pSet->cEntries++;
1877 pSet->aEntries[iEntry].cRefs = 1;
1878 pSet->aEntries[iEntry].iPage = iPage;
1879 pSet->aEntries[iEntry].pvPage = pvPage;
1880 pSet->aEntries[iEntry].HCPhys = HCPhys;
1881 pSet->aiHashTable[PGMMAPSET_HASH(HCPhys)] = iEntry;
1882 }
1883 else
1884 {
1885 /* Search the rest of the set. */
1886 Assert(pSet->cEntries <= RT_ELEMENTS(pSet->aEntries));
1887 i -= 4;
1888 while (i-- > 0)
1889 if ( pSet->aEntries[i].iPage == iPage
1890 && pSet->aEntries[i].cRefs < UINT16_MAX - 1)
1891 {
1892 pSet->aEntries[i].cRefs++;
1893 STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapSetSearchHits);
1894 break;
1895 }
1896 if (i < 0)
1897 {
1898 STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapSetSearchMisses);
1899 if (pSet->iSubset < pSet->cEntries)
1900 {
1901 STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapSetSearchFlushes);
1902 STAM_COUNTER_INC(&pVCpu->pgm.s.aStatR0DynMapSetSize[(pSet->cEntries * 10 / RT_ELEMENTS(pSet->aEntries)) % 11]);
1903 AssertMsg(pSet->cEntries < PGMMAPSET_MAX_FILL, ("%u\n", pSet->cEntries));
1904 pgmDynMapFlushSubset(pSet);
1905 }
1906
1907 if (RT_UNLIKELY(pSet->cEntries >= RT_ELEMENTS(pSet->aEntries)))
1908 {
1909 STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapSetOptimize);
1910 pgmDynMapOptimizeAutoSet(pSet);
1911 }
1912
1913 if (RT_LIKELY(pSet->cEntries < RT_ELEMENTS(pSet->aEntries)))
1914 {
1915 unsigned iEntry = pSet->cEntries++;
1916 pSet->aEntries[iEntry].cRefs = 1;
1917 pSet->aEntries[iEntry].iPage = iPage;
1918 pSet->aEntries[iEntry].pvPage = pvPage;
1919 pSet->aEntries[iEntry].HCPhys = HCPhys;
1920 pSet->aiHashTable[PGMMAPSET_HASH(HCPhys)] = iEntry;
1921 }
1922 else
1923 {
1924 /* We're screwed. */
1925 pgmR0DynMapReleasePage(g_pPGMR0DynMap, iPage, 1);
1926
1927 AssertMsg2("PGMDynMapHCPage: set is full!\n");
1928 if (!g_fPGMR0DynMapTestRunning)
1929 VMMR0CallHost(pVM, VMMCALLHOST_VM_R0_ASSERTION, 0);
1930 *ppv = NULL;
1931 return VERR_PGM_DYNMAP_FULL_SET;
1932 }
1933 }
1934 }
1935
1936 *ppv = pvPage;
1937 return VINF_SUCCESS;
1938}
1939
1940
1941#if 0 /* Not used in R0, should internalized the other PGMDynMapHC/GCPage too. */
1942/* documented elsewhere - a bit of a mess. */
1943VMMDECL(int) PGMDynMapHCPage(PVM pVM, RTHCPHYS HCPhys, void **ppv)
1944{
1945#ifdef VBOX_WITH_STATISTICS
1946 PVMCPU pVCpu = VMMGetCpu(pVM);
1947#endif
1948 /*
1949 * Validate state.
1950 */
1951 STAM_PROFILE_START(&pVCpu->pgm.s.StatR0DynMapHCPage, a);
1952 AssertPtr(ppv);
1953 AssertMsg(pVM->pgm.s.pvR0DynMapUsed == g_pPGMR0DynMap,
1954 ("%p != %p\n", pVM->pgm.s.pvR0DynMapUsed, g_pPGMR0DynMap));
1955 AssertMsg(!(HCPhys & PAGE_OFFSET_MASK), ("HCPhys=%RHp\n", HCPhys));
1956 PVMCPU pVCpu = VMMGetCpu(pVM);
1957 AssertPtr(pVCpu);
1958 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1959 AssertMsg(pSet->cEntries <= RT_ELEMENTS(pSet->aEntries),
1960 ("%#x (%u)\n", pSet->cEntries, pSet->cEntries));
1961
1962 /*
1963 * Call common code.
1964 */
1965 int rc = pgmR0DynMapHCPageCommon(pVM, pSet, HCPhys, ppv);
1966
1967 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatR0DynMapHCPage, a);
1968 return rc;
1969}
1970#endif
1971
1972
1973#if 0 /*def DEBUG*/
1974/** For pgmR0DynMapTest3PerCpu. */
1975typedef struct PGMR0DYNMAPTEST
1976{
1977 uint32_t u32Expect;
1978 uint32_t *pu32;
1979 uint32_t volatile cFailures;
1980} PGMR0DYNMAPTEST;
1981typedef PGMR0DYNMAPTEST *PPGMR0DYNMAPTEST;
1982
1983/**
1984 * Checks that the content of the page is the same on all CPUs, i.e. that there
1985 * are no CPU specfic PTs or similar nasty stuff involved.
1986 *
1987 * @param idCpu The current CPU.
1988 * @param pvUser1 Pointer a PGMR0DYNMAPTEST structure.
1989 * @param pvUser2 Unused, ignored.
1990 */
1991static DECLCALLBACK(void) pgmR0DynMapTest3PerCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1992{
1993 PPGMR0DYNMAPTEST pTest = (PPGMR0DYNMAPTEST)pvUser1;
1994 ASMInvalidatePage(pTest->pu32);
1995 if (*pTest->pu32 != pTest->u32Expect)
1996 ASMAtomicIncU32(&pTest->cFailures);
1997 NOREF(pvUser2); NOREF(idCpu);
1998}
1999
2000
2001/**
2002 * Performs some basic tests in debug builds.
2003 */
2004static int pgmR0DynMapTest(PVM pVM)
2005{
2006 LogRel(("pgmR0DynMapTest: ****** START ******\n"));
2007 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
2008 PPGMMAPSET pSet = &pVM->aCpus[0].pgm.s.AutoSet;
2009 uint32_t i;
2010
2011 /*
2012 * Assert internal integrity first.
2013 */
2014 LogRel(("Test #0\n"));
2015 int rc = PGMR0DynMapAssertIntegrity();
2016 if (RT_FAILURE(rc))
2017 return rc;
2018
2019 void *pvR0DynMapUsedSaved = pVM->pgm.s.pvR0DynMapUsed;
2020 pVM->pgm.s.pvR0DynMapUsed = pThis;
2021 g_fPGMR0DynMapTestRunning = true;
2022
2023 /*
2024 * Simple test, map CR3 twice and check that we're getting the
2025 * same mapping address back.
2026 */
2027 LogRel(("Test #1\n"));
2028 ASMIntDisable();
2029 PGMDynMapStartAutoSet(&pVM->aCpus[0]);
2030
2031 uint64_t cr3 = ASMGetCR3() & ~(uint64_t)PAGE_OFFSET_MASK;
2032 void *pv = (void *)(intptr_t)-1;
2033 void *pv2 = (void *)(intptr_t)-2;
2034 rc = PGMDynMapHCPage(pVM, cr3, &pv);
2035 int rc2 = PGMDynMapHCPage(pVM, cr3, &pv2);
2036 ASMIntEnable();
2037 if ( RT_SUCCESS(rc2)
2038 && RT_SUCCESS(rc)
2039 && pv == pv2)
2040 {
2041 LogRel(("Load=%u/%u/%u Set=%u/%u\n", pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
2042 rc = PGMR0DynMapAssertIntegrity();
2043
2044 /*
2045 * Check that the simple set overflow code works by filling it
2046 * with more CR3 mappings.
2047 */
2048 LogRel(("Test #2\n"));
2049 ASMIntDisable();
2050 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
2051 for (i = 0 ; i < UINT16_MAX*2 - 1 && RT_SUCCESS(rc) && pv2 == pv; i++)
2052 {
2053 pv2 = (void *)(intptr_t)-4;
2054 rc = PGMDynMapHCPage(pVM, cr3, &pv2);
2055 }
2056 ASMIntEnable();
2057 if (RT_FAILURE(rc) || pv != pv2)
2058 {
2059 LogRel(("failed(%d): rc=%Rrc; pv=%p pv2=%p i=%p\n", __LINE__, rc, pv, pv2, i));
2060 if (RT_SUCCESS(rc)) rc = VERR_INTERNAL_ERROR;
2061 }
2062 else if (pSet->cEntries != 5)
2063 {
2064 LogRel(("failed(%d): cEntries=%d expected %d\n", __LINE__, pSet->cEntries, RT_ELEMENTS(pSet->aEntries) / 2));
2065 rc = VERR_INTERNAL_ERROR;
2066 }
2067 else if ( pSet->aEntries[4].cRefs != UINT16_MAX - 1
2068 || pSet->aEntries[3].cRefs != UINT16_MAX - 1
2069 || pSet->aEntries[2].cRefs != 1
2070 || pSet->aEntries[1].cRefs != 1
2071 || pSet->aEntries[0].cRefs != 1)
2072 {
2073 LogRel(("failed(%d): bad set dist: ", __LINE__));
2074 for (i = 0; i < pSet->cEntries; i++)
2075 LogRel(("[%d]=%d, ", i, pSet->aEntries[i].cRefs));
2076 LogRel(("\n"));
2077 rc = VERR_INTERNAL_ERROR;
2078 }
2079 if (RT_SUCCESS(rc))
2080 rc = PGMR0DynMapAssertIntegrity();
2081 if (RT_SUCCESS(rc))
2082 {
2083 /*
2084 * Trigger an set optimization run (exactly).
2085 */
2086 LogRel(("Test #3\n"));
2087 ASMIntDisable();
2088 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
2089 pv2 = NULL;
2090 for (i = 0 ; i < RT_ELEMENTS(pSet->aEntries) - 5 && RT_SUCCESS(rc) && pv2 != pv; i++)
2091 {
2092 pv2 = (void *)(intptr_t)(-5 - i);
2093 rc = PGMDynMapHCPage(pVM, cr3 + PAGE_SIZE * (i + 5), &pv2);
2094 }
2095 ASMIntEnable();
2096 if (RT_FAILURE(rc) || pv == pv2)
2097 {
2098 LogRel(("failed(%d): rc=%Rrc; pv=%p pv2=%p i=%d\n", __LINE__, rc, pv, pv2, i));
2099 if (RT_SUCCESS(rc)) rc = VERR_INTERNAL_ERROR;
2100 }
2101 else if (pSet->cEntries != RT_ELEMENTS(pSet->aEntries))
2102 {
2103 LogRel(("failed(%d): cEntries=%d expected %d\n", __LINE__, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
2104 rc = VERR_INTERNAL_ERROR;
2105 }
2106 LogRel(("Load=%u/%u/%u Set=%u/%u\n", pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
2107 if (RT_SUCCESS(rc))
2108 rc = PGMR0DynMapAssertIntegrity();
2109 if (RT_SUCCESS(rc))
2110 {
2111 /*
2112 * Trigger an overflow error.
2113 */
2114 LogRel(("Test #4\n"));
2115 ASMIntDisable();
2116 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
2117 for (i = 0 ; i < RT_ELEMENTS(pSet->aEntries) + 2; i++)
2118 {
2119 rc = PGMDynMapHCPage(pVM, cr3 - PAGE_SIZE * (i + 5), &pv2);
2120 if (RT_SUCCESS(rc))
2121 rc = PGMR0DynMapAssertIntegrity();
2122 if (RT_FAILURE(rc))
2123 break;
2124 }
2125 ASMIntEnable();
2126 if (rc == VERR_PGM_DYNMAP_FULL_SET)
2127 {
2128 /* flush the set. */
2129 LogRel(("Test #5\n"));
2130 ASMIntDisable();
2131 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
2132 PGMDynMapReleaseAutoSet(&pVM->aCpus[0]);
2133 PGMDynMapStartAutoSet(&pVM->aCpus[0]);
2134 ASMIntEnable();
2135
2136 rc = PGMR0DynMapAssertIntegrity();
2137 }
2138 else
2139 {
2140 LogRel(("failed(%d): rc=%Rrc, wanted %d ; pv2=%p Set=%u/%u; i=%d\n", __LINE__,
2141 rc, VERR_PGM_DYNMAP_FULL_SET, pv2, pSet->cEntries, RT_ELEMENTS(pSet->aEntries), i));
2142 if (RT_SUCCESS(rc)) rc = VERR_INTERNAL_ERROR;
2143 }
2144 }
2145 }
2146 }
2147 else
2148 {
2149 LogRel(("failed(%d): rc=%Rrc rc2=%Rrc; pv=%p pv2=%p\n", __LINE__, rc, rc2, pv, pv2));
2150 if (RT_SUCCESS(rc))
2151 rc = rc2;
2152 }
2153
2154 /*
2155 * Check that everyone sees the same stuff.
2156 */
2157 if (RT_SUCCESS(rc))
2158 {
2159 LogRel(("Test #5\n"));
2160 ASMIntDisable();
2161 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
2162 RTHCPHYS HCPhysPT = RTR0MemObjGetPagePhysAddr(pThis->pSegHead->ahMemObjPTs[0], 0);
2163 rc = PGMDynMapHCPage(pVM, HCPhysPT, &pv);
2164 if (RT_SUCCESS(rc))
2165 {
2166 PGMR0DYNMAPTEST Test;
2167 uint32_t *pu32Real = &pThis->paPages[pThis->pSegHead->iPage].uPte.pLegacy->u;
2168 Test.pu32 = (uint32_t *)((uintptr_t)pv | ((uintptr_t)pu32Real & PAGE_OFFSET_MASK));
2169 Test.u32Expect = *pu32Real;
2170 ASMAtomicWriteU32(&Test.cFailures, 0);
2171 ASMIntEnable();
2172
2173 rc = RTMpOnAll(pgmR0DynMapTest3PerCpu, &Test, NULL);
2174 if (RT_FAILURE(rc))
2175 LogRel(("failed(%d): RTMpOnAll rc=%Rrc\n", __LINE__, rc));
2176 else if (Test.cFailures)
2177 {
2178 LogRel(("failed(%d): cFailures=%d pu32Real=%p pu32=%p u32Expect=%#x *pu32=%#x\n", __LINE__,
2179 Test.cFailures, pu32Real, Test.pu32, Test.u32Expect, *Test.pu32));
2180 rc = VERR_INTERNAL_ERROR;
2181 }
2182 else
2183 LogRel(("pu32Real=%p pu32=%p u32Expect=%#x *pu32=%#x\n",
2184 pu32Real, Test.pu32, Test.u32Expect, *Test.pu32));
2185 }
2186 else
2187 {
2188 ASMIntEnable();
2189 LogRel(("failed(%d): rc=%Rrc\n", rc));
2190 }
2191 }
2192
2193 /*
2194 * Clean up.
2195 */
2196 LogRel(("Cleanup.\n"));
2197 ASMIntDisable();
2198 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
2199 PGMDynMapFlushAutoSet(&pVM->aCpus[0]);
2200 PGMDynMapReleaseAutoSet(&pVM->aCpus[0]);
2201 ASMIntEnable();
2202
2203 if (RT_SUCCESS(rc))
2204 rc = PGMR0DynMapAssertIntegrity();
2205 else
2206 PGMR0DynMapAssertIntegrity();
2207
2208 g_fPGMR0DynMapTestRunning = false;
2209 LogRel(("Result: rc=%Rrc Load=%u/%u/%u Set=%#x/%u\n", rc,
2210 pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
2211 pVM->pgm.s.pvR0DynMapUsed = pvR0DynMapUsedSaved;
2212 LogRel(("pgmR0DynMapTest: ****** END ******\n"));
2213 return rc;
2214}
2215#endif /* DEBUG */
2216
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette