VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/PGMR0DynMap.cpp@ 28800

Last change on this file since 28800 was 28800, checked in by vboxsync, 15 years ago

Automated rebranding to Oracle copyright/license strings via filemuncher

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 79.2 KB
Line 
1/* $Id: PGMR0DynMap.cpp 28800 2010-04-27 08:22:32Z vboxsync $ */
2/** @file
3 * PGM - Page Manager and Monitor, ring-0 dynamic mapping cache.
4 */
5
6/*
7 * Copyright (C) 2008 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/*******************************************************************************
19* Internal Functions *
20*******************************************************************************/
21#define LOG_GROUP LOG_GROUP_PGM
22#include <VBox/pgm.h>
23#include "../PGMInternal.h"
24#include <VBox/vm.h>
25#include "../PGMInline.h"
26#include <VBox/sup.h>
27#include <VBox/err.h>
28#include <iprt/asm.h>
29#include <iprt/alloc.h>
30#include <iprt/assert.h>
31#include <iprt/cpuset.h>
32#include <iprt/memobj.h>
33#include <iprt/mp.h>
34#include <iprt/semaphore.h>
35#include <iprt/spinlock.h>
36#include <iprt/string.h>
37
38
39/*******************************************************************************
40* Defined Constants And Macros *
41*******************************************************************************/
42/** The max size of the mapping cache (in pages). */
43#define PGMR0DYNMAP_MAX_PAGES ((16*_1M) >> PAGE_SHIFT)
44/** The small segment size that is adopted on out-of-memory conditions with a
45 * single big segment. */
46#define PGMR0DYNMAP_SMALL_SEG_PAGES 128
47/** The number of pages we reserve per CPU. */
48#define PGMR0DYNMAP_PAGES_PER_CPU 256
49/** The minimum number of pages we reserve per CPU.
50 * This must be equal or larger than the autoset size. */
51#define PGMR0DYNMAP_PAGES_PER_CPU_MIN 64
52/** The number of guard pages.
53 * @remarks Never do tuning of the hashing or whatnot with a strict build! */
54#if defined(VBOX_STRICT)
55# define PGMR0DYNMAP_GUARD_PAGES 1
56#else
57# define PGMR0DYNMAP_GUARD_PAGES 0
58#endif
59/** The dummy physical address of guard pages. */
60#define PGMR0DYNMAP_GUARD_PAGE_HCPHYS UINT32_C(0x7777feed)
61/** The dummy reference count of guard pages. (Must be non-zero.) */
62#define PGMR0DYNMAP_GUARD_PAGE_REF_COUNT INT32_C(0x7777feed)
63#if 0
64/** Define this to just clear the present bit on guard pages.
65 * The alternative is to replace the entire PTE with an bad not-present
66 * PTE. Either way, XNU will screw us. :-/ */
67#define PGMR0DYNMAP_GUARD_NP
68#endif
69/** The dummy PTE value for a page. */
70#define PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE X86_PTE_PG_MASK
71/** The dummy PTE value for a page. */
72#define PGMR0DYNMAP_GUARD_PAGE_PAE_PTE UINT64_MAX /*X86_PTE_PAE_PG_MASK*/
73/** Calcs the overload threshold. Current set at 50%. */
74#define PGMR0DYNMAP_CALC_OVERLOAD(cPages) ((cPages) / 2)
75
76#if 0
77/* Assertions causes panics if preemption is disabled, this can be used to work aroudn that. */
78//#define RTSpinlockAcquire(a,b) do {} while (0)
79//#define RTSpinlockRelease(a,b) do {} while (0)
80#endif
81
82
83/*******************************************************************************
84* Structures and Typedefs *
85*******************************************************************************/
86/**
87 * Ring-0 dynamic mapping cache segment.
88 *
89 * The dynamic mapping cache can be extended with additional segments if the
90 * load is found to be too high. This done the next time a VM is created, under
91 * the protection of the init mutex. The arrays is reallocated and the new
92 * segment is added to the end of these. Nothing is rehashed of course, as the
93 * indexes / addresses must remain unchanged.
94 *
95 * This structure is only modified while owning the init mutex or during module
96 * init / term.
97 */
98typedef struct PGMR0DYNMAPSEG
99{
100 /** Pointer to the next segment. */
101 struct PGMR0DYNMAPSEG *pNext;
102 /** The memory object for the virtual address range that we're abusing. */
103 RTR0MEMOBJ hMemObj;
104 /** The start page in the cache. (I.e. index into the arrays.) */
105 uint16_t iPage;
106 /** The number of pages this segment contributes. */
107 uint16_t cPages;
108 /** The number of page tables. */
109 uint16_t cPTs;
110 /** The memory objects for the page tables. */
111 RTR0MEMOBJ ahMemObjPTs[1];
112} PGMR0DYNMAPSEG;
113/** Pointer to a ring-0 dynamic mapping cache segment. */
114typedef PGMR0DYNMAPSEG *PPGMR0DYNMAPSEG;
115
116
117/**
118 * Ring-0 dynamic mapping cache entry.
119 *
120 * This structure tracks
121 */
122typedef struct PGMR0DYNMAPENTRY
123{
124 /** The physical address of the currently mapped page.
125 * This is duplicate for three reasons: cache locality, cache policy of the PT
126 * mappings and sanity checks. */
127 RTHCPHYS HCPhys;
128 /** Pointer to the page. */
129 void *pvPage;
130 /** The number of references. */
131 int32_t volatile cRefs;
132 /** PTE pointer union. */
133 union PGMR0DYNMAPENTRY_PPTE
134 {
135 /** PTE pointer, 32-bit legacy version. */
136 PX86PTE pLegacy;
137 /** PTE pointer, PAE version. */
138 PX86PTEPAE pPae;
139 /** PTE pointer, the void version. */
140 void *pv;
141 } uPte;
142 /** CPUs that haven't invalidated this entry after it's last update. */
143 RTCPUSET PendingSet;
144} PGMR0DYNMAPENTRY;
145/** Pointer to a ring-0 dynamic mapping cache entry. */
146typedef PGMR0DYNMAPENTRY *PPGMR0DYNMAPENTRY;
147
148
149/**
150 * Ring-0 dynamic mapping cache.
151 *
152 * This is initialized during VMMR0 module init but no segments are allocated at
153 * that time. Segments will be added when the first VM is started and removed
154 * again when the last VM shuts down, thus avoid consuming memory while dormant.
155 * At module termination, the remaining bits will be freed up.
156 */
157typedef struct PGMR0DYNMAP
158{
159 /** The usual magic number / eye catcher (PGMR0DYNMAP_MAGIC). */
160 uint32_t u32Magic;
161 /** Spinlock serializing the normal operation of the cache. */
162 RTSPINLOCK hSpinlock;
163 /** Array for tracking and managing the pages. */
164 PPGMR0DYNMAPENTRY paPages;
165 /** The cache size given as a number of pages. */
166 uint32_t cPages;
167 /** Whether it's 32-bit legacy or PAE/AMD64 paging mode. */
168 bool fLegacyMode;
169 /** The current load.
170 * This does not include guard pages. */
171 uint32_t cLoad;
172 /** The max load ever.
173 * This is maintained to get trigger adding of more mapping space. */
174 uint32_t cMaxLoad;
175 /** Initialization / termination lock. */
176 RTSEMFASTMUTEX hInitLock;
177 /** The number of guard pages. */
178 uint32_t cGuardPages;
179 /** The number of users (protected by hInitLock). */
180 uint32_t cUsers;
181 /** Array containing a copy of the original page tables.
182 * The entries are either X86PTE or X86PTEPAE according to fLegacyMode. */
183 void *pvSavedPTEs;
184 /** List of segments. */
185 PPGMR0DYNMAPSEG pSegHead;
186 /** The paging mode. */
187 SUPPAGINGMODE enmPgMode;
188} PGMR0DYNMAP;
189/** Pointer to the ring-0 dynamic mapping cache */
190typedef PGMR0DYNMAP *PPGMR0DYNMAP;
191
192/** PGMR0DYNMAP::u32Magic. (Jens Christian Bugge Wesseltoft) */
193#define PGMR0DYNMAP_MAGIC 0x19640201
194
195
196/**
197 * Paging level data.
198 */
199typedef struct PGMR0DYNMAPPGLVL
200{
201 uint32_t cLevels; /**< The number of levels. */
202 struct
203 {
204 RTHCPHYS HCPhys; /**< The address of the page for the current level,
205 * i.e. what hMemObj/hMapObj is currently mapping. */
206 RTHCPHYS fPhysMask; /**< Mask for extracting HCPhys from uEntry. */
207 RTR0MEMOBJ hMemObj; /**< Memory object for HCPhys, PAGE_SIZE. */
208 RTR0MEMOBJ hMapObj; /**< Mapping object for hMemObj. */
209 uint32_t fPtrShift; /**< The pointer shift count. */
210 uint64_t fPtrMask; /**< The mask to apply to the shifted pointer to get the table index. */
211 uint64_t fAndMask; /**< And mask to check entry flags. */
212 uint64_t fResMask; /**< The result from applying fAndMask. */
213 union
214 {
215 void *pv; /**< hMapObj address. */
216 PX86PGUINT paLegacy; /**< Legacy table view. */
217 PX86PGPAEUINT paPae; /**< PAE/AMD64 table view. */
218 } u;
219 } a[4];
220} PGMR0DYNMAPPGLVL;
221/** Pointer to paging level data. */
222typedef PGMR0DYNMAPPGLVL *PPGMR0DYNMAPPGLVL;
223
224
225/*******************************************************************************
226* Global Variables *
227*******************************************************************************/
228/** Pointer to the ring-0 dynamic mapping cache. */
229static PPGMR0DYNMAP g_pPGMR0DynMap;
230/** For overflow testing. */
231static bool g_fPGMR0DynMapTestRunning = false;
232
233
234/*******************************************************************************
235* Internal Functions *
236*******************************************************************************/
237static void pgmR0DynMapReleasePage(PPGMR0DYNMAP pThis, uint32_t iPage, uint32_t cRefs);
238static int pgmR0DynMapSetup(PPGMR0DYNMAP pThis);
239static int pgmR0DynMapExpand(PPGMR0DYNMAP pThis);
240static void pgmR0DynMapTearDown(PPGMR0DYNMAP pThis);
241#if 0 /*def DEBUG*/
242static int pgmR0DynMapTest(PVM pVM);
243#endif
244
245
246/**
247 * Initializes the ring-0 dynamic mapping cache.
248 *
249 * @returns VBox status code.
250 */
251VMMR0DECL(int) PGMR0DynMapInit(void)
252{
253 Assert(!g_pPGMR0DynMap);
254
255 /*
256 * Create and initialize the cache instance.
257 */
258 PPGMR0DYNMAP pThis = (PPGMR0DYNMAP)RTMemAllocZ(sizeof(*pThis));
259 AssertLogRelReturn(pThis, VERR_NO_MEMORY);
260 int rc = VINF_SUCCESS;
261 pThis->enmPgMode = SUPR0GetPagingMode();
262 switch (pThis->enmPgMode)
263 {
264 case SUPPAGINGMODE_32_BIT:
265 case SUPPAGINGMODE_32_BIT_GLOBAL:
266 pThis->fLegacyMode = false;
267 break;
268 case SUPPAGINGMODE_PAE:
269 case SUPPAGINGMODE_PAE_GLOBAL:
270 case SUPPAGINGMODE_PAE_NX:
271 case SUPPAGINGMODE_PAE_GLOBAL_NX:
272 case SUPPAGINGMODE_AMD64:
273 case SUPPAGINGMODE_AMD64_GLOBAL:
274 case SUPPAGINGMODE_AMD64_NX:
275 case SUPPAGINGMODE_AMD64_GLOBAL_NX:
276 pThis->fLegacyMode = false;
277 break;
278 default:
279 rc = VERR_INTERNAL_ERROR;
280 break;
281 }
282 if (RT_SUCCESS(rc))
283 {
284 rc = RTSemFastMutexCreate(&pThis->hInitLock);
285 if (RT_SUCCESS(rc))
286 {
287 rc = RTSpinlockCreate(&pThis->hSpinlock);
288 if (RT_SUCCESS(rc))
289 {
290 pThis->u32Magic = PGMR0DYNMAP_MAGIC;
291 g_pPGMR0DynMap = pThis;
292 return VINF_SUCCESS;
293 }
294 RTSemFastMutexDestroy(pThis->hInitLock);
295 }
296 }
297 RTMemFree(pThis);
298 return rc;
299}
300
301
302/**
303 * Terminates the ring-0 dynamic mapping cache.
304 */
305VMMR0DECL(void) PGMR0DynMapTerm(void)
306{
307 /*
308 * Destroy the cache.
309 *
310 * There is not supposed to be any races here, the loader should
311 * make sure about that. So, don't bother locking anything.
312 *
313 * The VM objects should all be destroyed by now, so there is no
314 * dangling users or anything like that to clean up. This routine
315 * is just a mirror image of PGMR0DynMapInit.
316 */
317 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
318 if (pThis)
319 {
320 AssertPtr(pThis);
321 g_pPGMR0DynMap = NULL;
322
323 /* This should *never* happen, but in case it does try not to leak memory. */
324 AssertLogRelMsg(!pThis->cUsers && !pThis->paPages && !pThis->pvSavedPTEs && !pThis->cPages,
325 ("cUsers=%d paPages=%p pvSavedPTEs=%p cPages=%#x\n",
326 pThis->cUsers, pThis->paPages, pThis->pvSavedPTEs, pThis->cPages));
327 if (pThis->paPages)
328 pgmR0DynMapTearDown(pThis);
329
330 /* Free the associated resources. */
331 RTSemFastMutexDestroy(pThis->hInitLock);
332 pThis->hInitLock = NIL_RTSEMFASTMUTEX;
333 RTSpinlockDestroy(pThis->hSpinlock);
334 pThis->hSpinlock = NIL_RTSPINLOCK;
335 pThis->u32Magic = UINT32_MAX;
336 RTMemFree(pThis);
337 }
338}
339
340
341/**
342 * Initializes the dynamic mapping cache for a new VM.
343 *
344 * @returns VBox status code.
345 * @param pVM Pointer to the shared VM structure.
346 */
347VMMR0DECL(int) PGMR0DynMapInitVM(PVM pVM)
348{
349 AssertMsgReturn(!pVM->pgm.s.pvR0DynMapUsed, ("%p (pThis=%p)\n", pVM->pgm.s.pvR0DynMapUsed, g_pPGMR0DynMap), VERR_WRONG_ORDER);
350
351 /*
352 * Initialize the auto sets.
353 */
354 VMCPUID idCpu = pVM->cCpus;
355 AssertReturn(idCpu > 0 && idCpu <= VMM_MAX_CPU_COUNT, VERR_INTERNAL_ERROR);
356 while (idCpu-- > 0)
357 {
358 PPGMMAPSET pSet = &pVM->aCpus[idCpu].pgm.s.AutoSet;
359 uint32_t j = RT_ELEMENTS(pSet->aEntries);
360 while (j-- > 0)
361 {
362 pSet->aEntries[j].iPage = UINT16_MAX;
363 pSet->aEntries[j].cRefs = 0;
364 pSet->aEntries[j].pvPage = NULL;
365 pSet->aEntries[j].HCPhys = NIL_RTHCPHYS;
366 }
367 pSet->cEntries = PGMMAPSET_CLOSED;
368 pSet->iSubset = UINT32_MAX;
369 pSet->iCpu = -1;
370 memset(&pSet->aiHashTable[0], 0xff, sizeof(pSet->aiHashTable));
371 }
372
373 /*
374 * Do we need the cache? Skip the last bit if we don't.
375 */
376 if (!VMMIsHwVirtExtForced(pVM))
377 return VINF_SUCCESS;
378
379 /*
380 * Reference and if necessary setup or expand the cache.
381 */
382 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
383 AssertPtrReturn(pThis, VERR_INTERNAL_ERROR);
384 int rc = RTSemFastMutexRequest(pThis->hInitLock);
385 AssertLogRelRCReturn(rc, rc);
386
387 pThis->cUsers++;
388 if (pThis->cUsers == 1)
389 {
390 rc = pgmR0DynMapSetup(pThis);
391#if 0 /*def DEBUG*/
392 if (RT_SUCCESS(rc))
393 {
394 rc = pgmR0DynMapTest(pVM);
395 if (RT_FAILURE(rc))
396 pgmR0DynMapTearDown(pThis);
397 }
398#endif
399 }
400 else if (pThis->cMaxLoad > PGMR0DYNMAP_CALC_OVERLOAD(pThis->cPages - pThis->cGuardPages))
401 rc = pgmR0DynMapExpand(pThis);
402 if (RT_SUCCESS(rc))
403 pVM->pgm.s.pvR0DynMapUsed = pThis;
404 else
405 pThis->cUsers--;
406
407 RTSemFastMutexRelease(pThis->hInitLock);
408 return rc;
409}
410
411
412/**
413 * Terminates the dynamic mapping cache usage for a VM.
414 *
415 * @param pVM Pointer to the shared VM structure.
416 */
417VMMR0DECL(void) PGMR0DynMapTermVM(PVM pVM)
418{
419 /*
420 * Return immediately if we're not using the cache.
421 */
422 if (!pVM->pgm.s.pvR0DynMapUsed)
423 return;
424
425 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
426 AssertPtrReturnVoid(pThis);
427
428 int rc = RTSemFastMutexRequest(pThis->hInitLock);
429 AssertLogRelRCReturnVoid(rc);
430
431 if (pVM->pgm.s.pvR0DynMapUsed == pThis)
432 {
433 pVM->pgm.s.pvR0DynMapUsed = NULL;
434
435#ifdef VBOX_STRICT
436 PGMR0DynMapAssertIntegrity();
437#endif
438
439 /*
440 * Clean up and check the auto sets.
441 */
442 VMCPUID idCpu = pVM->cCpus;
443 while (idCpu-- > 0)
444 {
445 PPGMMAPSET pSet = &pVM->aCpus[idCpu].pgm.s.AutoSet;
446 uint32_t j = pSet->cEntries;
447 if (j <= RT_ELEMENTS(pSet->aEntries))
448 {
449 /*
450 * The set is open, close it.
451 */
452 while (j-- > 0)
453 {
454 int32_t cRefs = pSet->aEntries[j].cRefs;
455 uint32_t iPage = pSet->aEntries[j].iPage;
456 LogRel(("PGMR0DynMapTermVM: %d dangling refs to %#x\n", cRefs, iPage));
457 if (iPage < pThis->cPages && cRefs > 0)
458 pgmR0DynMapReleasePage(pThis, iPage, cRefs);
459 else
460 AssertLogRelMsgFailed(("cRefs=%d iPage=%#x cPages=%u\n", cRefs, iPage, pThis->cPages));
461
462 pSet->aEntries[j].iPage = UINT16_MAX;
463 pSet->aEntries[j].cRefs = 0;
464 pSet->aEntries[j].pvPage = NULL;
465 pSet->aEntries[j].HCPhys = NIL_RTHCPHYS;
466 }
467 pSet->cEntries = PGMMAPSET_CLOSED;
468 pSet->iSubset = UINT32_MAX;
469 pSet->iCpu = -1;
470 }
471 else
472 AssertMsg(j == PGMMAPSET_CLOSED, ("cEntries=%#x\n", j));
473
474 j = RT_ELEMENTS(pSet->aEntries);
475 while (j-- > 0)
476 {
477 Assert(pSet->aEntries[j].iPage == UINT16_MAX);
478 Assert(!pSet->aEntries[j].cRefs);
479 }
480 }
481
482 /*
483 * Release our reference to the mapping cache.
484 */
485 Assert(pThis->cUsers > 0);
486 pThis->cUsers--;
487 if (!pThis->cUsers)
488 pgmR0DynMapTearDown(pThis);
489 }
490 else
491 AssertLogRelMsgFailed(("pvR0DynMapUsed=%p pThis=%p\n", pVM->pgm.s.pvR0DynMapUsed, pThis));
492
493 RTSemFastMutexRelease(pThis->hInitLock);
494}
495
496
497/**
498 * Shoots down the TLBs for all the cache pages, pgmR0DynMapTearDown helper.
499 *
500 * @param idCpu The current CPU.
501 * @param pvUser1 The dynamic mapping cache instance.
502 * @param pvUser2 Unused, NULL.
503 */
504static DECLCALLBACK(void) pgmR0DynMapShootDownTlbs(RTCPUID idCpu, void *pvUser1, void *pvUser2)
505{
506 Assert(!pvUser2);
507 PPGMR0DYNMAP pThis = (PPGMR0DYNMAP)pvUser1;
508 Assert(pThis == g_pPGMR0DynMap);
509 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
510 uint32_t iPage = pThis->cPages;
511 while (iPage-- > 0)
512 ASMInvalidatePage(paPages[iPage].pvPage);
513}
514
515
516/**
517 * Shoot down the TLBs for every single cache entry on all CPUs.
518 *
519 * @returns IPRT status code (RTMpOnAll).
520 * @param pThis The dynamic mapping cache instance.
521 */
522static int pgmR0DynMapTlbShootDown(PPGMR0DYNMAP pThis)
523{
524 int rc = RTMpOnAll(pgmR0DynMapShootDownTlbs, pThis, NULL);
525 AssertRC(rc);
526 if (RT_FAILURE(rc))
527 {
528 uint32_t iPage = pThis->cPages;
529 while (iPage-- > 0)
530 ASMInvalidatePage(pThis->paPages[iPage].pvPage);
531 }
532 return rc;
533}
534
535
536/**
537 * Calculate the new cache size based on cMaxLoad statistics.
538 *
539 * @returns Number of pages.
540 * @param pThis The dynamic mapping cache instance.
541 * @param pcMinPages The minimal size in pages.
542 */
543static uint32_t pgmR0DynMapCalcNewSize(PPGMR0DYNMAP pThis, uint32_t *pcMinPages)
544{
545 Assert(pThis->cPages <= PGMR0DYNMAP_MAX_PAGES);
546
547 /* cCpus * PGMR0DYNMAP_PAGES_PER_CPU(_MIN). */
548 RTCPUID cCpus = RTMpGetCount();
549 AssertReturn(cCpus > 0 && cCpus <= RTCPUSET_MAX_CPUS, 0);
550 uint32_t cPages = cCpus * PGMR0DYNMAP_PAGES_PER_CPU;
551 uint32_t cMinPages = cCpus * PGMR0DYNMAP_PAGES_PER_CPU_MIN;
552
553 /* adjust against cMaxLoad. */
554 AssertMsg(pThis->cMaxLoad <= PGMR0DYNMAP_MAX_PAGES, ("%#x\n", pThis->cMaxLoad));
555 if (pThis->cMaxLoad > PGMR0DYNMAP_MAX_PAGES)
556 pThis->cMaxLoad = 0;
557
558 while (pThis->cMaxLoad > PGMR0DYNMAP_CALC_OVERLOAD(cPages))
559 cPages += PGMR0DYNMAP_PAGES_PER_CPU;
560
561 if (pThis->cMaxLoad > cMinPages)
562 cMinPages = pThis->cMaxLoad;
563
564 /* adjust against max and current size. */
565 if (cPages < pThis->cPages)
566 cPages = pThis->cPages;
567 cPages *= PGMR0DYNMAP_GUARD_PAGES + 1;
568 if (cPages > PGMR0DYNMAP_MAX_PAGES)
569 cPages = PGMR0DYNMAP_MAX_PAGES;
570
571 if (cMinPages < pThis->cPages)
572 cMinPages = pThis->cPages;
573 cMinPages *= PGMR0DYNMAP_GUARD_PAGES + 1;
574 if (cMinPages > PGMR0DYNMAP_MAX_PAGES)
575 cMinPages = PGMR0DYNMAP_MAX_PAGES;
576
577 Assert(cMinPages);
578 *pcMinPages = cMinPages;
579 return cPages;
580}
581
582
583/**
584 * Initializes the paging level data.
585 *
586 * @param pThis The dynamic mapping cache instance.
587 * @param pPgLvl The paging level data.
588 */
589void pgmR0DynMapPagingArrayInit(PPGMR0DYNMAP pThis, PPGMR0DYNMAPPGLVL pPgLvl)
590{
591 RTCCUINTREG cr4 = ASMGetCR4();
592 switch (pThis->enmPgMode)
593 {
594 case SUPPAGINGMODE_32_BIT:
595 case SUPPAGINGMODE_32_BIT_GLOBAL:
596 pPgLvl->cLevels = 2;
597 pPgLvl->a[0].fPhysMask = X86_CR3_PAGE_MASK;
598 pPgLvl->a[0].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
599 pPgLvl->a[0].fResMask = X86_PDE_P | X86_PDE_RW;
600 pPgLvl->a[0].fPtrMask = X86_PD_MASK;
601 pPgLvl->a[0].fPtrShift = X86_PD_SHIFT;
602
603 pPgLvl->a[1].fPhysMask = X86_PDE_PG_MASK;
604 pPgLvl->a[1].fAndMask = X86_PTE_P | X86_PTE_RW;
605 pPgLvl->a[1].fResMask = X86_PTE_P | X86_PTE_RW;
606 pPgLvl->a[1].fPtrMask = X86_PT_MASK;
607 pPgLvl->a[1].fPtrShift = X86_PT_SHIFT;
608 break;
609
610 case SUPPAGINGMODE_PAE:
611 case SUPPAGINGMODE_PAE_GLOBAL:
612 case SUPPAGINGMODE_PAE_NX:
613 case SUPPAGINGMODE_PAE_GLOBAL_NX:
614 pPgLvl->cLevels = 3;
615 pPgLvl->a[0].fPhysMask = X86_CR3_PAE_PAGE_MASK;
616 pPgLvl->a[0].fPtrMask = X86_PDPT_MASK_PAE;
617 pPgLvl->a[0].fPtrShift = X86_PDPT_SHIFT;
618 pPgLvl->a[0].fAndMask = X86_PDPE_P;
619 pPgLvl->a[0].fResMask = X86_PDPE_P;
620
621 pPgLvl->a[1].fPhysMask = X86_PDPE_PG_MASK;
622 pPgLvl->a[1].fPtrMask = X86_PD_PAE_MASK;
623 pPgLvl->a[1].fPtrShift = X86_PD_PAE_SHIFT;
624 pPgLvl->a[1].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
625 pPgLvl->a[1].fResMask = X86_PDE_P | X86_PDE_RW;
626
627 pPgLvl->a[2].fPhysMask = X86_PDE_PAE_PG_MASK;
628 pPgLvl->a[2].fPtrMask = X86_PT_PAE_MASK;
629 pPgLvl->a[2].fPtrShift = X86_PT_PAE_SHIFT;
630 pPgLvl->a[2].fAndMask = X86_PTE_P | X86_PTE_RW;
631 pPgLvl->a[2].fResMask = X86_PTE_P | X86_PTE_RW;
632 break;
633
634 case SUPPAGINGMODE_AMD64:
635 case SUPPAGINGMODE_AMD64_GLOBAL:
636 case SUPPAGINGMODE_AMD64_NX:
637 case SUPPAGINGMODE_AMD64_GLOBAL_NX:
638 pPgLvl->cLevels = 4;
639 pPgLvl->a[0].fPhysMask = X86_CR3_AMD64_PAGE_MASK;
640 pPgLvl->a[0].fPtrShift = X86_PML4_SHIFT;
641 pPgLvl->a[0].fPtrMask = X86_PML4_MASK;
642 pPgLvl->a[0].fAndMask = X86_PML4E_P | X86_PML4E_RW;
643 pPgLvl->a[0].fResMask = X86_PML4E_P | X86_PML4E_RW;
644
645 pPgLvl->a[1].fPhysMask = X86_PML4E_PG_MASK;
646 pPgLvl->a[1].fPtrShift = X86_PDPT_SHIFT;
647 pPgLvl->a[1].fPtrMask = X86_PDPT_MASK_AMD64;
648 pPgLvl->a[1].fAndMask = X86_PDPE_P | X86_PDPE_RW /** @todo check for X86_PDPT_PS support. */;
649 pPgLvl->a[1].fResMask = X86_PDPE_P | X86_PDPE_RW;
650
651 pPgLvl->a[2].fPhysMask = X86_PDPE_PG_MASK;
652 pPgLvl->a[2].fPtrShift = X86_PD_PAE_SHIFT;
653 pPgLvl->a[2].fPtrMask = X86_PD_PAE_MASK;
654 pPgLvl->a[2].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
655 pPgLvl->a[2].fResMask = X86_PDE_P | X86_PDE_RW;
656
657 pPgLvl->a[3].fPhysMask = X86_PDE_PAE_PG_MASK;
658 pPgLvl->a[3].fPtrShift = X86_PT_PAE_SHIFT;
659 pPgLvl->a[3].fPtrMask = X86_PT_PAE_MASK;
660 pPgLvl->a[3].fAndMask = X86_PTE_P | X86_PTE_RW;
661 pPgLvl->a[3].fResMask = X86_PTE_P | X86_PTE_RW;
662 break;
663
664 default:
665 AssertFailed();
666 pPgLvl->cLevels = 0;
667 break;
668 }
669
670 for (uint32_t i = 0; i < 4; i++) /* ASSUMING array size. */
671 {
672 pPgLvl->a[i].HCPhys = NIL_RTHCPHYS;
673 pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
674 pPgLvl->a[i].hMemObj = NIL_RTR0MEMOBJ;
675 pPgLvl->a[i].u.pv = NULL;
676 }
677}
678
679
680/**
681 * Maps a PTE.
682 *
683 * This will update the segment structure when new PTs are mapped.
684 *
685 * It also assumes that we (for paranoid reasons) wish to establish a mapping
686 * chain from CR3 to the PT that all corresponds to the processor we're
687 * currently running on, and go about this by running with interrupts disabled
688 * and restarting from CR3 for every change.
689 *
690 * @returns VBox status code, VINF_TRY_AGAIN if we changed any mappings and had
691 * to re-enable interrupts.
692 * @param pThis The dynamic mapping cache instance.
693 * @param pPgLvl The paging level structure.
694 * @param pvPage The page.
695 * @param pSeg The segment.
696 * @param cMaxPTs The max number of PTs expected in the segment.
697 * @param ppvPTE Where to store the PTE address.
698 */
699static int pgmR0DynMapPagingArrayMapPte(PPGMR0DYNMAP pThis, PPGMR0DYNMAPPGLVL pPgLvl, void *pvPage,
700 PPGMR0DYNMAPSEG pSeg, uint32_t cMaxPTs, void **ppvPTE)
701{
702 Assert(!(ASMGetFlags() & X86_EFL_IF));
703 void *pvEntry = NULL;
704 X86PGPAEUINT uEntry = ASMGetCR3();
705 for (uint32_t i = 0; i < pPgLvl->cLevels; i++)
706 {
707 RTHCPHYS HCPhys = uEntry & pPgLvl->a[i].fPhysMask;
708 if (pPgLvl->a[i].HCPhys != HCPhys)
709 {
710 /*
711 * Need to remap this level.
712 * The final level, the PT, will not be freed since that is what it's all about.
713 */
714 ASMIntEnable();
715 if (i + 1 == pPgLvl->cLevels)
716 AssertReturn(pSeg->cPTs < cMaxPTs, VERR_INTERNAL_ERROR);
717 else
718 {
719 int rc2 = RTR0MemObjFree(pPgLvl->a[i].hMemObj, true /* fFreeMappings */); AssertRC(rc2);
720 pPgLvl->a[i].hMemObj = pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
721 }
722
723 int rc = RTR0MemObjEnterPhys(&pPgLvl->a[i].hMemObj, HCPhys, PAGE_SIZE, RTMEM_CACHE_POLICY_DONT_CARE);
724 if (RT_SUCCESS(rc))
725 {
726 rc = RTR0MemObjMapKernel(&pPgLvl->a[i].hMapObj, pPgLvl->a[i].hMemObj,
727 (void *)-1 /* pvFixed */, 0 /* cbAlignment */,
728 RTMEM_PROT_WRITE | RTMEM_PROT_READ);
729 if (RT_SUCCESS(rc))
730 {
731 pPgLvl->a[i].u.pv = RTR0MemObjAddress(pPgLvl->a[i].hMapObj);
732 AssertMsg(((uintptr_t)pPgLvl->a[i].u.pv & ~(uintptr_t)PAGE_OFFSET_MASK), ("%p\n", pPgLvl->a[i].u.pv));
733 pPgLvl->a[i].HCPhys = HCPhys;
734 if (i + 1 == pPgLvl->cLevels)
735 pSeg->ahMemObjPTs[pSeg->cPTs++] = pPgLvl->a[i].hMemObj;
736 ASMIntDisable();
737 return VINF_TRY_AGAIN;
738 }
739
740 pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
741 }
742 else
743 pPgLvl->a[i].hMemObj = NIL_RTR0MEMOBJ;
744 pPgLvl->a[i].HCPhys = NIL_RTHCPHYS;
745 return rc;
746 }
747
748 /*
749 * The next level.
750 */
751 uint32_t iEntry = ((uint64_t)(uintptr_t)pvPage >> pPgLvl->a[i].fPtrShift) & pPgLvl->a[i].fPtrMask;
752 if (pThis->fLegacyMode)
753 {
754 pvEntry = &pPgLvl->a[i].u.paLegacy[iEntry];
755 uEntry = pPgLvl->a[i].u.paLegacy[iEntry];
756 }
757 else
758 {
759 pvEntry = &pPgLvl->a[i].u.paPae[iEntry];
760 uEntry = pPgLvl->a[i].u.paPae[iEntry];
761 }
762
763 if ((uEntry & pPgLvl->a[i].fAndMask) != pPgLvl->a[i].fResMask)
764 {
765 LogRel(("PGMR0DynMap: internal error - iPgLvl=%u cLevels=%u uEntry=%#llx fAnd=%#llx fRes=%#llx got=%#llx\n"
766 "PGMR0DynMap: pv=%p pvPage=%p iEntry=%#x fLegacyMode=%RTbool\n",
767 i, pPgLvl->cLevels, uEntry, pPgLvl->a[i].fAndMask, pPgLvl->a[i].fResMask, uEntry & pPgLvl->a[i].fAndMask,
768 pPgLvl->a[i].u.pv, pvPage, iEntry, pThis->fLegacyMode));
769 return VERR_INTERNAL_ERROR;
770 }
771 /*Log(("#%d: iEntry=%4d uEntry=%#llx pvEntry=%p HCPhys=%RHp \n", i, iEntry, uEntry, pvEntry, pPgLvl->a[i].HCPhys));*/
772 }
773
774 /* made it thru without needing to remap anything. */
775 *ppvPTE = pvEntry;
776 return VINF_SUCCESS;
777}
778
779
780/**
781 * Sets up a guard page.
782 *
783 * @param pThis The dynamic mapping cache instance.
784 * @param pPage The page.
785 */
786DECLINLINE(void) pgmR0DynMapSetupGuardPage(PPGMR0DYNMAP pThis, PPGMR0DYNMAPENTRY pPage)
787{
788 memset(pPage->pvPage, 0xfd, PAGE_SIZE);
789 pPage->cRefs = PGMR0DYNMAP_GUARD_PAGE_REF_COUNT;
790 pPage->HCPhys = PGMR0DYNMAP_GUARD_PAGE_HCPHYS;
791#ifdef PGMR0DYNMAP_GUARD_NP
792 ASMAtomicBitClear(pPage->uPte.pv, X86_PTE_BIT_P);
793#else
794 if (pThis->fLegacyMode)
795 ASMAtomicWriteU32(&pPage->uPte.pLegacy->u, PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE);
796 else
797 ASMAtomicWriteU64(&pPage->uPte.pPae->u, PGMR0DYNMAP_GUARD_PAGE_PAE_PTE);
798#endif
799 pThis->cGuardPages++;
800}
801
802
803/**
804 * Adds a new segment of the specified size.
805 *
806 * @returns VBox status code.
807 * @param pThis The dynamic mapping cache instance.
808 * @param cPages The size of the new segment, give as a page count.
809 */
810static int pgmR0DynMapAddSeg(PPGMR0DYNMAP pThis, uint32_t cPages)
811{
812 int rc2;
813 AssertReturn(ASMGetFlags() & X86_EFL_IF, VERR_PREEMPT_DISABLED);
814
815 /*
816 * Do the array reallocations first.
817 * (The pages array has to be replaced behind the spinlock of course.)
818 */
819 void *pvSavedPTEs = RTMemRealloc(pThis->pvSavedPTEs, (pThis->fLegacyMode ? sizeof(X86PGUINT) : sizeof(X86PGPAEUINT)) * (pThis->cPages + cPages));
820 if (!pvSavedPTEs)
821 return VERR_NO_MEMORY;
822 pThis->pvSavedPTEs = pvSavedPTEs;
823
824 void *pvPages = RTMemAllocZ(sizeof(pThis->paPages[0]) * (pThis->cPages + cPages));
825 if (!pvPages)
826 {
827 pvSavedPTEs = RTMemRealloc(pThis->pvSavedPTEs, (pThis->fLegacyMode ? sizeof(X86PGUINT) : sizeof(X86PGPAEUINT)) * pThis->cPages);
828 if (pvSavedPTEs)
829 pThis->pvSavedPTEs = pvSavedPTEs;
830 return VERR_NO_MEMORY;
831 }
832
833 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
834 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
835
836 memcpy(pvPages, pThis->paPages, sizeof(pThis->paPages[0]) * pThis->cPages);
837 void *pvToFree = pThis->paPages;
838 pThis->paPages = (PPGMR0DYNMAPENTRY)pvPages;
839
840 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
841 RTMemFree(pvToFree);
842
843 /*
844 * Allocate the segment structure and pages of memory, then touch all the pages (paranoia).
845 */
846 uint32_t cMaxPTs = cPages / (pThis->fLegacyMode ? X86_PG_ENTRIES : X86_PG_PAE_ENTRIES) + 2;
847 PPGMR0DYNMAPSEG pSeg = (PPGMR0DYNMAPSEG)RTMemAllocZ(RT_UOFFSETOF(PGMR0DYNMAPSEG, ahMemObjPTs[cMaxPTs]));
848 if (!pSeg)
849 return VERR_NO_MEMORY;
850 pSeg->pNext = NULL;
851 pSeg->cPages = cPages;
852 pSeg->iPage = pThis->cPages;
853 pSeg->cPTs = 0;
854 int rc = RTR0MemObjAllocPage(&pSeg->hMemObj, cPages << PAGE_SHIFT, false);
855 if (RT_SUCCESS(rc))
856 {
857 uint8_t *pbPage = (uint8_t *)RTR0MemObjAddress(pSeg->hMemObj);
858 AssertMsg(VALID_PTR(pbPage) && !((uintptr_t)pbPage & PAGE_OFFSET_MASK), ("%p\n", pbPage));
859 memset(pbPage, 0xfe, cPages << PAGE_SHIFT);
860
861 /*
862 * Walk thru the pages and set them up with a mapping of their PTE and everything.
863 */
864 ASMIntDisable();
865 PGMR0DYNMAPPGLVL PgLvl;
866 pgmR0DynMapPagingArrayInit(pThis, &PgLvl);
867 uint32_t const iEndPage = pSeg->iPage + cPages;
868 for (uint32_t iPage = pSeg->iPage;
869 iPage < iEndPage;
870 iPage++, pbPage += PAGE_SIZE)
871 {
872 /* Initialize the page data. */
873 pThis->paPages[iPage].HCPhys = NIL_RTHCPHYS;
874 pThis->paPages[iPage].pvPage = pbPage;
875 pThis->paPages[iPage].cRefs = 0;
876 pThis->paPages[iPage].uPte.pPae = 0;
877 RTCpuSetFill(&pThis->paPages[iPage].PendingSet);
878
879 /* Map its page table, retry until we've got a clean run (paranoia). */
880 do
881 rc = pgmR0DynMapPagingArrayMapPte(pThis, &PgLvl, pbPage, pSeg, cMaxPTs,
882 &pThis->paPages[iPage].uPte.pv);
883 while (rc == VINF_TRY_AGAIN);
884 if (RT_FAILURE(rc))
885 break;
886
887 /* Save the PTE. */
888 if (pThis->fLegacyMode)
889 ((PX86PGUINT)pThis->pvSavedPTEs)[iPage] = pThis->paPages[iPage].uPte.pLegacy->u;
890 else
891 ((PX86PGPAEUINT)pThis->pvSavedPTEs)[iPage] = pThis->paPages[iPage].uPte.pPae->u;
892
893#ifdef VBOX_STRICT
894 /* Check that we've got the right entry. */
895 RTHCPHYS HCPhysPage = RTR0MemObjGetPagePhysAddr(pSeg->hMemObj, iPage - pSeg->iPage);
896 RTHCPHYS HCPhysPte = pThis->fLegacyMode
897 ? pThis->paPages[iPage].uPte.pLegacy->u & X86_PTE_PG_MASK
898 : pThis->paPages[iPage].uPte.pPae->u & X86_PTE_PAE_PG_MASK;
899 if (HCPhysPage != HCPhysPte)
900 {
901 LogRel(("pgmR0DynMapAddSeg: internal error - page #%u HCPhysPage=%RHp HCPhysPte=%RHp pbPage=%p pvPte=%p\n",
902 iPage - pSeg->iPage, HCPhysPage, HCPhysPte, pbPage, pThis->paPages[iPage].uPte.pv));
903 rc = VERR_INTERNAL_ERROR;
904 break;
905 }
906#endif
907 } /* for each page */
908 ASMIntEnable();
909
910 /* cleanup non-PT mappings */
911 for (uint32_t i = 0; i < PgLvl.cLevels - 1; i++)
912 RTR0MemObjFree(PgLvl.a[i].hMemObj, true /* fFreeMappings */);
913
914 if (RT_SUCCESS(rc))
915 {
916#if PGMR0DYNMAP_GUARD_PAGES > 0
917 /*
918 * Setup guard pages.
919 * (Note: TLBs will be shot down later on.)
920 */
921 uint32_t iPage = pSeg->iPage;
922 while (iPage < iEndPage)
923 {
924 for (uint32_t iGPg = 0; iGPg < PGMR0DYNMAP_GUARD_PAGES && iPage < iEndPage; iGPg++, iPage++)
925 pgmR0DynMapSetupGuardPage(pThis, &pThis->paPages[iPage]);
926 iPage++; /* the guarded page */
927 }
928
929 /* Make sure the very last page is a guard page too. */
930 iPage = iEndPage - 1;
931 if (pThis->paPages[iPage].cRefs != PGMR0DYNMAP_GUARD_PAGE_REF_COUNT)
932 pgmR0DynMapSetupGuardPage(pThis, &pThis->paPages[iPage]);
933#endif /* PGMR0DYNMAP_GUARD_PAGES > 0 */
934
935 /*
936 * Commit it by adding the segment to the list and updating the page count.
937 */
938 pSeg->pNext = pThis->pSegHead;
939 pThis->pSegHead = pSeg;
940 pThis->cPages += cPages;
941 return VINF_SUCCESS;
942 }
943
944 /*
945 * Bail out.
946 */
947 while (pSeg->cPTs-- > 0)
948 {
949 rc2 = RTR0MemObjFree(pSeg->ahMemObjPTs[pSeg->cPTs], true /* fFreeMappings */);
950 AssertRC(rc2);
951 pSeg->ahMemObjPTs[pSeg->cPTs] = NIL_RTR0MEMOBJ;
952 }
953
954 rc2 = RTR0MemObjFree(pSeg->hMemObj, true /* fFreeMappings */);
955 AssertRC(rc2);
956 pSeg->hMemObj = NIL_RTR0MEMOBJ;
957 }
958 RTMemFree(pSeg);
959
960 /* Don't bother resizing the arrays, but free them if we're the only user. */
961 if (!pThis->cPages)
962 {
963 RTMemFree(pThis->paPages);
964 pThis->paPages = NULL;
965 RTMemFree(pThis->pvSavedPTEs);
966 pThis->pvSavedPTEs = NULL;
967 }
968 return rc;
969}
970
971
972/**
973 * Called by PGMR0DynMapInitVM under the init lock.
974 *
975 * @returns VBox status code.
976 * @param pThis The dynamic mapping cache instance.
977 */
978static int pgmR0DynMapSetup(PPGMR0DYNMAP pThis)
979{
980 /*
981 * Calc the size and add a segment of that size.
982 */
983 uint32_t cMinPages;
984 uint32_t cPages = pgmR0DynMapCalcNewSize(pThis, &cMinPages);
985 AssertReturn(cPages, VERR_INTERNAL_ERROR);
986 int rc = pgmR0DynMapAddSeg(pThis, cPages);
987 if (rc == VERR_NO_MEMORY)
988 {
989 /*
990 * Try adding smaller segments.
991 */
992 do
993 rc = pgmR0DynMapAddSeg(pThis, PGMR0DYNMAP_SMALL_SEG_PAGES);
994 while (RT_SUCCESS(rc) && pThis->cPages < cPages);
995 if (rc == VERR_NO_MEMORY && pThis->cPages >= cMinPages)
996 rc = VINF_SUCCESS;
997 if (rc == VERR_NO_MEMORY)
998 {
999 if (pThis->cPages)
1000 pgmR0DynMapTearDown(pThis);
1001 rc = VERR_PGM_DYNMAP_SETUP_ERROR;
1002 }
1003 }
1004 Assert(ASMGetFlags() & X86_EFL_IF);
1005
1006#if PGMR0DYNMAP_GUARD_PAGES > 0
1007 /* paranoia */
1008 if (RT_SUCCESS(rc))
1009 pgmR0DynMapTlbShootDown(pThis);
1010#endif
1011 return rc;
1012}
1013
1014
1015/**
1016 * Called by PGMR0DynMapInitVM under the init lock.
1017 *
1018 * @returns VBox status code.
1019 * @param pThis The dynamic mapping cache instance.
1020 */
1021static int pgmR0DynMapExpand(PPGMR0DYNMAP pThis)
1022{
1023 /*
1024 * Calc the new target size and add a segment of the appropriate size.
1025 */
1026 uint32_t cMinPages;
1027 uint32_t cPages = pgmR0DynMapCalcNewSize(pThis, &cMinPages);
1028 AssertReturn(cPages, VERR_INTERNAL_ERROR);
1029 if (pThis->cPages >= cPages)
1030 return VINF_SUCCESS;
1031
1032 uint32_t cAdd = cPages - pThis->cPages;
1033 int rc = pgmR0DynMapAddSeg(pThis, cAdd);
1034 if (rc == VERR_NO_MEMORY)
1035 {
1036 /*
1037 * Try adding smaller segments.
1038 */
1039 do
1040 rc = pgmR0DynMapAddSeg(pThis, PGMR0DYNMAP_SMALL_SEG_PAGES);
1041 while (RT_SUCCESS(rc) && pThis->cPages < cPages);
1042 if (rc == VERR_NO_MEMORY && pThis->cPages >= cMinPages)
1043 rc = VINF_SUCCESS;
1044 if (rc == VERR_NO_MEMORY)
1045 rc = VERR_PGM_DYNMAP_EXPAND_ERROR;
1046 }
1047 Assert(ASMGetFlags() & X86_EFL_IF);
1048
1049#if PGMR0DYNMAP_GUARD_PAGES > 0
1050 /* paranoia */
1051 if (RT_SUCCESS(rc))
1052 pgmR0DynMapTlbShootDown(pThis);
1053#endif
1054 return rc;
1055}
1056
1057
1058/**
1059 * Called by PGMR0DynMapTermVM under the init lock.
1060 *
1061 * @returns VBox status code.
1062 * @param pThis The dynamic mapping cache instance.
1063 */
1064static void pgmR0DynMapTearDown(PPGMR0DYNMAP pThis)
1065{
1066 /*
1067 * Restore the original page table entries
1068 */
1069 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1070 uint32_t iPage = pThis->cPages;
1071 if (pThis->fLegacyMode)
1072 {
1073 X86PGUINT const *paSavedPTEs = (X86PGUINT const *)pThis->pvSavedPTEs;
1074 while (iPage-- > 0)
1075 {
1076 X86PGUINT uOld = paPages[iPage].uPte.pLegacy->u;
1077 X86PGUINT uOld2 = uOld; NOREF(uOld2);
1078 X86PGUINT uNew = paSavedPTEs[iPage];
1079 while (!ASMAtomicCmpXchgExU32(&paPages[iPage].uPte.pLegacy->u, uNew, uOld, &uOld))
1080 AssertMsgFailed(("uOld=%#x uOld2=%#x uNew=%#x\n", uOld, uOld2, uNew));
1081 Assert(paPages[iPage].uPte.pLegacy->u == paSavedPTEs[iPage]);
1082 }
1083 }
1084 else
1085 {
1086 X86PGPAEUINT const *paSavedPTEs = (X86PGPAEUINT const *)pThis->pvSavedPTEs;
1087 while (iPage-- > 0)
1088 {
1089 X86PGPAEUINT uOld = paPages[iPage].uPte.pPae->u;
1090 X86PGPAEUINT uOld2 = uOld; NOREF(uOld2);
1091 X86PGPAEUINT uNew = paSavedPTEs[iPage];
1092 while (!ASMAtomicCmpXchgExU64(&paPages[iPage].uPte.pPae->u, uNew, uOld, &uOld))
1093 AssertMsgFailed(("uOld=%#llx uOld2=%#llx uNew=%#llx\n", uOld, uOld2, uNew));
1094 Assert(paPages[iPage].uPte.pPae->u == paSavedPTEs[iPage]);
1095 }
1096 }
1097
1098 /*
1099 * Shoot down the TLBs on all CPUs before freeing them.
1100 */
1101 pgmR0DynMapTlbShootDown(pThis);
1102
1103 /*
1104 * Free the segments.
1105 */
1106 while (pThis->pSegHead)
1107 {
1108 int rc;
1109 PPGMR0DYNMAPSEG pSeg = pThis->pSegHead;
1110 pThis->pSegHead = pSeg->pNext;
1111
1112 uint32_t iPT = pSeg->cPTs;
1113 while (iPT-- > 0)
1114 {
1115 rc = RTR0MemObjFree(pSeg->ahMemObjPTs[iPT], true /* fFreeMappings */); AssertRC(rc);
1116 pSeg->ahMemObjPTs[iPT] = NIL_RTR0MEMOBJ;
1117 }
1118 rc = RTR0MemObjFree(pSeg->hMemObj, true /* fFreeMappings */); AssertRC(rc);
1119 pSeg->hMemObj = NIL_RTR0MEMOBJ;
1120 pSeg->pNext = NULL;
1121 pSeg->iPage = UINT16_MAX;
1122 pSeg->cPages = 0;
1123 pSeg->cPTs = 0;
1124 RTMemFree(pSeg);
1125 }
1126
1127 /*
1128 * Free the arrays and restore the initial state.
1129 * The cLoadMax value is left behind for the next setup.
1130 */
1131 RTMemFree(pThis->paPages);
1132 pThis->paPages = NULL;
1133 RTMemFree(pThis->pvSavedPTEs);
1134 pThis->pvSavedPTEs = NULL;
1135 pThis->cPages = 0;
1136 pThis->cLoad = 0;
1137 pThis->cGuardPages = 0;
1138}
1139
1140
1141/**
1142 * Release references to a page, caller owns the spin lock.
1143 *
1144 * @param pThis The dynamic mapping cache instance.
1145 * @param iPage The page.
1146 * @param cRefs The number of references to release.
1147 */
1148DECLINLINE(void) pgmR0DynMapReleasePageLocked(PPGMR0DYNMAP pThis, uint32_t iPage, int32_t cRefs)
1149{
1150 cRefs = ASMAtomicSubS32(&pThis->paPages[iPage].cRefs, cRefs) - cRefs;
1151 AssertMsg(cRefs >= 0, ("%d\n", cRefs));
1152 if (!cRefs)
1153 pThis->cLoad--;
1154}
1155
1156
1157/**
1158 * Release references to a page, caller does not own the spin lock.
1159 *
1160 * @param pThis The dynamic mapping cache instance.
1161 * @param iPage The page.
1162 * @param cRefs The number of references to release.
1163 */
1164static void pgmR0DynMapReleasePage(PPGMR0DYNMAP pThis, uint32_t iPage, uint32_t cRefs)
1165{
1166 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1167 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1168 pgmR0DynMapReleasePageLocked(pThis, iPage, cRefs);
1169 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1170}
1171
1172
1173/**
1174 * pgmR0DynMapPage worker that deals with the tedious bits.
1175 *
1176 * @returns The page index on success, UINT32_MAX on failure.
1177 * @param pThis The dynamic mapping cache instance.
1178 * @param HCPhys The address of the page to be mapped.
1179 * @param iPage The page index pgmR0DynMapPage hashed HCPhys to.
1180 * @param pVM The shared VM structure, for statistics only.
1181 */
1182static uint32_t pgmR0DynMapPageSlow(PPGMR0DYNMAP pThis, RTHCPHYS HCPhys, uint32_t iPage, PVM pVM)
1183{
1184#ifdef VBOX_WITH_STATISTICS
1185 PVMCPU pVCpu = VMMGetCpu(pVM);
1186#endif
1187 STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapPageSlow);
1188
1189 /*
1190 * Check if any of the first 3 pages are unreferenced since the caller
1191 * already has made sure they aren't matching.
1192 */
1193#ifdef VBOX_WITH_STATISTICS
1194 bool fLooped = false;
1195#endif
1196 uint32_t const cPages = pThis->cPages;
1197 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1198 uint32_t iFreePage;
1199 if (!paPages[iPage].cRefs)
1200 iFreePage = iPage;
1201 else if (!paPages[(iPage + 1) % cPages].cRefs)
1202 iFreePage = (iPage + 1) % cPages;
1203 else if (!paPages[(iPage + 2) % cPages].cRefs)
1204 iFreePage = (iPage + 2) % cPages;
1205 else
1206 {
1207 /*
1208 * Search for an unused or matching entry.
1209 */
1210 iFreePage = (iPage + 3) % cPages;
1211 for (;;)
1212 {
1213 if (paPages[iFreePage].HCPhys == HCPhys)
1214 {
1215 STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapPageSlowLoopHits);
1216 return iFreePage;
1217 }
1218 if (!paPages[iFreePage].cRefs)
1219 break;
1220
1221 /* advance */
1222 iFreePage = (iFreePage + 1) % cPages;
1223 if (RT_UNLIKELY(iFreePage == iPage))
1224 return UINT32_MAX;
1225 }
1226 STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapPageSlowLoopMisses);
1227#ifdef VBOX_WITH_STATISTICS
1228 fLooped = true;
1229#endif
1230 }
1231 Assert(iFreePage < cPages);
1232
1233#if 0 //def VBOX_WITH_STATISTICS
1234 /* Check for lost hits. */
1235 if (!fLooped)
1236 for (uint32_t iPage2 = (iPage + 3) % cPages; iPage2 != iPage; iPage2 = (iPage2 + 1) % cPages)
1237 if (paPages[iPage2].HCPhys == HCPhys)
1238 STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapPageSlowLostHits);
1239#endif
1240
1241 /*
1242 * Setup the new entry.
1243 */
1244 /*Log6(("pgmR0DynMapPageSlow: old - %RHp %#x %#llx\n", paPages[iFreePage].HCPhys, paPages[iFreePage].cRefs, paPages[iFreePage].uPte.pPae->u));*/
1245 paPages[iFreePage].HCPhys = HCPhys;
1246 RTCpuSetFill(&paPages[iFreePage].PendingSet);
1247 if (pThis->fLegacyMode)
1248 {
1249 X86PGUINT uOld = paPages[iFreePage].uPte.pLegacy->u;
1250 X86PGUINT uOld2 = uOld; NOREF(uOld2);
1251 X86PGUINT uNew = (uOld & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1252 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1253 | (HCPhys & X86_PTE_PG_MASK);
1254 while (!ASMAtomicCmpXchgExU32(&paPages[iFreePage].uPte.pLegacy->u, uNew, uOld, &uOld))
1255 AssertMsgFailed(("uOld=%#x uOld2=%#x uNew=%#x\n", uOld, uOld2, uNew));
1256 Assert(paPages[iFreePage].uPte.pLegacy->u == uNew);
1257 }
1258 else
1259 {
1260 X86PGPAEUINT uOld = paPages[iFreePage].uPte.pPae->u;
1261 X86PGPAEUINT uOld2 = uOld; NOREF(uOld2);
1262 X86PGPAEUINT uNew = (uOld & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1263 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1264 | (HCPhys & X86_PTE_PAE_PG_MASK);
1265 while (!ASMAtomicCmpXchgExU64(&paPages[iFreePage].uPte.pPae->u, uNew, uOld, &uOld))
1266 AssertMsgFailed(("uOld=%#llx uOld2=%#llx uNew=%#llx\n", uOld, uOld2, uNew));
1267 Assert(paPages[iFreePage].uPte.pPae->u == uNew);
1268 /*Log6(("pgmR0DynMapPageSlow: #%x - %RHp %p %#llx\n", iFreePage, HCPhys, paPages[iFreePage].pvPage, uNew));*/
1269 }
1270 return iFreePage;
1271}
1272
1273
1274/**
1275 * Maps a page into the pool.
1276 *
1277 * @returns Page index on success, UINT32_MAX on failure.
1278 * @param pThis The dynamic mapping cache instance.
1279 * @param HCPhys The address of the page to be mapped.
1280 * @param iRealCpu The real cpu set index. (optimization)
1281 * @param pVM The shared VM structure, for statistics only.
1282 * @param ppvPage Where to the page address.
1283 */
1284DECLINLINE(uint32_t) pgmR0DynMapPage(PPGMR0DYNMAP pThis, RTHCPHYS HCPhys, int32_t iRealCpu, PVM pVM, void **ppvPage)
1285{
1286#ifdef VBOX_WITH_STATISTICS
1287 PVMCPU pVCpu = VMMGetCpu(pVM);
1288#endif
1289 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1290 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1291 AssertMsg(!(HCPhys & PAGE_OFFSET_MASK), ("HCPhys=%RHp\n", HCPhys));
1292 STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapPage);
1293
1294 /*
1295 * Find an entry, if possible a matching one. The HCPhys address is hashed
1296 * down to a page index, collisions are handled by linear searching.
1297 * Optimized for a hit in the first 3 pages.
1298 *
1299 * Field easy hits here and defer the tedious searching and inserting
1300 * to pgmR0DynMapPageSlow().
1301 */
1302 uint32_t const cPages = pThis->cPages;
1303 uint32_t iPage = (HCPhys >> PAGE_SHIFT) % cPages;
1304 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1305 if (RT_LIKELY(paPages[iPage].HCPhys == HCPhys))
1306 STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapPageHits0);
1307 else
1308 {
1309 uint32_t iPage2 = (iPage + 1) % cPages;
1310 if (RT_LIKELY(paPages[iPage2].HCPhys == HCPhys))
1311 {
1312 iPage = iPage2;
1313 STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapPageHits1);
1314 }
1315 else
1316 {
1317 iPage2 = (iPage + 2) % cPages;
1318 if (paPages[iPage2].HCPhys == HCPhys)
1319 {
1320 iPage = iPage2;
1321 STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapPageHits2);
1322 }
1323 else
1324 {
1325 iPage = pgmR0DynMapPageSlow(pThis, HCPhys, iPage, pVM);
1326 if (RT_UNLIKELY(iPage == UINT32_MAX))
1327 {
1328 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1329 *ppvPage = NULL;
1330 return iPage;
1331 }
1332 }
1333 }
1334 }
1335
1336 /*
1337 * Reference it, update statistics and get the return address.
1338 */
1339 int32_t cRefs = ASMAtomicIncS32(&paPages[iPage].cRefs);
1340 if (cRefs == 1)
1341 {
1342 pThis->cLoad++;
1343 if (pThis->cLoad > pThis->cMaxLoad)
1344 pThis->cMaxLoad = pThis->cLoad;
1345 AssertMsg(pThis->cLoad <= pThis->cPages - pThis->cGuardPages, ("%d/%d\n", pThis->cLoad, pThis->cPages - pThis->cGuardPages));
1346 }
1347 else if (RT_UNLIKELY(cRefs <= 0))
1348 {
1349 ASMAtomicDecS32(&paPages[iPage].cRefs);
1350 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1351 *ppvPage = NULL;
1352 AssertLogRelMsgFailedReturn(("cRefs=%d iPage=%p HCPhys=%RHp\n", cRefs, iPage, HCPhys), UINT32_MAX);
1353 }
1354 void *pvPage = paPages[iPage].pvPage;
1355
1356 /*
1357 * Invalidate the entry?
1358 */
1359 bool fInvalidateIt = RTCpuSetIsMemberByIndex(&paPages[iPage].PendingSet, iRealCpu);
1360 if (RT_UNLIKELY(fInvalidateIt))
1361 RTCpuSetDelByIndex(&paPages[iPage].PendingSet, iRealCpu);
1362
1363 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1364
1365 /*
1366 * Do the actual invalidation outside the spinlock.
1367 */
1368 if (RT_UNLIKELY(fInvalidateIt))
1369 {
1370 STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapPageInvlPg);
1371 ASMInvalidatePage(pvPage);
1372 }
1373
1374 *ppvPage = pvPage;
1375 return iPage;
1376}
1377
1378
1379/**
1380 * Assert the the integrity of the pool.
1381 *
1382 * @returns VBox status code.
1383 */
1384VMMR0DECL(int) PGMR0DynMapAssertIntegrity(void)
1385{
1386 /*
1387 * Basic pool stuff that doesn't require any lock, just assumes we're a user.
1388 */
1389 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1390 if (!pThis)
1391 return VINF_SUCCESS;
1392 AssertPtrReturn(pThis, VERR_INVALID_POINTER);
1393 AssertReturn(pThis->u32Magic == PGMR0DYNMAP_MAGIC, VERR_INVALID_MAGIC);
1394 if (!pThis->cUsers)
1395 return VERR_INVALID_PARAMETER;
1396
1397
1398 int rc = VINF_SUCCESS;
1399 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1400 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1401
1402#define CHECK_RET(expr, a) \
1403 do { \
1404 if (RT_UNLIKELY(!(expr))) \
1405 { \
1406 RTSpinlockRelease(pThis->hSpinlock, &Tmp); \
1407 RTAssertMsg1Weak(#expr, __LINE__, __FILE__, __PRETTY_FUNCTION__); \
1408 RTAssertMsg2Weak a; \
1409 return VERR_INTERNAL_ERROR; \
1410 } \
1411 } while (0)
1412
1413 /*
1414 * Check that the PTEs are correct.
1415 */
1416 uint32_t cGuard = 0;
1417 uint32_t cLoad = 0;
1418 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1419 uint32_t iPage = pThis->cPages;
1420 if (pThis->fLegacyMode)
1421 {
1422 PCX86PGUINT paSavedPTEs = (PCX86PGUINT)pThis->pvSavedPTEs; NOREF(paSavedPTEs);
1423 while (iPage-- > 0)
1424 {
1425 CHECK_RET(!((uintptr_t)paPages[iPage].pvPage & PAGE_OFFSET_MASK), ("#%u: %p\n", iPage, paPages[iPage].pvPage));
1426 if ( paPages[iPage].cRefs == PGMR0DYNMAP_GUARD_PAGE_REF_COUNT
1427 && paPages[iPage].HCPhys == PGMR0DYNMAP_GUARD_PAGE_HCPHYS)
1428 {
1429#ifdef PGMR0DYNMAP_GUARD_NP
1430 CHECK_RET(paPages[iPage].uPte.pLegacy->u == (paSavedPTEs[iPage] & ~(X86PGUINT)X86_PTE_P),
1431 ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, paSavedPTEs[iPage]));
1432#else
1433 CHECK_RET(paPages[iPage].uPte.pLegacy->u == PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE,
1434 ("#%u: %#x", iPage, paPages[iPage].uPte.pLegacy->u));
1435#endif
1436 cGuard++;
1437 }
1438 else if (paPages[iPage].HCPhys != NIL_RTHCPHYS)
1439 {
1440 CHECK_RET(!(paPages[iPage].HCPhys & PAGE_OFFSET_MASK), ("#%u: %RHp\n", iPage, paPages[iPage].HCPhys));
1441 X86PGUINT uPte = (paSavedPTEs[iPage] & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1442 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1443 | (paPages[iPage].HCPhys & X86_PTE_PAE_PG_MASK);
1444 CHECK_RET(paPages[iPage].uPte.pLegacy->u == uPte,
1445 ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, uPte));
1446 if (paPages[iPage].cRefs)
1447 cLoad++;
1448 }
1449 else
1450 CHECK_RET(paPages[iPage].uPte.pLegacy->u == paSavedPTEs[iPage],
1451 ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, paSavedPTEs[iPage]));
1452 }
1453 }
1454 else
1455 {
1456 PCX86PGPAEUINT paSavedPTEs = (PCX86PGPAEUINT)pThis->pvSavedPTEs; NOREF(paSavedPTEs);
1457 while (iPage-- > 0)
1458 {
1459 CHECK_RET(!((uintptr_t)paPages[iPage].pvPage & PAGE_OFFSET_MASK), ("#%u: %p\n", iPage, paPages[iPage].pvPage));
1460 if ( paPages[iPage].cRefs == PGMR0DYNMAP_GUARD_PAGE_REF_COUNT
1461 && paPages[iPage].HCPhys == PGMR0DYNMAP_GUARD_PAGE_HCPHYS)
1462 {
1463#ifdef PGMR0DYNMAP_GUARD_NP
1464 CHECK_RET(paPages[iPage].uPte.pPae->u == (paSavedPTEs[iPage] & ~(X86PGPAEUINT)X86_PTE_P),
1465 ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pPae->u, paSavedPTEs[iPage]));
1466#else
1467 CHECK_RET(paPages[iPage].uPte.pPae->u == PGMR0DYNMAP_GUARD_PAGE_PAE_PTE,
1468 ("#%u: %#llx", iPage, paPages[iPage].uPte.pPae->u));
1469#endif
1470 cGuard++;
1471 }
1472 else if (paPages[iPage].HCPhys != NIL_RTHCPHYS)
1473 {
1474 CHECK_RET(!(paPages[iPage].HCPhys & PAGE_OFFSET_MASK), ("#%u: %RHp\n", iPage, paPages[iPage].HCPhys));
1475 X86PGPAEUINT uPte = (paSavedPTEs[iPage] & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1476 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1477 | (paPages[iPage].HCPhys & X86_PTE_PAE_PG_MASK);
1478 CHECK_RET(paPages[iPage].uPte.pPae->u == uPte,
1479 ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pLegacy->u, uPte));
1480 if (paPages[iPage].cRefs)
1481 cLoad++;
1482 }
1483 else
1484 CHECK_RET(paPages[iPage].uPte.pPae->u == paSavedPTEs[iPage],
1485 ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pPae->u, paSavedPTEs[iPage]));
1486 }
1487 }
1488
1489 CHECK_RET(cLoad == pThis->cLoad, ("%u %u\n", cLoad, pThis->cLoad));
1490 CHECK_RET(cGuard == pThis->cGuardPages, ("%u %u\n", cGuard, pThis->cGuardPages));
1491
1492#undef CHECK_RET
1493 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1494 return VINF_SUCCESS;
1495}
1496
1497
1498/**
1499 * Signals the start of a new set of mappings.
1500 *
1501 * Mostly for strictness. PGMDynMapHCPage won't work unless this
1502 * API is called.
1503 *
1504 * @param pVCpu The shared data for the current virtual CPU.
1505 */
1506VMMDECL(void) PGMDynMapStartAutoSet(PVMCPU pVCpu)
1507{
1508 Assert(pVCpu->pgm.s.AutoSet.cEntries == PGMMAPSET_CLOSED);
1509 Assert(pVCpu->pgm.s.AutoSet.iSubset == UINT32_MAX);
1510 pVCpu->pgm.s.AutoSet.cEntries = 0;
1511 pVCpu->pgm.s.AutoSet.iCpu = RTMpCpuIdToSetIndex(RTMpCpuId());
1512}
1513
1514
1515/**
1516 * Starts or migrates the autoset of a virtual CPU.
1517 *
1518 * This is used by HWACCMR0Enter. When we've longjumped out of the HWACCM
1519 * execution loop with the set open, we'll migrate it when re-entering. While
1520 * under normal circumstances, we'll start it so VMXR0LoadGuestState can access
1521 * guest memory.
1522 *
1523 * @returns @c true if started, @c false if migrated.
1524 * @param pVCpu The shared data for the current virtual CPU.
1525 * @thread EMT
1526 */
1527VMMDECL(bool) PGMDynMapStartOrMigrateAutoSet(PVMCPU pVCpu)
1528{
1529 bool fStartIt = pVCpu->pgm.s.AutoSet.cEntries == PGMMAPSET_CLOSED;
1530 if (fStartIt)
1531 PGMDynMapStartAutoSet(pVCpu);
1532 else
1533 PGMDynMapMigrateAutoSet(pVCpu);
1534 return fStartIt;
1535}
1536
1537
1538/**
1539 * Worker that performs the actual flushing of the set.
1540 *
1541 * @param pSet The set to flush.
1542 * @param cEntries The number of entries.
1543 */
1544DECLINLINE(void) pgmDynMapFlushAutoSetWorker(PPGMMAPSET pSet, uint32_t cEntries)
1545{
1546 /*
1547 * Release any pages it's referencing.
1548 */
1549 if ( cEntries != 0
1550 && RT_LIKELY(cEntries <= RT_ELEMENTS(pSet->aEntries)))
1551 {
1552 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1553 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1554 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1555
1556 uint32_t i = cEntries;
1557 while (i-- > 0)
1558 {
1559 uint32_t iPage = pSet->aEntries[i].iPage;
1560 Assert(iPage < pThis->cPages);
1561 int32_t cRefs = pSet->aEntries[i].cRefs;
1562 Assert(cRefs > 0);
1563 pgmR0DynMapReleasePageLocked(pThis, iPage, cRefs);
1564
1565 pSet->aEntries[i].iPage = UINT16_MAX;
1566 pSet->aEntries[i].cRefs = 0;
1567 }
1568
1569 Assert(pThis->cLoad <= pThis->cPages - pThis->cGuardPages);
1570 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1571 }
1572}
1573
1574
1575/**
1576 * Releases the dynamic memory mappings made by PGMDynMapHCPage and associates
1577 * since the PGMDynMapStartAutoSet call.
1578 *
1579 * @param pVCpu The shared data for the current virtual CPU.
1580 */
1581VMMDECL(void) PGMDynMapReleaseAutoSet(PVMCPU pVCpu)
1582{
1583 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1584
1585 /*
1586 * Close and flush the set.
1587 */
1588 uint32_t cEntries = pSet->cEntries;
1589 AssertReturnVoid(cEntries != PGMMAPSET_CLOSED);
1590 pSet->cEntries = PGMMAPSET_CLOSED;
1591 pSet->iSubset = UINT32_MAX;
1592 pSet->iCpu = -1;
1593
1594 STAM_COUNTER_INC(&pVCpu->pgm.s.aStatR0DynMapSetSize[(cEntries * 10 / RT_ELEMENTS(pSet->aEntries)) % 11]);
1595 AssertMsg(cEntries < PGMMAPSET_MAX_FILL, ("%u\n", cEntries));
1596 if (cEntries > RT_ELEMENTS(pSet->aEntries) * 50 / 100)
1597 Log(("PGMDynMapReleaseAutoSet: cEntries=%d\n", pSet->cEntries));
1598
1599 pgmDynMapFlushAutoSetWorker(pSet, cEntries);
1600}
1601
1602
1603/**
1604 * Flushes the set if it's above a certain threshold.
1605 *
1606 * @param pVCpu The shared data for the current virtual CPU.
1607 */
1608VMMDECL(void) PGMDynMapFlushAutoSet(PVMCPU pVCpu)
1609{
1610 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1611 AssertMsg(pSet->iCpu == RTMpCpuIdToSetIndex(RTMpCpuId()), ("%d %d(%d) efl=%#x\n", pSet->iCpu, RTMpCpuIdToSetIndex(RTMpCpuId()), RTMpCpuId(), ASMGetFlags()));
1612
1613 /*
1614 * Only flush it if it's 45% full.
1615 */
1616 uint32_t cEntries = pSet->cEntries;
1617 AssertReturnVoid(cEntries != PGMMAPSET_CLOSED);
1618 STAM_COUNTER_INC(&pVCpu->pgm.s.aStatR0DynMapSetSize[(cEntries * 10 / RT_ELEMENTS(pSet->aEntries)) % 11]);
1619 if (cEntries >= RT_ELEMENTS(pSet->aEntries) * 45 / 100)
1620 {
1621 pSet->cEntries = 0;
1622
1623 AssertMsg(cEntries < PGMMAPSET_MAX_FILL, ("%u\n", cEntries));
1624 Log(("PGMDynMapFlushAutoSet: cEntries=%d\n", pSet->cEntries));
1625
1626 pgmDynMapFlushAutoSetWorker(pSet, cEntries);
1627 AssertMsg(pSet->iCpu == RTMpCpuIdToSetIndex(RTMpCpuId()), ("%d %d(%d) efl=%#x\n", pSet->iCpu, RTMpCpuIdToSetIndex(RTMpCpuId()), RTMpCpuId(), ASMGetFlags()));
1628 }
1629}
1630
1631
1632/**
1633 * Migrates the automatic mapping set of the current vCPU if it's active and
1634 * necessary.
1635 *
1636 * This is called when re-entering the hardware assisted execution mode after a
1637 * nip down to ring-3. We run the risk that the CPU might have change and we
1638 * will therefore make sure all the cache entries currently in the auto set will
1639 * be valid on the new CPU. If the cpu didn't change nothing will happen as all
1640 * the entries will have been flagged as invalidated.
1641 *
1642 * @param pVCpu The shared data for the current virtual CPU.
1643 * @thread EMT
1644 */
1645VMMDECL(void) PGMDynMapMigrateAutoSet(PVMCPU pVCpu)
1646{
1647 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1648 int32_t iRealCpu = RTMpCpuIdToSetIndex(RTMpCpuId());
1649 if (pSet->iCpu != iRealCpu)
1650 {
1651 uint32_t i = pSet->cEntries;
1652 if (i != PGMMAPSET_CLOSED)
1653 {
1654 AssertMsg(i <= RT_ELEMENTS(pSet->aEntries), ("%#x (%u)\n", i, i));
1655 if (i != 0 && RT_LIKELY(i <= RT_ELEMENTS(pSet->aEntries)))
1656 {
1657 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1658 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1659 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1660
1661 while (i-- > 0)
1662 {
1663 Assert(pSet->aEntries[i].cRefs > 0);
1664 uint32_t iPage = pSet->aEntries[i].iPage;
1665 Assert(iPage < pThis->cPages);
1666 if (RTCpuSetIsMemberByIndex(&pThis->paPages[iPage].PendingSet, iRealCpu))
1667 {
1668 RTCpuSetDelByIndex(&pThis->paPages[iPage].PendingSet, iRealCpu);
1669 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1670
1671 ASMInvalidatePage(pThis->paPages[iPage].pvPage);
1672 STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapMigrateInvlPg);
1673
1674 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1675 }
1676 }
1677
1678 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1679 }
1680 }
1681 pSet->iCpu = iRealCpu;
1682 }
1683}
1684
1685
1686/**
1687 * Worker function that flushes the current subset.
1688 *
1689 * This is called when the set is popped or when the set
1690 * hash a too high load. As also pointed out elsewhere, the
1691 * whole subset thing is a hack for working around code that
1692 * accesses too many pages. Like PGMPool.
1693 *
1694 * @param pSet The set which subset to flush.
1695 */
1696static void pgmDynMapFlushSubset(PPGMMAPSET pSet)
1697{
1698 uint32_t iSubset = pSet->iSubset;
1699 uint32_t i = pSet->cEntries;
1700 Assert(i <= RT_ELEMENTS(pSet->aEntries));
1701 if ( i > iSubset
1702 && i <= RT_ELEMENTS(pSet->aEntries))
1703 {
1704 Log(("pgmDynMapFlushSubset: cEntries=%d iSubset=%d\n", pSet->cEntries, iSubset));
1705 pSet->cEntries = iSubset;
1706
1707 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1708 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1709 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1710
1711 while (i-- > iSubset)
1712 {
1713 uint32_t iPage = pSet->aEntries[i].iPage;
1714 Assert(iPage < pThis->cPages);
1715 int32_t cRefs = pSet->aEntries[i].cRefs;
1716 Assert(cRefs > 0);
1717 pgmR0DynMapReleasePageLocked(pThis, iPage, cRefs);
1718
1719 pSet->aEntries[i].iPage = UINT16_MAX;
1720 pSet->aEntries[i].cRefs = 0;
1721 }
1722
1723 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1724 }
1725}
1726
1727
1728/**
1729 * Creates a subset.
1730 *
1731 * A subset is a hack to avoid having to rewrite code that touches a lot of
1732 * pages. It prevents the mapping set from being overflowed by automatically
1733 * flushing previous mappings when a certain threshold is reached.
1734 *
1735 * Pages mapped after calling this function are only valid until the next page
1736 * is mapped.
1737 *
1738 * @returns The index of the previous subset. Pass this to
1739 * PGMDynMapPopAutoSubset when poping it.
1740 * @param pVCpu Pointer to the virtual cpu data.
1741 */
1742VMMDECL(uint32_t) PGMDynMapPushAutoSubset(PVMCPU pVCpu)
1743{
1744 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1745 AssertReturn(pSet->cEntries != PGMMAPSET_CLOSED, UINT32_MAX);
1746 uint32_t iPrevSubset = pSet->iSubset;
1747 LogFlow(("PGMDynMapPushAutoSubset: pVCpu=%p iPrevSubset=%u\n", pVCpu, iPrevSubset));
1748
1749 pSet->iSubset = pSet->cEntries;
1750 STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapSubsets);
1751 return iPrevSubset;
1752}
1753
1754
1755/**
1756 * Pops a subset created by a previous call to PGMDynMapPushAutoSubset.
1757 *
1758 * @param pVCpu Pointer to the virtual cpu data.
1759 * @param iPrevSubset What PGMDynMapPushAutoSubset returned.
1760 */
1761VMMDECL(void) PGMDynMapPopAutoSubset(PVMCPU pVCpu, uint32_t iPrevSubset)
1762{
1763 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1764 uint32_t cEntries = pSet->cEntries;
1765 LogFlow(("PGMDynMapPopAutoSubset: pVCpu=%p iPrevSubset=%u iSubset=%u cEntries=%u\n", pVCpu, iPrevSubset, pSet->iSubset, cEntries));
1766 AssertReturnVoid(cEntries != PGMMAPSET_CLOSED);
1767 AssertReturnVoid(pSet->iSubset >= iPrevSubset || iPrevSubset == UINT32_MAX);
1768 STAM_COUNTER_INC(&pVCpu->pgm.s.aStatR0DynMapSetSize[(cEntries * 10 / RT_ELEMENTS(pSet->aEntries)) % 11]);
1769 if ( cEntries >= RT_ELEMENTS(pSet->aEntries) * 40 / 100
1770 && cEntries != pSet->iSubset)
1771 {
1772 AssertMsg(cEntries < PGMMAPSET_MAX_FILL, ("%u\n", cEntries));
1773 pgmDynMapFlushSubset(pSet);
1774 }
1775 pSet->iSubset = iPrevSubset;
1776}
1777
1778
1779/**
1780 * As a final resort for a full auto set, try merge duplicate entries.
1781 *
1782 * @param pSet The set.
1783 */
1784static void pgmDynMapOptimizeAutoSet(PPGMMAPSET pSet)
1785{
1786 for (uint32_t i = 0 ; i < pSet->cEntries; i++)
1787 {
1788 uint16_t const iPage = pSet->aEntries[i].iPage;
1789 uint32_t j = i + 1;
1790 while (j < pSet->cEntries)
1791 {
1792 if (pSet->aEntries[j].iPage != iPage)
1793 j++;
1794 else if ((uint32_t)pSet->aEntries[i].cRefs + (uint32_t)pSet->aEntries[j].cRefs < UINT16_MAX)
1795 {
1796 /* merge j into i removing j. */
1797 pSet->aEntries[i].cRefs += pSet->aEntries[j].cRefs;
1798 pSet->cEntries--;
1799 if (j < pSet->cEntries)
1800 {
1801 pSet->aEntries[j] = pSet->aEntries[pSet->cEntries];
1802 pSet->aEntries[pSet->cEntries].iPage = UINT16_MAX;
1803 pSet->aEntries[pSet->cEntries].cRefs = 0;
1804 }
1805 else
1806 {
1807 pSet->aEntries[j].iPage = UINT16_MAX;
1808 pSet->aEntries[j].cRefs = 0;
1809 }
1810 }
1811 else
1812 {
1813 /* migrate the max number of refs from j into i and quit the inner loop. */
1814 uint32_t cMigrate = UINT16_MAX - 1 - pSet->aEntries[i].cRefs;
1815 Assert(pSet->aEntries[j].cRefs > cMigrate);
1816 pSet->aEntries[j].cRefs -= cMigrate;
1817 pSet->aEntries[i].cRefs = UINT16_MAX - 1;
1818 break;
1819 }
1820 }
1821 }
1822}
1823
1824
1825/**
1826 * Common worker code for PGMDynMapHCPhys, pgmR0DynMapHCPageInlined and
1827 * pgmR0DynMapGCPageInlined.
1828 *
1829 * @returns VINF_SUCCESS, bails out to ring-3 on failure.
1830 * @param pVM The shared VM structure (for statistics).
1831 * @param pSet The set.
1832 * @param HCPhys The physical address of the page.
1833 * @param ppv Where to store the address of the mapping on success.
1834 *
1835 * @remarks This is a very hot path.
1836 */
1837int pgmR0DynMapHCPageCommon(PVM pVM, PPGMMAPSET pSet, RTHCPHYS HCPhys, void **ppv)
1838{
1839 LogFlow(("pgmR0DynMapHCPageCommon: pVM=%p pSet=%p HCPhys=%RHp ppv=%p\n",
1840 pVM, pSet, HCPhys, ppv));
1841#ifdef VBOX_WITH_STATISTICS
1842 PVMCPU pVCpu = VMMGetCpu(pVM);
1843#endif
1844 AssertMsg(pSet->iCpu == RTMpCpuIdToSetIndex(RTMpCpuId()), ("%d %d(%d) efl=%#x\n", pSet->iCpu, RTMpCpuIdToSetIndex(RTMpCpuId()), RTMpCpuId(), ASMGetFlags()));
1845
1846 /*
1847 * Map it.
1848 */
1849 void *pvPage;
1850 uint32_t const iPage = pgmR0DynMapPage(g_pPGMR0DynMap, HCPhys, pSet->iCpu, pVM, &pvPage);
1851 if (RT_UNLIKELY(iPage == UINT32_MAX))
1852 {
1853 RTAssertMsg2Weak("PGMDynMapHCPage: cLoad=%u/%u cPages=%u cGuardPages=%u\n",
1854 g_pPGMR0DynMap->cLoad, g_pPGMR0DynMap->cMaxLoad, g_pPGMR0DynMap->cPages, g_pPGMR0DynMap->cGuardPages);
1855 if (!g_fPGMR0DynMapTestRunning)
1856 VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_VM_R0_ASSERTION, 0);
1857 *ppv = NULL;
1858 return VERR_PGM_DYNMAP_FAILED;
1859 }
1860
1861 /*
1862 * Add the page to the auto reference set.
1863 *
1864 * The typical usage pattern means that the same pages will be mapped
1865 * several times in the same set. We can catch most of these
1866 * remappings by looking a few pages back into the set. (The searching
1867 * and set optimizing path will hardly ever be used when doing this.)
1868 */
1869 AssertCompile(RT_ELEMENTS(pSet->aEntries) >= 8);
1870 int32_t i = pSet->cEntries;
1871 if (i-- < 5)
1872 {
1873 unsigned iEntry = pSet->cEntries++;
1874 pSet->aEntries[iEntry].cRefs = 1;
1875 pSet->aEntries[iEntry].iPage = iPage;
1876 pSet->aEntries[iEntry].pvPage = pvPage;
1877 pSet->aEntries[iEntry].HCPhys = HCPhys;
1878 pSet->aiHashTable[PGMMAPSET_HASH(HCPhys)] = iEntry;
1879 }
1880 /* Any of the last 5 pages? */
1881 else if ( pSet->aEntries[i - 0].iPage == iPage
1882 && pSet->aEntries[i - 0].cRefs < UINT16_MAX - 1)
1883 pSet->aEntries[i - 0].cRefs++;
1884 else if ( pSet->aEntries[i - 1].iPage == iPage
1885 && pSet->aEntries[i - 1].cRefs < UINT16_MAX - 1)
1886 pSet->aEntries[i - 1].cRefs++;
1887 else if ( pSet->aEntries[i - 2].iPage == iPage
1888 && pSet->aEntries[i - 2].cRefs < UINT16_MAX - 1)
1889 pSet->aEntries[i - 2].cRefs++;
1890 else if ( pSet->aEntries[i - 3].iPage == iPage
1891 && pSet->aEntries[i - 3].cRefs < UINT16_MAX - 1)
1892 pSet->aEntries[i - 3].cRefs++;
1893 else if ( pSet->aEntries[i - 4].iPage == iPage
1894 && pSet->aEntries[i - 4].cRefs < UINT16_MAX - 1)
1895 pSet->aEntries[i - 4].cRefs++;
1896 /* Don't bother searching unless we're above a 60% load. */
1897 else if (RT_LIKELY(i <= (int32_t)RT_ELEMENTS(pSet->aEntries) * 60 / 100))
1898 {
1899 unsigned iEntry = pSet->cEntries++;
1900 pSet->aEntries[iEntry].cRefs = 1;
1901 pSet->aEntries[iEntry].iPage = iPage;
1902 pSet->aEntries[iEntry].pvPage = pvPage;
1903 pSet->aEntries[iEntry].HCPhys = HCPhys;
1904 pSet->aiHashTable[PGMMAPSET_HASH(HCPhys)] = iEntry;
1905 }
1906 else
1907 {
1908 /* Search the rest of the set. */
1909 Assert(pSet->cEntries <= RT_ELEMENTS(pSet->aEntries));
1910 i -= 4;
1911 while (i-- > 0)
1912 if ( pSet->aEntries[i].iPage == iPage
1913 && pSet->aEntries[i].cRefs < UINT16_MAX - 1)
1914 {
1915 pSet->aEntries[i].cRefs++;
1916 STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapSetSearchHits);
1917 break;
1918 }
1919 if (i < 0)
1920 {
1921 STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapSetSearchMisses);
1922 if (pSet->iSubset < pSet->cEntries)
1923 {
1924 STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapSetSearchFlushes);
1925 STAM_COUNTER_INC(&pVCpu->pgm.s.aStatR0DynMapSetSize[(pSet->cEntries * 10 / RT_ELEMENTS(pSet->aEntries)) % 11]);
1926 AssertMsg(pSet->cEntries < PGMMAPSET_MAX_FILL, ("%u\n", pSet->cEntries));
1927 pgmDynMapFlushSubset(pSet);
1928 }
1929
1930 if (RT_UNLIKELY(pSet->cEntries >= RT_ELEMENTS(pSet->aEntries)))
1931 {
1932 STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapSetOptimize);
1933 pgmDynMapOptimizeAutoSet(pSet);
1934 }
1935
1936 if (RT_LIKELY(pSet->cEntries < RT_ELEMENTS(pSet->aEntries)))
1937 {
1938 unsigned iEntry = pSet->cEntries++;
1939 pSet->aEntries[iEntry].cRefs = 1;
1940 pSet->aEntries[iEntry].iPage = iPage;
1941 pSet->aEntries[iEntry].pvPage = pvPage;
1942 pSet->aEntries[iEntry].HCPhys = HCPhys;
1943 pSet->aiHashTable[PGMMAPSET_HASH(HCPhys)] = iEntry;
1944 }
1945 else
1946 {
1947 /* We're screwed. */
1948 pgmR0DynMapReleasePage(g_pPGMR0DynMap, iPage, 1);
1949
1950 RTAssertMsg2Weak("PGMDynMapHCPage: set is full!\n");
1951 if (!g_fPGMR0DynMapTestRunning)
1952 VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_VM_R0_ASSERTION, 0);
1953 *ppv = NULL;
1954 return VERR_PGM_DYNMAP_FULL_SET;
1955 }
1956 }
1957 }
1958
1959 *ppv = pvPage;
1960 return VINF_SUCCESS;
1961}
1962
1963
1964#if 0 /* Not used in R0, should internalized the other PGMDynMapHC/GCPage too. */
1965/* documented elsewhere - a bit of a mess. */
1966VMMDECL(int) PGMDynMapHCPage(PVM pVM, RTHCPHYS HCPhys, void **ppv)
1967{
1968#ifdef VBOX_WITH_STATISTICS
1969 PVMCPU pVCpu = VMMGetCpu(pVM);
1970#endif
1971 /*
1972 * Validate state.
1973 */
1974 STAM_PROFILE_START(&pVCpu->pgm.s.StatR0DynMapHCPage, a);
1975 AssertPtr(ppv);
1976 AssertMsg(pVM->pgm.s.pvR0DynMapUsed == g_pPGMR0DynMap,
1977 ("%p != %p\n", pVM->pgm.s.pvR0DynMapUsed, g_pPGMR0DynMap));
1978 AssertMsg(!(HCPhys & PAGE_OFFSET_MASK), ("HCPhys=%RHp\n", HCPhys));
1979 PVMCPU pVCpu = VMMGetCpu(pVM);
1980 AssertPtr(pVCpu);
1981 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1982 AssertMsg(pSet->cEntries <= RT_ELEMENTS(pSet->aEntries),
1983 ("%#x (%u)\n", pSet->cEntries, pSet->cEntries));
1984
1985 /*
1986 * Call common code.
1987 */
1988 int rc = pgmR0DynMapHCPageCommon(pVM, pSet, HCPhys, ppv);
1989
1990 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatR0DynMapHCPage, a);
1991 return rc;
1992}
1993#endif
1994
1995
1996#if 0 /*def DEBUG*/
1997/** For pgmR0DynMapTest3PerCpu. */
1998typedef struct PGMR0DYNMAPTEST
1999{
2000 uint32_t u32Expect;
2001 uint32_t *pu32;
2002 uint32_t volatile cFailures;
2003} PGMR0DYNMAPTEST;
2004typedef PGMR0DYNMAPTEST *PPGMR0DYNMAPTEST;
2005
2006/**
2007 * Checks that the content of the page is the same on all CPUs, i.e. that there
2008 * are no CPU specfic PTs or similar nasty stuff involved.
2009 *
2010 * @param idCpu The current CPU.
2011 * @param pvUser1 Pointer a PGMR0DYNMAPTEST structure.
2012 * @param pvUser2 Unused, ignored.
2013 */
2014static DECLCALLBACK(void) pgmR0DynMapTest3PerCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
2015{
2016 PPGMR0DYNMAPTEST pTest = (PPGMR0DYNMAPTEST)pvUser1;
2017 ASMInvalidatePage(pTest->pu32);
2018 if (*pTest->pu32 != pTest->u32Expect)
2019 ASMAtomicIncU32(&pTest->cFailures);
2020 NOREF(pvUser2); NOREF(idCpu);
2021}
2022
2023
2024/**
2025 * Performs some basic tests in debug builds.
2026 */
2027static int pgmR0DynMapTest(PVM pVM)
2028{
2029 LogRel(("pgmR0DynMapTest: ****** START ******\n"));
2030 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
2031 PPGMMAPSET pSet = &pVM->aCpus[0].pgm.s.AutoSet;
2032 uint32_t i;
2033
2034 /*
2035 * Assert internal integrity first.
2036 */
2037 LogRel(("Test #0\n"));
2038 int rc = PGMR0DynMapAssertIntegrity();
2039 if (RT_FAILURE(rc))
2040 return rc;
2041
2042 void *pvR0DynMapUsedSaved = pVM->pgm.s.pvR0DynMapUsed;
2043 pVM->pgm.s.pvR0DynMapUsed = pThis;
2044 g_fPGMR0DynMapTestRunning = true;
2045
2046 /*
2047 * Simple test, map CR3 twice and check that we're getting the
2048 * same mapping address back.
2049 */
2050 LogRel(("Test #1\n"));
2051 ASMIntDisable();
2052 PGMDynMapStartAutoSet(&pVM->aCpus[0]);
2053
2054 uint64_t cr3 = ASMGetCR3() & ~(uint64_t)PAGE_OFFSET_MASK;
2055 void *pv = (void *)(intptr_t)-1;
2056 void *pv2 = (void *)(intptr_t)-2;
2057 rc = PGMDynMapHCPage(pVM, cr3, &pv);
2058 int rc2 = PGMDynMapHCPage(pVM, cr3, &pv2);
2059 ASMIntEnable();
2060 if ( RT_SUCCESS(rc2)
2061 && RT_SUCCESS(rc)
2062 && pv == pv2)
2063 {
2064 LogRel(("Load=%u/%u/%u Set=%u/%u\n", pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
2065 rc = PGMR0DynMapAssertIntegrity();
2066
2067 /*
2068 * Check that the simple set overflow code works by filling it
2069 * with more CR3 mappings.
2070 */
2071 LogRel(("Test #2\n"));
2072 ASMIntDisable();
2073 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
2074 for (i = 0 ; i < UINT16_MAX*2 - 1 && RT_SUCCESS(rc) && pv2 == pv; i++)
2075 {
2076 pv2 = (void *)(intptr_t)-4;
2077 rc = PGMDynMapHCPage(pVM, cr3, &pv2);
2078 }
2079 ASMIntEnable();
2080 if (RT_FAILURE(rc) || pv != pv2)
2081 {
2082 LogRel(("failed(%d): rc=%Rrc; pv=%p pv2=%p i=%p\n", __LINE__, rc, pv, pv2, i));
2083 if (RT_SUCCESS(rc)) rc = VERR_INTERNAL_ERROR;
2084 }
2085 else if (pSet->cEntries != 5)
2086 {
2087 LogRel(("failed(%d): cEntries=%d expected %d\n", __LINE__, pSet->cEntries, RT_ELEMENTS(pSet->aEntries) / 2));
2088 rc = VERR_INTERNAL_ERROR;
2089 }
2090 else if ( pSet->aEntries[4].cRefs != UINT16_MAX - 1
2091 || pSet->aEntries[3].cRefs != UINT16_MAX - 1
2092 || pSet->aEntries[2].cRefs != 1
2093 || pSet->aEntries[1].cRefs != 1
2094 || pSet->aEntries[0].cRefs != 1)
2095 {
2096 LogRel(("failed(%d): bad set dist: ", __LINE__));
2097 for (i = 0; i < pSet->cEntries; i++)
2098 LogRel(("[%d]=%d, ", i, pSet->aEntries[i].cRefs));
2099 LogRel(("\n"));
2100 rc = VERR_INTERNAL_ERROR;
2101 }
2102 if (RT_SUCCESS(rc))
2103 rc = PGMR0DynMapAssertIntegrity();
2104 if (RT_SUCCESS(rc))
2105 {
2106 /*
2107 * Trigger an set optimization run (exactly).
2108 */
2109 LogRel(("Test #3\n"));
2110 ASMIntDisable();
2111 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
2112 pv2 = NULL;
2113 for (i = 0 ; i < RT_ELEMENTS(pSet->aEntries) - 5 && RT_SUCCESS(rc) && pv2 != pv; i++)
2114 {
2115 pv2 = (void *)(intptr_t)(-5 - i);
2116 rc = PGMDynMapHCPage(pVM, cr3 + PAGE_SIZE * (i + 5), &pv2);
2117 }
2118 ASMIntEnable();
2119 if (RT_FAILURE(rc) || pv == pv2)
2120 {
2121 LogRel(("failed(%d): rc=%Rrc; pv=%p pv2=%p i=%d\n", __LINE__, rc, pv, pv2, i));
2122 if (RT_SUCCESS(rc)) rc = VERR_INTERNAL_ERROR;
2123 }
2124 else if (pSet->cEntries != RT_ELEMENTS(pSet->aEntries))
2125 {
2126 LogRel(("failed(%d): cEntries=%d expected %d\n", __LINE__, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
2127 rc = VERR_INTERNAL_ERROR;
2128 }
2129 LogRel(("Load=%u/%u/%u Set=%u/%u\n", pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
2130 if (RT_SUCCESS(rc))
2131 rc = PGMR0DynMapAssertIntegrity();
2132 if (RT_SUCCESS(rc))
2133 {
2134 /*
2135 * Trigger an overflow error.
2136 */
2137 LogRel(("Test #4\n"));
2138 ASMIntDisable();
2139 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
2140 for (i = 0 ; i < RT_ELEMENTS(pSet->aEntries) + 2; i++)
2141 {
2142 rc = PGMDynMapHCPage(pVM, cr3 - PAGE_SIZE * (i + 5), &pv2);
2143 if (RT_SUCCESS(rc))
2144 rc = PGMR0DynMapAssertIntegrity();
2145 if (RT_FAILURE(rc))
2146 break;
2147 }
2148 ASMIntEnable();
2149 if (rc == VERR_PGM_DYNMAP_FULL_SET)
2150 {
2151 /* flush the set. */
2152 LogRel(("Test #5\n"));
2153 ASMIntDisable();
2154 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
2155 PGMDynMapReleaseAutoSet(&pVM->aCpus[0]);
2156 PGMDynMapStartAutoSet(&pVM->aCpus[0]);
2157 ASMIntEnable();
2158
2159 rc = PGMR0DynMapAssertIntegrity();
2160 }
2161 else
2162 {
2163 LogRel(("failed(%d): rc=%Rrc, wanted %d ; pv2=%p Set=%u/%u; i=%d\n", __LINE__,
2164 rc, VERR_PGM_DYNMAP_FULL_SET, pv2, pSet->cEntries, RT_ELEMENTS(pSet->aEntries), i));
2165 if (RT_SUCCESS(rc)) rc = VERR_INTERNAL_ERROR;
2166 }
2167 }
2168 }
2169 }
2170 else
2171 {
2172 LogRel(("failed(%d): rc=%Rrc rc2=%Rrc; pv=%p pv2=%p\n", __LINE__, rc, rc2, pv, pv2));
2173 if (RT_SUCCESS(rc))
2174 rc = rc2;
2175 }
2176
2177 /*
2178 * Check that everyone sees the same stuff.
2179 */
2180 if (RT_SUCCESS(rc))
2181 {
2182 LogRel(("Test #5\n"));
2183 ASMIntDisable();
2184 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
2185 RTHCPHYS HCPhysPT = RTR0MemObjGetPagePhysAddr(pThis->pSegHead->ahMemObjPTs[0], 0);
2186 rc = PGMDynMapHCPage(pVM, HCPhysPT, &pv);
2187 if (RT_SUCCESS(rc))
2188 {
2189 PGMR0DYNMAPTEST Test;
2190 uint32_t *pu32Real = &pThis->paPages[pThis->pSegHead->iPage].uPte.pLegacy->u;
2191 Test.pu32 = (uint32_t *)((uintptr_t)pv | ((uintptr_t)pu32Real & PAGE_OFFSET_MASK));
2192 Test.u32Expect = *pu32Real;
2193 ASMAtomicWriteU32(&Test.cFailures, 0);
2194 ASMIntEnable();
2195
2196 rc = RTMpOnAll(pgmR0DynMapTest3PerCpu, &Test, NULL);
2197 if (RT_FAILURE(rc))
2198 LogRel(("failed(%d): RTMpOnAll rc=%Rrc\n", __LINE__, rc));
2199 else if (Test.cFailures)
2200 {
2201 LogRel(("failed(%d): cFailures=%d pu32Real=%p pu32=%p u32Expect=%#x *pu32=%#x\n", __LINE__,
2202 Test.cFailures, pu32Real, Test.pu32, Test.u32Expect, *Test.pu32));
2203 rc = VERR_INTERNAL_ERROR;
2204 }
2205 else
2206 LogRel(("pu32Real=%p pu32=%p u32Expect=%#x *pu32=%#x\n",
2207 pu32Real, Test.pu32, Test.u32Expect, *Test.pu32));
2208 }
2209 else
2210 {
2211 ASMIntEnable();
2212 LogRel(("failed(%d): rc=%Rrc\n", rc));
2213 }
2214 }
2215
2216 /*
2217 * Clean up.
2218 */
2219 LogRel(("Cleanup.\n"));
2220 ASMIntDisable();
2221 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
2222 PGMDynMapFlushAutoSet(&pVM->aCpus[0]);
2223 PGMDynMapReleaseAutoSet(&pVM->aCpus[0]);
2224 ASMIntEnable();
2225
2226 if (RT_SUCCESS(rc))
2227 rc = PGMR0DynMapAssertIntegrity();
2228 else
2229 PGMR0DynMapAssertIntegrity();
2230
2231 g_fPGMR0DynMapTestRunning = false;
2232 LogRel(("Result: rc=%Rrc Load=%u/%u/%u Set=%#x/%u\n", rc,
2233 pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
2234 pVM->pgm.s.pvR0DynMapUsed = pvR0DynMapUsedSaved;
2235 LogRel(("pgmR0DynMapTest: ****** END ******\n"));
2236 return rc;
2237}
2238#endif /* DEBUG */
2239
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette