VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 25576

Last change on this file since 25576 was 25551, checked in by vboxsync, 15 years ago

Removed the obsolete RC tlb flushing in the pgm pool access handler. Our disassembler can now deal with flushed tlb entries.
(watch for regressions!)

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 186.9 KB
Line 
1/* $Id: PGMAllPool.cpp 25551 2009-12-21 17:22:45Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42#include <iprt/string.h>
43
44
45/*******************************************************************************
46* Internal Functions *
47*******************************************************************************/
48RT_C_DECLS_BEGIN
49static void pgmPoolFlushAllInt(PPGMPOOL pPool);
50DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
51DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
52static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
54static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
55#ifndef IN_RING3
56DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
57#endif
58#ifdef LOG_ENABLED
59static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
60#endif
61#if defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)
62static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT);
63#endif
64
65int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
66PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
67void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
68void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
69
70RT_C_DECLS_END
71
72
73/**
74 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
75 *
76 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
77 * @param enmKind The page kind.
78 */
79DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
80{
81 switch (enmKind)
82 {
83 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
84 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
85 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
86 return true;
87 default:
88 return false;
89 }
90}
91
92/** @def PGMPOOL_PAGE_2_LOCKED_PTR
93 * Maps a pool page pool into the current context and lock it (RC only).
94 *
95 * @returns VBox status code.
96 * @param pVM The VM handle.
97 * @param pPage The pool page.
98 *
99 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
100 * small page window employeed by that function. Be careful.
101 * @remark There is no need to assert on the result.
102 */
103#if defined(IN_RC)
104DECLINLINE(void *) PGMPOOL_PAGE_2_LOCKED_PTR(PVM pVM, PPGMPOOLPAGE pPage)
105{
106 void *pv = pgmPoolMapPageInlined(&pVM->pgm.s, pPage);
107
108 /* Make sure the dynamic mapping will not be reused. */
109 if (pv)
110 PGMDynLockHCPage(pVM, (uint8_t *)pv);
111
112 return pv;
113}
114#else
115# define PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage) PGMPOOL_PAGE_2_PTR(pVM, pPage)
116#endif
117
118/** @def PGMPOOL_UNLOCK_PTR
119 * Unlock a previously locked dynamic caching (RC only).
120 *
121 * @returns VBox status code.
122 * @param pVM The VM handle.
123 * @param pPage The pool page.
124 *
125 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
126 * small page window employeed by that function. Be careful.
127 * @remark There is no need to assert on the result.
128 */
129#if defined(IN_RC)
130DECLINLINE(void) PGMPOOL_UNLOCK_PTR(PVM pVM, void *pvPage)
131{
132 if (pvPage)
133 PGMDynUnlockHCPage(pVM, (uint8_t *)pvPage);
134}
135#else
136# define PGMPOOL_UNLOCK_PTR(pVM, pPage) do {} while (0)
137#endif
138
139
140/**
141 * Flushes a chain of pages sharing the same access monitor.
142 *
143 * @returns VBox status code suitable for scheduling.
144 * @param pPool The pool.
145 * @param pPage A page in the chain.
146 */
147int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
148{
149 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
150
151 /*
152 * Find the list head.
153 */
154 uint16_t idx = pPage->idx;
155 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
156 {
157 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
158 {
159 idx = pPage->iMonitoredPrev;
160 Assert(idx != pPage->idx);
161 pPage = &pPool->aPages[idx];
162 }
163 }
164
165 /*
166 * Iterate the list flushing each shadow page.
167 */
168 int rc = VINF_SUCCESS;
169 for (;;)
170 {
171 idx = pPage->iMonitoredNext;
172 Assert(idx != pPage->idx);
173 if (pPage->idx >= PGMPOOL_IDX_FIRST)
174 {
175 int rc2 = pgmPoolFlushPage(pPool, pPage);
176 AssertRC(rc2);
177 }
178 /* next */
179 if (idx == NIL_PGMPOOL_IDX)
180 break;
181 pPage = &pPool->aPages[idx];
182 }
183 return rc;
184}
185
186
187/**
188 * Wrapper for getting the current context pointer to the entry being modified.
189 *
190 * @returns VBox status code suitable for scheduling.
191 * @param pVM VM Handle.
192 * @param pvDst Destination address
193 * @param pvSrc Source guest virtual address.
194 * @param GCPhysSrc The source guest physical address.
195 * @param cb Size of data to read
196 */
197DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
198{
199#if defined(IN_RING3)
200 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
201 return VINF_SUCCESS;
202#else
203 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
204 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
205#endif
206}
207
208/**
209 * Process shadow entries before they are changed by the guest.
210 *
211 * For PT entries we will clear them. For PD entries, we'll simply check
212 * for mapping conflicts and set the SyncCR3 FF if found.
213 *
214 * @param pVCpu VMCPU handle
215 * @param pPool The pool.
216 * @param pPage The head page.
217 * @param GCPhysFault The guest physical fault address.
218 * @param uAddress In R0 and GC this is the guest context fault address (flat).
219 * In R3 this is the host context 'fault' address.
220 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
221 */
222void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, unsigned cbWrite)
223{
224 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%#x (idx=%#x)\n", pPage->iMonitoredPrev, pPage->idx));
225 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
226 PVM pVM = pPool->CTX_SUFF(pVM);
227
228 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)pvAddress, GCPhysFault, cbWrite));
229
230 for (;;)
231 {
232 union
233 {
234 void *pv;
235 PX86PT pPT;
236 PX86PTPAE pPTPae;
237 PX86PD pPD;
238 PX86PDPAE pPDPae;
239 PX86PDPT pPDPT;
240 PX86PML4 pPML4;
241 } uShw;
242
243 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s\n", pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
244
245 uShw.pv = NULL;
246 switch (pPage->enmKind)
247 {
248 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
249 {
250 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
251 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
252 const unsigned iShw = off / sizeof(X86PTE);
253 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
254 if (uShw.pPT->a[iShw].n.u1Present)
255 {
256 X86PTE GstPte;
257
258 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
259 AssertRC(rc);
260 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
261 pgmPoolTracDerefGCPhysHint(pPool, pPage,
262 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
263 GstPte.u & X86_PTE_PG_MASK);
264 ASMAtomicWriteSize(&uShw.pPT->a[iShw], 0);
265 }
266 break;
267 }
268
269 /* page/2 sized */
270 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
271 {
272 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
273 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
274 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
275 {
276 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
277 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
278 if (uShw.pPTPae->a[iShw].n.u1Present)
279 {
280 X86PTE GstPte;
281 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
282 AssertRC(rc);
283
284 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
285 pgmPoolTracDerefGCPhysHint(pPool, pPage,
286 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
287 GstPte.u & X86_PTE_PG_MASK);
288 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw], 0);
289 }
290 }
291 break;
292 }
293
294 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
295 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
296 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
297 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
298 {
299 unsigned iGst = off / sizeof(X86PDE);
300 unsigned iShwPdpt = iGst / 256;
301 unsigned iShw = (iGst % 256) * 2;
302 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
303
304 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
305 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
306 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
307 {
308 for (unsigned i = 0; i < 2; i++)
309 {
310# ifndef IN_RING0
311 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
312 {
313 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
314 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
315 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
316 break;
317 }
318 else
319# endif /* !IN_RING0 */
320 if (uShw.pPDPae->a[iShw+i].n.u1Present)
321 {
322 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
323 pgmPoolFree(pVM,
324 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
325 pPage->idx,
326 iShw + i);
327 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw+i], 0);
328 }
329
330 /* paranoia / a bit assumptive. */
331 if ( (off & 3)
332 && (off & 3) + cbWrite > 4)
333 {
334 const unsigned iShw2 = iShw + 2 + i;
335 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
336 {
337# ifndef IN_RING0
338 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
339 {
340 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
341 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
342 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
343 break;
344 }
345 else
346# endif /* !IN_RING0 */
347 if (uShw.pPDPae->a[iShw2].n.u1Present)
348 {
349 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
350 pgmPoolFree(pVM,
351 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
352 pPage->idx,
353 iShw2);
354 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
355 }
356 }
357 }
358 }
359 }
360 break;
361 }
362
363 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
364 {
365 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
366 const unsigned iShw = off / sizeof(X86PTEPAE);
367 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
368 if (uShw.pPTPae->a[iShw].n.u1Present)
369 {
370 X86PTEPAE GstPte;
371 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
372 AssertRC(rc);
373
374 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
375 pgmPoolTracDerefGCPhysHint(pPool, pPage,
376 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
377 GstPte.u & X86_PTE_PAE_PG_MASK);
378 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw].u, 0);
379 }
380
381 /* paranoia / a bit assumptive. */
382 if ( (off & 7)
383 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
384 {
385 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
386 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
387
388 if (uShw.pPTPae->a[iShw2].n.u1Present)
389 {
390 X86PTEPAE GstPte;
391# ifdef IN_RING3
392 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
393# else
394 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
395# endif
396 AssertRC(rc);
397 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
398 pgmPoolTracDerefGCPhysHint(pPool, pPage,
399 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
400 GstPte.u & X86_PTE_PAE_PG_MASK);
401 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw2].u ,0);
402 }
403 }
404 break;
405 }
406
407 case PGMPOOLKIND_32BIT_PD:
408 {
409 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
410 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
411
412 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
413 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
414# ifndef IN_RING0
415 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
416 {
417 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
418 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
419 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
420 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
421 break;
422 }
423# endif /* !IN_RING0 */
424# ifndef IN_RING0
425 else
426# endif /* !IN_RING0 */
427 {
428 if (uShw.pPD->a[iShw].n.u1Present)
429 {
430 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
431 pgmPoolFree(pVM,
432 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
433 pPage->idx,
434 iShw);
435 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
436 }
437 }
438 /* paranoia / a bit assumptive. */
439 if ( (off & 3)
440 && (off & 3) + cbWrite > sizeof(X86PTE))
441 {
442 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
443 if ( iShw2 != iShw
444 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
445 {
446# ifndef IN_RING0
447 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
448 {
449 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
450 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
451 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
452 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
453 break;
454 }
455# endif /* !IN_RING0 */
456# ifndef IN_RING0
457 else
458# endif /* !IN_RING0 */
459 {
460 if (uShw.pPD->a[iShw2].n.u1Present)
461 {
462 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
463 pgmPoolFree(pVM,
464 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
465 pPage->idx,
466 iShw2);
467 ASMAtomicWriteSize(&uShw.pPD->a[iShw2].u, 0);
468 }
469 }
470 }
471 }
472#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
473 if ( uShw.pPD->a[iShw].n.u1Present
474 && !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
475 {
476 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
477# ifdef IN_RC /* TLB load - we're pushing things a bit... */
478 ASMProbeReadByte(pvAddress);
479# endif
480 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
481 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
482 }
483#endif
484 break;
485 }
486
487 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
488 {
489 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
490 const unsigned iShw = off / sizeof(X86PDEPAE);
491 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
492#ifndef IN_RING0
493 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
494 {
495 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
496 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
497 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
498 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
499 break;
500 }
501#endif /* !IN_RING0 */
502 /*
503 * Causes trouble when the guest uses a PDE to refer to the whole page table level
504 * structure. (Invalidate here; faults later on when it tries to change the page
505 * table entries -> recheck; probably only applies to the RC case.)
506 */
507# ifndef IN_RING0
508 else
509# endif /* !IN_RING0 */
510 {
511 if (uShw.pPDPae->a[iShw].n.u1Present)
512 {
513 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
514 pgmPoolFree(pVM,
515 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
516 pPage->idx,
517 iShw);
518 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
519 }
520 }
521 /* paranoia / a bit assumptive. */
522 if ( (off & 7)
523 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
524 {
525 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
526 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
527
528#ifndef IN_RING0
529 if ( iShw2 != iShw
530 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
531 {
532 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
533 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
534 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
535 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
536 break;
537 }
538#endif /* !IN_RING0 */
539# ifndef IN_RING0
540 else
541# endif /* !IN_RING0 */
542 if (uShw.pPDPae->a[iShw2].n.u1Present)
543 {
544 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
545 pgmPoolFree(pVM,
546 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
547 pPage->idx,
548 iShw2);
549 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
550 }
551 }
552 break;
553 }
554
555 case PGMPOOLKIND_PAE_PDPT:
556 {
557 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
558 /*
559 * Hopefully this doesn't happen very often:
560 * - touching unused parts of the page
561 * - messing with the bits of pd pointers without changing the physical address
562 */
563 /* PDPT roots are not page aligned; 32 byte only! */
564 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
565
566 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
567 const unsigned iShw = offPdpt / sizeof(X86PDPE);
568 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
569 {
570# ifndef IN_RING0
571 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
572 {
573 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
574 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
575 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
576 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
577 break;
578 }
579# endif /* !IN_RING0 */
580# ifndef IN_RING0
581 else
582# endif /* !IN_RING0 */
583 if (uShw.pPDPT->a[iShw].n.u1Present)
584 {
585 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
586 pgmPoolFree(pVM,
587 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
588 pPage->idx,
589 iShw);
590 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
591 }
592
593 /* paranoia / a bit assumptive. */
594 if ( (offPdpt & 7)
595 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
596 {
597 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
598 if ( iShw2 != iShw
599 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
600 {
601# ifndef IN_RING0
602 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
603 {
604 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
605 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
606 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
607 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
608 break;
609 }
610# endif /* !IN_RING0 */
611# ifndef IN_RING0
612 else
613# endif /* !IN_RING0 */
614 if (uShw.pPDPT->a[iShw2].n.u1Present)
615 {
616 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
617 pgmPoolFree(pVM,
618 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
619 pPage->idx,
620 iShw2);
621 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
622 }
623 }
624 }
625 }
626 break;
627 }
628
629#ifndef IN_RC
630 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
631 {
632 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
633 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
634 const unsigned iShw = off / sizeof(X86PDEPAE);
635 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
636 if (uShw.pPDPae->a[iShw].n.u1Present)
637 {
638 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
639 pgmPoolFree(pVM,
640 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
641 pPage->idx,
642 iShw);
643 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
644 }
645 /* paranoia / a bit assumptive. */
646 if ( (off & 7)
647 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
648 {
649 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
650 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
651
652 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
653 if (uShw.pPDPae->a[iShw2].n.u1Present)
654 {
655 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
656 pgmPoolFree(pVM,
657 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
658 pPage->idx,
659 iShw2);
660 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
661 }
662 }
663 break;
664 }
665
666 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
667 {
668 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
669 /*
670 * Hopefully this doesn't happen very often:
671 * - messing with the bits of pd pointers without changing the physical address
672 */
673 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
674 const unsigned iShw = off / sizeof(X86PDPE);
675 if (uShw.pPDPT->a[iShw].n.u1Present)
676 {
677 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
678 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
679 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
680 }
681 /* paranoia / a bit assumptive. */
682 if ( (off & 7)
683 && (off & 7) + cbWrite > sizeof(X86PDPE))
684 {
685 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
686 if (uShw.pPDPT->a[iShw2].n.u1Present)
687 {
688 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
689 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
690 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
691 }
692 }
693 break;
694 }
695
696 case PGMPOOLKIND_64BIT_PML4:
697 {
698 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
699 /*
700 * Hopefully this doesn't happen very often:
701 * - messing with the bits of pd pointers without changing the physical address
702 */
703 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
704 const unsigned iShw = off / sizeof(X86PDPE);
705 if (uShw.pPML4->a[iShw].n.u1Present)
706 {
707 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
708 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
709 ASMAtomicWriteSize(&uShw.pPML4->a[iShw].u, 0);
710 }
711 /* paranoia / a bit assumptive. */
712 if ( (off & 7)
713 && (off & 7) + cbWrite > sizeof(X86PDPE))
714 {
715 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
716 if (uShw.pPML4->a[iShw2].n.u1Present)
717 {
718 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
719 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
720 ASMAtomicWriteSize(&uShw.pPML4->a[iShw2].u, 0);
721 }
722 }
723 break;
724 }
725#endif /* IN_RING0 */
726
727 default:
728 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
729 }
730 PGMPOOL_UNLOCK_PTR(pVM, uShw.pv);
731
732 /* next */
733 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
734 return;
735 pPage = &pPool->aPages[pPage->iMonitoredNext];
736 }
737}
738
739# ifndef IN_RING3
740/**
741 * Checks if a access could be a fork operation in progress.
742 *
743 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
744 *
745 * @returns true if it's likly that we're forking, otherwise false.
746 * @param pPool The pool.
747 * @param pDis The disassembled instruction.
748 * @param offFault The access offset.
749 */
750DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
751{
752 /*
753 * i386 linux is using btr to clear X86_PTE_RW.
754 * The functions involved are (2.6.16 source inspection):
755 * clear_bit
756 * ptep_set_wrprotect
757 * copy_one_pte
758 * copy_pte_range
759 * copy_pmd_range
760 * copy_pud_range
761 * copy_page_range
762 * dup_mmap
763 * dup_mm
764 * copy_mm
765 * copy_process
766 * do_fork
767 */
768 if ( pDis->pCurInstr->opcode == OP_BTR
769 && !(offFault & 4)
770 /** @todo Validate that the bit index is X86_PTE_RW. */
771 )
772 {
773 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
774 return true;
775 }
776 return false;
777}
778
779
780/**
781 * Determine whether the page is likely to have been reused.
782 *
783 * @returns true if we consider the page as being reused for a different purpose.
784 * @returns false if we consider it to still be a paging page.
785 * @param pVM VM Handle.
786 * @param pVCpu VMCPU Handle.
787 * @param pRegFrame Trap register frame.
788 * @param pDis The disassembly info for the faulting instruction.
789 * @param pvFault The fault address.
790 *
791 * @remark The REP prefix check is left to the caller because of STOSD/W.
792 */
793DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
794{
795#ifndef IN_RC
796 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
797 if ( HWACCMHasPendingIrq(pVM)
798 && (pRegFrame->rsp - pvFault) < 32)
799 {
800 /* Fault caused by stack writes while trying to inject an interrupt event. */
801 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
802 return true;
803 }
804#else
805 NOREF(pVM); NOREF(pvFault);
806#endif
807
808 LogFlow(("Reused instr %RGv %d at %RGv param1.flags=%x param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->opcode, pvFault, pDis->param1.flags, pDis->param1.base.reg_gen));
809
810 /* Non-supervisor mode write means it's used for something else. */
811 if (CPUMGetGuestCPL(pVCpu, pRegFrame) != 0)
812 return true;
813
814 switch (pDis->pCurInstr->opcode)
815 {
816 /* call implies the actual push of the return address faulted */
817 case OP_CALL:
818 Log4(("pgmPoolMonitorIsReused: CALL\n"));
819 return true;
820 case OP_PUSH:
821 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
822 return true;
823 case OP_PUSHF:
824 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
825 return true;
826 case OP_PUSHA:
827 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
828 return true;
829 case OP_FXSAVE:
830 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
831 return true;
832 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
833 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
834 return true;
835 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
836 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
837 return true;
838 case OP_MOVSWD:
839 case OP_STOSWD:
840 if ( pDis->prefix == (PREFIX_REP|PREFIX_REX)
841 && pRegFrame->rcx >= 0x40
842 )
843 {
844 Assert(pDis->mode == CPUMODE_64BIT);
845
846 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
847 return true;
848 }
849 return false;
850 }
851 if ( ( (pDis->param1.flags & USE_REG_GEN32)
852 || (pDis->param1.flags & USE_REG_GEN64))
853 && (pDis->param1.base.reg_gen == USE_REG_ESP))
854 {
855 Log4(("pgmPoolMonitorIsReused: ESP\n"));
856 return true;
857 }
858
859 return false;
860}
861
862/**
863 * Flushes the page being accessed.
864 *
865 * @returns VBox status code suitable for scheduling.
866 * @param pVM The VM handle.
867 * @param pVCpu The VMCPU handle.
868 * @param pPool The pool.
869 * @param pPage The pool page (head).
870 * @param pDis The disassembly of the write instruction.
871 * @param pRegFrame The trap register frame.
872 * @param GCPhysFault The fault address as guest physical address.
873 * @param pvFault The fault address.
874 */
875static int pgmPoolAccessHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
876 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
877{
878 /*
879 * First, do the flushing.
880 */
881 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
882
883 /*
884 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection). Must do this in raw mode (!); XP boot will fail otherwise
885 */
886 uint32_t cbWritten;
887 int rc2 = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, &cbWritten);
888 if (RT_SUCCESS(rc2))
889 pRegFrame->rip += pDis->opsize;
890 else if (rc2 == VERR_EM_INTERPRETER)
891 {
892#ifdef IN_RC
893 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
894 {
895 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
896 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
897 rc = VINF_SUCCESS;
898 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
899 }
900 else
901#endif
902 {
903 rc = VINF_EM_RAW_EMULATE_INSTR;
904 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
905 }
906 }
907 else
908 rc = rc2;
909
910 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
911 return rc;
912}
913
914/**
915 * Handles the STOSD write accesses.
916 *
917 * @returns VBox status code suitable for scheduling.
918 * @param pVM The VM handle.
919 * @param pPool The pool.
920 * @param pPage The pool page (head).
921 * @param pDis The disassembly of the write instruction.
922 * @param pRegFrame The trap register frame.
923 * @param GCPhysFault The fault address as guest physical address.
924 * @param pvFault The fault address.
925 */
926DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
927 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
928{
929 unsigned uIncrement = pDis->param1.size;
930
931 Assert(pDis->mode == CPUMODE_32BIT || pDis->mode == CPUMODE_64BIT);
932 Assert(pRegFrame->rcx <= 0x20);
933
934#ifdef VBOX_STRICT
935 if (pDis->opmode == CPUMODE_32BIT)
936 Assert(uIncrement == 4);
937 else
938 Assert(uIncrement == 8);
939#endif
940
941 Log3(("pgmPoolAccessHandlerSTOSD\n"));
942
943 /*
944 * Increment the modification counter and insert it into the list
945 * of modified pages the first time.
946 */
947 if (!pPage->cModifications++)
948 pgmPoolMonitorModifiedInsert(pPool, pPage);
949
950 /*
951 * Execute REP STOSD.
952 *
953 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
954 * write situation, meaning that it's safe to write here.
955 */
956 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
957 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
958 while (pRegFrame->rcx)
959 {
960#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
961 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
962 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
963 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
964#else
965 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
966#endif
967#ifdef IN_RC
968 *(uint32_t *)pu32 = pRegFrame->eax;
969#else
970 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
971#endif
972 pu32 += uIncrement;
973 GCPhysFault += uIncrement;
974 pRegFrame->rdi += uIncrement;
975 pRegFrame->rcx--;
976 }
977 pRegFrame->rip += pDis->opsize;
978
979 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
980 return VINF_SUCCESS;
981}
982
983
984/**
985 * Handles the simple write accesses.
986 *
987 * @returns VBox status code suitable for scheduling.
988 * @param pVM The VM handle.
989 * @param pVCpu The VMCPU handle.
990 * @param pPool The pool.
991 * @param pPage The pool page (head).
992 * @param pDis The disassembly of the write instruction.
993 * @param pRegFrame The trap register frame.
994 * @param GCPhysFault The fault address as guest physical address.
995 * @param pvFault The fault address.
996 * @param pfReused Reused state (out)
997 */
998DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
999 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
1000{
1001 Log3(("pgmPoolAccessHandlerSimple\n"));
1002 /*
1003 * Increment the modification counter and insert it into the list
1004 * of modified pages the first time.
1005 */
1006 if (!pPage->cModifications++)
1007 pgmPoolMonitorModifiedInsert(pPool, pPage);
1008
1009 /*
1010 * Clear all the pages. ASSUMES that pvFault is readable.
1011 */
1012#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1013 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1014 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
1015 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1016#else
1017 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
1018#endif
1019
1020 /*
1021 * Interpret the instruction.
1022 */
1023 uint32_t cb;
1024 int rc = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, &cb);
1025 if (RT_SUCCESS(rc))
1026 pRegFrame->rip += pDis->opsize;
1027 else if (rc == VERR_EM_INTERPRETER)
1028 {
1029 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1030 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode));
1031 rc = VINF_EM_RAW_EMULATE_INSTR;
1032 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1033 }
1034
1035#if 0 /* experimental code */
1036 if (rc == VINF_SUCCESS)
1037 {
1038 switch (pPage->enmKind)
1039 {
1040 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1041 {
1042 X86PTEPAE GstPte;
1043 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1044 AssertRC(rc);
1045
1046 /* Check the new value written by the guest. If present and with a bogus physical address, then
1047 * it's fairly safe to assume the guest is reusing the PT.
1048 */
1049 if (GstPte.n.u1Present)
1050 {
1051 RTHCPHYS HCPhys = -1;
1052 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1053 if (rc != VINF_SUCCESS)
1054 {
1055 *pfReused = true;
1056 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1057 }
1058 }
1059 break;
1060 }
1061 }
1062 }
1063#endif
1064
1065 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1066 return rc;
1067}
1068
1069/**
1070 * \#PF Handler callback for PT write accesses.
1071 *
1072 * @returns VBox status code (appropriate for GC return).
1073 * @param pVM VM Handle.
1074 * @param uErrorCode CPU Error code.
1075 * @param pRegFrame Trap register frame.
1076 * NULL on DMA and other non CPU access.
1077 * @param pvFault The fault address (cr2).
1078 * @param GCPhysFault The GC physical address corresponding to pvFault.
1079 * @param pvUser User argument.
1080 */
1081DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1082{
1083 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1084 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1085 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1086 PVMCPU pVCpu = VMMGetCpu(pVM);
1087 unsigned cMaxModifications;
1088 bool fForcedFlush = false;
1089
1090 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1091
1092 pgmLock(pVM);
1093 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1094 {
1095 /* Pool page changed while we were waiting for the lock; ignore. */
1096 Log(("CPU%d: pgmPoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1097 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1098 pgmUnlock(pVM);
1099 return VINF_SUCCESS;
1100 }
1101#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1102 if (pPage->fDirty)
1103 {
1104 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH));
1105 pgmUnlock(pVM);
1106 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1107 }
1108#endif
1109
1110#if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1111 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1112 {
1113 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1114 void *pvGst;
1115 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1116 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
1117 }
1118#endif
1119
1120 /*
1121 * Disassemble the faulting instruction.
1122 */
1123 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1124 int rc = EMInterpretDisasOne(pVM, pVCpu, pRegFrame, pDis, NULL);
1125 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1126 {
1127 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1128 pgmUnlock(pVM);
1129 return rc;
1130 }
1131
1132 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1133
1134 /*
1135 * We should ALWAYS have the list head as user parameter. This
1136 * is because we use that page to record the changes.
1137 */
1138 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1139
1140#ifdef IN_RING0
1141 /* Maximum nr of modifications depends on the page type. */
1142 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1143 cMaxModifications = 4;
1144 else
1145 cMaxModifications = 24;
1146#else
1147 cMaxModifications = 48;
1148#endif
1149
1150 /*
1151 * Incremental page table updates should weight more than random ones.
1152 * (Only applies when started from offset 0)
1153 */
1154 pVCpu->pgm.s.cPoolAccessHandler++;
1155 if ( pPage->pvLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1156 && pPage->pvLastAccessHandlerRip < pRegFrame->rip + 0x40
1157 && pvFault == (pPage->pvLastAccessHandlerFault + pDis->param1.size)
1158 && pVCpu->pgm.s.cPoolAccessHandler == (pPage->cLastAccessHandlerCount + 1))
1159 {
1160 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1161 pPage->cModifications = pPage->cModifications * 2;
1162 pPage->pvLastAccessHandlerFault = pvFault;
1163 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1164 if (pPage->cModifications >= cMaxModifications)
1165 {
1166 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1167 fForcedFlush = true;
1168 }
1169 }
1170
1171 if (pPage->cModifications >= cMaxModifications)
1172 Log(("Mod overflow %VGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1173
1174 /*
1175 * Check if it's worth dealing with.
1176 */
1177 bool fReused = false;
1178 bool fNotReusedNotForking = false;
1179 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1180 || pgmPoolIsPageLocked(&pVM->pgm.s, pPage)
1181 )
1182 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1183 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1184 {
1185 /*
1186 * Simple instructions, no REP prefix.
1187 */
1188 if (!(pDis->prefix & (PREFIX_REP | PREFIX_REPNE)))
1189 {
1190 rc = pgmPoolAccessHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1191 if (fReused)
1192 goto flushPage;
1193
1194 /* A mov instruction to change the first page table entry will be remembered so we can detect
1195 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1196 */
1197 if ( rc == VINF_SUCCESS
1198 && pDis->pCurInstr->opcode == OP_MOV
1199 && (pvFault & PAGE_OFFSET_MASK) == 0)
1200 {
1201 pPage->pvLastAccessHandlerFault = pvFault;
1202 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1203 pPage->pvLastAccessHandlerRip = pRegFrame->rip;
1204 /* Make sure we don't kick out a page too quickly. */
1205 if (pPage->cModifications > 8)
1206 pPage->cModifications = 2;
1207 }
1208 else
1209 if (pPage->pvLastAccessHandlerFault == pvFault)
1210 {
1211 /* ignore the 2nd write to this page table entry. */
1212 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1213 }
1214 else
1215 {
1216 pPage->pvLastAccessHandlerFault = 0;
1217 pPage->pvLastAccessHandlerRip = 0;
1218 }
1219
1220 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1221 pgmUnlock(pVM);
1222 return rc;
1223 }
1224
1225 /*
1226 * Windows is frequently doing small memset() operations (netio test 4k+).
1227 * We have to deal with these or we'll kill the cache and performance.
1228 */
1229 if ( pDis->pCurInstr->opcode == OP_STOSWD
1230 && !pRegFrame->eflags.Bits.u1DF
1231 && pDis->opmode == pDis->mode
1232 && pDis->addrmode == pDis->mode)
1233 {
1234 bool fValidStosd = false;
1235
1236 if ( pDis->mode == CPUMODE_32BIT
1237 && pDis->prefix == PREFIX_REP
1238 && pRegFrame->ecx <= 0x20
1239 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1240 && !((uintptr_t)pvFault & 3)
1241 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1242 )
1243 {
1244 fValidStosd = true;
1245 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1246 }
1247 else
1248 if ( pDis->mode == CPUMODE_64BIT
1249 && pDis->prefix == (PREFIX_REP | PREFIX_REX)
1250 && pRegFrame->rcx <= 0x20
1251 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1252 && !((uintptr_t)pvFault & 7)
1253 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1254 )
1255 {
1256 fValidStosd = true;
1257 }
1258
1259 if (fValidStosd)
1260 {
1261 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1262 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1263 pgmUnlock(pVM);
1264 return rc;
1265 }
1266 }
1267
1268 /* REP prefix, don't bother. */
1269 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1270 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1271 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode, pDis->prefix));
1272 fNotReusedNotForking = true;
1273 }
1274
1275#if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1276 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1277 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1278 */
1279 if ( pPage->cModifications >= cMaxModifications
1280 && !fForcedFlush
1281 && pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1282 && ( fNotReusedNotForking
1283 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1284 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1285 )
1286 )
1287 {
1288 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
1289 Assert(pPage->fDirty == false);
1290
1291 /* Flush any monitored duplicates as we will disable write protection. */
1292 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1293 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1294 {
1295 PPGMPOOLPAGE pPageHead = pPage;
1296
1297 /* Find the monitor head. */
1298 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1299 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1300
1301 while (pPageHead)
1302 {
1303 unsigned idxNext = pPageHead->iMonitoredNext;
1304
1305 if (pPageHead != pPage)
1306 {
1307 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1308 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1309 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1310 AssertRC(rc2);
1311 }
1312
1313 if (idxNext == NIL_PGMPOOL_IDX)
1314 break;
1315
1316 pPageHead = &pPool->aPages[idxNext];
1317 }
1318 }
1319
1320 /* The flushing above might fail for locked pages, so double check. */
1321 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1322 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1323 {
1324 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1325
1326 /* Temporarily allow write access to the page table again. */
1327 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys, pPage->GCPhys);
1328 if (rc == VINF_SUCCESS)
1329 {
1330 rc = PGMShwModifyPage(pVCpu, pvFault, 1, X86_PTE_RW, ~(uint64_t)X86_PTE_RW);
1331 AssertMsg(rc == VINF_SUCCESS
1332 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1333 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1334 || rc == VERR_PAGE_NOT_PRESENT,
1335 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1336
1337 pPage->pvDirtyFault = pvFault;
1338
1339 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1340 pgmUnlock(pVM);
1341 return rc;
1342 }
1343 }
1344 }
1345#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1346
1347 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushModOverflow));
1348flushPage:
1349 /*
1350 * Not worth it, so flush it.
1351 *
1352 * If we considered it to be reused, don't go back to ring-3
1353 * to emulate failed instructions since we usually cannot
1354 * interpret then. This may be a bit risky, in which case
1355 * the reuse detection must be fixed.
1356 */
1357 rc = pgmPoolAccessHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1358 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1359 && fReused)
1360 {
1361 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1362 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1363 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1364 }
1365 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1366 pgmUnlock(pVM);
1367 return rc;
1368}
1369
1370# endif /* !IN_RING3 */
1371
1372# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1373
1374# ifdef VBOX_STRICT
1375/**
1376 * Check references to guest physical memory in a PAE / PAE page table.
1377 *
1378 * @param pPool The pool.
1379 * @param pPage The page.
1380 * @param pShwPT The shadow page table (mapping of the page).
1381 * @param pGstPT The guest page table.
1382 */
1383static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
1384{
1385 unsigned cErrors = 0;
1386 int LastRc = -1; /* initialized to shut up gcc */
1387 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1388 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1389
1390#ifdef VBOX_STRICT
1391 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1392 AssertMsg(!pShwPT->a[i].n.u1Present, ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, pShwPT->a[i].u, pPage->iFirstPresent));
1393#endif
1394 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1395 {
1396 if (pShwPT->a[i].n.u1Present)
1397 {
1398 RTHCPHYS HCPhys = -1;
1399 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1400 if ( rc != VINF_SUCCESS
1401 || (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) != HCPhys)
1402 {
1403 RTHCPHYS HCPhysPT = -1;
1404 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1405 LastPTE = i;
1406 LastRc = rc;
1407 LastHCPhys = HCPhys;
1408 cErrors++;
1409
1410 rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pPage->GCPhys, &HCPhysPT);
1411 AssertRC(rc);
1412
1413 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1414 {
1415 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1416
1417 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1418 {
1419 PX86PTPAE pShwPT2 = (PX86PTPAE)PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pTempPage);
1420
1421 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1422 {
1423 if ( pShwPT2->a[j].n.u1Present
1424 && pShwPT2->a[j].n.u1Write
1425 && ((pShwPT2->a[j].u & X86_PTE_PAE_PG_MASK) == HCPhysPT))
1426 {
1427 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, pShwPT->a[j].u, pShwPT2->a[j].u));
1428 }
1429 }
1430 }
1431 }
1432 }
1433 }
1434 }
1435 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, pShwPT->a[LastPTE].u, LastHCPhys));
1436}
1437# endif /* VBOX_STRICT */
1438
1439/**
1440 * Clear references to guest physical memory in a PAE / PAE page table.
1441 *
1442 * @returns nr of changed PTEs
1443 * @param pPool The pool.
1444 * @param pPage The page.
1445 * @param pShwPT The shadow page table (mapping of the page).
1446 * @param pGstPT The guest page table.
1447 * @param pOldGstPT The old cached guest page table.
1448 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1449 * @param pfFlush Flush reused page table (out)
1450 */
1451DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT, PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1452{
1453 unsigned cChanged = 0;
1454
1455#ifdef VBOX_STRICT
1456 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1457 AssertMsg(!pShwPT->a[i].n.u1Present, ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, pShwPT->a[i].u, pPage->iFirstPresent));
1458#endif
1459 *pfFlush = false;
1460
1461 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1462 {
1463 /* Check the new value written by the guest. If present and with a bogus physical address, then
1464 * it's fairly safe to assume the guest is reusing the PT.
1465 */
1466 if ( fAllowRemoval
1467 && pGstPT->a[i].n.u1Present)
1468 {
1469 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1470 {
1471 *pfFlush = true;
1472 return ++cChanged;
1473 }
1474 }
1475 if (pShwPT->a[i].n.u1Present)
1476 {
1477 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1478 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1479 {
1480#ifdef VBOX_STRICT
1481 RTHCPHYS HCPhys = -1;
1482 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1483 AssertMsg(rc == VINF_SUCCESS && (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1484#endif
1485 uint64_t uHostAttr = pShwPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1486 bool fHostRW = !!(pShwPT->a[i].u & X86_PTE_RW);
1487 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1488 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1489
1490 if ( uHostAttr == uGuestAttr
1491 && fHostRW <= fGuestRW)
1492 continue;
1493 }
1494 cChanged++;
1495 /* Something was changed, so flush it. */
1496 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1497 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1498 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
1499 ASMAtomicWriteSize(&pShwPT->a[i].u, 0);
1500 }
1501 }
1502 return cChanged;
1503}
1504
1505
1506/**
1507 * Flush a dirty page
1508 *
1509 * @param pVM VM Handle.
1510 * @param pPool The pool.
1511 * @param idxSlot Dirty array slot index
1512 * @param fAllowRemoval Allow a reused page table to be removed
1513 */
1514static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1515{
1516 PPGMPOOLPAGE pPage;
1517 unsigned idxPage;
1518
1519 Assert(idxSlot < RT_ELEMENTS(pPool->aIdxDirtyPages));
1520 if (pPool->aIdxDirtyPages[idxSlot] == NIL_PGMPOOL_IDX)
1521 return;
1522
1523 idxPage = pPool->aIdxDirtyPages[idxSlot];
1524 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1525 pPage = &pPool->aPages[idxPage];
1526 Assert(pPage->idx == idxPage);
1527 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1528
1529 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1530 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1531
1532 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1533 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys);
1534 Assert(rc == VINF_SUCCESS);
1535 pPage->fDirty = false;
1536
1537#ifdef VBOX_STRICT
1538 uint64_t fFlags = 0;
1539 RTHCPHYS HCPhys;
1540 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->pvDirtyFault, &fFlags, &HCPhys);
1541 AssertMsg( ( rc == VINF_SUCCESS
1542 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1543 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1544 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1545 || rc == VERR_PAGE_NOT_PRESENT,
1546 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->pvDirtyFault, rc, fFlags));
1547#endif
1548
1549 /* Flush those PTEs that have changed. */
1550 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1551 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1552 void *pvGst;
1553 bool fFlush;
1554 rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1555 unsigned cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst, (PCX86PTPAE)&pPool->aDirtyPages[idxSlot][0], fAllowRemoval, &fFlush);
1556 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1557 /** Note: we might want to consider keeping the dirty page active in case there were many changes. */
1558
1559 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1560 Assert(pPage->cModifications);
1561 if (cChanges < 4)
1562 pPage->cModifications = 1; /* must use > 0 here */
1563 else
1564 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1565
1566 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1567 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages))
1568 pPool->idxFreeDirtyPage = idxSlot;
1569
1570 pPool->cDirtyPages--;
1571 pPool->aIdxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1572 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1573 if (fFlush)
1574 {
1575 Assert(fAllowRemoval);
1576 Log(("Flush reused page table!\n"));
1577 pgmPoolFlushPage(pPool, pPage);
1578 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1579 }
1580 else
1581 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1582}
1583
1584# ifndef IN_RING3
1585/**
1586 * Add a new dirty page
1587 *
1588 * @param pVM VM Handle.
1589 * @param pPool The pool.
1590 * @param pPage The page.
1591 */
1592void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1593{
1594 unsigned idxFree;
1595
1596 Assert(PGMIsLocked(pVM));
1597 AssertCompile(RT_ELEMENTS(pPool->aIdxDirtyPages) == 8 || RT_ELEMENTS(pPool->aIdxDirtyPages) == 16);
1598 Assert(!pPage->fDirty);
1599
1600 idxFree = pPool->idxFreeDirtyPage;
1601 Assert(idxFree < RT_ELEMENTS(pPool->aIdxDirtyPages));
1602 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1603
1604 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aIdxDirtyPages))
1605 {
1606 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1607 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1608 }
1609 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages));
1610 AssertMsg(pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1611
1612 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1613
1614 /* Make a copy of the guest page table as we require valid GCPhys addresses when removing
1615 * references to physical pages. (the HCPhys linear lookup is *extremely* expensive!)
1616 */
1617 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1618 void *pvGst;
1619 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1620 memcpy(&pPool->aDirtyPages[idxFree][0], pvGst, PAGE_SIZE);
1621#ifdef VBOX_STRICT
1622 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
1623#endif
1624
1625 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1626 pPage->fDirty = true;
1627 pPage->idxDirty = idxFree;
1628 pPool->aIdxDirtyPages[idxFree] = pPage->idx;
1629 pPool->cDirtyPages++;
1630
1631 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1632 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages)
1633 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1634 {
1635 unsigned i;
1636 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1637 {
1638 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1639 if (pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1640 {
1641 pPool->idxFreeDirtyPage = idxFree;
1642 break;
1643 }
1644 }
1645 Assert(i != RT_ELEMENTS(pPool->aIdxDirtyPages));
1646 }
1647
1648 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages) || pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1649 return;
1650}
1651# endif /* !IN_RING3 */
1652
1653/**
1654 * Check if the specified page is dirty (not write monitored)
1655 *
1656 * @return dirty or not
1657 * @param pVM VM Handle.
1658 * @param GCPhys Guest physical address
1659 */
1660bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1661{
1662 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1663 Assert(PGMIsLocked(pVM));
1664 if (!pPool->cDirtyPages)
1665 return false;
1666
1667 GCPhys = GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1668
1669 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1670 {
1671 if (pPool->aIdxDirtyPages[i] != NIL_PGMPOOL_IDX)
1672 {
1673 PPGMPOOLPAGE pPage;
1674 unsigned idxPage = pPool->aIdxDirtyPages[i];
1675
1676 pPage = &pPool->aPages[idxPage];
1677 if (pPage->GCPhys == GCPhys)
1678 return true;
1679 }
1680 }
1681 return false;
1682}
1683
1684/**
1685 * Reset all dirty pages by reinstating page monitoring.
1686 *
1687 * @param pVM VM Handle.
1688 */
1689void pgmPoolResetDirtyPages(PVM pVM)
1690{
1691 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1692 Assert(PGMIsLocked(pVM));
1693 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1694
1695 if (!pPool->cDirtyPages)
1696 return;
1697
1698 Log(("pgmPoolResetDirtyPages\n"));
1699 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1700 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1701
1702 pPool->idxFreeDirtyPage = 0;
1703 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aIdxDirtyPages)
1704 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1705 {
1706 unsigned i;
1707 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1708 {
1709 if (pPool->aIdxDirtyPages[i] == NIL_PGMPOOL_IDX)
1710 {
1711 pPool->idxFreeDirtyPage = i;
1712 break;
1713 }
1714 }
1715 AssertMsg(i != RT_ELEMENTS(pPool->aIdxDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1716 }
1717
1718 Assert(pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages));
1719 return;
1720}
1721
1722/**
1723 * Reset all dirty pages by reinstating page monitoring.
1724 *
1725 * @param pVM VM Handle.
1726 * @param GCPhysPT Physical address of the page table
1727 */
1728void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
1729{
1730 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1731 Assert(PGMIsLocked(pVM));
1732 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1733 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aIdxDirtyPages);
1734
1735 if (!pPool->cDirtyPages)
1736 return;
1737
1738 GCPhysPT = GCPhysPT & ~(RTGCPHYS)(PAGE_SIZE - 1);
1739
1740 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1741 {
1742 if (pPool->aIdxDirtyPages[i] != NIL_PGMPOOL_IDX)
1743 {
1744 unsigned idxPage = pPool->aIdxDirtyPages[i];
1745
1746 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1747 if (pPage->GCPhys == GCPhysPT)
1748 {
1749 idxDirtyPage = i;
1750 break;
1751 }
1752 }
1753 }
1754
1755 if (idxDirtyPage != RT_ELEMENTS(pPool->aIdxDirtyPages))
1756 {
1757 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1758 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aIdxDirtyPages)
1759 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1760 {
1761 unsigned i;
1762 for (i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1763 {
1764 if (pPool->aIdxDirtyPages[i] == NIL_PGMPOOL_IDX)
1765 {
1766 pPool->idxFreeDirtyPage = i;
1767 break;
1768 }
1769 }
1770 AssertMsg(i != RT_ELEMENTS(pPool->aIdxDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1771 }
1772 }
1773}
1774
1775# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1776
1777/**
1778 * Inserts a page into the GCPhys hash table.
1779 *
1780 * @param pPool The pool.
1781 * @param pPage The page.
1782 */
1783DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1784{
1785 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1786 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1787 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1788 pPage->iNext = pPool->aiHash[iHash];
1789 pPool->aiHash[iHash] = pPage->idx;
1790}
1791
1792
1793/**
1794 * Removes a page from the GCPhys hash table.
1795 *
1796 * @param pPool The pool.
1797 * @param pPage The page.
1798 */
1799DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1800{
1801 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1802 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1803 if (pPool->aiHash[iHash] == pPage->idx)
1804 pPool->aiHash[iHash] = pPage->iNext;
1805 else
1806 {
1807 uint16_t iPrev = pPool->aiHash[iHash];
1808 for (;;)
1809 {
1810 const int16_t i = pPool->aPages[iPrev].iNext;
1811 if (i == pPage->idx)
1812 {
1813 pPool->aPages[iPrev].iNext = pPage->iNext;
1814 break;
1815 }
1816 if (i == NIL_PGMPOOL_IDX)
1817 {
1818 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1819 break;
1820 }
1821 iPrev = i;
1822 }
1823 }
1824 pPage->iNext = NIL_PGMPOOL_IDX;
1825}
1826
1827
1828/**
1829 * Frees up one cache page.
1830 *
1831 * @returns VBox status code.
1832 * @retval VINF_SUCCESS on success.
1833 * @param pPool The pool.
1834 * @param iUser The user index.
1835 */
1836static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1837{
1838#ifndef IN_RC
1839 const PVM pVM = pPool->CTX_SUFF(pVM);
1840#endif
1841 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1842 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1843
1844 /*
1845 * Select one page from the tail of the age list.
1846 */
1847 PPGMPOOLPAGE pPage;
1848 for (unsigned iLoop = 0; ; iLoop++)
1849 {
1850 uint16_t iToFree = pPool->iAgeTail;
1851 if (iToFree == iUser)
1852 iToFree = pPool->aPages[iToFree].iAgePrev;
1853/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1854 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1855 {
1856 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1857 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1858 {
1859 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1860 continue;
1861 iToFree = i;
1862 break;
1863 }
1864 }
1865*/
1866 Assert(iToFree != iUser);
1867 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1868 pPage = &pPool->aPages[iToFree];
1869
1870 /*
1871 * Reject any attempts at flushing the currently active shadow CR3 mapping.
1872 * Call pgmPoolCacheUsed to move the page to the head of the age list.
1873 */
1874 if (!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
1875 break;
1876 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
1877 pgmPoolCacheUsed(pPool, pPage);
1878 AssertLogRelReturn(iLoop < 8192, VERR_INTERNAL_ERROR);
1879 }
1880
1881 /*
1882 * Found a usable page, flush it and return.
1883 */
1884 int rc = pgmPoolFlushPage(pPool, pPage);
1885 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
1886 /* todo: find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
1887 if (rc == VINF_SUCCESS)
1888 PGM_INVL_ALL_VCPU_TLBS(pVM);
1889 return rc;
1890}
1891
1892
1893/**
1894 * Checks if a kind mismatch is really a page being reused
1895 * or if it's just normal remappings.
1896 *
1897 * @returns true if reused and the cached page (enmKind1) should be flushed
1898 * @returns false if not reused.
1899 * @param enmKind1 The kind of the cached page.
1900 * @param enmKind2 The kind of the requested page.
1901 */
1902static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1903{
1904 switch (enmKind1)
1905 {
1906 /*
1907 * Never reuse them. There is no remapping in non-paging mode.
1908 */
1909 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1910 case PGMPOOLKIND_32BIT_PD_PHYS:
1911 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1912 case PGMPOOLKIND_PAE_PD_PHYS:
1913 case PGMPOOLKIND_PAE_PDPT_PHYS:
1914 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1915 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1916 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1917 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1918 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1919 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
1920 return false;
1921
1922 /*
1923 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1924 */
1925 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1926 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1927 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1928 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1929 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1930 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1931 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1932 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1933 case PGMPOOLKIND_32BIT_PD:
1934 case PGMPOOLKIND_PAE_PDPT:
1935 switch (enmKind2)
1936 {
1937 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1938 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1939 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1940 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1941 case PGMPOOLKIND_64BIT_PML4:
1942 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1943 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1944 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1945 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1946 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1947 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1948 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1949 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1950 return true;
1951 default:
1952 return false;
1953 }
1954
1955 /*
1956 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1957 */
1958 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1959 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1960 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1961 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1962 case PGMPOOLKIND_64BIT_PML4:
1963 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1964 switch (enmKind2)
1965 {
1966 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1967 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1968 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1969 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1970 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1971 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1972 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1973 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1974 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1975 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1976 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1977 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1978 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1979 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1980 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1981 return true;
1982 default:
1983 return false;
1984 }
1985
1986 /*
1987 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1988 */
1989 case PGMPOOLKIND_ROOT_NESTED:
1990 return false;
1991
1992 default:
1993 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1994 }
1995}
1996
1997
1998/**
1999 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2000 *
2001 * @returns VBox status code.
2002 * @retval VINF_PGM_CACHED_PAGE on success.
2003 * @retval VERR_FILE_NOT_FOUND if not found.
2004 * @param pPool The pool.
2005 * @param GCPhys The GC physical address of the page we're gonna shadow.
2006 * @param enmKind The kind of mapping.
2007 * @param enmAccess Access type for the mapping (only relevant for big pages)
2008 * @param iUser The shadow page pool index of the user table.
2009 * @param iUserTable The index into the user table (shadowed).
2010 * @param ppPage Where to store the pointer to the page.
2011 */
2012static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2013{
2014#ifndef IN_RC
2015 const PVM pVM = pPool->CTX_SUFF(pVM);
2016#endif
2017 /*
2018 * Look up the GCPhys in the hash.
2019 */
2020 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2021 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%x iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2022 if (i != NIL_PGMPOOL_IDX)
2023 {
2024 do
2025 {
2026 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2027 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2028 if (pPage->GCPhys == GCPhys)
2029 {
2030 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2031 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess)
2032 {
2033 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2034 * doesn't flush it in case there are no more free use records.
2035 */
2036 pgmPoolCacheUsed(pPool, pPage);
2037
2038 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2039 if (RT_SUCCESS(rc))
2040 {
2041 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2042 *ppPage = pPage;
2043 if (pPage->cModifications)
2044 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2045 STAM_COUNTER_INC(&pPool->StatCacheHits);
2046 return VINF_PGM_CACHED_PAGE;
2047 }
2048 return rc;
2049 }
2050
2051 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2052 {
2053 /*
2054 * The kind is different. In some cases we should now flush the page
2055 * as it has been reused, but in most cases this is normal remapping
2056 * of PDs as PT or big pages using the GCPhys field in a slightly
2057 * different way than the other kinds.
2058 */
2059 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2060 {
2061 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2062 pgmPoolFlushPage(pPool, pPage);
2063 break;
2064 }
2065 }
2066 }
2067
2068 /* next */
2069 i = pPage->iNext;
2070 } while (i != NIL_PGMPOOL_IDX);
2071 }
2072
2073 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2074 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2075 return VERR_FILE_NOT_FOUND;
2076}
2077
2078
2079/**
2080 * Inserts a page into the cache.
2081 *
2082 * @param pPool The pool.
2083 * @param pPage The cached page.
2084 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2085 */
2086static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2087{
2088 /*
2089 * Insert into the GCPhys hash if the page is fit for that.
2090 */
2091 Assert(!pPage->fCached);
2092 if (fCanBeCached)
2093 {
2094 pPage->fCached = true;
2095 pgmPoolHashInsert(pPool, pPage);
2096 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2097 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2098 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2099 }
2100 else
2101 {
2102 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2103 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2104 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2105 }
2106
2107 /*
2108 * Insert at the head of the age list.
2109 */
2110 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2111 pPage->iAgeNext = pPool->iAgeHead;
2112 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2113 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2114 else
2115 pPool->iAgeTail = pPage->idx;
2116 pPool->iAgeHead = pPage->idx;
2117}
2118
2119
2120/**
2121 * Flushes a cached page.
2122 *
2123 * @param pPool The pool.
2124 * @param pPage The cached page.
2125 */
2126static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2127{
2128 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2129
2130 /*
2131 * Remove the page from the hash.
2132 */
2133 if (pPage->fCached)
2134 {
2135 pPage->fCached = false;
2136 pgmPoolHashRemove(pPool, pPage);
2137 }
2138 else
2139 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2140
2141 /*
2142 * Remove it from the age list.
2143 */
2144 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2145 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2146 else
2147 pPool->iAgeTail = pPage->iAgePrev;
2148 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2149 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2150 else
2151 pPool->iAgeHead = pPage->iAgeNext;
2152 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2153 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2154}
2155
2156
2157/**
2158 * Looks for pages sharing the monitor.
2159 *
2160 * @returns Pointer to the head page.
2161 * @returns NULL if not found.
2162 * @param pPool The Pool
2163 * @param pNewPage The page which is going to be monitored.
2164 */
2165static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2166{
2167 /*
2168 * Look up the GCPhys in the hash.
2169 */
2170 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2171 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2172 if (i == NIL_PGMPOOL_IDX)
2173 return NULL;
2174 do
2175 {
2176 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2177 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2178 && pPage != pNewPage)
2179 {
2180 switch (pPage->enmKind)
2181 {
2182 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2183 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2184 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2185 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2186 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2187 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2188 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2189 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2190 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2191 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2192 case PGMPOOLKIND_64BIT_PML4:
2193 case PGMPOOLKIND_32BIT_PD:
2194 case PGMPOOLKIND_PAE_PDPT:
2195 {
2196 /* find the head */
2197 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2198 {
2199 Assert(pPage->iMonitoredPrev != pPage->idx);
2200 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2201 }
2202 return pPage;
2203 }
2204
2205 /* ignore, no monitoring. */
2206 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2207 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2208 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2209 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2210 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2211 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2212 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2213 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2214 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2215 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2216 case PGMPOOLKIND_ROOT_NESTED:
2217 case PGMPOOLKIND_PAE_PD_PHYS:
2218 case PGMPOOLKIND_PAE_PDPT_PHYS:
2219 case PGMPOOLKIND_32BIT_PD_PHYS:
2220 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2221 break;
2222 default:
2223 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2224 }
2225 }
2226
2227 /* next */
2228 i = pPage->iNext;
2229 } while (i != NIL_PGMPOOL_IDX);
2230 return NULL;
2231}
2232
2233
2234/**
2235 * Enabled write monitoring of a guest page.
2236 *
2237 * @returns VBox status code.
2238 * @retval VINF_SUCCESS on success.
2239 * @param pPool The pool.
2240 * @param pPage The cached page.
2241 */
2242static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2243{
2244 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
2245
2246 /*
2247 * Filter out the relevant kinds.
2248 */
2249 switch (pPage->enmKind)
2250 {
2251 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2252 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2253 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2254 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2255 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2256 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2257 case PGMPOOLKIND_64BIT_PML4:
2258 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2259 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2260 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2261 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2262 case PGMPOOLKIND_32BIT_PD:
2263 case PGMPOOLKIND_PAE_PDPT:
2264 break;
2265
2266 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2267 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2268 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2269 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2270 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2271 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2272 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2273 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2274 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2275 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2276 case PGMPOOLKIND_ROOT_NESTED:
2277 /* Nothing to monitor here. */
2278 return VINF_SUCCESS;
2279
2280 case PGMPOOLKIND_32BIT_PD_PHYS:
2281 case PGMPOOLKIND_PAE_PDPT_PHYS:
2282 case PGMPOOLKIND_PAE_PD_PHYS:
2283 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2284 /* Nothing to monitor here. */
2285 return VINF_SUCCESS;
2286 default:
2287 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2288 }
2289
2290 /*
2291 * Install handler.
2292 */
2293 int rc;
2294 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2295 if (pPageHead)
2296 {
2297 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2298 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2299
2300#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2301 if (pPageHead->fDirty)
2302 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirty, false /* do not remove */);
2303#endif
2304
2305 pPage->iMonitoredPrev = pPageHead->idx;
2306 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2307 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2308 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2309 pPageHead->iMonitoredNext = pPage->idx;
2310 rc = VINF_SUCCESS;
2311 }
2312 else
2313 {
2314 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2315 PVM pVM = pPool->CTX_SUFF(pVM);
2316 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2317 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
2318 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
2319 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
2320 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
2321 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
2322 pPool->pszAccessHandler);
2323 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2324 * the heap size should suffice. */
2325 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2326 Assert(!(VMMGetCpu(pVM)->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(VMMGetCpu(pVM), VMCPU_FF_PGM_SYNC_CR3));
2327 }
2328 pPage->fMonitored = true;
2329 return rc;
2330}
2331
2332
2333/**
2334 * Disables write monitoring of a guest page.
2335 *
2336 * @returns VBox status code.
2337 * @retval VINF_SUCCESS on success.
2338 * @param pPool The pool.
2339 * @param pPage The cached page.
2340 */
2341static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2342{
2343 /*
2344 * Filter out the relevant kinds.
2345 */
2346 switch (pPage->enmKind)
2347 {
2348 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2349 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2350 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2351 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2352 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2353 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2354 case PGMPOOLKIND_64BIT_PML4:
2355 case PGMPOOLKIND_32BIT_PD:
2356 case PGMPOOLKIND_PAE_PDPT:
2357 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2358 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2359 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2360 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2361 break;
2362
2363 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2364 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2365 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2366 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2367 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2368 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2369 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2370 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2371 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2372 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2373 case PGMPOOLKIND_ROOT_NESTED:
2374 case PGMPOOLKIND_PAE_PD_PHYS:
2375 case PGMPOOLKIND_PAE_PDPT_PHYS:
2376 case PGMPOOLKIND_32BIT_PD_PHYS:
2377 /* Nothing to monitor here. */
2378 return VINF_SUCCESS;
2379
2380 default:
2381 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2382 }
2383
2384 /*
2385 * Remove the page from the monitored list or uninstall it if last.
2386 */
2387 const PVM pVM = pPool->CTX_SUFF(pVM);
2388 int rc;
2389 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2390 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2391 {
2392 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2393 {
2394 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2395 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2396 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
2397 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2398 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2399 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2400 pPool->pszAccessHandler);
2401 AssertFatalRCSuccess(rc);
2402 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2403 }
2404 else
2405 {
2406 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2407 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2408 {
2409 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2410 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2411 }
2412 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2413 rc = VINF_SUCCESS;
2414 }
2415 }
2416 else
2417 {
2418 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
2419 AssertFatalRC(rc);
2420#ifdef VBOX_STRICT
2421 PVMCPU pVCpu = VMMGetCpu(pVM);
2422#endif
2423 AssertMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2424 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2425 }
2426 pPage->fMonitored = false;
2427
2428 /*
2429 * Remove it from the list of modified pages (if in it).
2430 */
2431 pgmPoolMonitorModifiedRemove(pPool, pPage);
2432
2433 return rc;
2434}
2435
2436
2437/**
2438 * Inserts the page into the list of modified pages.
2439 *
2440 * @param pPool The pool.
2441 * @param pPage The page.
2442 */
2443void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2444{
2445 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2446 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2447 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2448 && pPool->iModifiedHead != pPage->idx,
2449 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2450 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2451 pPool->iModifiedHead, pPool->cModifiedPages));
2452
2453 pPage->iModifiedNext = pPool->iModifiedHead;
2454 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2455 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2456 pPool->iModifiedHead = pPage->idx;
2457 pPool->cModifiedPages++;
2458#ifdef VBOX_WITH_STATISTICS
2459 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2460 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2461#endif
2462}
2463
2464
2465/**
2466 * Removes the page from the list of modified pages and resets the
2467 * moficiation counter.
2468 *
2469 * @param pPool The pool.
2470 * @param pPage The page which is believed to be in the list of modified pages.
2471 */
2472static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2473{
2474 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2475 if (pPool->iModifiedHead == pPage->idx)
2476 {
2477 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2478 pPool->iModifiedHead = pPage->iModifiedNext;
2479 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2480 {
2481 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2482 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2483 }
2484 pPool->cModifiedPages--;
2485 }
2486 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2487 {
2488 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2489 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2490 {
2491 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2492 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2493 }
2494 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2495 pPool->cModifiedPages--;
2496 }
2497 else
2498 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2499 pPage->cModifications = 0;
2500}
2501
2502
2503/**
2504 * Zaps the list of modified pages, resetting their modification counters in the process.
2505 *
2506 * @param pVM The VM handle.
2507 */
2508static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2509{
2510 pgmLock(pVM);
2511 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2512 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2513
2514 unsigned cPages = 0; NOREF(cPages);
2515
2516#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2517 pgmPoolResetDirtyPages(pVM);
2518#endif
2519
2520 uint16_t idx = pPool->iModifiedHead;
2521 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2522 while (idx != NIL_PGMPOOL_IDX)
2523 {
2524 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2525 idx = pPage->iModifiedNext;
2526 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2527 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2528 pPage->cModifications = 0;
2529 Assert(++cPages);
2530 }
2531 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2532 pPool->cModifiedPages = 0;
2533 pgmUnlock(pVM);
2534}
2535
2536
2537/**
2538 * Handle SyncCR3 pool tasks
2539 *
2540 * @returns VBox status code.
2541 * @retval VINF_SUCCESS if successfully added.
2542 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2543 * @param pVCpu The VMCPU handle.
2544 * @remark Should only be used when monitoring is available, thus placed in
2545 * the PGMPOOL_WITH_MONITORING #ifdef.
2546 */
2547int pgmPoolSyncCR3(PVMCPU pVCpu)
2548{
2549 PVM pVM = pVCpu->CTX_SUFF(pVM);
2550 LogFlow(("pgmPoolSyncCR3\n"));
2551
2552 /*
2553 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2554 * Occasionally we will have to clear all the shadow page tables because we wanted
2555 * to monitor a page which was mapped by too many shadowed page tables. This operation
2556 * sometimes refered to as a 'lightweight flush'.
2557 */
2558# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2559 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2560 pgmR3PoolClearAll(pVM);
2561# else /* !IN_RING3 */
2562 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2563 {
2564 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2565 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2566 return VINF_PGM_SYNC_CR3;
2567 }
2568# endif /* !IN_RING3 */
2569 else
2570 pgmPoolMonitorModifiedClearAll(pVM);
2571
2572 return VINF_SUCCESS;
2573}
2574
2575
2576/**
2577 * Frees up at least one user entry.
2578 *
2579 * @returns VBox status code.
2580 * @retval VINF_SUCCESS if successfully added.
2581 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2582 * @param pPool The pool.
2583 * @param iUser The user index.
2584 */
2585static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2586{
2587 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2588 /*
2589 * Just free cached pages in a braindead fashion.
2590 */
2591 /** @todo walk the age list backwards and free the first with usage. */
2592 int rc = VINF_SUCCESS;
2593 do
2594 {
2595 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2596 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2597 rc = rc2;
2598 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2599 return rc;
2600}
2601
2602
2603/**
2604 * Inserts a page into the cache.
2605 *
2606 * This will create user node for the page, insert it into the GCPhys
2607 * hash, and insert it into the age list.
2608 *
2609 * @returns VBox status code.
2610 * @retval VINF_SUCCESS if successfully added.
2611 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2612 * @param pPool The pool.
2613 * @param pPage The cached page.
2614 * @param GCPhys The GC physical address of the page we're gonna shadow.
2615 * @param iUser The user index.
2616 * @param iUserTable The user table index.
2617 */
2618DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2619{
2620 int rc = VINF_SUCCESS;
2621 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2622
2623 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser %x iUserTable %x\n", GCPhys, iUser, iUserTable));
2624
2625#ifdef VBOX_STRICT
2626 /*
2627 * Check that the entry doesn't already exists.
2628 */
2629 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2630 {
2631 uint16_t i = pPage->iUserHead;
2632 do
2633 {
2634 Assert(i < pPool->cMaxUsers);
2635 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2636 i = paUsers[i].iNext;
2637 } while (i != NIL_PGMPOOL_USER_INDEX);
2638 }
2639#endif
2640
2641 /*
2642 * Find free a user node.
2643 */
2644 uint16_t i = pPool->iUserFreeHead;
2645 if (i == NIL_PGMPOOL_USER_INDEX)
2646 {
2647 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2648 if (RT_FAILURE(rc))
2649 return rc;
2650 i = pPool->iUserFreeHead;
2651 }
2652
2653 /*
2654 * Unlink the user node from the free list,
2655 * initialize and insert it into the user list.
2656 */
2657 pPool->iUserFreeHead = paUsers[i].iNext;
2658 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2659 paUsers[i].iUser = iUser;
2660 paUsers[i].iUserTable = iUserTable;
2661 pPage->iUserHead = i;
2662
2663 /*
2664 * Insert into cache and enable monitoring of the guest page if enabled.
2665 *
2666 * Until we implement caching of all levels, including the CR3 one, we'll
2667 * have to make sure we don't try monitor & cache any recursive reuse of
2668 * a monitored CR3 page. Because all windows versions are doing this we'll
2669 * have to be able to do combined access monitoring, CR3 + PT and
2670 * PD + PT (guest PAE).
2671 *
2672 * Update:
2673 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2674 */
2675 const bool fCanBeMonitored = true;
2676 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2677 if (fCanBeMonitored)
2678 {
2679 rc = pgmPoolMonitorInsert(pPool, pPage);
2680 AssertRC(rc);
2681 }
2682 return rc;
2683}
2684
2685
2686/**
2687 * Adds a user reference to a page.
2688 *
2689 * This will move the page to the head of the
2690 *
2691 * @returns VBox status code.
2692 * @retval VINF_SUCCESS if successfully added.
2693 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2694 * @param pPool The pool.
2695 * @param pPage The cached page.
2696 * @param iUser The user index.
2697 * @param iUserTable The user table.
2698 */
2699static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2700{
2701 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2702
2703 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2704
2705# ifdef VBOX_STRICT
2706 /*
2707 * Check that the entry doesn't already exists. We only allow multiple users of top-level paging structures (SHW_POOL_ROOT_IDX).
2708 */
2709 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2710 {
2711 uint16_t i = pPage->iUserHead;
2712 do
2713 {
2714 Assert(i < pPool->cMaxUsers);
2715 AssertMsg(iUser != PGMPOOL_IDX_PD || iUser != PGMPOOL_IDX_PDPT || iUser != PGMPOOL_IDX_NESTED_ROOT || iUser != PGMPOOL_IDX_AMD64_CR3 ||
2716 paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2717 i = paUsers[i].iNext;
2718 } while (i != NIL_PGMPOOL_USER_INDEX);
2719 }
2720# endif
2721
2722 /*
2723 * Allocate a user node.
2724 */
2725 uint16_t i = pPool->iUserFreeHead;
2726 if (i == NIL_PGMPOOL_USER_INDEX)
2727 {
2728 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2729 if (RT_FAILURE(rc))
2730 return rc;
2731 i = pPool->iUserFreeHead;
2732 }
2733 pPool->iUserFreeHead = paUsers[i].iNext;
2734
2735 /*
2736 * Initialize the user node and insert it.
2737 */
2738 paUsers[i].iNext = pPage->iUserHead;
2739 paUsers[i].iUser = iUser;
2740 paUsers[i].iUserTable = iUserTable;
2741 pPage->iUserHead = i;
2742
2743# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2744 if (pPage->fDirty)
2745 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirty, false /* do not remove */);
2746# endif
2747
2748 /*
2749 * Tell the cache to update its replacement stats for this page.
2750 */
2751 pgmPoolCacheUsed(pPool, pPage);
2752 return VINF_SUCCESS;
2753}
2754
2755
2756/**
2757 * Frees a user record associated with a page.
2758 *
2759 * This does not clear the entry in the user table, it simply replaces the
2760 * user record to the chain of free records.
2761 *
2762 * @param pPool The pool.
2763 * @param HCPhys The HC physical address of the shadow page.
2764 * @param iUser The shadow page pool index of the user table.
2765 * @param iUserTable The index into the user table (shadowed).
2766 */
2767static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2768{
2769 /*
2770 * Unlink and free the specified user entry.
2771 */
2772 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2773
2774 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2775 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2776 uint16_t i = pPage->iUserHead;
2777 if ( i != NIL_PGMPOOL_USER_INDEX
2778 && paUsers[i].iUser == iUser
2779 && paUsers[i].iUserTable == iUserTable)
2780 {
2781 pPage->iUserHead = paUsers[i].iNext;
2782
2783 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2784 paUsers[i].iNext = pPool->iUserFreeHead;
2785 pPool->iUserFreeHead = i;
2786 return;
2787 }
2788
2789 /* General: Linear search. */
2790 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2791 while (i != NIL_PGMPOOL_USER_INDEX)
2792 {
2793 if ( paUsers[i].iUser == iUser
2794 && paUsers[i].iUserTable == iUserTable)
2795 {
2796 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2797 paUsers[iPrev].iNext = paUsers[i].iNext;
2798 else
2799 pPage->iUserHead = paUsers[i].iNext;
2800
2801 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2802 paUsers[i].iNext = pPool->iUserFreeHead;
2803 pPool->iUserFreeHead = i;
2804 return;
2805 }
2806 iPrev = i;
2807 i = paUsers[i].iNext;
2808 }
2809
2810 /* Fatal: didn't find it */
2811 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2812 iUser, iUserTable, pPage->GCPhys));
2813}
2814
2815
2816/**
2817 * Gets the entry size of a shadow table.
2818 *
2819 * @param enmKind The kind of page.
2820 *
2821 * @returns The size of the entry in bytes. That is, 4 or 8.
2822 * @returns If the kind is not for a table, an assertion is raised and 0 is
2823 * returned.
2824 */
2825DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2826{
2827 switch (enmKind)
2828 {
2829 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2830 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2831 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2832 case PGMPOOLKIND_32BIT_PD:
2833 case PGMPOOLKIND_32BIT_PD_PHYS:
2834 return 4;
2835
2836 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2837 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2838 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2839 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2840 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2841 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2842 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2843 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2844 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2845 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2846 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2847 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2848 case PGMPOOLKIND_64BIT_PML4:
2849 case PGMPOOLKIND_PAE_PDPT:
2850 case PGMPOOLKIND_ROOT_NESTED:
2851 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2852 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2853 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2854 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2855 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2856 case PGMPOOLKIND_PAE_PD_PHYS:
2857 case PGMPOOLKIND_PAE_PDPT_PHYS:
2858 return 8;
2859
2860 default:
2861 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2862 }
2863}
2864
2865
2866/**
2867 * Gets the entry size of a guest table.
2868 *
2869 * @param enmKind The kind of page.
2870 *
2871 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2872 * @returns If the kind is not for a table, an assertion is raised and 0 is
2873 * returned.
2874 */
2875DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2876{
2877 switch (enmKind)
2878 {
2879 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2880 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2881 case PGMPOOLKIND_32BIT_PD:
2882 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2883 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2884 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2885 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2886 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2887 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2888 return 4;
2889
2890 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2891 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2892 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2893 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2894 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2895 case PGMPOOLKIND_64BIT_PML4:
2896 case PGMPOOLKIND_PAE_PDPT:
2897 return 8;
2898
2899 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2900 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2901 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2902 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2903 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2904 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2905 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2906 case PGMPOOLKIND_ROOT_NESTED:
2907 case PGMPOOLKIND_PAE_PD_PHYS:
2908 case PGMPOOLKIND_PAE_PDPT_PHYS:
2909 case PGMPOOLKIND_32BIT_PD_PHYS:
2910 /** @todo can we return 0? (nobody is calling this...) */
2911 AssertFailed();
2912 return 0;
2913
2914 default:
2915 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2916 }
2917}
2918
2919
2920/**
2921 * Scans one shadow page table for mappings of a physical page.
2922 *
2923 * @returns true/false indicating removal of all relevant PTEs
2924 * @param pVM The VM handle.
2925 * @param pPhysPage The guest page in question.
2926 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
2927 * @param iShw The shadow page table.
2928 * @param cRefs The number of references made in that PT.
2929 * @param pfKeptPTEs Flag indicating removal of all relevant PTEs (out)
2930 */
2931static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t cRefs)
2932{
2933 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d cRefs=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, cRefs));
2934 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2935 bool bRet = false;
2936
2937 /*
2938 * Assert sanity.
2939 */
2940 Assert(cRefs == 1);
2941 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2942 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2943
2944 /*
2945 * Then, clear the actual mappings to the page in the shadow PT.
2946 */
2947 switch (pPage->enmKind)
2948 {
2949 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2950 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2951 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2952 {
2953 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2954 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2955 uint32_t u32AndMask, u32OrMask;
2956
2957 u32AndMask = 0;
2958 u32OrMask = 0;
2959
2960 if (!fFlushPTEs)
2961 {
2962 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
2963 {
2964 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
2965 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
2966 u32OrMask = X86_PTE_RW;
2967 u32AndMask = UINT32_MAX;
2968 bRet = true;
2969 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
2970 break;
2971
2972 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
2973 u32OrMask = 0;
2974 u32AndMask = ~X86_PTE_RW;
2975 bRet = true;
2976 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
2977 break;
2978 default:
2979 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
2980 break;
2981 }
2982 }
2983 else
2984 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
2985
2986 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2987 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2988 {
2989 X86PTE Pte;
2990
2991 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2992 Pte.u = (pPT->a[i].u & u32AndMask) | u32OrMask;
2993 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
2994 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
2995
2996 ASMAtomicWriteSize(&pPT->a[i].u, Pte.u);
2997 cRefs--;
2998 if (!cRefs)
2999 return bRet;
3000 }
3001#ifdef LOG_ENABLED
3002 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3003 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3004 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3005 {
3006 Log(("i=%d cRefs=%d\n", i, cRefs--));
3007 }
3008#endif
3009 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3010 break;
3011 }
3012
3013 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3014 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3015 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3016 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3017 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3018 {
3019 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3020 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3021 uint64_t u64AndMask, u64OrMask;
3022
3023 u64OrMask = 0;
3024 u64AndMask = 0;
3025 if (!fFlushPTEs)
3026 {
3027 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3028 {
3029 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3030 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3031 u64OrMask = X86_PTE_RW;
3032 u64AndMask = UINT64_MAX;
3033 bRet = true;
3034 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3035 break;
3036
3037 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3038 u64OrMask = 0;
3039 u64AndMask = ~((uint64_t)X86_PTE_RW);
3040 bRet = true;
3041 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3042 break;
3043
3044 default:
3045 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3046 break;
3047 }
3048 }
3049 else
3050 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3051
3052 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3053 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3054 {
3055 X86PTEPAE Pte;
3056
3057 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
3058 Pte.u = (pPT->a[i].u & u64AndMask) | u64OrMask;
3059 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3060 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3061
3062 ASMAtomicWriteSize(&pPT->a[i].u, Pte.u);
3063 cRefs--;
3064 if (!cRefs)
3065 return bRet;
3066 }
3067#ifdef LOG_ENABLED
3068 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3069 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3070 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3071 {
3072 Log(("i=%d cRefs=%d\n", i, cRefs--));
3073 }
3074#endif
3075 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64));
3076 break;
3077 }
3078
3079 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3080 {
3081 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3082 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3083 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3084 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3085 {
3086 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
3087 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3088 pPT->a[i].u = 0;
3089 cRefs--;
3090 if (!cRefs)
3091 return bRet;
3092 }
3093#ifdef LOG_ENABLED
3094 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3095 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3096 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3097 {
3098 Log(("i=%d cRefs=%d\n", i, cRefs--));
3099 }
3100#endif
3101 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3102 break;
3103 }
3104
3105 default:
3106 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3107 }
3108 return bRet;
3109}
3110
3111
3112/**
3113 * Scans one shadow page table for mappings of a physical page.
3114 *
3115 * @param pVM The VM handle.
3116 * @param pPhysPage The guest page in question.
3117 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3118 * @param iShw The shadow page table.
3119 * @param cRefs The number of references made in that PT.
3120 */
3121static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t cRefs)
3122{
3123 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3124
3125 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d cRefs=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, cRefs));
3126 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3127 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, cRefs);
3128 if (!fKeptPTEs)
3129 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3130 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3131}
3132
3133
3134/**
3135 * Flushes a list of shadow page tables mapping the same physical page.
3136 *
3137 * @param pVM The VM handle.
3138 * @param pPhysPage The guest page in question.
3139 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3140 * @param iPhysExt The physical cross reference extent list to flush.
3141 */
3142static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3143{
3144 Assert(PGMIsLockOwner(pVM));
3145 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3146 bool fKeepList = false;
3147
3148 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3149 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3150
3151 const uint16_t iPhysExtStart = iPhysExt;
3152 PPGMPOOLPHYSEXT pPhysExt;
3153 do
3154 {
3155 Assert(iPhysExt < pPool->cMaxPhysExts);
3156 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3157 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3158 {
3159 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3160 {
3161 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], 1);
3162 if (!fKeptPTEs)
3163 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3164 else
3165 fKeepList = true;
3166 }
3167 }
3168 /* next */
3169 iPhysExt = pPhysExt->iNext;
3170 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3171
3172 if (!fKeepList)
3173 {
3174 /* insert the list into the free list and clear the ram range entry. */
3175 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3176 pPool->iPhysExtFreeHead = iPhysExtStart;
3177 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3178 }
3179
3180 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3181}
3182
3183
3184/**
3185 * Flushes all shadow page table mappings of the given guest page.
3186 *
3187 * This is typically called when the host page backing the guest one has been
3188 * replaced or when the page protection was changed due to an access handler.
3189 *
3190 * @returns VBox status code.
3191 * @retval VINF_SUCCESS if all references has been successfully cleared.
3192 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3193 * pool cleaning. FF and sync flags are set.
3194 *
3195 * @param pVM The VM handle.
3196 * @param pPhysPage The guest page in question.
3197 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3198 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3199 * flushed, it is NOT touched if this isn't necessary.
3200 * The caller MUST initialized this to @a false.
3201 */
3202int pgmPoolTrackUpdateGCPhys(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3203{
3204 PVMCPU pVCpu = VMMGetCpu(pVM);
3205 pgmLock(pVM);
3206 int rc = VINF_SUCCESS;
3207 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3208 if (u16)
3209 {
3210 /*
3211 * The zero page is currently screwing up the tracking and we'll
3212 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3213 * is defined, zero pages won't normally be mapped. Some kind of solution
3214 * will be needed for this problem of course, but it will have to wait...
3215 */
3216 if (PGM_PAGE_IS_ZERO(pPhysPage))
3217 rc = VINF_PGM_GCPHYS_ALIASED;
3218 else
3219 {
3220# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3221 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3222 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3223 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
3224# endif
3225
3226 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3227 pgmPoolTrackFlushGCPhysPT(pVM,
3228 pPhysPage,
3229 fFlushPTEs,
3230 PGMPOOL_TD_GET_IDX(u16),
3231 PGMPOOL_TD_GET_CREFS(u16));
3232 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3233 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3234 else
3235 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3236 *pfFlushTLBs = true;
3237
3238# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3239 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
3240# endif
3241 }
3242 }
3243
3244 if (rc == VINF_PGM_GCPHYS_ALIASED)
3245 {
3246 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3247 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3248 rc = VINF_PGM_SYNC_CR3;
3249 }
3250 pgmUnlock(pVM);
3251 return rc;
3252}
3253
3254
3255/**
3256 * Scans all shadow page tables for mappings of a physical page.
3257 *
3258 * This may be slow, but it's most likely more efficient than cleaning
3259 * out the entire page pool / cache.
3260 *
3261 * @returns VBox status code.
3262 * @retval VINF_SUCCESS if all references has been successfully cleared.
3263 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3264 * a page pool cleaning.
3265 *
3266 * @param pVM The VM handle.
3267 * @param pPhysPage The guest page in question.
3268 */
3269int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3270{
3271 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3272 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3273 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3274 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3275
3276#if 1
3277 /*
3278 * There is a limit to what makes sense.
3279 */
3280 if (pPool->cPresent > 1024)
3281 {
3282 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3283 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3284 return VINF_PGM_GCPHYS_ALIASED;
3285 }
3286#endif
3287
3288 /*
3289 * Iterate all the pages until we've encountered all that in use.
3290 * This is simple but not quite optimal solution.
3291 */
3292 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3293 const uint32_t u32 = u64;
3294 unsigned cLeft = pPool->cUsedPages;
3295 unsigned iPage = pPool->cCurPages;
3296 while (--iPage >= PGMPOOL_IDX_FIRST)
3297 {
3298 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3299 if (pPage->GCPhys != NIL_RTGCPHYS)
3300 {
3301 switch (pPage->enmKind)
3302 {
3303 /*
3304 * We only care about shadow page tables.
3305 */
3306 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3307 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3308 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3309 {
3310 unsigned cPresent = pPage->cPresent;
3311 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3312 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3313 if (pPT->a[i].n.u1Present)
3314 {
3315 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3316 {
3317 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3318 pPT->a[i].u = 0;
3319 }
3320 if (!--cPresent)
3321 break;
3322 }
3323 break;
3324 }
3325
3326 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3327 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3328 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3329 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3330 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3331 {
3332 unsigned cPresent = pPage->cPresent;
3333 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3334 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3335 if (pPT->a[i].n.u1Present)
3336 {
3337 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3338 {
3339 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3340 pPT->a[i].u = 0;
3341 }
3342 if (!--cPresent)
3343 break;
3344 }
3345 break;
3346 }
3347 }
3348 if (!--cLeft)
3349 break;
3350 }
3351 }
3352
3353 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3354 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3355 return VINF_SUCCESS;
3356}
3357
3358
3359/**
3360 * Clears the user entry in a user table.
3361 *
3362 * This is used to remove all references to a page when flushing it.
3363 */
3364static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3365{
3366 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3367 Assert(pUser->iUser < pPool->cCurPages);
3368 uint32_t iUserTable = pUser->iUserTable;
3369
3370 /*
3371 * Map the user page.
3372 */
3373 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3374 union
3375 {
3376 uint64_t *pau64;
3377 uint32_t *pau32;
3378 } u;
3379 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3380
3381 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3382
3383 /* Safety precaution in case we change the paging for other modes too in the future. */
3384 Assert(!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage));
3385
3386#ifdef VBOX_STRICT
3387 /*
3388 * Some sanity checks.
3389 */
3390 switch (pUserPage->enmKind)
3391 {
3392 case PGMPOOLKIND_32BIT_PD:
3393 case PGMPOOLKIND_32BIT_PD_PHYS:
3394 Assert(iUserTable < X86_PG_ENTRIES);
3395 break;
3396 case PGMPOOLKIND_PAE_PDPT:
3397 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3398 case PGMPOOLKIND_PAE_PDPT_PHYS:
3399 Assert(iUserTable < 4);
3400 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3401 break;
3402 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3403 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3404 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3405 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3406 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3407 case PGMPOOLKIND_PAE_PD_PHYS:
3408 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3409 break;
3410 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3411 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3412 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3413 break;
3414 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3415 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3416 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3417 break;
3418 case PGMPOOLKIND_64BIT_PML4:
3419 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3420 /* GCPhys >> PAGE_SHIFT is the index here */
3421 break;
3422 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3423 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3424 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3425 break;
3426
3427 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3428 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3429 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3430 break;
3431
3432 case PGMPOOLKIND_ROOT_NESTED:
3433 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3434 break;
3435
3436 default:
3437 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3438 break;
3439 }
3440#endif /* VBOX_STRICT */
3441
3442 /*
3443 * Clear the entry in the user page.
3444 */
3445 switch (pUserPage->enmKind)
3446 {
3447 /* 32-bit entries */
3448 case PGMPOOLKIND_32BIT_PD:
3449 case PGMPOOLKIND_32BIT_PD_PHYS:
3450 ASMAtomicWriteSize(&u.pau32[iUserTable], 0);
3451 break;
3452
3453 /* 64-bit entries */
3454 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3455 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3456 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3457 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3458 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3459#if defined(IN_RC)
3460 /* In 32 bits PAE mode we *must* invalidate the TLB when changing a PDPT entry; the CPU fetches them only during cr3 load, so any
3461 * non-present PDPT will continue to cause page faults.
3462 */
3463 ASMReloadCR3();
3464#endif
3465 /* no break */
3466 case PGMPOOLKIND_PAE_PD_PHYS:
3467 case PGMPOOLKIND_PAE_PDPT_PHYS:
3468 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3469 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3470 case PGMPOOLKIND_64BIT_PML4:
3471 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3472 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3473 case PGMPOOLKIND_PAE_PDPT:
3474 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3475 case PGMPOOLKIND_ROOT_NESTED:
3476 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3477 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3478 ASMAtomicWriteSize(&u.pau64[iUserTable], 0);
3479 break;
3480
3481 default:
3482 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3483 }
3484}
3485
3486
3487/**
3488 * Clears all users of a page.
3489 */
3490static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3491{
3492 /*
3493 * Free all the user records.
3494 */
3495 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3496
3497 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3498 uint16_t i = pPage->iUserHead;
3499 while (i != NIL_PGMPOOL_USER_INDEX)
3500 {
3501 /* Clear enter in user table. */
3502 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3503
3504 /* Free it. */
3505 const uint16_t iNext = paUsers[i].iNext;
3506 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3507 paUsers[i].iNext = pPool->iUserFreeHead;
3508 pPool->iUserFreeHead = i;
3509
3510 /* Next. */
3511 i = iNext;
3512 }
3513 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3514}
3515
3516
3517/**
3518 * Allocates a new physical cross reference extent.
3519 *
3520 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3521 * @param pVM The VM handle.
3522 * @param piPhysExt Where to store the phys ext index.
3523 */
3524PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3525{
3526 Assert(PGMIsLockOwner(pVM));
3527 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3528 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3529 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3530 {
3531 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3532 return NULL;
3533 }
3534 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3535 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3536 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3537 *piPhysExt = iPhysExt;
3538 return pPhysExt;
3539}
3540
3541
3542/**
3543 * Frees a physical cross reference extent.
3544 *
3545 * @param pVM The VM handle.
3546 * @param iPhysExt The extent to free.
3547 */
3548void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3549{
3550 Assert(PGMIsLockOwner(pVM));
3551 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3552 Assert(iPhysExt < pPool->cMaxPhysExts);
3553 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3554 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3555 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3556 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3557 pPool->iPhysExtFreeHead = iPhysExt;
3558}
3559
3560
3561/**
3562 * Frees a physical cross reference extent.
3563 *
3564 * @param pVM The VM handle.
3565 * @param iPhysExt The extent to free.
3566 */
3567void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3568{
3569 Assert(PGMIsLockOwner(pVM));
3570 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3571
3572 const uint16_t iPhysExtStart = iPhysExt;
3573 PPGMPOOLPHYSEXT pPhysExt;
3574 do
3575 {
3576 Assert(iPhysExt < pPool->cMaxPhysExts);
3577 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3578 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3579 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3580
3581 /* next */
3582 iPhysExt = pPhysExt->iNext;
3583 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3584
3585 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3586 pPool->iPhysExtFreeHead = iPhysExtStart;
3587}
3588
3589
3590/**
3591 * Insert a reference into a list of physical cross reference extents.
3592 *
3593 * @returns The new tracking data for PGMPAGE.
3594 *
3595 * @param pVM The VM handle.
3596 * @param iPhysExt The physical extent index of the list head.
3597 * @param iShwPT The shadow page table index.
3598 *
3599 */
3600static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
3601{
3602 Assert(PGMIsLockOwner(pVM));
3603 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3604 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3605
3606 /* special common case. */
3607 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3608 {
3609 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3610 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3611 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d}\n", iPhysExt, iShwPT));
3612 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3613 }
3614
3615 /* general treatment. */
3616 const uint16_t iPhysExtStart = iPhysExt;
3617 unsigned cMax = 15;
3618 for (;;)
3619 {
3620 Assert(iPhysExt < pPool->cMaxPhysExts);
3621 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3622 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3623 {
3624 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3625 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3626 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
3627 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3628 }
3629 if (!--cMax)
3630 {
3631 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3632 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3633 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3634 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3635 }
3636 }
3637
3638 /* add another extent to the list. */
3639 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3640 if (!pNew)
3641 {
3642 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3643 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3644 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
3645 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3646 }
3647 pNew->iNext = iPhysExtStart;
3648 pNew->aidx[0] = iShwPT;
3649 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
3650 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3651}
3652
3653
3654/**
3655 * Add a reference to guest physical page where extents are in use.
3656 *
3657 * @returns The new tracking data for PGMPAGE.
3658 *
3659 * @param pVM The VM handle.
3660 * @param u16 The ram range flags (top 16-bits).
3661 * @param iShwPT The shadow page table index.
3662 */
3663uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
3664{
3665 pgmLock(pVM);
3666 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3667 {
3668 /*
3669 * Convert to extent list.
3670 */
3671 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3672 uint16_t iPhysExt;
3673 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3674 if (pPhysExt)
3675 {
3676 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
3677 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3678 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
3679 pPhysExt->aidx[1] = iShwPT;
3680 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3681 }
3682 else
3683 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3684 }
3685 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3686 {
3687 /*
3688 * Insert into the extent list.
3689 */
3690 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT);
3691 }
3692 else
3693 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3694 pgmUnlock(pVM);
3695 return u16;
3696}
3697
3698
3699/**
3700 * Clear references to guest physical memory.
3701 *
3702 * @param pPool The pool.
3703 * @param pPage The page.
3704 * @param pPhysPage Pointer to the aPages entry in the ram range.
3705 */
3706void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3707{
3708 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
3709 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3710
3711 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
3712 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
3713 {
3714 PVM pVM = pPool->CTX_SUFF(pVM);
3715 pgmLock(pVM);
3716
3717 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3718 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3719 do
3720 {
3721 Assert(iPhysExt < pPool->cMaxPhysExts);
3722
3723 /*
3724 * Look for the shadow page and check if it's all freed.
3725 */
3726 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3727 {
3728 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3729 {
3730 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3731
3732 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3733 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3734 {
3735 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3736 pgmUnlock(pVM);
3737 return;
3738 }
3739
3740 /* we can free the node. */
3741 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3742 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3743 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3744 {
3745 /* lonely node */
3746 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3747 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
3748 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3749 }
3750 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3751 {
3752 /* head */
3753 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
3754 PGM_PAGE_SET_TRACKING(pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
3755 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3756 }
3757 else
3758 {
3759 /* in list */
3760 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3761 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3762 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3763 }
3764 iPhysExt = iPhysExtNext;
3765 pgmUnlock(pVM);
3766 return;
3767 }
3768 }
3769
3770 /* next */
3771 iPhysExtPrev = iPhysExt;
3772 iPhysExt = paPhysExts[iPhysExt].iNext;
3773 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3774
3775 pgmUnlock(pVM);
3776 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3777 }
3778 else /* nothing to do */
3779 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
3780}
3781
3782
3783/**
3784 * Clear references to guest physical memory.
3785 *
3786 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3787 * is assumed to be correct, so the linear search can be skipped and we can assert
3788 * at an earlier point.
3789 *
3790 * @param pPool The pool.
3791 * @param pPage The page.
3792 * @param HCPhys The host physical address corresponding to the guest page.
3793 * @param GCPhys The guest physical address corresponding to HCPhys.
3794 */
3795static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3796{
3797 /*
3798 * Walk range list.
3799 */
3800 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3801 while (pRam)
3802 {
3803 RTGCPHYS off = GCPhys - pRam->GCPhys;
3804 if (off < pRam->cb)
3805 {
3806 /* does it match? */
3807 const unsigned iPage = off >> PAGE_SHIFT;
3808 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3809#ifdef LOG_ENABLED
3810RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3811Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
3812#endif
3813 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3814 {
3815 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3816 return;
3817 }
3818 break;
3819 }
3820 pRam = pRam->CTX_SUFF(pNext);
3821 }
3822 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
3823}
3824
3825
3826/**
3827 * Clear references to guest physical memory.
3828 *
3829 * @param pPool The pool.
3830 * @param pPage The page.
3831 * @param HCPhys The host physical address corresponding to the guest page.
3832 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3833 */
3834void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3835{
3836 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
3837
3838 /*
3839 * Walk range list.
3840 */
3841 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3842 while (pRam)
3843 {
3844 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3845 if (off < pRam->cb)
3846 {
3847 /* does it match? */
3848 const unsigned iPage = off >> PAGE_SHIFT;
3849 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3850 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3851 {
3852 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3853 return;
3854 }
3855 break;
3856 }
3857 pRam = pRam->CTX_SUFF(pNext);
3858 }
3859
3860 /*
3861 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3862 */
3863 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3864 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3865 while (pRam)
3866 {
3867 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3868 while (iPage-- > 0)
3869 {
3870 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3871 {
3872 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
3873 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3874 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3875 return;
3876 }
3877 }
3878 pRam = pRam->CTX_SUFF(pNext);
3879 }
3880
3881 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
3882}
3883
3884
3885/**
3886 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3887 *
3888 * @param pPool The pool.
3889 * @param pPage The page.
3890 * @param pShwPT The shadow page table (mapping of the page).
3891 * @param pGstPT The guest page table.
3892 */
3893DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3894{
3895 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3896 if (pShwPT->a[i].n.u1Present)
3897 {
3898 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3899 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3900 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3901 if (!--pPage->cPresent)
3902 break;
3903 }
3904}
3905
3906
3907/**
3908 * Clear references to guest physical memory in a PAE / 32-bit page table.
3909 *
3910 * @param pPool The pool.
3911 * @param pPage The page.
3912 * @param pShwPT The shadow page table (mapping of the page).
3913 * @param pGstPT The guest page table (just a half one).
3914 */
3915DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3916{
3917 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3918 if (pShwPT->a[i].n.u1Present)
3919 {
3920 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
3921 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3922 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3923 if (!--pPage->cPresent)
3924 break;
3925 }
3926}
3927
3928
3929/**
3930 * Clear references to guest physical memory in a PAE / PAE page table.
3931 *
3932 * @param pPool The pool.
3933 * @param pPage The page.
3934 * @param pShwPT The shadow page table (mapping of the page).
3935 * @param pGstPT The guest page table.
3936 */
3937DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3938{
3939 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3940 if (pShwPT->a[i].n.u1Present)
3941 {
3942 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3943 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3944 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3945 if (!--pPage->cPresent)
3946 break;
3947 }
3948}
3949
3950
3951/**
3952 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3953 *
3954 * @param pPool The pool.
3955 * @param pPage The page.
3956 * @param pShwPT The shadow page table (mapping of the page).
3957 */
3958DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3959{
3960 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
3961 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3962 if (pShwPT->a[i].n.u1Present)
3963 {
3964 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3965 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3966 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3967 if (!--pPage->cPresent)
3968 break;
3969 }
3970}
3971
3972
3973/**
3974 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3975 *
3976 * @param pPool The pool.
3977 * @param pPage The page.
3978 * @param pShwPT The shadow page table (mapping of the page).
3979 */
3980DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3981{
3982 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
3983 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3984 if (pShwPT->a[i].n.u1Present)
3985 {
3986 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
3987 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3988 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3989 if (!--pPage->cPresent)
3990 break;
3991 }
3992}
3993
3994
3995/**
3996 * Clear references to shadowed pages in an EPT page table.
3997 *
3998 * @param pPool The pool.
3999 * @param pPage The page.
4000 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4001 */
4002DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4003{
4004 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4005 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4006 if (pShwPT->a[i].n.u1Present)
4007 {
4008 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4009 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4010 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
4011 if (!--pPage->cPresent)
4012 break;
4013 }
4014}
4015
4016
4017
4018/**
4019 * Clear references to shadowed pages in a 32 bits page directory.
4020 *
4021 * @param pPool The pool.
4022 * @param pPage The page.
4023 * @param pShwPD The shadow page directory (mapping of the page).
4024 */
4025DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4026{
4027 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4028 {
4029 if ( pShwPD->a[i].n.u1Present
4030 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4031 )
4032 {
4033 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4034 if (pSubPage)
4035 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4036 else
4037 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4038 }
4039 }
4040}
4041
4042/**
4043 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4044 *
4045 * @param pPool The pool.
4046 * @param pPage The page.
4047 * @param pShwPD The shadow page directory (mapping of the page).
4048 */
4049DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4050{
4051 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4052 {
4053 if ( pShwPD->a[i].n.u1Present
4054 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4055 )
4056 {
4057 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4058 if (pSubPage)
4059 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4060 else
4061 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4062 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4063 }
4064 }
4065}
4066
4067/**
4068 * Clear references to shadowed pages in a PAE page directory pointer table.
4069 *
4070 * @param pPool The pool.
4071 * @param pPage The page.
4072 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4073 */
4074DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4075{
4076 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4077 {
4078 if ( pShwPDPT->a[i].n.u1Present
4079 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4080 )
4081 {
4082 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4083 if (pSubPage)
4084 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4085 else
4086 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4087 }
4088 }
4089}
4090
4091
4092/**
4093 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4094 *
4095 * @param pPool The pool.
4096 * @param pPage The page.
4097 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4098 */
4099DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4100{
4101 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4102 {
4103 Assert(!(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING));
4104 if (pShwPDPT->a[i].n.u1Present)
4105 {
4106 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4107 if (pSubPage)
4108 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4109 else
4110 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4111 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4112 }
4113 }
4114}
4115
4116
4117/**
4118 * Clear references to shadowed pages in a 64-bit level 4 page table.
4119 *
4120 * @param pPool The pool.
4121 * @param pPage The page.
4122 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4123 */
4124DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4125{
4126 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4127 {
4128 if (pShwPML4->a[i].n.u1Present)
4129 {
4130 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4131 if (pSubPage)
4132 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4133 else
4134 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4135 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4136 }
4137 }
4138}
4139
4140
4141/**
4142 * Clear references to shadowed pages in an EPT page directory.
4143 *
4144 * @param pPool The pool.
4145 * @param pPage The page.
4146 * @param pShwPD The shadow page directory (mapping of the page).
4147 */
4148DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4149{
4150 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4151 {
4152 if (pShwPD->a[i].n.u1Present)
4153 {
4154 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4155 if (pSubPage)
4156 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4157 else
4158 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4159 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4160 }
4161 }
4162}
4163
4164
4165/**
4166 * Clear references to shadowed pages in an EPT page directory pointer table.
4167 *
4168 * @param pPool The pool.
4169 * @param pPage The page.
4170 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4171 */
4172DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4173{
4174 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4175 {
4176 if (pShwPDPT->a[i].n.u1Present)
4177 {
4178 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4179 if (pSubPage)
4180 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4181 else
4182 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4183 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4184 }
4185 }
4186}
4187
4188
4189/**
4190 * Clears all references made by this page.
4191 *
4192 * This includes other shadow pages and GC physical addresses.
4193 *
4194 * @param pPool The pool.
4195 * @param pPage The page.
4196 */
4197static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4198{
4199 /*
4200 * Map the shadow page and take action according to the page kind.
4201 */
4202 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
4203 switch (pPage->enmKind)
4204 {
4205 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4206 {
4207 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4208 void *pvGst;
4209 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4210 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4211 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4212 break;
4213 }
4214
4215 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4216 {
4217 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4218 void *pvGst;
4219 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4220 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
4221 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4222 break;
4223 }
4224
4225 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4226 {
4227 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4228 void *pvGst;
4229 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4230 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
4231 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4232 break;
4233 }
4234
4235 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4236 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4237 {
4238 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4239 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4240 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4241 break;
4242 }
4243
4244 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4245 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4246 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4247 {
4248 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4249 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
4250 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4251 break;
4252 }
4253
4254 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4255 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4256 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4257 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4258 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4259 case PGMPOOLKIND_PAE_PD_PHYS:
4260 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4261 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4262 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4263 break;
4264
4265 case PGMPOOLKIND_32BIT_PD_PHYS:
4266 case PGMPOOLKIND_32BIT_PD:
4267 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4268 break;
4269
4270 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4271 case PGMPOOLKIND_PAE_PDPT:
4272 case PGMPOOLKIND_PAE_PDPT_PHYS:
4273 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4274 break;
4275
4276 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4277 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4278 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4279 break;
4280
4281 case PGMPOOLKIND_64BIT_PML4:
4282 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4283 break;
4284
4285 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4286 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4287 break;
4288
4289 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4290 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4291 break;
4292
4293 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4294 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4295 break;
4296
4297 default:
4298 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4299 }
4300
4301 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4302 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4303 ASMMemZeroPage(pvShw);
4304 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4305 pPage->fZeroed = true;
4306 PGMPOOL_UNLOCK_PTR(pPool->CTX_SUFF(pVM), pvShw);
4307}
4308
4309/**
4310 * Flushes a pool page.
4311 *
4312 * This moves the page to the free list after removing all user references to it.
4313 *
4314 * @returns VBox status code.
4315 * @retval VINF_SUCCESS on success.
4316 * @param pPool The pool.
4317 * @param HCPhys The HC physical address of the shadow page.
4318 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4319 */
4320int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4321{
4322 PVM pVM = pPool->CTX_SUFF(pVM);
4323 bool fFlushRequired = false;
4324
4325 int rc = VINF_SUCCESS;
4326 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4327 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4328 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4329
4330 /*
4331 * Quietly reject any attempts at flushing any of the special root pages.
4332 */
4333 if (pPage->idx < PGMPOOL_IDX_FIRST)
4334 {
4335 AssertFailed(); /* can no longer happen */
4336 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4337 return VINF_SUCCESS;
4338 }
4339
4340 pgmLock(pVM);
4341
4342 /*
4343 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4344 */
4345 if (pgmPoolIsPageLocked(&pVM->pgm.s, pPage))
4346 {
4347 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4348 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4349 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4350 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4351 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4352 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4353 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4354 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4355 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD,
4356 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4357 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4358 pgmUnlock(pVM);
4359 return VINF_SUCCESS;
4360 }
4361
4362#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4363 /* Start a subset so we won't run out of mapping space. */
4364 PVMCPU pVCpu = VMMGetCpu(pVM);
4365 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4366#endif
4367
4368 /*
4369 * Mark the page as being in need of an ASMMemZeroPage().
4370 */
4371 pPage->fZeroed = false;
4372
4373#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4374 if (pPage->fDirty)
4375 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirty, false /* do not remove */);
4376#endif
4377
4378 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4379 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4380 fFlushRequired = true;
4381
4382 /*
4383 * Clear the page.
4384 */
4385 pgmPoolTrackClearPageUsers(pPool, pPage);
4386 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4387 pgmPoolTrackDeref(pPool, pPage);
4388 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4389
4390 /*
4391 * Flush it from the cache.
4392 */
4393 pgmPoolCacheFlushPage(pPool, pPage);
4394
4395#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4396 /* Heavy stuff done. */
4397 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4398#endif
4399
4400 /*
4401 * Deregistering the monitoring.
4402 */
4403 if (pPage->fMonitored)
4404 rc = pgmPoolMonitorFlush(pPool, pPage);
4405
4406 /*
4407 * Free the page.
4408 */
4409 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4410 pPage->iNext = pPool->iFreeHead;
4411 pPool->iFreeHead = pPage->idx;
4412 pPage->enmKind = PGMPOOLKIND_FREE;
4413 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4414 pPage->GCPhys = NIL_RTGCPHYS;
4415 pPage->fReusedFlushPending = false;
4416
4417 pPool->cUsedPages--;
4418
4419 /* Flush the TLBs of all VCPUs if required. */
4420 if ( fFlushRequired
4421 && fFlush)
4422 {
4423 PGM_INVL_ALL_VCPU_TLBS(pVM);
4424 }
4425
4426 pgmUnlock(pVM);
4427 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4428 return rc;
4429}
4430
4431
4432/**
4433 * Frees a usage of a pool page.
4434 *
4435 * The caller is responsible to updating the user table so that it no longer
4436 * references the shadow page.
4437 *
4438 * @param pPool The pool.
4439 * @param HCPhys The HC physical address of the shadow page.
4440 * @param iUser The shadow page pool index of the user table.
4441 * @param iUserTable The index into the user table (shadowed).
4442 */
4443void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4444{
4445 PVM pVM = pPool->CTX_SUFF(pVM);
4446
4447 STAM_PROFILE_START(&pPool->StatFree, a);
4448 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%#x iUserTable=%#x\n",
4449 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4450 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4451 pgmLock(pVM);
4452 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4453 if (!pPage->fCached)
4454 pgmPoolFlushPage(pPool, pPage);
4455 pgmUnlock(pVM);
4456 STAM_PROFILE_STOP(&pPool->StatFree, a);
4457}
4458
4459
4460/**
4461 * Makes one or more free page free.
4462 *
4463 * @returns VBox status code.
4464 * @retval VINF_SUCCESS on success.
4465 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4466 *
4467 * @param pPool The pool.
4468 * @param enmKind Page table kind
4469 * @param iUser The user of the page.
4470 */
4471static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4472{
4473 PVM pVM = pPool->CTX_SUFF(pVM);
4474
4475 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4476
4477 /*
4478 * If the pool isn't full grown yet, expand it.
4479 */
4480 if ( pPool->cCurPages < pPool->cMaxPages
4481#if defined(IN_RC)
4482 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4483 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4484 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4485#endif
4486 )
4487 {
4488 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4489#ifdef IN_RING3
4490 int rc = PGMR3PoolGrow(pVM);
4491#else
4492 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4493#endif
4494 if (RT_FAILURE(rc))
4495 return rc;
4496 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4497 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4498 return VINF_SUCCESS;
4499 }
4500
4501 /*
4502 * Free one cached page.
4503 */
4504 return pgmPoolCacheFreeOne(pPool, iUser);
4505}
4506
4507/**
4508 * Allocates a page from the pool.
4509 *
4510 * This page may actually be a cached page and not in need of any processing
4511 * on the callers part.
4512 *
4513 * @returns VBox status code.
4514 * @retval VINF_SUCCESS if a NEW page was allocated.
4515 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4516 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4517 * @param pVM The VM handle.
4518 * @param GCPhys The GC physical address of the page we're gonna shadow.
4519 * For 4MB and 2MB PD entries, it's the first address the
4520 * shadow PT is covering.
4521 * @param enmKind The kind of mapping.
4522 * @param enmAccess Access type for the mapping (only relevant for big pages)
4523 * @param iUser The shadow page pool index of the user table.
4524 * @param iUserTable The index into the user table (shadowed).
4525 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4526 * @param fLockPage Lock the page
4527 */
4528int pgmPoolAllocEx(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage, bool fLockPage)
4529{
4530 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4531 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4532 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%#x iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4533 *ppPage = NULL;
4534 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4535 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4536 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4537
4538 pgmLock(pVM);
4539
4540 if (pPool->fCacheEnabled)
4541 {
4542 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, iUser, iUserTable, ppPage);
4543 if (RT_SUCCESS(rc2))
4544 {
4545 if (fLockPage)
4546 pgmPoolLockPage(pPool, *ppPage);
4547 pgmUnlock(pVM);
4548 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4549 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4550 return rc2;
4551 }
4552 }
4553
4554 /*
4555 * Allocate a new one.
4556 */
4557 int rc = VINF_SUCCESS;
4558 uint16_t iNew = pPool->iFreeHead;
4559 if (iNew == NIL_PGMPOOL_IDX)
4560 {
4561 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
4562 if (RT_FAILURE(rc))
4563 {
4564 pgmUnlock(pVM);
4565 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4566 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4567 return rc;
4568 }
4569 iNew = pPool->iFreeHead;
4570 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4571 }
4572
4573 /* unlink the free head */
4574 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4575 pPool->iFreeHead = pPage->iNext;
4576 pPage->iNext = NIL_PGMPOOL_IDX;
4577
4578 /*
4579 * Initialize it.
4580 */
4581 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4582 pPage->enmKind = enmKind;
4583 pPage->enmAccess = enmAccess;
4584 pPage->GCPhys = GCPhys;
4585 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4586 pPage->fMonitored = false;
4587 pPage->fCached = false;
4588#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4589 pPage->fDirty = false;
4590#endif
4591 pPage->fReusedFlushPending = false;
4592 pPage->cModifications = 0;
4593 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4594 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4595 pPage->cPresent = 0;
4596 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
4597 pPage->pvLastAccessHandlerFault = 0;
4598 pPage->cLastAccessHandlerCount = 0;
4599 pPage->pvLastAccessHandlerRip = 0;
4600
4601 /*
4602 * Insert into the tracking and cache. If this fails, free the page.
4603 */
4604 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4605 if (RT_FAILURE(rc3))
4606 {
4607 pPool->cUsedPages--;
4608 pPage->enmKind = PGMPOOLKIND_FREE;
4609 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4610 pPage->GCPhys = NIL_RTGCPHYS;
4611 pPage->iNext = pPool->iFreeHead;
4612 pPool->iFreeHead = pPage->idx;
4613 pgmUnlock(pVM);
4614 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4615 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4616 return rc3;
4617 }
4618
4619 /*
4620 * Commit the allocation, clear the page and return.
4621 */
4622#ifdef VBOX_WITH_STATISTICS
4623 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4624 pPool->cUsedPagesHigh = pPool->cUsedPages;
4625#endif
4626
4627 if (!pPage->fZeroed)
4628 {
4629 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4630 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4631 ASMMemZeroPage(pv);
4632 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4633 }
4634
4635 *ppPage = pPage;
4636 if (fLockPage)
4637 pgmPoolLockPage(pPool, pPage);
4638 pgmUnlock(pVM);
4639 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4640 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4641 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4642 return rc;
4643}
4644
4645
4646/**
4647 * Frees a usage of a pool page.
4648 *
4649 * @param pVM The VM handle.
4650 * @param HCPhys The HC physical address of the shadow page.
4651 * @param iUser The shadow page pool index of the user table.
4652 * @param iUserTable The index into the user table (shadowed).
4653 */
4654void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4655{
4656 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4657 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4658 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4659}
4660
4661/**
4662 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
4663 *
4664 * @returns Pointer to the shadow page structure.
4665 * @param pPool The pool.
4666 * @param HCPhys The HC physical address of the shadow page.
4667 */
4668PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
4669{
4670 PVM pVM = pPool->CTX_SUFF(pVM);
4671
4672 Assert(PGMIsLockOwner(pVM));
4673
4674 /*
4675 * Look up the page.
4676 */
4677 pgmLock(pVM);
4678 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
4679 pgmUnlock(pVM);
4680
4681 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
4682 return pPage;
4683}
4684
4685#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
4686/**
4687 * Flush the specified page if present
4688 *
4689 * @param pVM The VM handle.
4690 * @param GCPhys Guest physical address of the page to flush
4691 */
4692void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
4693{
4694 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4695
4696 VM_ASSERT_EMT(pVM);
4697
4698 /*
4699 * Look up the GCPhys in the hash.
4700 */
4701 GCPhys = GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
4702 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
4703 if (i == NIL_PGMPOOL_IDX)
4704 return;
4705
4706 do
4707 {
4708 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4709 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
4710 {
4711 switch (pPage->enmKind)
4712 {
4713 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4714 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4715 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4716 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4717 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4718 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4719 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4720 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4721 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4722 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4723 case PGMPOOLKIND_64BIT_PML4:
4724 case PGMPOOLKIND_32BIT_PD:
4725 case PGMPOOLKIND_PAE_PDPT:
4726 {
4727 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
4728#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4729 if (pPage->fDirty)
4730 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
4731 else
4732#endif
4733 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
4734 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
4735 pgmPoolMonitorChainFlush(pPool, pPage);
4736 return;
4737 }
4738
4739 /* ignore, no monitoring. */
4740 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4741 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4742 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4743 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4744 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4745 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4746 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4747 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4748 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4749 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4750 case PGMPOOLKIND_ROOT_NESTED:
4751 case PGMPOOLKIND_PAE_PD_PHYS:
4752 case PGMPOOLKIND_PAE_PDPT_PHYS:
4753 case PGMPOOLKIND_32BIT_PD_PHYS:
4754 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4755 break;
4756
4757 default:
4758 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
4759 }
4760 }
4761
4762 /* next */
4763 i = pPage->iNext;
4764 } while (i != NIL_PGMPOOL_IDX);
4765 return;
4766}
4767#endif /* IN_RING3 */
4768
4769#ifdef IN_RING3
4770/**
4771 * Flushes the entire cache.
4772 *
4773 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
4774 * this and execute this CR3 flush.
4775 *
4776 * @param pPool The pool.
4777 */
4778void pgmR3PoolReset(PVM pVM)
4779{
4780 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4781
4782 Assert(PGMIsLockOwner(pVM));
4783 STAM_PROFILE_START(&pPool->StatR3Reset, a);
4784 LogFlow(("pgmR3PoolReset:\n"));
4785
4786 /*
4787 * If there are no pages in the pool, there is nothing to do.
4788 */
4789 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
4790 {
4791 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
4792 return;
4793 }
4794
4795 /*
4796 * Exit the shadow mode since we're going to clear everything,
4797 * including the root page.
4798 */
4799 for (VMCPUID i = 0; i < pVM->cCpus; i++)
4800 {
4801 PVMCPU pVCpu = &pVM->aCpus[i];
4802 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
4803 }
4804
4805 /*
4806 * Nuke the free list and reinsert all pages into it.
4807 */
4808 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
4809 {
4810 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4811
4812 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
4813 if (pPage->fMonitored)
4814 pgmPoolMonitorFlush(pPool, pPage);
4815 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4816 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4817 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4818 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4819 pPage->cModifications = 0;
4820 pPage->GCPhys = NIL_RTGCPHYS;
4821 pPage->enmKind = PGMPOOLKIND_FREE;
4822 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4823 Assert(pPage->idx == i);
4824 pPage->iNext = i + 1;
4825 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
4826 pPage->fSeenNonGlobal = false;
4827 pPage->fMonitored = false;
4828#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4829 pPage->fDirty = false;
4830#endif
4831 pPage->fCached = false;
4832 pPage->fReusedFlushPending = false;
4833 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4834 pPage->iAgeNext = NIL_PGMPOOL_IDX;
4835 pPage->iAgePrev = NIL_PGMPOOL_IDX;
4836 pPage->cLocked = 0;
4837 }
4838 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
4839 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
4840 pPool->cUsedPages = 0;
4841
4842 /*
4843 * Zap and reinitialize the user records.
4844 */
4845 pPool->cPresent = 0;
4846 pPool->iUserFreeHead = 0;
4847 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4848 const unsigned cMaxUsers = pPool->cMaxUsers;
4849 for (unsigned i = 0; i < cMaxUsers; i++)
4850 {
4851 paUsers[i].iNext = i + 1;
4852 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4853 paUsers[i].iUserTable = 0xfffffffe;
4854 }
4855 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
4856
4857 /*
4858 * Clear all the GCPhys links and rebuild the phys ext free list.
4859 */
4860 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
4861 pRam;
4862 pRam = pRam->CTX_SUFF(pNext))
4863 {
4864 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4865 while (iPage-- > 0)
4866 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
4867 }
4868
4869 pPool->iPhysExtFreeHead = 0;
4870 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4871 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
4872 for (unsigned i = 0; i < cMaxPhysExts; i++)
4873 {
4874 paPhysExts[i].iNext = i + 1;
4875 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
4876 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
4877 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
4878 }
4879 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4880
4881 /*
4882 * Just zap the modified list.
4883 */
4884 pPool->cModifiedPages = 0;
4885 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
4886
4887 /*
4888 * Clear the GCPhys hash and the age list.
4889 */
4890 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
4891 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
4892 pPool->iAgeHead = NIL_PGMPOOL_IDX;
4893 pPool->iAgeTail = NIL_PGMPOOL_IDX;
4894
4895#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4896 /* Clear all dirty pages. */
4897 pPool->idxFreeDirtyPage = 0;
4898 pPool->cDirtyPages = 0;
4899 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
4900 pPool->aIdxDirtyPages[i] = NIL_PGMPOOL_IDX;
4901#endif
4902
4903 /*
4904 * Reinsert active pages into the hash and ensure monitoring chains are correct.
4905 */
4906 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
4907 {
4908 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4909 pPage->iNext = NIL_PGMPOOL_IDX;
4910 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4911 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4912 pPage->cModifications = 0;
4913 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
4914 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4915 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4916 if (pPage->fMonitored)
4917 {
4918 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
4919 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
4920 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
4921 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
4922 pPool->pszAccessHandler);
4923 AssertFatalRCSuccess(rc);
4924 pgmPoolHashInsert(pPool, pPage);
4925 }
4926 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
4927 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
4928 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
4929 }
4930
4931 for (VMCPUID i = 0; i < pVM->cCpus; i++)
4932 {
4933 /*
4934 * Re-enter the shadowing mode and assert Sync CR3 FF.
4935 */
4936 PVMCPU pVCpu = &pVM->aCpus[i];
4937 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
4938 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
4939 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
4940 }
4941
4942 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
4943}
4944#endif /* IN_RING3 */
4945
4946#ifdef LOG_ENABLED
4947static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
4948{
4949 switch(enmKind)
4950 {
4951 case PGMPOOLKIND_INVALID:
4952 return "PGMPOOLKIND_INVALID";
4953 case PGMPOOLKIND_FREE:
4954 return "PGMPOOLKIND_FREE";
4955 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4956 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
4957 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4958 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
4959 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4960 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
4961 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4962 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
4963 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4964 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
4965 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4966 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
4967 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4968 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
4969 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4970 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
4971 case PGMPOOLKIND_32BIT_PD:
4972 return "PGMPOOLKIND_32BIT_PD";
4973 case PGMPOOLKIND_32BIT_PD_PHYS:
4974 return "PGMPOOLKIND_32BIT_PD_PHYS";
4975 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4976 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
4977 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4978 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
4979 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4980 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
4981 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4982 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
4983 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4984 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
4985 case PGMPOOLKIND_PAE_PD_PHYS:
4986 return "PGMPOOLKIND_PAE_PD_PHYS";
4987 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4988 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
4989 case PGMPOOLKIND_PAE_PDPT:
4990 return "PGMPOOLKIND_PAE_PDPT";
4991 case PGMPOOLKIND_PAE_PDPT_PHYS:
4992 return "PGMPOOLKIND_PAE_PDPT_PHYS";
4993 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4994 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
4995 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4996 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
4997 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4998 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
4999 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5000 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5001 case PGMPOOLKIND_64BIT_PML4:
5002 return "PGMPOOLKIND_64BIT_PML4";
5003 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5004 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5005 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5006 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5007 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5008 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5009 case PGMPOOLKIND_ROOT_NESTED:
5010 return "PGMPOOLKIND_ROOT_NESTED";
5011 }
5012 return "Unknown kind!";
5013}
5014#endif /* LOG_ENABLED*/
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette