VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 25547

Last change on this file since 25547 was 25547, checked in by vboxsync, 15 years ago

pgmPoolAccessHandlerFlush: tlb flush on necessary for raw mode

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 187.8 KB
Line 
1/* $Id: PGMAllPool.cpp 25547 2009-12-21 16:38:13Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42#include <iprt/string.h>
43
44
45/*******************************************************************************
46* Internal Functions *
47*******************************************************************************/
48RT_C_DECLS_BEGIN
49static void pgmPoolFlushAllInt(PPGMPOOL pPool);
50DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
51DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
52static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
54static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
55#ifndef IN_RING3
56DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
57#endif
58#ifdef LOG_ENABLED
59static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
60#endif
61#if defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)
62static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT);
63#endif
64
65int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
66PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
67void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
68void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
69
70RT_C_DECLS_END
71
72
73/**
74 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
75 *
76 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
77 * @param enmKind The page kind.
78 */
79DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
80{
81 switch (enmKind)
82 {
83 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
84 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
85 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
86 return true;
87 default:
88 return false;
89 }
90}
91
92/** @def PGMPOOL_PAGE_2_LOCKED_PTR
93 * Maps a pool page pool into the current context and lock it (RC only).
94 *
95 * @returns VBox status code.
96 * @param pVM The VM handle.
97 * @param pPage The pool page.
98 *
99 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
100 * small page window employeed by that function. Be careful.
101 * @remark There is no need to assert on the result.
102 */
103#if defined(IN_RC)
104DECLINLINE(void *) PGMPOOL_PAGE_2_LOCKED_PTR(PVM pVM, PPGMPOOLPAGE pPage)
105{
106 void *pv = pgmPoolMapPageInlined(&pVM->pgm.s, pPage);
107
108 /* Make sure the dynamic mapping will not be reused. */
109 if (pv)
110 PGMDynLockHCPage(pVM, (uint8_t *)pv);
111
112 return pv;
113}
114#else
115# define PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage) PGMPOOL_PAGE_2_PTR(pVM, pPage)
116#endif
117
118/** @def PGMPOOL_UNLOCK_PTR
119 * Unlock a previously locked dynamic caching (RC only).
120 *
121 * @returns VBox status code.
122 * @param pVM The VM handle.
123 * @param pPage The pool page.
124 *
125 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
126 * small page window employeed by that function. Be careful.
127 * @remark There is no need to assert on the result.
128 */
129#if defined(IN_RC)
130DECLINLINE(void) PGMPOOL_UNLOCK_PTR(PVM pVM, void *pvPage)
131{
132 if (pvPage)
133 PGMDynUnlockHCPage(pVM, (uint8_t *)pvPage);
134}
135#else
136# define PGMPOOL_UNLOCK_PTR(pVM, pPage) do {} while (0)
137#endif
138
139
140/**
141 * Flushes a chain of pages sharing the same access monitor.
142 *
143 * @returns VBox status code suitable for scheduling.
144 * @param pPool The pool.
145 * @param pPage A page in the chain.
146 */
147int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
148{
149 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
150
151 /*
152 * Find the list head.
153 */
154 uint16_t idx = pPage->idx;
155 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
156 {
157 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
158 {
159 idx = pPage->iMonitoredPrev;
160 Assert(idx != pPage->idx);
161 pPage = &pPool->aPages[idx];
162 }
163 }
164
165 /*
166 * Iterate the list flushing each shadow page.
167 */
168 int rc = VINF_SUCCESS;
169 for (;;)
170 {
171 idx = pPage->iMonitoredNext;
172 Assert(idx != pPage->idx);
173 if (pPage->idx >= PGMPOOL_IDX_FIRST)
174 {
175 int rc2 = pgmPoolFlushPage(pPool, pPage);
176 AssertRC(rc2);
177 }
178 /* next */
179 if (idx == NIL_PGMPOOL_IDX)
180 break;
181 pPage = &pPool->aPages[idx];
182 }
183 return rc;
184}
185
186
187/**
188 * Wrapper for getting the current context pointer to the entry being modified.
189 *
190 * @returns VBox status code suitable for scheduling.
191 * @param pVM VM Handle.
192 * @param pvDst Destination address
193 * @param pvSrc Source guest virtual address.
194 * @param GCPhysSrc The source guest physical address.
195 * @param cb Size of data to read
196 */
197DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
198{
199#if defined(IN_RING3)
200 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
201 return VINF_SUCCESS;
202#else
203 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
204 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
205#endif
206}
207
208/**
209 * Process shadow entries before they are changed by the guest.
210 *
211 * For PT entries we will clear them. For PD entries, we'll simply check
212 * for mapping conflicts and set the SyncCR3 FF if found.
213 *
214 * @param pVCpu VMCPU handle
215 * @param pPool The pool.
216 * @param pPage The head page.
217 * @param GCPhysFault The guest physical fault address.
218 * @param uAddress In R0 and GC this is the guest context fault address (flat).
219 * In R3 this is the host context 'fault' address.
220 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
221 */
222void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, unsigned cbWrite)
223{
224 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%#x (idx=%#x)\n", pPage->iMonitoredPrev, pPage->idx));
225 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
226 PVM pVM = pPool->CTX_SUFF(pVM);
227
228 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)pvAddress, GCPhysFault, cbWrite));
229
230 for (;;)
231 {
232 union
233 {
234 void *pv;
235 PX86PT pPT;
236 PX86PTPAE pPTPae;
237 PX86PD pPD;
238 PX86PDPAE pPDPae;
239 PX86PDPT pPDPT;
240 PX86PML4 pPML4;
241 } uShw;
242
243 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s\n", pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
244
245 uShw.pv = NULL;
246 switch (pPage->enmKind)
247 {
248 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
249 {
250 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
251 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
252 const unsigned iShw = off / sizeof(X86PTE);
253 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
254 if (uShw.pPT->a[iShw].n.u1Present)
255 {
256 X86PTE GstPte;
257
258 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
259 AssertRC(rc);
260 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
261 pgmPoolTracDerefGCPhysHint(pPool, pPage,
262 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
263 GstPte.u & X86_PTE_PG_MASK);
264 ASMAtomicWriteSize(&uShw.pPT->a[iShw], 0);
265 }
266 break;
267 }
268
269 /* page/2 sized */
270 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
271 {
272 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
273 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
274 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
275 {
276 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
277 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
278 if (uShw.pPTPae->a[iShw].n.u1Present)
279 {
280 X86PTE GstPte;
281 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
282 AssertRC(rc);
283
284 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
285 pgmPoolTracDerefGCPhysHint(pPool, pPage,
286 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
287 GstPte.u & X86_PTE_PG_MASK);
288 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw], 0);
289 }
290 }
291 break;
292 }
293
294 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
295 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
296 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
297 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
298 {
299 unsigned iGst = off / sizeof(X86PDE);
300 unsigned iShwPdpt = iGst / 256;
301 unsigned iShw = (iGst % 256) * 2;
302 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
303
304 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
305 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
306 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
307 {
308 for (unsigned i = 0; i < 2; i++)
309 {
310# ifndef IN_RING0
311 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
312 {
313 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
314 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
315 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
316 break;
317 }
318 else
319# endif /* !IN_RING0 */
320 if (uShw.pPDPae->a[iShw+i].n.u1Present)
321 {
322 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
323 pgmPoolFree(pVM,
324 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
325 pPage->idx,
326 iShw + i);
327 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw+i], 0);
328 }
329
330 /* paranoia / a bit assumptive. */
331 if ( (off & 3)
332 && (off & 3) + cbWrite > 4)
333 {
334 const unsigned iShw2 = iShw + 2 + i;
335 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
336 {
337# ifndef IN_RING0
338 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
339 {
340 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
341 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
342 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
343 break;
344 }
345 else
346# endif /* !IN_RING0 */
347 if (uShw.pPDPae->a[iShw2].n.u1Present)
348 {
349 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
350 pgmPoolFree(pVM,
351 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
352 pPage->idx,
353 iShw2);
354 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
355 }
356 }
357 }
358 }
359 }
360 break;
361 }
362
363 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
364 {
365 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
366 const unsigned iShw = off / sizeof(X86PTEPAE);
367 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
368 if (uShw.pPTPae->a[iShw].n.u1Present)
369 {
370 X86PTEPAE GstPte;
371 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
372 AssertRC(rc);
373
374 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
375 pgmPoolTracDerefGCPhysHint(pPool, pPage,
376 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
377 GstPte.u & X86_PTE_PAE_PG_MASK);
378 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw].u, 0);
379 }
380
381 /* paranoia / a bit assumptive. */
382 if ( (off & 7)
383 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
384 {
385 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
386 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
387
388 if (uShw.pPTPae->a[iShw2].n.u1Present)
389 {
390 X86PTEPAE GstPte;
391# ifdef IN_RING3
392 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
393# else
394 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
395# endif
396 AssertRC(rc);
397 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
398 pgmPoolTracDerefGCPhysHint(pPool, pPage,
399 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
400 GstPte.u & X86_PTE_PAE_PG_MASK);
401 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw2].u ,0);
402 }
403 }
404 break;
405 }
406
407 case PGMPOOLKIND_32BIT_PD:
408 {
409 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
410 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
411
412 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
413 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
414# ifndef IN_RING0
415 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
416 {
417 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
418 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
419 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
420 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
421 break;
422 }
423# endif /* !IN_RING0 */
424# ifndef IN_RING0
425 else
426# endif /* !IN_RING0 */
427 {
428 if (uShw.pPD->a[iShw].n.u1Present)
429 {
430 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
431 pgmPoolFree(pVM,
432 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
433 pPage->idx,
434 iShw);
435 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
436 }
437 }
438 /* paranoia / a bit assumptive. */
439 if ( (off & 3)
440 && (off & 3) + cbWrite > sizeof(X86PTE))
441 {
442 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
443 if ( iShw2 != iShw
444 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
445 {
446# ifndef IN_RING0
447 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
448 {
449 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
450 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
451 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
452 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
453 break;
454 }
455# endif /* !IN_RING0 */
456# ifndef IN_RING0
457 else
458# endif /* !IN_RING0 */
459 {
460 if (uShw.pPD->a[iShw2].n.u1Present)
461 {
462 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
463 pgmPoolFree(pVM,
464 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
465 pPage->idx,
466 iShw2);
467 ASMAtomicWriteSize(&uShw.pPD->a[iShw2].u, 0);
468 }
469 }
470 }
471 }
472#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
473 if ( uShw.pPD->a[iShw].n.u1Present
474 && !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
475 {
476 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
477# ifdef IN_RC /* TLB load - we're pushing things a bit... */
478 ASMProbeReadByte(pvAddress);
479# endif
480 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
481 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
482 }
483#endif
484 break;
485 }
486
487 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
488 {
489 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
490 const unsigned iShw = off / sizeof(X86PDEPAE);
491 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
492#ifndef IN_RING0
493 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
494 {
495 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
496 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
497 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
498 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
499 break;
500 }
501#endif /* !IN_RING0 */
502 /*
503 * Causes trouble when the guest uses a PDE to refer to the whole page table level
504 * structure. (Invalidate here; faults later on when it tries to change the page
505 * table entries -> recheck; probably only applies to the RC case.)
506 */
507# ifndef IN_RING0
508 else
509# endif /* !IN_RING0 */
510 {
511 if (uShw.pPDPae->a[iShw].n.u1Present)
512 {
513 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
514 pgmPoolFree(pVM,
515 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
516 pPage->idx,
517 iShw);
518 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
519 }
520 }
521 /* paranoia / a bit assumptive. */
522 if ( (off & 7)
523 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
524 {
525 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
526 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
527
528#ifndef IN_RING0
529 if ( iShw2 != iShw
530 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
531 {
532 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
533 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
534 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
535 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
536 break;
537 }
538#endif /* !IN_RING0 */
539# ifndef IN_RING0
540 else
541# endif /* !IN_RING0 */
542 if (uShw.pPDPae->a[iShw2].n.u1Present)
543 {
544 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
545 pgmPoolFree(pVM,
546 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
547 pPage->idx,
548 iShw2);
549 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
550 }
551 }
552 break;
553 }
554
555 case PGMPOOLKIND_PAE_PDPT:
556 {
557 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
558 /*
559 * Hopefully this doesn't happen very often:
560 * - touching unused parts of the page
561 * - messing with the bits of pd pointers without changing the physical address
562 */
563 /* PDPT roots are not page aligned; 32 byte only! */
564 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
565
566 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
567 const unsigned iShw = offPdpt / sizeof(X86PDPE);
568 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
569 {
570# ifndef IN_RING0
571 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
572 {
573 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
574 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
575 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
576 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
577 break;
578 }
579# endif /* !IN_RING0 */
580# ifndef IN_RING0
581 else
582# endif /* !IN_RING0 */
583 if (uShw.pPDPT->a[iShw].n.u1Present)
584 {
585 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
586 pgmPoolFree(pVM,
587 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
588 pPage->idx,
589 iShw);
590 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
591 }
592
593 /* paranoia / a bit assumptive. */
594 if ( (offPdpt & 7)
595 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
596 {
597 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
598 if ( iShw2 != iShw
599 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
600 {
601# ifndef IN_RING0
602 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
603 {
604 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
605 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
606 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
607 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
608 break;
609 }
610# endif /* !IN_RING0 */
611# ifndef IN_RING0
612 else
613# endif /* !IN_RING0 */
614 if (uShw.pPDPT->a[iShw2].n.u1Present)
615 {
616 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
617 pgmPoolFree(pVM,
618 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
619 pPage->idx,
620 iShw2);
621 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
622 }
623 }
624 }
625 }
626 break;
627 }
628
629#ifndef IN_RC
630 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
631 {
632 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
633 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
634 const unsigned iShw = off / sizeof(X86PDEPAE);
635 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
636 if (uShw.pPDPae->a[iShw].n.u1Present)
637 {
638 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
639 pgmPoolFree(pVM,
640 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
641 pPage->idx,
642 iShw);
643 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
644 }
645 /* paranoia / a bit assumptive. */
646 if ( (off & 7)
647 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
648 {
649 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
650 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
651
652 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
653 if (uShw.pPDPae->a[iShw2].n.u1Present)
654 {
655 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
656 pgmPoolFree(pVM,
657 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
658 pPage->idx,
659 iShw2);
660 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
661 }
662 }
663 break;
664 }
665
666 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
667 {
668 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
669 /*
670 * Hopefully this doesn't happen very often:
671 * - messing with the bits of pd pointers without changing the physical address
672 */
673 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
674 const unsigned iShw = off / sizeof(X86PDPE);
675 if (uShw.pPDPT->a[iShw].n.u1Present)
676 {
677 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
678 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
679 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
680 }
681 /* paranoia / a bit assumptive. */
682 if ( (off & 7)
683 && (off & 7) + cbWrite > sizeof(X86PDPE))
684 {
685 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
686 if (uShw.pPDPT->a[iShw2].n.u1Present)
687 {
688 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
689 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
690 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
691 }
692 }
693 break;
694 }
695
696 case PGMPOOLKIND_64BIT_PML4:
697 {
698 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
699 /*
700 * Hopefully this doesn't happen very often:
701 * - messing with the bits of pd pointers without changing the physical address
702 */
703 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
704 const unsigned iShw = off / sizeof(X86PDPE);
705 if (uShw.pPML4->a[iShw].n.u1Present)
706 {
707 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
708 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
709 ASMAtomicWriteSize(&uShw.pPML4->a[iShw].u, 0);
710 }
711 /* paranoia / a bit assumptive. */
712 if ( (off & 7)
713 && (off & 7) + cbWrite > sizeof(X86PDPE))
714 {
715 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
716 if (uShw.pPML4->a[iShw2].n.u1Present)
717 {
718 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
719 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
720 ASMAtomicWriteSize(&uShw.pPML4->a[iShw2].u, 0);
721 }
722 }
723 break;
724 }
725#endif /* IN_RING0 */
726
727 default:
728 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
729 }
730 PGMPOOL_UNLOCK_PTR(pVM, uShw.pv);
731
732 /* next */
733 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
734 return;
735 pPage = &pPool->aPages[pPage->iMonitoredNext];
736 }
737}
738
739# ifndef IN_RING3
740/**
741 * Checks if a access could be a fork operation in progress.
742 *
743 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
744 *
745 * @returns true if it's likly that we're forking, otherwise false.
746 * @param pPool The pool.
747 * @param pDis The disassembled instruction.
748 * @param offFault The access offset.
749 */
750DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
751{
752 /*
753 * i386 linux is using btr to clear X86_PTE_RW.
754 * The functions involved are (2.6.16 source inspection):
755 * clear_bit
756 * ptep_set_wrprotect
757 * copy_one_pte
758 * copy_pte_range
759 * copy_pmd_range
760 * copy_pud_range
761 * copy_page_range
762 * dup_mmap
763 * dup_mm
764 * copy_mm
765 * copy_process
766 * do_fork
767 */
768 if ( pDis->pCurInstr->opcode == OP_BTR
769 && !(offFault & 4)
770 /** @todo Validate that the bit index is X86_PTE_RW. */
771 )
772 {
773 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
774 return true;
775 }
776 return false;
777}
778
779
780/**
781 * Determine whether the page is likely to have been reused.
782 *
783 * @returns true if we consider the page as being reused for a different purpose.
784 * @returns false if we consider it to still be a paging page.
785 * @param pVM VM Handle.
786 * @param pVCpu VMCPU Handle.
787 * @param pRegFrame Trap register frame.
788 * @param pDis The disassembly info for the faulting instruction.
789 * @param pvFault The fault address.
790 *
791 * @remark The REP prefix check is left to the caller because of STOSD/W.
792 */
793DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
794{
795#ifndef IN_RC
796 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
797 if ( HWACCMHasPendingIrq(pVM)
798 && (pRegFrame->rsp - pvFault) < 32)
799 {
800 /* Fault caused by stack writes while trying to inject an interrupt event. */
801 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
802 return true;
803 }
804#else
805 NOREF(pVM); NOREF(pvFault);
806#endif
807
808 LogFlow(("Reused instr %RGv %d at %RGv param1.flags=%x param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->opcode, pvFault, pDis->param1.flags, pDis->param1.base.reg_gen));
809
810 /* Non-supervisor mode write means it's used for something else. */
811 if (CPUMGetGuestCPL(pVCpu, pRegFrame) != 0)
812 return true;
813
814 switch (pDis->pCurInstr->opcode)
815 {
816 /* call implies the actual push of the return address faulted */
817 case OP_CALL:
818 Log4(("pgmPoolMonitorIsReused: CALL\n"));
819 return true;
820 case OP_PUSH:
821 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
822 return true;
823 case OP_PUSHF:
824 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
825 return true;
826 case OP_PUSHA:
827 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
828 return true;
829 case OP_FXSAVE:
830 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
831 return true;
832 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
833 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
834 return true;
835 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
836 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
837 return true;
838 case OP_MOVSWD:
839 case OP_STOSWD:
840 if ( pDis->prefix == (PREFIX_REP|PREFIX_REX)
841 && pRegFrame->rcx >= 0x40
842 )
843 {
844 Assert(pDis->mode == CPUMODE_64BIT);
845
846 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
847 return true;
848 }
849 return false;
850 }
851 if ( ( (pDis->param1.flags & USE_REG_GEN32)
852 || (pDis->param1.flags & USE_REG_GEN64))
853 && (pDis->param1.base.reg_gen == USE_REG_ESP))
854 {
855 Log4(("pgmPoolMonitorIsReused: ESP\n"));
856 return true;
857 }
858
859 return false;
860}
861
862/**
863 * Flushes the page being accessed.
864 *
865 * @returns VBox status code suitable for scheduling.
866 * @param pVM The VM handle.
867 * @param pVCpu The VMCPU handle.
868 * @param pPool The pool.
869 * @param pPage The pool page (head).
870 * @param pDis The disassembly of the write instruction.
871 * @param pRegFrame The trap register frame.
872 * @param GCPhysFault The fault address as guest physical address.
873 * @param pvFault The fault address.
874 */
875static int pgmPoolAccessHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
876 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
877{
878 /*
879 * First, do the flushing.
880 */
881 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
882
883 /*
884 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection). Must do this in raw mode (!); XP boot will fail otherwise
885 */
886 uint32_t cbWritten;
887 int rc2 = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, &cbWritten);
888 if (RT_SUCCESS(rc2))
889 pRegFrame->rip += pDis->opsize;
890 else if (rc2 == VERR_EM_INTERPRETER)
891 {
892#ifdef IN_RC
893 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
894 {
895 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
896 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
897 rc = VINF_SUCCESS;
898 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
899 }
900 else
901#endif
902 {
903 rc = VINF_EM_RAW_EMULATE_INSTR;
904 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
905 }
906 }
907 else
908 rc = rc2;
909
910#ifdef IN_RC
911 /* See use in pgmPoolAccessHandlerSimple(). */
912 PGM_INVL_VCPU_TLBS(pVCpu);
913#endif
914 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
915 return rc;
916}
917
918/**
919 * Handles the STOSD write accesses.
920 *
921 * @returns VBox status code suitable for scheduling.
922 * @param pVM The VM handle.
923 * @param pPool The pool.
924 * @param pPage The pool page (head).
925 * @param pDis The disassembly of the write instruction.
926 * @param pRegFrame The trap register frame.
927 * @param GCPhysFault The fault address as guest physical address.
928 * @param pvFault The fault address.
929 */
930DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
931 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
932{
933 unsigned uIncrement = pDis->param1.size;
934
935 Assert(pDis->mode == CPUMODE_32BIT || pDis->mode == CPUMODE_64BIT);
936 Assert(pRegFrame->rcx <= 0x20);
937
938#ifdef VBOX_STRICT
939 if (pDis->opmode == CPUMODE_32BIT)
940 Assert(uIncrement == 4);
941 else
942 Assert(uIncrement == 8);
943#endif
944
945 Log3(("pgmPoolAccessHandlerSTOSD\n"));
946
947 /*
948 * Increment the modification counter and insert it into the list
949 * of modified pages the first time.
950 */
951 if (!pPage->cModifications++)
952 pgmPoolMonitorModifiedInsert(pPool, pPage);
953
954 /*
955 * Execute REP STOSD.
956 *
957 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
958 * write situation, meaning that it's safe to write here.
959 */
960 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
961 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
962 while (pRegFrame->rcx)
963 {
964#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
965 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
966 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
967 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
968#else
969 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
970#endif
971#ifdef IN_RC
972 *(uint32_t *)pu32 = pRegFrame->eax;
973#else
974 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
975#endif
976 pu32 += uIncrement;
977 GCPhysFault += uIncrement;
978 pRegFrame->rdi += uIncrement;
979 pRegFrame->rcx--;
980 }
981 pRegFrame->rip += pDis->opsize;
982
983#ifdef IN_RC
984 /* See use in pgmPoolAccessHandlerSimple(). */
985 PGM_INVL_VCPU_TLBS(pVCpu);
986#endif
987
988 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
989 return VINF_SUCCESS;
990}
991
992
993/**
994 * Handles the simple write accesses.
995 *
996 * @returns VBox status code suitable for scheduling.
997 * @param pVM The VM handle.
998 * @param pVCpu The VMCPU handle.
999 * @param pPool The pool.
1000 * @param pPage The pool page (head).
1001 * @param pDis The disassembly of the write instruction.
1002 * @param pRegFrame The trap register frame.
1003 * @param GCPhysFault The fault address as guest physical address.
1004 * @param pvFault The fault address.
1005 * @param pfReused Reused state (out)
1006 */
1007DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
1008 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
1009{
1010 Log3(("pgmPoolAccessHandlerSimple\n"));
1011 /*
1012 * Increment the modification counter and insert it into the list
1013 * of modified pages the first time.
1014 */
1015 if (!pPage->cModifications++)
1016 pgmPoolMonitorModifiedInsert(pPool, pPage);
1017
1018 /*
1019 * Clear all the pages. ASSUMES that pvFault is readable.
1020 */
1021#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1022 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1023 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
1024 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1025#else
1026 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
1027#endif
1028
1029 /*
1030 * Interpret the instruction.
1031 */
1032 uint32_t cb;
1033 int rc = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, &cb);
1034 if (RT_SUCCESS(rc))
1035 pRegFrame->rip += pDis->opsize;
1036 else if (rc == VERR_EM_INTERPRETER)
1037 {
1038 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1039 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode));
1040 rc = VINF_EM_RAW_EMULATE_INSTR;
1041 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1042 }
1043
1044#if 0 /* experimental code */
1045 if (rc == VINF_SUCCESS)
1046 {
1047 switch (pPage->enmKind)
1048 {
1049 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1050 {
1051 X86PTEPAE GstPte;
1052 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1053 AssertRC(rc);
1054
1055 /* Check the new value written by the guest. If present and with a bogus physical address, then
1056 * it's fairly safe to assume the guest is reusing the PT.
1057 */
1058 if (GstPte.n.u1Present)
1059 {
1060 RTHCPHYS HCPhys = -1;
1061 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1062 if (rc != VINF_SUCCESS)
1063 {
1064 *pfReused = true;
1065 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1066 }
1067 }
1068 break;
1069 }
1070 }
1071 }
1072#endif
1073
1074#ifdef IN_RC
1075 /*
1076 * Quick hack, with logging enabled we're getting stale
1077 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
1078 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
1079 * have to be fixed to support this. But that'll have to wait till next week.
1080 *
1081 * An alternative is to keep track of the changed PTEs together with the
1082 * GCPhys from the guest PT. This may proove expensive though.
1083 *
1084 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
1085 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
1086 */
1087 PGM_INVL_VCPU_TLBS(pVCpu);
1088#endif
1089
1090 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1091 return rc;
1092}
1093
1094/**
1095 * \#PF Handler callback for PT write accesses.
1096 *
1097 * @returns VBox status code (appropriate for GC return).
1098 * @param pVM VM Handle.
1099 * @param uErrorCode CPU Error code.
1100 * @param pRegFrame Trap register frame.
1101 * NULL on DMA and other non CPU access.
1102 * @param pvFault The fault address (cr2).
1103 * @param GCPhysFault The GC physical address corresponding to pvFault.
1104 * @param pvUser User argument.
1105 */
1106DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1107{
1108 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1109 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1110 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1111 PVMCPU pVCpu = VMMGetCpu(pVM);
1112 unsigned cMaxModifications;
1113 bool fForcedFlush = false;
1114
1115 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1116
1117 pgmLock(pVM);
1118 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1119 {
1120 /* Pool page changed while we were waiting for the lock; ignore. */
1121 Log(("CPU%d: pgmPoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1122 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1123 pgmUnlock(pVM);
1124 return VINF_SUCCESS;
1125 }
1126#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1127 if (pPage->fDirty)
1128 {
1129 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH));
1130 pgmUnlock(pVM);
1131 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1132 }
1133#endif
1134
1135#if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1136 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1137 {
1138 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1139 void *pvGst;
1140 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1141 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
1142 }
1143#endif
1144
1145 /*
1146 * Disassemble the faulting instruction.
1147 */
1148 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1149 int rc = EMInterpretDisasOne(pVM, pVCpu, pRegFrame, pDis, NULL);
1150 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1151 {
1152 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1153 pgmUnlock(pVM);
1154 return rc;
1155 }
1156
1157 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1158
1159 /*
1160 * We should ALWAYS have the list head as user parameter. This
1161 * is because we use that page to record the changes.
1162 */
1163 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1164
1165#ifdef IN_RING0
1166 /* Maximum nr of modifications depends on the page type. */
1167 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1168 cMaxModifications = 4;
1169 else
1170 cMaxModifications = 24;
1171#else
1172 cMaxModifications = 48;
1173#endif
1174
1175 /*
1176 * Incremental page table updates should weight more than random ones.
1177 * (Only applies when started from offset 0)
1178 */
1179 pVCpu->pgm.s.cPoolAccessHandler++;
1180 if ( pPage->pvLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1181 && pPage->pvLastAccessHandlerRip < pRegFrame->rip + 0x40
1182 && pvFault == (pPage->pvLastAccessHandlerFault + pDis->param1.size)
1183 && pVCpu->pgm.s.cPoolAccessHandler == (pPage->cLastAccessHandlerCount + 1))
1184 {
1185 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1186 pPage->cModifications = pPage->cModifications * 2;
1187 pPage->pvLastAccessHandlerFault = pvFault;
1188 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1189 if (pPage->cModifications >= cMaxModifications)
1190 {
1191 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1192 fForcedFlush = true;
1193 }
1194 }
1195
1196 if (pPage->cModifications >= cMaxModifications)
1197 Log(("Mod overflow %VGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1198
1199 /*
1200 * Check if it's worth dealing with.
1201 */
1202 bool fReused = false;
1203 bool fNotReusedNotForking = false;
1204 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1205 || pgmPoolIsPageLocked(&pVM->pgm.s, pPage)
1206 )
1207 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1208 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1209 {
1210 /*
1211 * Simple instructions, no REP prefix.
1212 */
1213 if (!(pDis->prefix & (PREFIX_REP | PREFIX_REPNE)))
1214 {
1215 rc = pgmPoolAccessHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1216 if (fReused)
1217 goto flushPage;
1218
1219 /* A mov instruction to change the first page table entry will be remembered so we can detect
1220 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1221 */
1222 if ( rc == VINF_SUCCESS
1223 && pDis->pCurInstr->opcode == OP_MOV
1224 && (pvFault & PAGE_OFFSET_MASK) == 0)
1225 {
1226 pPage->pvLastAccessHandlerFault = pvFault;
1227 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1228 pPage->pvLastAccessHandlerRip = pRegFrame->rip;
1229 /* Make sure we don't kick out a page too quickly. */
1230 if (pPage->cModifications > 8)
1231 pPage->cModifications = 2;
1232 }
1233 else
1234 if (pPage->pvLastAccessHandlerFault == pvFault)
1235 {
1236 /* ignore the 2nd write to this page table entry. */
1237 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1238 }
1239 else
1240 {
1241 pPage->pvLastAccessHandlerFault = 0;
1242 pPage->pvLastAccessHandlerRip = 0;
1243 }
1244
1245 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1246 pgmUnlock(pVM);
1247 return rc;
1248 }
1249
1250 /*
1251 * Windows is frequently doing small memset() operations (netio test 4k+).
1252 * We have to deal with these or we'll kill the cache and performance.
1253 */
1254 if ( pDis->pCurInstr->opcode == OP_STOSWD
1255 && !pRegFrame->eflags.Bits.u1DF
1256 && pDis->opmode == pDis->mode
1257 && pDis->addrmode == pDis->mode)
1258 {
1259 bool fValidStosd = false;
1260
1261 if ( pDis->mode == CPUMODE_32BIT
1262 && pDis->prefix == PREFIX_REP
1263 && pRegFrame->ecx <= 0x20
1264 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1265 && !((uintptr_t)pvFault & 3)
1266 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1267 )
1268 {
1269 fValidStosd = true;
1270 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1271 }
1272 else
1273 if ( pDis->mode == CPUMODE_64BIT
1274 && pDis->prefix == (PREFIX_REP | PREFIX_REX)
1275 && pRegFrame->rcx <= 0x20
1276 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1277 && !((uintptr_t)pvFault & 7)
1278 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1279 )
1280 {
1281 fValidStosd = true;
1282 }
1283
1284 if (fValidStosd)
1285 {
1286 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1287 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1288 pgmUnlock(pVM);
1289 return rc;
1290 }
1291 }
1292
1293 /* REP prefix, don't bother. */
1294 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1295 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1296 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode, pDis->prefix));
1297 fNotReusedNotForking = true;
1298 }
1299
1300#if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1301 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1302 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1303 */
1304 if ( pPage->cModifications >= cMaxModifications
1305 && !fForcedFlush
1306 && pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1307 && ( fNotReusedNotForking
1308 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1309 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1310 )
1311 )
1312 {
1313 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
1314 Assert(pPage->fDirty == false);
1315
1316 /* Flush any monitored duplicates as we will disable write protection. */
1317 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1318 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1319 {
1320 PPGMPOOLPAGE pPageHead = pPage;
1321
1322 /* Find the monitor head. */
1323 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1324 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1325
1326 while (pPageHead)
1327 {
1328 unsigned idxNext = pPageHead->iMonitoredNext;
1329
1330 if (pPageHead != pPage)
1331 {
1332 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1333 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1334 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1335 AssertRC(rc2);
1336 }
1337
1338 if (idxNext == NIL_PGMPOOL_IDX)
1339 break;
1340
1341 pPageHead = &pPool->aPages[idxNext];
1342 }
1343 }
1344
1345 /* The flushing above might fail for locked pages, so double check. */
1346 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1347 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1348 {
1349 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1350
1351 /* Temporarily allow write access to the page table again. */
1352 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys, pPage->GCPhys);
1353 if (rc == VINF_SUCCESS)
1354 {
1355 rc = PGMShwModifyPage(pVCpu, pvFault, 1, X86_PTE_RW, ~(uint64_t)X86_PTE_RW);
1356 AssertMsg(rc == VINF_SUCCESS
1357 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1358 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1359 || rc == VERR_PAGE_NOT_PRESENT,
1360 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1361
1362 pPage->pvDirtyFault = pvFault;
1363
1364 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1365 pgmUnlock(pVM);
1366 return rc;
1367 }
1368 }
1369 }
1370#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1371
1372 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushModOverflow));
1373flushPage:
1374 /*
1375 * Not worth it, so flush it.
1376 *
1377 * If we considered it to be reused, don't go back to ring-3
1378 * to emulate failed instructions since we usually cannot
1379 * interpret then. This may be a bit risky, in which case
1380 * the reuse detection must be fixed.
1381 */
1382 rc = pgmPoolAccessHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1383 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1384 && fReused)
1385 {
1386 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1387 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1388 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1389 }
1390 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1391 pgmUnlock(pVM);
1392 return rc;
1393}
1394
1395# endif /* !IN_RING3 */
1396
1397# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1398
1399# ifdef VBOX_STRICT
1400/**
1401 * Check references to guest physical memory in a PAE / PAE page table.
1402 *
1403 * @param pPool The pool.
1404 * @param pPage The page.
1405 * @param pShwPT The shadow page table (mapping of the page).
1406 * @param pGstPT The guest page table.
1407 */
1408static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
1409{
1410 unsigned cErrors = 0;
1411 int LastRc = -1; /* initialized to shut up gcc */
1412 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1413 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1414
1415#ifdef VBOX_STRICT
1416 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1417 AssertMsg(!pShwPT->a[i].n.u1Present, ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, pShwPT->a[i].u, pPage->iFirstPresent));
1418#endif
1419 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1420 {
1421 if (pShwPT->a[i].n.u1Present)
1422 {
1423 RTHCPHYS HCPhys = -1;
1424 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1425 if ( rc != VINF_SUCCESS
1426 || (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) != HCPhys)
1427 {
1428 RTHCPHYS HCPhysPT = -1;
1429 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1430 LastPTE = i;
1431 LastRc = rc;
1432 LastHCPhys = HCPhys;
1433 cErrors++;
1434
1435 rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pPage->GCPhys, &HCPhysPT);
1436 AssertRC(rc);
1437
1438 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1439 {
1440 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1441
1442 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1443 {
1444 PX86PTPAE pShwPT2 = (PX86PTPAE)PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pTempPage);
1445
1446 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1447 {
1448 if ( pShwPT2->a[j].n.u1Present
1449 && pShwPT2->a[j].n.u1Write
1450 && ((pShwPT2->a[j].u & X86_PTE_PAE_PG_MASK) == HCPhysPT))
1451 {
1452 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, pShwPT->a[j].u, pShwPT2->a[j].u));
1453 }
1454 }
1455 }
1456 }
1457 }
1458 }
1459 }
1460 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, pShwPT->a[LastPTE].u, LastHCPhys));
1461}
1462# endif /* VBOX_STRICT */
1463
1464/**
1465 * Clear references to guest physical memory in a PAE / PAE page table.
1466 *
1467 * @returns nr of changed PTEs
1468 * @param pPool The pool.
1469 * @param pPage The page.
1470 * @param pShwPT The shadow page table (mapping of the page).
1471 * @param pGstPT The guest page table.
1472 * @param pOldGstPT The old cached guest page table.
1473 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1474 * @param pfFlush Flush reused page table (out)
1475 */
1476DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT, PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1477{
1478 unsigned cChanged = 0;
1479
1480#ifdef VBOX_STRICT
1481 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1482 AssertMsg(!pShwPT->a[i].n.u1Present, ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, pShwPT->a[i].u, pPage->iFirstPresent));
1483#endif
1484 *pfFlush = false;
1485
1486 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1487 {
1488 /* Check the new value written by the guest. If present and with a bogus physical address, then
1489 * it's fairly safe to assume the guest is reusing the PT.
1490 */
1491 if ( fAllowRemoval
1492 && pGstPT->a[i].n.u1Present)
1493 {
1494 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1495 {
1496 *pfFlush = true;
1497 return ++cChanged;
1498 }
1499 }
1500 if (pShwPT->a[i].n.u1Present)
1501 {
1502 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1503 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1504 {
1505#ifdef VBOX_STRICT
1506 RTHCPHYS HCPhys = -1;
1507 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1508 AssertMsg(rc == VINF_SUCCESS && (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1509#endif
1510 uint64_t uHostAttr = pShwPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1511 bool fHostRW = !!(pShwPT->a[i].u & X86_PTE_RW);
1512 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1513 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1514
1515 if ( uHostAttr == uGuestAttr
1516 && fHostRW <= fGuestRW)
1517 continue;
1518 }
1519 cChanged++;
1520 /* Something was changed, so flush it. */
1521 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1522 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1523 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
1524 ASMAtomicWriteSize(&pShwPT->a[i].u, 0);
1525 }
1526 }
1527 return cChanged;
1528}
1529
1530
1531/**
1532 * Flush a dirty page
1533 *
1534 * @param pVM VM Handle.
1535 * @param pPool The pool.
1536 * @param idxSlot Dirty array slot index
1537 * @param fAllowRemoval Allow a reused page table to be removed
1538 */
1539static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1540{
1541 PPGMPOOLPAGE pPage;
1542 unsigned idxPage;
1543
1544 Assert(idxSlot < RT_ELEMENTS(pPool->aIdxDirtyPages));
1545 if (pPool->aIdxDirtyPages[idxSlot] == NIL_PGMPOOL_IDX)
1546 return;
1547
1548 idxPage = pPool->aIdxDirtyPages[idxSlot];
1549 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1550 pPage = &pPool->aPages[idxPage];
1551 Assert(pPage->idx == idxPage);
1552 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1553
1554 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1555 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1556
1557 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1558 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys);
1559 Assert(rc == VINF_SUCCESS);
1560 pPage->fDirty = false;
1561
1562#ifdef VBOX_STRICT
1563 uint64_t fFlags = 0;
1564 RTHCPHYS HCPhys;
1565 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->pvDirtyFault, &fFlags, &HCPhys);
1566 AssertMsg( ( rc == VINF_SUCCESS
1567 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1568 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1569 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1570 || rc == VERR_PAGE_NOT_PRESENT,
1571 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->pvDirtyFault, rc, fFlags));
1572#endif
1573
1574 /* Flush those PTEs that have changed. */
1575 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1576 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1577 void *pvGst;
1578 bool fFlush;
1579 rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1580 unsigned cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst, (PCX86PTPAE)&pPool->aDirtyPages[idxSlot][0], fAllowRemoval, &fFlush);
1581 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1582 /** Note: we might want to consider keeping the dirty page active in case there were many changes. */
1583
1584 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1585 Assert(pPage->cModifications);
1586 if (cChanges < 4)
1587 pPage->cModifications = 1; /* must use > 0 here */
1588 else
1589 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1590
1591 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1592 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages))
1593 pPool->idxFreeDirtyPage = idxSlot;
1594
1595 pPool->cDirtyPages--;
1596 pPool->aIdxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1597 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1598 if (fFlush)
1599 {
1600 Assert(fAllowRemoval);
1601 Log(("Flush reused page table!\n"));
1602 pgmPoolFlushPage(pPool, pPage);
1603 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1604 }
1605 else
1606 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1607}
1608
1609# ifndef IN_RING3
1610/**
1611 * Add a new dirty page
1612 *
1613 * @param pVM VM Handle.
1614 * @param pPool The pool.
1615 * @param pPage The page.
1616 */
1617void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1618{
1619 unsigned idxFree;
1620
1621 Assert(PGMIsLocked(pVM));
1622 AssertCompile(RT_ELEMENTS(pPool->aIdxDirtyPages) == 8 || RT_ELEMENTS(pPool->aIdxDirtyPages) == 16);
1623 Assert(!pPage->fDirty);
1624
1625 idxFree = pPool->idxFreeDirtyPage;
1626 Assert(idxFree < RT_ELEMENTS(pPool->aIdxDirtyPages));
1627 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1628
1629 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aIdxDirtyPages))
1630 {
1631 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1632 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1633 }
1634 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages));
1635 AssertMsg(pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1636
1637 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1638
1639 /* Make a copy of the guest page table as we require valid GCPhys addresses when removing
1640 * references to physical pages. (the HCPhys linear lookup is *extremely* expensive!)
1641 */
1642 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1643 void *pvGst;
1644 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1645 memcpy(&pPool->aDirtyPages[idxFree][0], pvGst, PAGE_SIZE);
1646#ifdef VBOX_STRICT
1647 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
1648#endif
1649
1650 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1651 pPage->fDirty = true;
1652 pPage->idxDirty = idxFree;
1653 pPool->aIdxDirtyPages[idxFree] = pPage->idx;
1654 pPool->cDirtyPages++;
1655
1656 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1657 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages)
1658 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1659 {
1660 unsigned i;
1661 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1662 {
1663 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1664 if (pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1665 {
1666 pPool->idxFreeDirtyPage = idxFree;
1667 break;
1668 }
1669 }
1670 Assert(i != RT_ELEMENTS(pPool->aIdxDirtyPages));
1671 }
1672
1673 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages) || pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1674 return;
1675}
1676# endif /* !IN_RING3 */
1677
1678/**
1679 * Check if the specified page is dirty (not write monitored)
1680 *
1681 * @return dirty or not
1682 * @param pVM VM Handle.
1683 * @param GCPhys Guest physical address
1684 */
1685bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1686{
1687 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1688 Assert(PGMIsLocked(pVM));
1689 if (!pPool->cDirtyPages)
1690 return false;
1691
1692 GCPhys = GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1693
1694 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1695 {
1696 if (pPool->aIdxDirtyPages[i] != NIL_PGMPOOL_IDX)
1697 {
1698 PPGMPOOLPAGE pPage;
1699 unsigned idxPage = pPool->aIdxDirtyPages[i];
1700
1701 pPage = &pPool->aPages[idxPage];
1702 if (pPage->GCPhys == GCPhys)
1703 return true;
1704 }
1705 }
1706 return false;
1707}
1708
1709/**
1710 * Reset all dirty pages by reinstating page monitoring.
1711 *
1712 * @param pVM VM Handle.
1713 */
1714void pgmPoolResetDirtyPages(PVM pVM)
1715{
1716 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1717 Assert(PGMIsLocked(pVM));
1718 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1719
1720 if (!pPool->cDirtyPages)
1721 return;
1722
1723 Log(("pgmPoolResetDirtyPages\n"));
1724 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1725 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1726
1727 pPool->idxFreeDirtyPage = 0;
1728 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aIdxDirtyPages)
1729 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1730 {
1731 unsigned i;
1732 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1733 {
1734 if (pPool->aIdxDirtyPages[i] == NIL_PGMPOOL_IDX)
1735 {
1736 pPool->idxFreeDirtyPage = i;
1737 break;
1738 }
1739 }
1740 AssertMsg(i != RT_ELEMENTS(pPool->aIdxDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1741 }
1742
1743 Assert(pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages));
1744 return;
1745}
1746
1747/**
1748 * Reset all dirty pages by reinstating page monitoring.
1749 *
1750 * @param pVM VM Handle.
1751 * @param GCPhysPT Physical address of the page table
1752 */
1753void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
1754{
1755 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1756 Assert(PGMIsLocked(pVM));
1757 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1758 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aIdxDirtyPages);
1759
1760 if (!pPool->cDirtyPages)
1761 return;
1762
1763 GCPhysPT = GCPhysPT & ~(RTGCPHYS)(PAGE_SIZE - 1);
1764
1765 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1766 {
1767 if (pPool->aIdxDirtyPages[i] != NIL_PGMPOOL_IDX)
1768 {
1769 unsigned idxPage = pPool->aIdxDirtyPages[i];
1770
1771 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1772 if (pPage->GCPhys == GCPhysPT)
1773 {
1774 idxDirtyPage = i;
1775 break;
1776 }
1777 }
1778 }
1779
1780 if (idxDirtyPage != RT_ELEMENTS(pPool->aIdxDirtyPages))
1781 {
1782 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1783 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aIdxDirtyPages)
1784 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1785 {
1786 unsigned i;
1787 for (i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1788 {
1789 if (pPool->aIdxDirtyPages[i] == NIL_PGMPOOL_IDX)
1790 {
1791 pPool->idxFreeDirtyPage = i;
1792 break;
1793 }
1794 }
1795 AssertMsg(i != RT_ELEMENTS(pPool->aIdxDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1796 }
1797 }
1798}
1799
1800# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1801
1802/**
1803 * Inserts a page into the GCPhys hash table.
1804 *
1805 * @param pPool The pool.
1806 * @param pPage The page.
1807 */
1808DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1809{
1810 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1811 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1812 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1813 pPage->iNext = pPool->aiHash[iHash];
1814 pPool->aiHash[iHash] = pPage->idx;
1815}
1816
1817
1818/**
1819 * Removes a page from the GCPhys hash table.
1820 *
1821 * @param pPool The pool.
1822 * @param pPage The page.
1823 */
1824DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1825{
1826 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1827 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1828 if (pPool->aiHash[iHash] == pPage->idx)
1829 pPool->aiHash[iHash] = pPage->iNext;
1830 else
1831 {
1832 uint16_t iPrev = pPool->aiHash[iHash];
1833 for (;;)
1834 {
1835 const int16_t i = pPool->aPages[iPrev].iNext;
1836 if (i == pPage->idx)
1837 {
1838 pPool->aPages[iPrev].iNext = pPage->iNext;
1839 break;
1840 }
1841 if (i == NIL_PGMPOOL_IDX)
1842 {
1843 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1844 break;
1845 }
1846 iPrev = i;
1847 }
1848 }
1849 pPage->iNext = NIL_PGMPOOL_IDX;
1850}
1851
1852
1853/**
1854 * Frees up one cache page.
1855 *
1856 * @returns VBox status code.
1857 * @retval VINF_SUCCESS on success.
1858 * @param pPool The pool.
1859 * @param iUser The user index.
1860 */
1861static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1862{
1863#ifndef IN_RC
1864 const PVM pVM = pPool->CTX_SUFF(pVM);
1865#endif
1866 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1867 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1868
1869 /*
1870 * Select one page from the tail of the age list.
1871 */
1872 PPGMPOOLPAGE pPage;
1873 for (unsigned iLoop = 0; ; iLoop++)
1874 {
1875 uint16_t iToFree = pPool->iAgeTail;
1876 if (iToFree == iUser)
1877 iToFree = pPool->aPages[iToFree].iAgePrev;
1878/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1879 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1880 {
1881 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1882 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1883 {
1884 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1885 continue;
1886 iToFree = i;
1887 break;
1888 }
1889 }
1890*/
1891 Assert(iToFree != iUser);
1892 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1893 pPage = &pPool->aPages[iToFree];
1894
1895 /*
1896 * Reject any attempts at flushing the currently active shadow CR3 mapping.
1897 * Call pgmPoolCacheUsed to move the page to the head of the age list.
1898 */
1899 if (!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
1900 break;
1901 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
1902 pgmPoolCacheUsed(pPool, pPage);
1903 AssertLogRelReturn(iLoop < 8192, VERR_INTERNAL_ERROR);
1904 }
1905
1906 /*
1907 * Found a usable page, flush it and return.
1908 */
1909 int rc = pgmPoolFlushPage(pPool, pPage);
1910 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
1911 /* todo: find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
1912 if (rc == VINF_SUCCESS)
1913 PGM_INVL_ALL_VCPU_TLBS(pVM);
1914 return rc;
1915}
1916
1917
1918/**
1919 * Checks if a kind mismatch is really a page being reused
1920 * or if it's just normal remappings.
1921 *
1922 * @returns true if reused and the cached page (enmKind1) should be flushed
1923 * @returns false if not reused.
1924 * @param enmKind1 The kind of the cached page.
1925 * @param enmKind2 The kind of the requested page.
1926 */
1927static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1928{
1929 switch (enmKind1)
1930 {
1931 /*
1932 * Never reuse them. There is no remapping in non-paging mode.
1933 */
1934 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1935 case PGMPOOLKIND_32BIT_PD_PHYS:
1936 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1937 case PGMPOOLKIND_PAE_PD_PHYS:
1938 case PGMPOOLKIND_PAE_PDPT_PHYS:
1939 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1940 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1941 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1942 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1943 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1944 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
1945 return false;
1946
1947 /*
1948 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1949 */
1950 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1951 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1952 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1953 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1954 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1955 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1956 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1957 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1958 case PGMPOOLKIND_32BIT_PD:
1959 case PGMPOOLKIND_PAE_PDPT:
1960 switch (enmKind2)
1961 {
1962 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1963 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1964 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1965 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1966 case PGMPOOLKIND_64BIT_PML4:
1967 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1968 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1969 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1970 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1971 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1972 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1973 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1974 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1975 return true;
1976 default:
1977 return false;
1978 }
1979
1980 /*
1981 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1982 */
1983 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1984 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1985 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1986 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1987 case PGMPOOLKIND_64BIT_PML4:
1988 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1989 switch (enmKind2)
1990 {
1991 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1992 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1993 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1994 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1995 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1996 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1997 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1998 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1999 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2000 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2001 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2002 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2003 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2004 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2005 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2006 return true;
2007 default:
2008 return false;
2009 }
2010
2011 /*
2012 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2013 */
2014 case PGMPOOLKIND_ROOT_NESTED:
2015 return false;
2016
2017 default:
2018 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2019 }
2020}
2021
2022
2023/**
2024 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2025 *
2026 * @returns VBox status code.
2027 * @retval VINF_PGM_CACHED_PAGE on success.
2028 * @retval VERR_FILE_NOT_FOUND if not found.
2029 * @param pPool The pool.
2030 * @param GCPhys The GC physical address of the page we're gonna shadow.
2031 * @param enmKind The kind of mapping.
2032 * @param enmAccess Access type for the mapping (only relevant for big pages)
2033 * @param iUser The shadow page pool index of the user table.
2034 * @param iUserTable The index into the user table (shadowed).
2035 * @param ppPage Where to store the pointer to the page.
2036 */
2037static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2038{
2039#ifndef IN_RC
2040 const PVM pVM = pPool->CTX_SUFF(pVM);
2041#endif
2042 /*
2043 * Look up the GCPhys in the hash.
2044 */
2045 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2046 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%x iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2047 if (i != NIL_PGMPOOL_IDX)
2048 {
2049 do
2050 {
2051 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2052 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2053 if (pPage->GCPhys == GCPhys)
2054 {
2055 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2056 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess)
2057 {
2058 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2059 * doesn't flush it in case there are no more free use records.
2060 */
2061 pgmPoolCacheUsed(pPool, pPage);
2062
2063 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2064 if (RT_SUCCESS(rc))
2065 {
2066 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2067 *ppPage = pPage;
2068 if (pPage->cModifications)
2069 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2070 STAM_COUNTER_INC(&pPool->StatCacheHits);
2071 return VINF_PGM_CACHED_PAGE;
2072 }
2073 return rc;
2074 }
2075
2076 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2077 {
2078 /*
2079 * The kind is different. In some cases we should now flush the page
2080 * as it has been reused, but in most cases this is normal remapping
2081 * of PDs as PT or big pages using the GCPhys field in a slightly
2082 * different way than the other kinds.
2083 */
2084 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2085 {
2086 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2087 pgmPoolFlushPage(pPool, pPage);
2088 break;
2089 }
2090 }
2091 }
2092
2093 /* next */
2094 i = pPage->iNext;
2095 } while (i != NIL_PGMPOOL_IDX);
2096 }
2097
2098 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2099 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2100 return VERR_FILE_NOT_FOUND;
2101}
2102
2103
2104/**
2105 * Inserts a page into the cache.
2106 *
2107 * @param pPool The pool.
2108 * @param pPage The cached page.
2109 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2110 */
2111static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2112{
2113 /*
2114 * Insert into the GCPhys hash if the page is fit for that.
2115 */
2116 Assert(!pPage->fCached);
2117 if (fCanBeCached)
2118 {
2119 pPage->fCached = true;
2120 pgmPoolHashInsert(pPool, pPage);
2121 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2122 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2123 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2124 }
2125 else
2126 {
2127 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2128 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2129 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2130 }
2131
2132 /*
2133 * Insert at the head of the age list.
2134 */
2135 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2136 pPage->iAgeNext = pPool->iAgeHead;
2137 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2138 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2139 else
2140 pPool->iAgeTail = pPage->idx;
2141 pPool->iAgeHead = pPage->idx;
2142}
2143
2144
2145/**
2146 * Flushes a cached page.
2147 *
2148 * @param pPool The pool.
2149 * @param pPage The cached page.
2150 */
2151static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2152{
2153 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2154
2155 /*
2156 * Remove the page from the hash.
2157 */
2158 if (pPage->fCached)
2159 {
2160 pPage->fCached = false;
2161 pgmPoolHashRemove(pPool, pPage);
2162 }
2163 else
2164 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2165
2166 /*
2167 * Remove it from the age list.
2168 */
2169 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2170 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2171 else
2172 pPool->iAgeTail = pPage->iAgePrev;
2173 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2174 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2175 else
2176 pPool->iAgeHead = pPage->iAgeNext;
2177 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2178 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2179}
2180
2181
2182/**
2183 * Looks for pages sharing the monitor.
2184 *
2185 * @returns Pointer to the head page.
2186 * @returns NULL if not found.
2187 * @param pPool The Pool
2188 * @param pNewPage The page which is going to be monitored.
2189 */
2190static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2191{
2192 /*
2193 * Look up the GCPhys in the hash.
2194 */
2195 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2196 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2197 if (i == NIL_PGMPOOL_IDX)
2198 return NULL;
2199 do
2200 {
2201 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2202 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2203 && pPage != pNewPage)
2204 {
2205 switch (pPage->enmKind)
2206 {
2207 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2208 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2209 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2210 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2211 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2212 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2213 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2214 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2215 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2216 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2217 case PGMPOOLKIND_64BIT_PML4:
2218 case PGMPOOLKIND_32BIT_PD:
2219 case PGMPOOLKIND_PAE_PDPT:
2220 {
2221 /* find the head */
2222 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2223 {
2224 Assert(pPage->iMonitoredPrev != pPage->idx);
2225 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2226 }
2227 return pPage;
2228 }
2229
2230 /* ignore, no monitoring. */
2231 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2232 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2233 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2234 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2235 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2236 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2237 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2238 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2239 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2240 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2241 case PGMPOOLKIND_ROOT_NESTED:
2242 case PGMPOOLKIND_PAE_PD_PHYS:
2243 case PGMPOOLKIND_PAE_PDPT_PHYS:
2244 case PGMPOOLKIND_32BIT_PD_PHYS:
2245 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2246 break;
2247 default:
2248 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2249 }
2250 }
2251
2252 /* next */
2253 i = pPage->iNext;
2254 } while (i != NIL_PGMPOOL_IDX);
2255 return NULL;
2256}
2257
2258
2259/**
2260 * Enabled write monitoring of a guest page.
2261 *
2262 * @returns VBox status code.
2263 * @retval VINF_SUCCESS on success.
2264 * @param pPool The pool.
2265 * @param pPage The cached page.
2266 */
2267static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2268{
2269 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
2270
2271 /*
2272 * Filter out the relevant kinds.
2273 */
2274 switch (pPage->enmKind)
2275 {
2276 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2277 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2278 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2279 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2280 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2281 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2282 case PGMPOOLKIND_64BIT_PML4:
2283 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2284 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2285 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2286 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2287 case PGMPOOLKIND_32BIT_PD:
2288 case PGMPOOLKIND_PAE_PDPT:
2289 break;
2290
2291 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2292 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2293 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2294 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2295 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2296 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2297 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2298 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2299 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2300 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2301 case PGMPOOLKIND_ROOT_NESTED:
2302 /* Nothing to monitor here. */
2303 return VINF_SUCCESS;
2304
2305 case PGMPOOLKIND_32BIT_PD_PHYS:
2306 case PGMPOOLKIND_PAE_PDPT_PHYS:
2307 case PGMPOOLKIND_PAE_PD_PHYS:
2308 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2309 /* Nothing to monitor here. */
2310 return VINF_SUCCESS;
2311 default:
2312 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2313 }
2314
2315 /*
2316 * Install handler.
2317 */
2318 int rc;
2319 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2320 if (pPageHead)
2321 {
2322 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2323 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2324
2325#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2326 if (pPageHead->fDirty)
2327 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirty, false /* do not remove */);
2328#endif
2329
2330 pPage->iMonitoredPrev = pPageHead->idx;
2331 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2332 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2333 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2334 pPageHead->iMonitoredNext = pPage->idx;
2335 rc = VINF_SUCCESS;
2336 }
2337 else
2338 {
2339 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2340 PVM pVM = pPool->CTX_SUFF(pVM);
2341 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2342 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
2343 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
2344 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
2345 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
2346 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
2347 pPool->pszAccessHandler);
2348 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2349 * the heap size should suffice. */
2350 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2351 Assert(!(VMMGetCpu(pVM)->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(VMMGetCpu(pVM), VMCPU_FF_PGM_SYNC_CR3));
2352 }
2353 pPage->fMonitored = true;
2354 return rc;
2355}
2356
2357
2358/**
2359 * Disables write monitoring of a guest page.
2360 *
2361 * @returns VBox status code.
2362 * @retval VINF_SUCCESS on success.
2363 * @param pPool The pool.
2364 * @param pPage The cached page.
2365 */
2366static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2367{
2368 /*
2369 * Filter out the relevant kinds.
2370 */
2371 switch (pPage->enmKind)
2372 {
2373 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2374 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2375 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2376 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2377 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2378 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2379 case PGMPOOLKIND_64BIT_PML4:
2380 case PGMPOOLKIND_32BIT_PD:
2381 case PGMPOOLKIND_PAE_PDPT:
2382 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2383 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2384 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2385 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2386 break;
2387
2388 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2389 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2390 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2391 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2392 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2393 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2394 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2395 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2396 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2397 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2398 case PGMPOOLKIND_ROOT_NESTED:
2399 case PGMPOOLKIND_PAE_PD_PHYS:
2400 case PGMPOOLKIND_PAE_PDPT_PHYS:
2401 case PGMPOOLKIND_32BIT_PD_PHYS:
2402 /* Nothing to monitor here. */
2403 return VINF_SUCCESS;
2404
2405 default:
2406 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2407 }
2408
2409 /*
2410 * Remove the page from the monitored list or uninstall it if last.
2411 */
2412 const PVM pVM = pPool->CTX_SUFF(pVM);
2413 int rc;
2414 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2415 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2416 {
2417 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2418 {
2419 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2420 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2421 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
2422 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2423 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2424 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2425 pPool->pszAccessHandler);
2426 AssertFatalRCSuccess(rc);
2427 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2428 }
2429 else
2430 {
2431 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2432 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2433 {
2434 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2435 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2436 }
2437 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2438 rc = VINF_SUCCESS;
2439 }
2440 }
2441 else
2442 {
2443 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
2444 AssertFatalRC(rc);
2445#ifdef VBOX_STRICT
2446 PVMCPU pVCpu = VMMGetCpu(pVM);
2447#endif
2448 AssertMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2449 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2450 }
2451 pPage->fMonitored = false;
2452
2453 /*
2454 * Remove it from the list of modified pages (if in it).
2455 */
2456 pgmPoolMonitorModifiedRemove(pPool, pPage);
2457
2458 return rc;
2459}
2460
2461
2462/**
2463 * Inserts the page into the list of modified pages.
2464 *
2465 * @param pPool The pool.
2466 * @param pPage The page.
2467 */
2468void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2469{
2470 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2471 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2472 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2473 && pPool->iModifiedHead != pPage->idx,
2474 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2475 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2476 pPool->iModifiedHead, pPool->cModifiedPages));
2477
2478 pPage->iModifiedNext = pPool->iModifiedHead;
2479 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2480 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2481 pPool->iModifiedHead = pPage->idx;
2482 pPool->cModifiedPages++;
2483#ifdef VBOX_WITH_STATISTICS
2484 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2485 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2486#endif
2487}
2488
2489
2490/**
2491 * Removes the page from the list of modified pages and resets the
2492 * moficiation counter.
2493 *
2494 * @param pPool The pool.
2495 * @param pPage The page which is believed to be in the list of modified pages.
2496 */
2497static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2498{
2499 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2500 if (pPool->iModifiedHead == pPage->idx)
2501 {
2502 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2503 pPool->iModifiedHead = pPage->iModifiedNext;
2504 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2505 {
2506 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2507 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2508 }
2509 pPool->cModifiedPages--;
2510 }
2511 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2512 {
2513 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2514 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2515 {
2516 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2517 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2518 }
2519 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2520 pPool->cModifiedPages--;
2521 }
2522 else
2523 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2524 pPage->cModifications = 0;
2525}
2526
2527
2528/**
2529 * Zaps the list of modified pages, resetting their modification counters in the process.
2530 *
2531 * @param pVM The VM handle.
2532 */
2533static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2534{
2535 pgmLock(pVM);
2536 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2537 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2538
2539 unsigned cPages = 0; NOREF(cPages);
2540
2541#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2542 pgmPoolResetDirtyPages(pVM);
2543#endif
2544
2545 uint16_t idx = pPool->iModifiedHead;
2546 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2547 while (idx != NIL_PGMPOOL_IDX)
2548 {
2549 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2550 idx = pPage->iModifiedNext;
2551 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2552 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2553 pPage->cModifications = 0;
2554 Assert(++cPages);
2555 }
2556 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2557 pPool->cModifiedPages = 0;
2558 pgmUnlock(pVM);
2559}
2560
2561
2562/**
2563 * Handle SyncCR3 pool tasks
2564 *
2565 * @returns VBox status code.
2566 * @retval VINF_SUCCESS if successfully added.
2567 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2568 * @param pVCpu The VMCPU handle.
2569 * @remark Should only be used when monitoring is available, thus placed in
2570 * the PGMPOOL_WITH_MONITORING #ifdef.
2571 */
2572int pgmPoolSyncCR3(PVMCPU pVCpu)
2573{
2574 PVM pVM = pVCpu->CTX_SUFF(pVM);
2575 LogFlow(("pgmPoolSyncCR3\n"));
2576
2577 /*
2578 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2579 * Occasionally we will have to clear all the shadow page tables because we wanted
2580 * to monitor a page which was mapped by too many shadowed page tables. This operation
2581 * sometimes refered to as a 'lightweight flush'.
2582 */
2583# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2584 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2585 pgmR3PoolClearAll(pVM);
2586# else /* !IN_RING3 */
2587 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2588 {
2589 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2590 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2591 return VINF_PGM_SYNC_CR3;
2592 }
2593# endif /* !IN_RING3 */
2594 else
2595 pgmPoolMonitorModifiedClearAll(pVM);
2596
2597 return VINF_SUCCESS;
2598}
2599
2600
2601/**
2602 * Frees up at least one user entry.
2603 *
2604 * @returns VBox status code.
2605 * @retval VINF_SUCCESS if successfully added.
2606 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2607 * @param pPool The pool.
2608 * @param iUser The user index.
2609 */
2610static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2611{
2612 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2613 /*
2614 * Just free cached pages in a braindead fashion.
2615 */
2616 /** @todo walk the age list backwards and free the first with usage. */
2617 int rc = VINF_SUCCESS;
2618 do
2619 {
2620 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2621 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2622 rc = rc2;
2623 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2624 return rc;
2625}
2626
2627
2628/**
2629 * Inserts a page into the cache.
2630 *
2631 * This will create user node for the page, insert it into the GCPhys
2632 * hash, and insert it into the age list.
2633 *
2634 * @returns VBox status code.
2635 * @retval VINF_SUCCESS if successfully added.
2636 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2637 * @param pPool The pool.
2638 * @param pPage The cached page.
2639 * @param GCPhys The GC physical address of the page we're gonna shadow.
2640 * @param iUser The user index.
2641 * @param iUserTable The user table index.
2642 */
2643DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2644{
2645 int rc = VINF_SUCCESS;
2646 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2647
2648 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser %x iUserTable %x\n", GCPhys, iUser, iUserTable));
2649
2650#ifdef VBOX_STRICT
2651 /*
2652 * Check that the entry doesn't already exists.
2653 */
2654 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2655 {
2656 uint16_t i = pPage->iUserHead;
2657 do
2658 {
2659 Assert(i < pPool->cMaxUsers);
2660 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2661 i = paUsers[i].iNext;
2662 } while (i != NIL_PGMPOOL_USER_INDEX);
2663 }
2664#endif
2665
2666 /*
2667 * Find free a user node.
2668 */
2669 uint16_t i = pPool->iUserFreeHead;
2670 if (i == NIL_PGMPOOL_USER_INDEX)
2671 {
2672 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2673 if (RT_FAILURE(rc))
2674 return rc;
2675 i = pPool->iUserFreeHead;
2676 }
2677
2678 /*
2679 * Unlink the user node from the free list,
2680 * initialize and insert it into the user list.
2681 */
2682 pPool->iUserFreeHead = paUsers[i].iNext;
2683 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2684 paUsers[i].iUser = iUser;
2685 paUsers[i].iUserTable = iUserTable;
2686 pPage->iUserHead = i;
2687
2688 /*
2689 * Insert into cache and enable monitoring of the guest page if enabled.
2690 *
2691 * Until we implement caching of all levels, including the CR3 one, we'll
2692 * have to make sure we don't try monitor & cache any recursive reuse of
2693 * a monitored CR3 page. Because all windows versions are doing this we'll
2694 * have to be able to do combined access monitoring, CR3 + PT and
2695 * PD + PT (guest PAE).
2696 *
2697 * Update:
2698 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2699 */
2700 const bool fCanBeMonitored = true;
2701 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2702 if (fCanBeMonitored)
2703 {
2704 rc = pgmPoolMonitorInsert(pPool, pPage);
2705 AssertRC(rc);
2706 }
2707 return rc;
2708}
2709
2710
2711/**
2712 * Adds a user reference to a page.
2713 *
2714 * This will move the page to the head of the
2715 *
2716 * @returns VBox status code.
2717 * @retval VINF_SUCCESS if successfully added.
2718 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2719 * @param pPool The pool.
2720 * @param pPage The cached page.
2721 * @param iUser The user index.
2722 * @param iUserTable The user table.
2723 */
2724static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2725{
2726 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2727
2728 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2729
2730# ifdef VBOX_STRICT
2731 /*
2732 * Check that the entry doesn't already exists. We only allow multiple users of top-level paging structures (SHW_POOL_ROOT_IDX).
2733 */
2734 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2735 {
2736 uint16_t i = pPage->iUserHead;
2737 do
2738 {
2739 Assert(i < pPool->cMaxUsers);
2740 AssertMsg(iUser != PGMPOOL_IDX_PD || iUser != PGMPOOL_IDX_PDPT || iUser != PGMPOOL_IDX_NESTED_ROOT || iUser != PGMPOOL_IDX_AMD64_CR3 ||
2741 paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2742 i = paUsers[i].iNext;
2743 } while (i != NIL_PGMPOOL_USER_INDEX);
2744 }
2745# endif
2746
2747 /*
2748 * Allocate a user node.
2749 */
2750 uint16_t i = pPool->iUserFreeHead;
2751 if (i == NIL_PGMPOOL_USER_INDEX)
2752 {
2753 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2754 if (RT_FAILURE(rc))
2755 return rc;
2756 i = pPool->iUserFreeHead;
2757 }
2758 pPool->iUserFreeHead = paUsers[i].iNext;
2759
2760 /*
2761 * Initialize the user node and insert it.
2762 */
2763 paUsers[i].iNext = pPage->iUserHead;
2764 paUsers[i].iUser = iUser;
2765 paUsers[i].iUserTable = iUserTable;
2766 pPage->iUserHead = i;
2767
2768# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2769 if (pPage->fDirty)
2770 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirty, false /* do not remove */);
2771# endif
2772
2773 /*
2774 * Tell the cache to update its replacement stats for this page.
2775 */
2776 pgmPoolCacheUsed(pPool, pPage);
2777 return VINF_SUCCESS;
2778}
2779
2780
2781/**
2782 * Frees a user record associated with a page.
2783 *
2784 * This does not clear the entry in the user table, it simply replaces the
2785 * user record to the chain of free records.
2786 *
2787 * @param pPool The pool.
2788 * @param HCPhys The HC physical address of the shadow page.
2789 * @param iUser The shadow page pool index of the user table.
2790 * @param iUserTable The index into the user table (shadowed).
2791 */
2792static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2793{
2794 /*
2795 * Unlink and free the specified user entry.
2796 */
2797 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2798
2799 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2800 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2801 uint16_t i = pPage->iUserHead;
2802 if ( i != NIL_PGMPOOL_USER_INDEX
2803 && paUsers[i].iUser == iUser
2804 && paUsers[i].iUserTable == iUserTable)
2805 {
2806 pPage->iUserHead = paUsers[i].iNext;
2807
2808 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2809 paUsers[i].iNext = pPool->iUserFreeHead;
2810 pPool->iUserFreeHead = i;
2811 return;
2812 }
2813
2814 /* General: Linear search. */
2815 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2816 while (i != NIL_PGMPOOL_USER_INDEX)
2817 {
2818 if ( paUsers[i].iUser == iUser
2819 && paUsers[i].iUserTable == iUserTable)
2820 {
2821 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2822 paUsers[iPrev].iNext = paUsers[i].iNext;
2823 else
2824 pPage->iUserHead = paUsers[i].iNext;
2825
2826 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2827 paUsers[i].iNext = pPool->iUserFreeHead;
2828 pPool->iUserFreeHead = i;
2829 return;
2830 }
2831 iPrev = i;
2832 i = paUsers[i].iNext;
2833 }
2834
2835 /* Fatal: didn't find it */
2836 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2837 iUser, iUserTable, pPage->GCPhys));
2838}
2839
2840
2841/**
2842 * Gets the entry size of a shadow table.
2843 *
2844 * @param enmKind The kind of page.
2845 *
2846 * @returns The size of the entry in bytes. That is, 4 or 8.
2847 * @returns If the kind is not for a table, an assertion is raised and 0 is
2848 * returned.
2849 */
2850DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2851{
2852 switch (enmKind)
2853 {
2854 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2855 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2856 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2857 case PGMPOOLKIND_32BIT_PD:
2858 case PGMPOOLKIND_32BIT_PD_PHYS:
2859 return 4;
2860
2861 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2862 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2863 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2864 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2865 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2866 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2867 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2868 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2869 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2870 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2871 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2872 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2873 case PGMPOOLKIND_64BIT_PML4:
2874 case PGMPOOLKIND_PAE_PDPT:
2875 case PGMPOOLKIND_ROOT_NESTED:
2876 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2877 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2878 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2879 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2880 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2881 case PGMPOOLKIND_PAE_PD_PHYS:
2882 case PGMPOOLKIND_PAE_PDPT_PHYS:
2883 return 8;
2884
2885 default:
2886 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2887 }
2888}
2889
2890
2891/**
2892 * Gets the entry size of a guest table.
2893 *
2894 * @param enmKind The kind of page.
2895 *
2896 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2897 * @returns If the kind is not for a table, an assertion is raised and 0 is
2898 * returned.
2899 */
2900DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2901{
2902 switch (enmKind)
2903 {
2904 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2905 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2906 case PGMPOOLKIND_32BIT_PD:
2907 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2908 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2909 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2910 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2911 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2912 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2913 return 4;
2914
2915 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2916 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2917 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2918 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2919 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2920 case PGMPOOLKIND_64BIT_PML4:
2921 case PGMPOOLKIND_PAE_PDPT:
2922 return 8;
2923
2924 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2925 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2926 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2927 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2928 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2929 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2930 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2931 case PGMPOOLKIND_ROOT_NESTED:
2932 case PGMPOOLKIND_PAE_PD_PHYS:
2933 case PGMPOOLKIND_PAE_PDPT_PHYS:
2934 case PGMPOOLKIND_32BIT_PD_PHYS:
2935 /** @todo can we return 0? (nobody is calling this...) */
2936 AssertFailed();
2937 return 0;
2938
2939 default:
2940 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2941 }
2942}
2943
2944
2945/**
2946 * Scans one shadow page table for mappings of a physical page.
2947 *
2948 * @returns true/false indicating removal of all relevant PTEs
2949 * @param pVM The VM handle.
2950 * @param pPhysPage The guest page in question.
2951 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
2952 * @param iShw The shadow page table.
2953 * @param cRefs The number of references made in that PT.
2954 * @param pfKeptPTEs Flag indicating removal of all relevant PTEs (out)
2955 */
2956static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t cRefs)
2957{
2958 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d cRefs=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, cRefs));
2959 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2960 bool bRet = false;
2961
2962 /*
2963 * Assert sanity.
2964 */
2965 Assert(cRefs == 1);
2966 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2967 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2968
2969 /*
2970 * Then, clear the actual mappings to the page in the shadow PT.
2971 */
2972 switch (pPage->enmKind)
2973 {
2974 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2975 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2976 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2977 {
2978 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2979 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2980 uint32_t u32AndMask, u32OrMask;
2981
2982 u32AndMask = 0;
2983 u32OrMask = 0;
2984
2985 if (!fFlushPTEs)
2986 {
2987 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
2988 {
2989 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
2990 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
2991 u32OrMask = X86_PTE_RW;
2992 u32AndMask = UINT32_MAX;
2993 bRet = true;
2994 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
2995 break;
2996
2997 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
2998 u32OrMask = 0;
2999 u32AndMask = ~X86_PTE_RW;
3000 bRet = true;
3001 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3002 break;
3003 default:
3004 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3005 break;
3006 }
3007 }
3008 else
3009 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3010
3011 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3012 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3013 {
3014 X86PTE Pte;
3015
3016 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
3017 Pte.u = (pPT->a[i].u & u32AndMask) | u32OrMask;
3018 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3019 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3020
3021 ASMAtomicWriteSize(&pPT->a[i].u, Pte.u);
3022 cRefs--;
3023 if (!cRefs)
3024 return bRet;
3025 }
3026#ifdef LOG_ENABLED
3027 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3028 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3029 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3030 {
3031 Log(("i=%d cRefs=%d\n", i, cRefs--));
3032 }
3033#endif
3034 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3035 break;
3036 }
3037
3038 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3039 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3040 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3041 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3042 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3043 {
3044 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3045 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3046 uint64_t u64AndMask, u64OrMask;
3047
3048 u64OrMask = 0;
3049 u64AndMask = 0;
3050 if (!fFlushPTEs)
3051 {
3052 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3053 {
3054 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3055 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3056 u64OrMask = X86_PTE_RW;
3057 u64AndMask = UINT64_MAX;
3058 bRet = true;
3059 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3060 break;
3061
3062 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3063 u64OrMask = 0;
3064 u64AndMask = ~((uint64_t)X86_PTE_RW);
3065 bRet = true;
3066 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3067 break;
3068
3069 default:
3070 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3071 break;
3072 }
3073 }
3074 else
3075 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3076
3077 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3078 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3079 {
3080 X86PTEPAE Pte;
3081
3082 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
3083 Pte.u = (pPT->a[i].u & u64AndMask) | u64OrMask;
3084 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3085 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3086
3087 ASMAtomicWriteSize(&pPT->a[i].u, Pte.u);
3088 cRefs--;
3089 if (!cRefs)
3090 return bRet;
3091 }
3092#ifdef LOG_ENABLED
3093 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3094 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3095 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3096 {
3097 Log(("i=%d cRefs=%d\n", i, cRefs--));
3098 }
3099#endif
3100 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64));
3101 break;
3102 }
3103
3104 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3105 {
3106 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3107 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3108 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3109 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3110 {
3111 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
3112 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3113 pPT->a[i].u = 0;
3114 cRefs--;
3115 if (!cRefs)
3116 return bRet;
3117 }
3118#ifdef LOG_ENABLED
3119 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3120 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3121 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3122 {
3123 Log(("i=%d cRefs=%d\n", i, cRefs--));
3124 }
3125#endif
3126 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3127 break;
3128 }
3129
3130 default:
3131 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3132 }
3133 return bRet;
3134}
3135
3136
3137/**
3138 * Scans one shadow page table for mappings of a physical page.
3139 *
3140 * @param pVM The VM handle.
3141 * @param pPhysPage The guest page in question.
3142 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3143 * @param iShw The shadow page table.
3144 * @param cRefs The number of references made in that PT.
3145 */
3146static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t cRefs)
3147{
3148 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3149
3150 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d cRefs=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, cRefs));
3151 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3152 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, cRefs);
3153 if (!fKeptPTEs)
3154 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3155 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3156}
3157
3158
3159/**
3160 * Flushes a list of shadow page tables mapping the same physical page.
3161 *
3162 * @param pVM The VM handle.
3163 * @param pPhysPage The guest page in question.
3164 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3165 * @param iPhysExt The physical cross reference extent list to flush.
3166 */
3167static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3168{
3169 Assert(PGMIsLockOwner(pVM));
3170 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3171 bool fKeepList = false;
3172
3173 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3174 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3175
3176 const uint16_t iPhysExtStart = iPhysExt;
3177 PPGMPOOLPHYSEXT pPhysExt;
3178 do
3179 {
3180 Assert(iPhysExt < pPool->cMaxPhysExts);
3181 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3182 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3183 {
3184 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3185 {
3186 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], 1);
3187 if (!fKeptPTEs)
3188 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3189 else
3190 fKeepList = true;
3191 }
3192 }
3193 /* next */
3194 iPhysExt = pPhysExt->iNext;
3195 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3196
3197 if (!fKeepList)
3198 {
3199 /* insert the list into the free list and clear the ram range entry. */
3200 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3201 pPool->iPhysExtFreeHead = iPhysExtStart;
3202 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3203 }
3204
3205 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3206}
3207
3208
3209/**
3210 * Flushes all shadow page table mappings of the given guest page.
3211 *
3212 * This is typically called when the host page backing the guest one has been
3213 * replaced or when the page protection was changed due to an access handler.
3214 *
3215 * @returns VBox status code.
3216 * @retval VINF_SUCCESS if all references has been successfully cleared.
3217 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3218 * pool cleaning. FF and sync flags are set.
3219 *
3220 * @param pVM The VM handle.
3221 * @param pPhysPage The guest page in question.
3222 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3223 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3224 * flushed, it is NOT touched if this isn't necessary.
3225 * The caller MUST initialized this to @a false.
3226 */
3227int pgmPoolTrackUpdateGCPhys(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3228{
3229 PVMCPU pVCpu = VMMGetCpu(pVM);
3230 pgmLock(pVM);
3231 int rc = VINF_SUCCESS;
3232 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3233 if (u16)
3234 {
3235 /*
3236 * The zero page is currently screwing up the tracking and we'll
3237 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3238 * is defined, zero pages won't normally be mapped. Some kind of solution
3239 * will be needed for this problem of course, but it will have to wait...
3240 */
3241 if (PGM_PAGE_IS_ZERO(pPhysPage))
3242 rc = VINF_PGM_GCPHYS_ALIASED;
3243 else
3244 {
3245# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3246 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3247 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3248 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
3249# endif
3250
3251 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3252 pgmPoolTrackFlushGCPhysPT(pVM,
3253 pPhysPage,
3254 fFlushPTEs,
3255 PGMPOOL_TD_GET_IDX(u16),
3256 PGMPOOL_TD_GET_CREFS(u16));
3257 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3258 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3259 else
3260 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3261 *pfFlushTLBs = true;
3262
3263# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3264 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
3265# endif
3266 }
3267 }
3268
3269 if (rc == VINF_PGM_GCPHYS_ALIASED)
3270 {
3271 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3272 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3273 rc = VINF_PGM_SYNC_CR3;
3274 }
3275 pgmUnlock(pVM);
3276 return rc;
3277}
3278
3279
3280/**
3281 * Scans all shadow page tables for mappings of a physical page.
3282 *
3283 * This may be slow, but it's most likely more efficient than cleaning
3284 * out the entire page pool / cache.
3285 *
3286 * @returns VBox status code.
3287 * @retval VINF_SUCCESS if all references has been successfully cleared.
3288 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3289 * a page pool cleaning.
3290 *
3291 * @param pVM The VM handle.
3292 * @param pPhysPage The guest page in question.
3293 */
3294int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3295{
3296 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3297 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3298 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3299 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3300
3301#if 1
3302 /*
3303 * There is a limit to what makes sense.
3304 */
3305 if (pPool->cPresent > 1024)
3306 {
3307 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3308 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3309 return VINF_PGM_GCPHYS_ALIASED;
3310 }
3311#endif
3312
3313 /*
3314 * Iterate all the pages until we've encountered all that in use.
3315 * This is simple but not quite optimal solution.
3316 */
3317 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3318 const uint32_t u32 = u64;
3319 unsigned cLeft = pPool->cUsedPages;
3320 unsigned iPage = pPool->cCurPages;
3321 while (--iPage >= PGMPOOL_IDX_FIRST)
3322 {
3323 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3324 if (pPage->GCPhys != NIL_RTGCPHYS)
3325 {
3326 switch (pPage->enmKind)
3327 {
3328 /*
3329 * We only care about shadow page tables.
3330 */
3331 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3332 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3333 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3334 {
3335 unsigned cPresent = pPage->cPresent;
3336 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3337 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3338 if (pPT->a[i].n.u1Present)
3339 {
3340 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3341 {
3342 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3343 pPT->a[i].u = 0;
3344 }
3345 if (!--cPresent)
3346 break;
3347 }
3348 break;
3349 }
3350
3351 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3352 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3353 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3354 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3355 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3356 {
3357 unsigned cPresent = pPage->cPresent;
3358 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3359 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3360 if (pPT->a[i].n.u1Present)
3361 {
3362 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3363 {
3364 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3365 pPT->a[i].u = 0;
3366 }
3367 if (!--cPresent)
3368 break;
3369 }
3370 break;
3371 }
3372 }
3373 if (!--cLeft)
3374 break;
3375 }
3376 }
3377
3378 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3379 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3380 return VINF_SUCCESS;
3381}
3382
3383
3384/**
3385 * Clears the user entry in a user table.
3386 *
3387 * This is used to remove all references to a page when flushing it.
3388 */
3389static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3390{
3391 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3392 Assert(pUser->iUser < pPool->cCurPages);
3393 uint32_t iUserTable = pUser->iUserTable;
3394
3395 /*
3396 * Map the user page.
3397 */
3398 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3399 union
3400 {
3401 uint64_t *pau64;
3402 uint32_t *pau32;
3403 } u;
3404 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3405
3406 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3407
3408 /* Safety precaution in case we change the paging for other modes too in the future. */
3409 Assert(!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage));
3410
3411#ifdef VBOX_STRICT
3412 /*
3413 * Some sanity checks.
3414 */
3415 switch (pUserPage->enmKind)
3416 {
3417 case PGMPOOLKIND_32BIT_PD:
3418 case PGMPOOLKIND_32BIT_PD_PHYS:
3419 Assert(iUserTable < X86_PG_ENTRIES);
3420 break;
3421 case PGMPOOLKIND_PAE_PDPT:
3422 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3423 case PGMPOOLKIND_PAE_PDPT_PHYS:
3424 Assert(iUserTable < 4);
3425 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3426 break;
3427 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3428 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3429 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3430 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3431 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3432 case PGMPOOLKIND_PAE_PD_PHYS:
3433 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3434 break;
3435 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3436 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3437 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3438 break;
3439 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3440 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3441 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3442 break;
3443 case PGMPOOLKIND_64BIT_PML4:
3444 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3445 /* GCPhys >> PAGE_SHIFT is the index here */
3446 break;
3447 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3448 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3449 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3450 break;
3451
3452 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3453 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3454 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3455 break;
3456
3457 case PGMPOOLKIND_ROOT_NESTED:
3458 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3459 break;
3460
3461 default:
3462 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3463 break;
3464 }
3465#endif /* VBOX_STRICT */
3466
3467 /*
3468 * Clear the entry in the user page.
3469 */
3470 switch (pUserPage->enmKind)
3471 {
3472 /* 32-bit entries */
3473 case PGMPOOLKIND_32BIT_PD:
3474 case PGMPOOLKIND_32BIT_PD_PHYS:
3475 ASMAtomicWriteSize(&u.pau32[iUserTable], 0);
3476 break;
3477
3478 /* 64-bit entries */
3479 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3480 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3481 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3482 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3483 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3484#if defined(IN_RC)
3485 /* In 32 bits PAE mode we *must* invalidate the TLB when changing a PDPT entry; the CPU fetches them only during cr3 load, so any
3486 * non-present PDPT will continue to cause page faults.
3487 */
3488 ASMReloadCR3();
3489#endif
3490 /* no break */
3491 case PGMPOOLKIND_PAE_PD_PHYS:
3492 case PGMPOOLKIND_PAE_PDPT_PHYS:
3493 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3494 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3495 case PGMPOOLKIND_64BIT_PML4:
3496 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3497 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3498 case PGMPOOLKIND_PAE_PDPT:
3499 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3500 case PGMPOOLKIND_ROOT_NESTED:
3501 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3502 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3503 ASMAtomicWriteSize(&u.pau64[iUserTable], 0);
3504 break;
3505
3506 default:
3507 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3508 }
3509}
3510
3511
3512/**
3513 * Clears all users of a page.
3514 */
3515static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3516{
3517 /*
3518 * Free all the user records.
3519 */
3520 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3521
3522 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3523 uint16_t i = pPage->iUserHead;
3524 while (i != NIL_PGMPOOL_USER_INDEX)
3525 {
3526 /* Clear enter in user table. */
3527 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3528
3529 /* Free it. */
3530 const uint16_t iNext = paUsers[i].iNext;
3531 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3532 paUsers[i].iNext = pPool->iUserFreeHead;
3533 pPool->iUserFreeHead = i;
3534
3535 /* Next. */
3536 i = iNext;
3537 }
3538 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3539}
3540
3541
3542/**
3543 * Allocates a new physical cross reference extent.
3544 *
3545 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3546 * @param pVM The VM handle.
3547 * @param piPhysExt Where to store the phys ext index.
3548 */
3549PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3550{
3551 Assert(PGMIsLockOwner(pVM));
3552 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3553 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3554 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3555 {
3556 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3557 return NULL;
3558 }
3559 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3560 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3561 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3562 *piPhysExt = iPhysExt;
3563 return pPhysExt;
3564}
3565
3566
3567/**
3568 * Frees a physical cross reference extent.
3569 *
3570 * @param pVM The VM handle.
3571 * @param iPhysExt The extent to free.
3572 */
3573void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3574{
3575 Assert(PGMIsLockOwner(pVM));
3576 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3577 Assert(iPhysExt < pPool->cMaxPhysExts);
3578 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3579 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3580 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3581 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3582 pPool->iPhysExtFreeHead = iPhysExt;
3583}
3584
3585
3586/**
3587 * Frees a physical cross reference extent.
3588 *
3589 * @param pVM The VM handle.
3590 * @param iPhysExt The extent to free.
3591 */
3592void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3593{
3594 Assert(PGMIsLockOwner(pVM));
3595 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3596
3597 const uint16_t iPhysExtStart = iPhysExt;
3598 PPGMPOOLPHYSEXT pPhysExt;
3599 do
3600 {
3601 Assert(iPhysExt < pPool->cMaxPhysExts);
3602 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3603 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3604 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3605
3606 /* next */
3607 iPhysExt = pPhysExt->iNext;
3608 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3609
3610 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3611 pPool->iPhysExtFreeHead = iPhysExtStart;
3612}
3613
3614
3615/**
3616 * Insert a reference into a list of physical cross reference extents.
3617 *
3618 * @returns The new tracking data for PGMPAGE.
3619 *
3620 * @param pVM The VM handle.
3621 * @param iPhysExt The physical extent index of the list head.
3622 * @param iShwPT The shadow page table index.
3623 *
3624 */
3625static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
3626{
3627 Assert(PGMIsLockOwner(pVM));
3628 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3629 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3630
3631 /* special common case. */
3632 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3633 {
3634 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3635 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3636 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d}\n", iPhysExt, iShwPT));
3637 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3638 }
3639
3640 /* general treatment. */
3641 const uint16_t iPhysExtStart = iPhysExt;
3642 unsigned cMax = 15;
3643 for (;;)
3644 {
3645 Assert(iPhysExt < pPool->cMaxPhysExts);
3646 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3647 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3648 {
3649 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3650 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3651 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
3652 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3653 }
3654 if (!--cMax)
3655 {
3656 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3657 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3658 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3659 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3660 }
3661 }
3662
3663 /* add another extent to the list. */
3664 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3665 if (!pNew)
3666 {
3667 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3668 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3669 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
3670 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3671 }
3672 pNew->iNext = iPhysExtStart;
3673 pNew->aidx[0] = iShwPT;
3674 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
3675 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3676}
3677
3678
3679/**
3680 * Add a reference to guest physical page where extents are in use.
3681 *
3682 * @returns The new tracking data for PGMPAGE.
3683 *
3684 * @param pVM The VM handle.
3685 * @param u16 The ram range flags (top 16-bits).
3686 * @param iShwPT The shadow page table index.
3687 */
3688uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
3689{
3690 pgmLock(pVM);
3691 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3692 {
3693 /*
3694 * Convert to extent list.
3695 */
3696 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3697 uint16_t iPhysExt;
3698 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3699 if (pPhysExt)
3700 {
3701 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
3702 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3703 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
3704 pPhysExt->aidx[1] = iShwPT;
3705 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3706 }
3707 else
3708 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3709 }
3710 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3711 {
3712 /*
3713 * Insert into the extent list.
3714 */
3715 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT);
3716 }
3717 else
3718 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3719 pgmUnlock(pVM);
3720 return u16;
3721}
3722
3723
3724/**
3725 * Clear references to guest physical memory.
3726 *
3727 * @param pPool The pool.
3728 * @param pPage The page.
3729 * @param pPhysPage Pointer to the aPages entry in the ram range.
3730 */
3731void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3732{
3733 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
3734 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3735
3736 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
3737 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
3738 {
3739 PVM pVM = pPool->CTX_SUFF(pVM);
3740 pgmLock(pVM);
3741
3742 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3743 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3744 do
3745 {
3746 Assert(iPhysExt < pPool->cMaxPhysExts);
3747
3748 /*
3749 * Look for the shadow page and check if it's all freed.
3750 */
3751 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3752 {
3753 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3754 {
3755 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3756
3757 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3758 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3759 {
3760 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3761 pgmUnlock(pVM);
3762 return;
3763 }
3764
3765 /* we can free the node. */
3766 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3767 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3768 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3769 {
3770 /* lonely node */
3771 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3772 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
3773 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3774 }
3775 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3776 {
3777 /* head */
3778 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
3779 PGM_PAGE_SET_TRACKING(pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
3780 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3781 }
3782 else
3783 {
3784 /* in list */
3785 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3786 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3787 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3788 }
3789 iPhysExt = iPhysExtNext;
3790 pgmUnlock(pVM);
3791 return;
3792 }
3793 }
3794
3795 /* next */
3796 iPhysExtPrev = iPhysExt;
3797 iPhysExt = paPhysExts[iPhysExt].iNext;
3798 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3799
3800 pgmUnlock(pVM);
3801 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3802 }
3803 else /* nothing to do */
3804 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
3805}
3806
3807
3808/**
3809 * Clear references to guest physical memory.
3810 *
3811 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3812 * is assumed to be correct, so the linear search can be skipped and we can assert
3813 * at an earlier point.
3814 *
3815 * @param pPool The pool.
3816 * @param pPage The page.
3817 * @param HCPhys The host physical address corresponding to the guest page.
3818 * @param GCPhys The guest physical address corresponding to HCPhys.
3819 */
3820static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3821{
3822 /*
3823 * Walk range list.
3824 */
3825 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3826 while (pRam)
3827 {
3828 RTGCPHYS off = GCPhys - pRam->GCPhys;
3829 if (off < pRam->cb)
3830 {
3831 /* does it match? */
3832 const unsigned iPage = off >> PAGE_SHIFT;
3833 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3834#ifdef LOG_ENABLED
3835RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3836Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
3837#endif
3838 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3839 {
3840 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3841 return;
3842 }
3843 break;
3844 }
3845 pRam = pRam->CTX_SUFF(pNext);
3846 }
3847 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
3848}
3849
3850
3851/**
3852 * Clear references to guest physical memory.
3853 *
3854 * @param pPool The pool.
3855 * @param pPage The page.
3856 * @param HCPhys The host physical address corresponding to the guest page.
3857 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3858 */
3859void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3860{
3861 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
3862
3863 /*
3864 * Walk range list.
3865 */
3866 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3867 while (pRam)
3868 {
3869 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3870 if (off < pRam->cb)
3871 {
3872 /* does it match? */
3873 const unsigned iPage = off >> PAGE_SHIFT;
3874 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3875 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3876 {
3877 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3878 return;
3879 }
3880 break;
3881 }
3882 pRam = pRam->CTX_SUFF(pNext);
3883 }
3884
3885 /*
3886 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3887 */
3888 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3889 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3890 while (pRam)
3891 {
3892 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3893 while (iPage-- > 0)
3894 {
3895 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3896 {
3897 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
3898 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3899 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3900 return;
3901 }
3902 }
3903 pRam = pRam->CTX_SUFF(pNext);
3904 }
3905
3906 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
3907}
3908
3909
3910/**
3911 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3912 *
3913 * @param pPool The pool.
3914 * @param pPage The page.
3915 * @param pShwPT The shadow page table (mapping of the page).
3916 * @param pGstPT The guest page table.
3917 */
3918DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3919{
3920 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3921 if (pShwPT->a[i].n.u1Present)
3922 {
3923 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3924 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3925 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3926 if (!--pPage->cPresent)
3927 break;
3928 }
3929}
3930
3931
3932/**
3933 * Clear references to guest physical memory in a PAE / 32-bit page table.
3934 *
3935 * @param pPool The pool.
3936 * @param pPage The page.
3937 * @param pShwPT The shadow page table (mapping of the page).
3938 * @param pGstPT The guest page table (just a half one).
3939 */
3940DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3941{
3942 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3943 if (pShwPT->a[i].n.u1Present)
3944 {
3945 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
3946 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3947 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3948 if (!--pPage->cPresent)
3949 break;
3950 }
3951}
3952
3953
3954/**
3955 * Clear references to guest physical memory in a PAE / PAE page table.
3956 *
3957 * @param pPool The pool.
3958 * @param pPage The page.
3959 * @param pShwPT The shadow page table (mapping of the page).
3960 * @param pGstPT The guest page table.
3961 */
3962DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3963{
3964 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3965 if (pShwPT->a[i].n.u1Present)
3966 {
3967 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3968 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3969 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3970 if (!--pPage->cPresent)
3971 break;
3972 }
3973}
3974
3975
3976/**
3977 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3978 *
3979 * @param pPool The pool.
3980 * @param pPage The page.
3981 * @param pShwPT The shadow page table (mapping of the page).
3982 */
3983DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3984{
3985 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
3986 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3987 if (pShwPT->a[i].n.u1Present)
3988 {
3989 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3990 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3991 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3992 if (!--pPage->cPresent)
3993 break;
3994 }
3995}
3996
3997
3998/**
3999 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4000 *
4001 * @param pPool The pool.
4002 * @param pPage The page.
4003 * @param pShwPT The shadow page table (mapping of the page).
4004 */
4005DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
4006{
4007 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4008 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4009 if (pShwPT->a[i].n.u1Present)
4010 {
4011 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4012 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
4013 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
4014 if (!--pPage->cPresent)
4015 break;
4016 }
4017}
4018
4019
4020/**
4021 * Clear references to shadowed pages in an EPT page table.
4022 *
4023 * @param pPool The pool.
4024 * @param pPage The page.
4025 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4026 */
4027DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4028{
4029 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4030 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4031 if (pShwPT->a[i].n.u1Present)
4032 {
4033 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4034 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4035 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
4036 if (!--pPage->cPresent)
4037 break;
4038 }
4039}
4040
4041
4042
4043/**
4044 * Clear references to shadowed pages in a 32 bits page directory.
4045 *
4046 * @param pPool The pool.
4047 * @param pPage The page.
4048 * @param pShwPD The shadow page directory (mapping of the page).
4049 */
4050DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4051{
4052 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4053 {
4054 if ( pShwPD->a[i].n.u1Present
4055 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4056 )
4057 {
4058 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4059 if (pSubPage)
4060 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4061 else
4062 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4063 }
4064 }
4065}
4066
4067/**
4068 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4069 *
4070 * @param pPool The pool.
4071 * @param pPage The page.
4072 * @param pShwPD The shadow page directory (mapping of the page).
4073 */
4074DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4075{
4076 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4077 {
4078 if ( pShwPD->a[i].n.u1Present
4079 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4080 )
4081 {
4082 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4083 if (pSubPage)
4084 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4085 else
4086 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4087 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4088 }
4089 }
4090}
4091
4092/**
4093 * Clear references to shadowed pages in a PAE page directory pointer table.
4094 *
4095 * @param pPool The pool.
4096 * @param pPage The page.
4097 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4098 */
4099DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4100{
4101 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4102 {
4103 if ( pShwPDPT->a[i].n.u1Present
4104 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4105 )
4106 {
4107 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4108 if (pSubPage)
4109 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4110 else
4111 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4112 }
4113 }
4114}
4115
4116
4117/**
4118 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4119 *
4120 * @param pPool The pool.
4121 * @param pPage The page.
4122 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4123 */
4124DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4125{
4126 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4127 {
4128 Assert(!(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING));
4129 if (pShwPDPT->a[i].n.u1Present)
4130 {
4131 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4132 if (pSubPage)
4133 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4134 else
4135 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4136 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4137 }
4138 }
4139}
4140
4141
4142/**
4143 * Clear references to shadowed pages in a 64-bit level 4 page table.
4144 *
4145 * @param pPool The pool.
4146 * @param pPage The page.
4147 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4148 */
4149DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4150{
4151 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4152 {
4153 if (pShwPML4->a[i].n.u1Present)
4154 {
4155 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4156 if (pSubPage)
4157 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4158 else
4159 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4160 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4161 }
4162 }
4163}
4164
4165
4166/**
4167 * Clear references to shadowed pages in an EPT page directory.
4168 *
4169 * @param pPool The pool.
4170 * @param pPage The page.
4171 * @param pShwPD The shadow page directory (mapping of the page).
4172 */
4173DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4174{
4175 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4176 {
4177 if (pShwPD->a[i].n.u1Present)
4178 {
4179 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4180 if (pSubPage)
4181 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4182 else
4183 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4184 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4185 }
4186 }
4187}
4188
4189
4190/**
4191 * Clear references to shadowed pages in an EPT page directory pointer table.
4192 *
4193 * @param pPool The pool.
4194 * @param pPage The page.
4195 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4196 */
4197DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4198{
4199 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4200 {
4201 if (pShwPDPT->a[i].n.u1Present)
4202 {
4203 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4204 if (pSubPage)
4205 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4206 else
4207 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4208 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4209 }
4210 }
4211}
4212
4213
4214/**
4215 * Clears all references made by this page.
4216 *
4217 * This includes other shadow pages and GC physical addresses.
4218 *
4219 * @param pPool The pool.
4220 * @param pPage The page.
4221 */
4222static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4223{
4224 /*
4225 * Map the shadow page and take action according to the page kind.
4226 */
4227 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
4228 switch (pPage->enmKind)
4229 {
4230 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4231 {
4232 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4233 void *pvGst;
4234 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4235 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4236 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4237 break;
4238 }
4239
4240 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4241 {
4242 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4243 void *pvGst;
4244 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4245 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
4246 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4247 break;
4248 }
4249
4250 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4251 {
4252 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4253 void *pvGst;
4254 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4255 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
4256 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4257 break;
4258 }
4259
4260 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4261 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4262 {
4263 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4264 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4265 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4266 break;
4267 }
4268
4269 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4270 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4271 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4272 {
4273 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4274 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
4275 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4276 break;
4277 }
4278
4279 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4280 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4281 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4282 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4283 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4284 case PGMPOOLKIND_PAE_PD_PHYS:
4285 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4286 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4287 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4288 break;
4289
4290 case PGMPOOLKIND_32BIT_PD_PHYS:
4291 case PGMPOOLKIND_32BIT_PD:
4292 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4293 break;
4294
4295 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4296 case PGMPOOLKIND_PAE_PDPT:
4297 case PGMPOOLKIND_PAE_PDPT_PHYS:
4298 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4299 break;
4300
4301 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4302 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4303 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4304 break;
4305
4306 case PGMPOOLKIND_64BIT_PML4:
4307 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4308 break;
4309
4310 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4311 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4312 break;
4313
4314 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4315 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4316 break;
4317
4318 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4319 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4320 break;
4321
4322 default:
4323 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4324 }
4325
4326 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4327 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4328 ASMMemZeroPage(pvShw);
4329 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4330 pPage->fZeroed = true;
4331 PGMPOOL_UNLOCK_PTR(pPool->CTX_SUFF(pVM), pvShw);
4332}
4333
4334/**
4335 * Flushes a pool page.
4336 *
4337 * This moves the page to the free list after removing all user references to it.
4338 *
4339 * @returns VBox status code.
4340 * @retval VINF_SUCCESS on success.
4341 * @param pPool The pool.
4342 * @param HCPhys The HC physical address of the shadow page.
4343 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4344 */
4345int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4346{
4347 PVM pVM = pPool->CTX_SUFF(pVM);
4348 bool fFlushRequired = false;
4349
4350 int rc = VINF_SUCCESS;
4351 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4352 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4353 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4354
4355 /*
4356 * Quietly reject any attempts at flushing any of the special root pages.
4357 */
4358 if (pPage->idx < PGMPOOL_IDX_FIRST)
4359 {
4360 AssertFailed(); /* can no longer happen */
4361 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4362 return VINF_SUCCESS;
4363 }
4364
4365 pgmLock(pVM);
4366
4367 /*
4368 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4369 */
4370 if (pgmPoolIsPageLocked(&pVM->pgm.s, pPage))
4371 {
4372 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4373 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4374 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4375 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4376 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4377 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4378 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4379 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4380 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD,
4381 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4382 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4383 pgmUnlock(pVM);
4384 return VINF_SUCCESS;
4385 }
4386
4387#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4388 /* Start a subset so we won't run out of mapping space. */
4389 PVMCPU pVCpu = VMMGetCpu(pVM);
4390 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4391#endif
4392
4393 /*
4394 * Mark the page as being in need of an ASMMemZeroPage().
4395 */
4396 pPage->fZeroed = false;
4397
4398#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4399 if (pPage->fDirty)
4400 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirty, false /* do not remove */);
4401#endif
4402
4403 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4404 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4405 fFlushRequired = true;
4406
4407 /*
4408 * Clear the page.
4409 */
4410 pgmPoolTrackClearPageUsers(pPool, pPage);
4411 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4412 pgmPoolTrackDeref(pPool, pPage);
4413 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4414
4415 /*
4416 * Flush it from the cache.
4417 */
4418 pgmPoolCacheFlushPage(pPool, pPage);
4419
4420#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4421 /* Heavy stuff done. */
4422 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4423#endif
4424
4425 /*
4426 * Deregistering the monitoring.
4427 */
4428 if (pPage->fMonitored)
4429 rc = pgmPoolMonitorFlush(pPool, pPage);
4430
4431 /*
4432 * Free the page.
4433 */
4434 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4435 pPage->iNext = pPool->iFreeHead;
4436 pPool->iFreeHead = pPage->idx;
4437 pPage->enmKind = PGMPOOLKIND_FREE;
4438 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4439 pPage->GCPhys = NIL_RTGCPHYS;
4440 pPage->fReusedFlushPending = false;
4441
4442 pPool->cUsedPages--;
4443
4444 /* Flush the TLBs of all VCPUs if required. */
4445 if ( fFlushRequired
4446 && fFlush)
4447 {
4448 PGM_INVL_ALL_VCPU_TLBS(pVM);
4449 }
4450
4451 pgmUnlock(pVM);
4452 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4453 return rc;
4454}
4455
4456
4457/**
4458 * Frees a usage of a pool page.
4459 *
4460 * The caller is responsible to updating the user table so that it no longer
4461 * references the shadow page.
4462 *
4463 * @param pPool The pool.
4464 * @param HCPhys The HC physical address of the shadow page.
4465 * @param iUser The shadow page pool index of the user table.
4466 * @param iUserTable The index into the user table (shadowed).
4467 */
4468void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4469{
4470 PVM pVM = pPool->CTX_SUFF(pVM);
4471
4472 STAM_PROFILE_START(&pPool->StatFree, a);
4473 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%#x iUserTable=%#x\n",
4474 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4475 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4476 pgmLock(pVM);
4477 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4478 if (!pPage->fCached)
4479 pgmPoolFlushPage(pPool, pPage);
4480 pgmUnlock(pVM);
4481 STAM_PROFILE_STOP(&pPool->StatFree, a);
4482}
4483
4484
4485/**
4486 * Makes one or more free page free.
4487 *
4488 * @returns VBox status code.
4489 * @retval VINF_SUCCESS on success.
4490 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4491 *
4492 * @param pPool The pool.
4493 * @param enmKind Page table kind
4494 * @param iUser The user of the page.
4495 */
4496static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4497{
4498 PVM pVM = pPool->CTX_SUFF(pVM);
4499
4500 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4501
4502 /*
4503 * If the pool isn't full grown yet, expand it.
4504 */
4505 if ( pPool->cCurPages < pPool->cMaxPages
4506#if defined(IN_RC)
4507 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4508 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4509 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4510#endif
4511 )
4512 {
4513 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4514#ifdef IN_RING3
4515 int rc = PGMR3PoolGrow(pVM);
4516#else
4517 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4518#endif
4519 if (RT_FAILURE(rc))
4520 return rc;
4521 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4522 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4523 return VINF_SUCCESS;
4524 }
4525
4526 /*
4527 * Free one cached page.
4528 */
4529 return pgmPoolCacheFreeOne(pPool, iUser);
4530}
4531
4532/**
4533 * Allocates a page from the pool.
4534 *
4535 * This page may actually be a cached page and not in need of any processing
4536 * on the callers part.
4537 *
4538 * @returns VBox status code.
4539 * @retval VINF_SUCCESS if a NEW page was allocated.
4540 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4541 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4542 * @param pVM The VM handle.
4543 * @param GCPhys The GC physical address of the page we're gonna shadow.
4544 * For 4MB and 2MB PD entries, it's the first address the
4545 * shadow PT is covering.
4546 * @param enmKind The kind of mapping.
4547 * @param enmAccess Access type for the mapping (only relevant for big pages)
4548 * @param iUser The shadow page pool index of the user table.
4549 * @param iUserTable The index into the user table (shadowed).
4550 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4551 * @param fLockPage Lock the page
4552 */
4553int pgmPoolAllocEx(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage, bool fLockPage)
4554{
4555 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4556 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4557 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%#x iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4558 *ppPage = NULL;
4559 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4560 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4561 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4562
4563 pgmLock(pVM);
4564
4565 if (pPool->fCacheEnabled)
4566 {
4567 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, iUser, iUserTable, ppPage);
4568 if (RT_SUCCESS(rc2))
4569 {
4570 if (fLockPage)
4571 pgmPoolLockPage(pPool, *ppPage);
4572 pgmUnlock(pVM);
4573 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4574 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4575 return rc2;
4576 }
4577 }
4578
4579 /*
4580 * Allocate a new one.
4581 */
4582 int rc = VINF_SUCCESS;
4583 uint16_t iNew = pPool->iFreeHead;
4584 if (iNew == NIL_PGMPOOL_IDX)
4585 {
4586 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
4587 if (RT_FAILURE(rc))
4588 {
4589 pgmUnlock(pVM);
4590 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4591 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4592 return rc;
4593 }
4594 iNew = pPool->iFreeHead;
4595 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4596 }
4597
4598 /* unlink the free head */
4599 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4600 pPool->iFreeHead = pPage->iNext;
4601 pPage->iNext = NIL_PGMPOOL_IDX;
4602
4603 /*
4604 * Initialize it.
4605 */
4606 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4607 pPage->enmKind = enmKind;
4608 pPage->enmAccess = enmAccess;
4609 pPage->GCPhys = GCPhys;
4610 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4611 pPage->fMonitored = false;
4612 pPage->fCached = false;
4613#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4614 pPage->fDirty = false;
4615#endif
4616 pPage->fReusedFlushPending = false;
4617 pPage->cModifications = 0;
4618 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4619 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4620 pPage->cPresent = 0;
4621 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
4622 pPage->pvLastAccessHandlerFault = 0;
4623 pPage->cLastAccessHandlerCount = 0;
4624 pPage->pvLastAccessHandlerRip = 0;
4625
4626 /*
4627 * Insert into the tracking and cache. If this fails, free the page.
4628 */
4629 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4630 if (RT_FAILURE(rc3))
4631 {
4632 pPool->cUsedPages--;
4633 pPage->enmKind = PGMPOOLKIND_FREE;
4634 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4635 pPage->GCPhys = NIL_RTGCPHYS;
4636 pPage->iNext = pPool->iFreeHead;
4637 pPool->iFreeHead = pPage->idx;
4638 pgmUnlock(pVM);
4639 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4640 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4641 return rc3;
4642 }
4643
4644 /*
4645 * Commit the allocation, clear the page and return.
4646 */
4647#ifdef VBOX_WITH_STATISTICS
4648 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4649 pPool->cUsedPagesHigh = pPool->cUsedPages;
4650#endif
4651
4652 if (!pPage->fZeroed)
4653 {
4654 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4655 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4656 ASMMemZeroPage(pv);
4657 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4658 }
4659
4660 *ppPage = pPage;
4661 if (fLockPage)
4662 pgmPoolLockPage(pPool, pPage);
4663 pgmUnlock(pVM);
4664 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4665 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4666 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4667 return rc;
4668}
4669
4670
4671/**
4672 * Frees a usage of a pool page.
4673 *
4674 * @param pVM The VM handle.
4675 * @param HCPhys The HC physical address of the shadow page.
4676 * @param iUser The shadow page pool index of the user table.
4677 * @param iUserTable The index into the user table (shadowed).
4678 */
4679void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4680{
4681 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4682 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4683 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4684}
4685
4686/**
4687 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
4688 *
4689 * @returns Pointer to the shadow page structure.
4690 * @param pPool The pool.
4691 * @param HCPhys The HC physical address of the shadow page.
4692 */
4693PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
4694{
4695 PVM pVM = pPool->CTX_SUFF(pVM);
4696
4697 Assert(PGMIsLockOwner(pVM));
4698
4699 /*
4700 * Look up the page.
4701 */
4702 pgmLock(pVM);
4703 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
4704 pgmUnlock(pVM);
4705
4706 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
4707 return pPage;
4708}
4709
4710#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
4711/**
4712 * Flush the specified page if present
4713 *
4714 * @param pVM The VM handle.
4715 * @param GCPhys Guest physical address of the page to flush
4716 */
4717void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
4718{
4719 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4720
4721 VM_ASSERT_EMT(pVM);
4722
4723 /*
4724 * Look up the GCPhys in the hash.
4725 */
4726 GCPhys = GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
4727 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
4728 if (i == NIL_PGMPOOL_IDX)
4729 return;
4730
4731 do
4732 {
4733 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4734 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
4735 {
4736 switch (pPage->enmKind)
4737 {
4738 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4739 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4740 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4741 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4742 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4743 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4744 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4745 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4746 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4747 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4748 case PGMPOOLKIND_64BIT_PML4:
4749 case PGMPOOLKIND_32BIT_PD:
4750 case PGMPOOLKIND_PAE_PDPT:
4751 {
4752 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
4753#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4754 if (pPage->fDirty)
4755 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
4756 else
4757#endif
4758 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
4759 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
4760 pgmPoolMonitorChainFlush(pPool, pPage);
4761 return;
4762 }
4763
4764 /* ignore, no monitoring. */
4765 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4766 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4767 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4768 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4769 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4770 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4771 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4772 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4773 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4774 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4775 case PGMPOOLKIND_ROOT_NESTED:
4776 case PGMPOOLKIND_PAE_PD_PHYS:
4777 case PGMPOOLKIND_PAE_PDPT_PHYS:
4778 case PGMPOOLKIND_32BIT_PD_PHYS:
4779 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4780 break;
4781
4782 default:
4783 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
4784 }
4785 }
4786
4787 /* next */
4788 i = pPage->iNext;
4789 } while (i != NIL_PGMPOOL_IDX);
4790 return;
4791}
4792#endif /* IN_RING3 */
4793
4794#ifdef IN_RING3
4795/**
4796 * Flushes the entire cache.
4797 *
4798 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
4799 * this and execute this CR3 flush.
4800 *
4801 * @param pPool The pool.
4802 */
4803void pgmR3PoolReset(PVM pVM)
4804{
4805 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4806
4807 Assert(PGMIsLockOwner(pVM));
4808 STAM_PROFILE_START(&pPool->StatR3Reset, a);
4809 LogFlow(("pgmR3PoolReset:\n"));
4810
4811 /*
4812 * If there are no pages in the pool, there is nothing to do.
4813 */
4814 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
4815 {
4816 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
4817 return;
4818 }
4819
4820 /*
4821 * Exit the shadow mode since we're going to clear everything,
4822 * including the root page.
4823 */
4824 for (VMCPUID i = 0; i < pVM->cCpus; i++)
4825 {
4826 PVMCPU pVCpu = &pVM->aCpus[i];
4827 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
4828 }
4829
4830 /*
4831 * Nuke the free list and reinsert all pages into it.
4832 */
4833 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
4834 {
4835 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4836
4837 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
4838 if (pPage->fMonitored)
4839 pgmPoolMonitorFlush(pPool, pPage);
4840 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4841 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4842 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4843 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4844 pPage->cModifications = 0;
4845 pPage->GCPhys = NIL_RTGCPHYS;
4846 pPage->enmKind = PGMPOOLKIND_FREE;
4847 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4848 Assert(pPage->idx == i);
4849 pPage->iNext = i + 1;
4850 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
4851 pPage->fSeenNonGlobal = false;
4852 pPage->fMonitored = false;
4853#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4854 pPage->fDirty = false;
4855#endif
4856 pPage->fCached = false;
4857 pPage->fReusedFlushPending = false;
4858 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4859 pPage->iAgeNext = NIL_PGMPOOL_IDX;
4860 pPage->iAgePrev = NIL_PGMPOOL_IDX;
4861 pPage->cLocked = 0;
4862 }
4863 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
4864 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
4865 pPool->cUsedPages = 0;
4866
4867 /*
4868 * Zap and reinitialize the user records.
4869 */
4870 pPool->cPresent = 0;
4871 pPool->iUserFreeHead = 0;
4872 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4873 const unsigned cMaxUsers = pPool->cMaxUsers;
4874 for (unsigned i = 0; i < cMaxUsers; i++)
4875 {
4876 paUsers[i].iNext = i + 1;
4877 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4878 paUsers[i].iUserTable = 0xfffffffe;
4879 }
4880 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
4881
4882 /*
4883 * Clear all the GCPhys links and rebuild the phys ext free list.
4884 */
4885 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
4886 pRam;
4887 pRam = pRam->CTX_SUFF(pNext))
4888 {
4889 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4890 while (iPage-- > 0)
4891 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
4892 }
4893
4894 pPool->iPhysExtFreeHead = 0;
4895 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4896 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
4897 for (unsigned i = 0; i < cMaxPhysExts; i++)
4898 {
4899 paPhysExts[i].iNext = i + 1;
4900 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
4901 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
4902 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
4903 }
4904 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4905
4906 /*
4907 * Just zap the modified list.
4908 */
4909 pPool->cModifiedPages = 0;
4910 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
4911
4912 /*
4913 * Clear the GCPhys hash and the age list.
4914 */
4915 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
4916 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
4917 pPool->iAgeHead = NIL_PGMPOOL_IDX;
4918 pPool->iAgeTail = NIL_PGMPOOL_IDX;
4919
4920#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4921 /* Clear all dirty pages. */
4922 pPool->idxFreeDirtyPage = 0;
4923 pPool->cDirtyPages = 0;
4924 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
4925 pPool->aIdxDirtyPages[i] = NIL_PGMPOOL_IDX;
4926#endif
4927
4928 /*
4929 * Reinsert active pages into the hash and ensure monitoring chains are correct.
4930 */
4931 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
4932 {
4933 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4934 pPage->iNext = NIL_PGMPOOL_IDX;
4935 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4936 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4937 pPage->cModifications = 0;
4938 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
4939 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4940 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4941 if (pPage->fMonitored)
4942 {
4943 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
4944 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
4945 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
4946 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
4947 pPool->pszAccessHandler);
4948 AssertFatalRCSuccess(rc);
4949 pgmPoolHashInsert(pPool, pPage);
4950 }
4951 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
4952 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
4953 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
4954 }
4955
4956 for (VMCPUID i = 0; i < pVM->cCpus; i++)
4957 {
4958 /*
4959 * Re-enter the shadowing mode and assert Sync CR3 FF.
4960 */
4961 PVMCPU pVCpu = &pVM->aCpus[i];
4962 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
4963 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
4964 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
4965 }
4966
4967 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
4968}
4969#endif /* IN_RING3 */
4970
4971#ifdef LOG_ENABLED
4972static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
4973{
4974 switch(enmKind)
4975 {
4976 case PGMPOOLKIND_INVALID:
4977 return "PGMPOOLKIND_INVALID";
4978 case PGMPOOLKIND_FREE:
4979 return "PGMPOOLKIND_FREE";
4980 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4981 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
4982 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4983 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
4984 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4985 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
4986 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4987 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
4988 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4989 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
4990 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4991 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
4992 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4993 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
4994 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4995 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
4996 case PGMPOOLKIND_32BIT_PD:
4997 return "PGMPOOLKIND_32BIT_PD";
4998 case PGMPOOLKIND_32BIT_PD_PHYS:
4999 return "PGMPOOLKIND_32BIT_PD_PHYS";
5000 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5001 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5002 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5003 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5004 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5005 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5006 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5007 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5008 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5009 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5010 case PGMPOOLKIND_PAE_PD_PHYS:
5011 return "PGMPOOLKIND_PAE_PD_PHYS";
5012 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5013 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5014 case PGMPOOLKIND_PAE_PDPT:
5015 return "PGMPOOLKIND_PAE_PDPT";
5016 case PGMPOOLKIND_PAE_PDPT_PHYS:
5017 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5018 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5019 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5020 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5021 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5022 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5023 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5024 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5025 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5026 case PGMPOOLKIND_64BIT_PML4:
5027 return "PGMPOOLKIND_64BIT_PML4";
5028 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5029 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5030 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5031 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5032 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5033 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5034 case PGMPOOLKIND_ROOT_NESTED:
5035 return "PGMPOOLKIND_ROOT_NESTED";
5036 }
5037 return "Unknown kind!";
5038}
5039#endif /* LOG_ENABLED*/
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette