VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 23794

Last change on this file since 23794 was 23597, checked in by vboxsync, 15 years ago

Fix QNX paging loop

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 190.2 KB
Line 
1/* $Id: PGMAllPool.cpp 23597 2009-10-07 14:51:47Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42#include <iprt/string.h>
43
44
45/*******************************************************************************
46* Internal Functions *
47*******************************************************************************/
48RT_C_DECLS_BEGIN
49static void pgmPoolFlushAllInt(PPGMPOOL pPool);
50#ifdef PGMPOOL_WITH_USER_TRACKING
51DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
52DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
53static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
54#endif
55#ifdef PGMPOOL_WITH_CACHE
56static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
57#endif
58#ifdef PGMPOOL_WITH_MONITORING
59static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
60#endif
61#ifndef IN_RING3
62DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
63#endif
64#ifdef LOG_ENABLED
65static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
66#endif
67#if defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)
68static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT);
69#endif
70
71int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
72PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
73void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
74void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
75
76RT_C_DECLS_END
77
78
79/**
80 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
81 *
82 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
83 * @param enmKind The page kind.
84 */
85DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
86{
87 switch (enmKind)
88 {
89 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
90 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
91 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
92 return true;
93 default:
94 return false;
95 }
96}
97
98/** @def PGMPOOL_PAGE_2_LOCKED_PTR
99 * Maps a pool page pool into the current context and lock it (RC only).
100 *
101 * @returns VBox status code.
102 * @param pVM The VM handle.
103 * @param pPage The pool page.
104 *
105 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
106 * small page window employeed by that function. Be careful.
107 * @remark There is no need to assert on the result.
108 */
109#if defined(IN_RC)
110DECLINLINE(void *) PGMPOOL_PAGE_2_LOCKED_PTR(PVM pVM, PPGMPOOLPAGE pPage)
111{
112 void *pv = pgmPoolMapPageInlined(&pVM->pgm.s, pPage);
113
114 /* Make sure the dynamic mapping will not be reused. */
115 if (pv)
116 PGMDynLockHCPage(pVM, (uint8_t *)pv);
117
118 return pv;
119}
120#else
121# define PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage) PGMPOOL_PAGE_2_PTR(pVM, pPage)
122#endif
123
124/** @def PGMPOOL_UNLOCK_PTR
125 * Unlock a previously locked dynamic caching (RC only).
126 *
127 * @returns VBox status code.
128 * @param pVM The VM handle.
129 * @param pPage The pool page.
130 *
131 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
132 * small page window employeed by that function. Be careful.
133 * @remark There is no need to assert on the result.
134 */
135#if defined(IN_RC)
136DECLINLINE(void) PGMPOOL_UNLOCK_PTR(PVM pVM, void *pvPage)
137{
138 if (pvPage)
139 PGMDynUnlockHCPage(pVM, (uint8_t *)pvPage);
140}
141#else
142# define PGMPOOL_UNLOCK_PTR(pVM, pPage) do {} while (0)
143#endif
144
145
146#ifdef PGMPOOL_WITH_MONITORING
147/**
148 * Determin the size of a write instruction.
149 * @returns number of bytes written.
150 * @param pDis The disassembler state.
151 */
152static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
153{
154 /*
155 * This is very crude and possibly wrong for some opcodes,
156 * but since it's not really supposed to be called we can
157 * probably live with that.
158 */
159 return DISGetParamSize(pDis, &pDis->param1);
160}
161
162
163/**
164 * Flushes a chain of pages sharing the same access monitor.
165 *
166 * @returns VBox status code suitable for scheduling.
167 * @param pPool The pool.
168 * @param pPage A page in the chain.
169 */
170int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
171{
172 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
173
174 /*
175 * Find the list head.
176 */
177 uint16_t idx = pPage->idx;
178 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
179 {
180 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
181 {
182 idx = pPage->iMonitoredPrev;
183 Assert(idx != pPage->idx);
184 pPage = &pPool->aPages[idx];
185 }
186 }
187
188 /*
189 * Iterate the list flushing each shadow page.
190 */
191 int rc = VINF_SUCCESS;
192 for (;;)
193 {
194 idx = pPage->iMonitoredNext;
195 Assert(idx != pPage->idx);
196 if (pPage->idx >= PGMPOOL_IDX_FIRST)
197 {
198 int rc2 = pgmPoolFlushPage(pPool, pPage);
199 AssertRC(rc2);
200 }
201 /* next */
202 if (idx == NIL_PGMPOOL_IDX)
203 break;
204 pPage = &pPool->aPages[idx];
205 }
206 return rc;
207}
208
209
210/**
211 * Wrapper for getting the current context pointer to the entry being modified.
212 *
213 * @returns VBox status code suitable for scheduling.
214 * @param pVM VM Handle.
215 * @param pvDst Destination address
216 * @param pvSrc Source guest virtual address.
217 * @param GCPhysSrc The source guest physical address.
218 * @param cb Size of data to read
219 */
220DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
221{
222#if defined(IN_RING3)
223 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
224 return VINF_SUCCESS;
225#else
226 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
227 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
228#endif
229}
230
231/**
232 * Process shadow entries before they are changed by the guest.
233 *
234 * For PT entries we will clear them. For PD entries, we'll simply check
235 * for mapping conflicts and set the SyncCR3 FF if found.
236 *
237 * @param pVCpu VMCPU handle
238 * @param pPool The pool.
239 * @param pPage The head page.
240 * @param GCPhysFault The guest physical fault address.
241 * @param uAddress In R0 and GC this is the guest context fault address (flat).
242 * In R3 this is the host context 'fault' address.
243 * @param pDis The disassembler state for figuring out the write size.
244 * This need not be specified if the caller knows we won't do cross entry accesses.
245 */
246void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, PDISCPUSTATE pDis)
247{
248 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%#x (idx=%#x)\n", pPage->iMonitoredPrev, pPage->idx));
249 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
250 const unsigned cbWrite = pDis ? pgmPoolDisasWriteSize(pDis) : 0;
251 PVM pVM = pPool->CTX_SUFF(pVM);
252
253 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)pvAddress, GCPhysFault, cbWrite));
254
255 for (;;)
256 {
257 union
258 {
259 void *pv;
260 PX86PT pPT;
261 PX86PTPAE pPTPae;
262 PX86PD pPD;
263 PX86PDPAE pPDPae;
264 PX86PDPT pPDPT;
265 PX86PML4 pPML4;
266 } uShw;
267
268 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s\n", pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
269
270 uShw.pv = NULL;
271 switch (pPage->enmKind)
272 {
273 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
274 {
275 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
276 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
277 const unsigned iShw = off / sizeof(X86PTE);
278 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
279 if (uShw.pPT->a[iShw].n.u1Present)
280 {
281# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
282 X86PTE GstPte;
283
284 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
285 AssertRC(rc);
286 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
287 pgmPoolTracDerefGCPhysHint(pPool, pPage,
288 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
289 GstPte.u & X86_PTE_PG_MASK);
290# endif
291 ASMAtomicWriteSize(&uShw.pPT->a[iShw], 0);
292 }
293 break;
294 }
295
296 /* page/2 sized */
297 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
298 {
299 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
300 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
301 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
302 {
303 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
304 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
305 if (uShw.pPTPae->a[iShw].n.u1Present)
306 {
307# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
308 X86PTE GstPte;
309 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
310 AssertRC(rc);
311
312 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
313 pgmPoolTracDerefGCPhysHint(pPool, pPage,
314 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
315 GstPte.u & X86_PTE_PG_MASK);
316# endif
317 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw], 0);
318 }
319 }
320 break;
321 }
322
323 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
324 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
325 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
326 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
327 {
328 unsigned iGst = off / sizeof(X86PDE);
329 unsigned iShwPdpt = iGst / 256;
330 unsigned iShw = (iGst % 256) * 2;
331 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
332
333 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
334 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
335 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
336 {
337 for (unsigned i = 0; i < 2; i++)
338 {
339# ifndef IN_RING0
340 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
341 {
342 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
343 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
344 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
345 break;
346 }
347 else
348# endif /* !IN_RING0 */
349 if (uShw.pPDPae->a[iShw+i].n.u1Present)
350 {
351 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
352 pgmPoolFree(pVM,
353 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
354 pPage->idx,
355 iShw + i);
356 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw+i], 0);
357 }
358
359 /* paranoia / a bit assumptive. */
360 if ( pDis
361 && (off & 3)
362 && (off & 3) + cbWrite > 4)
363 {
364 const unsigned iShw2 = iShw + 2 + i;
365 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
366 {
367# ifndef IN_RING0
368 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
369 {
370 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
371 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
372 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
373 break;
374 }
375 else
376# endif /* !IN_RING0 */
377 if (uShw.pPDPae->a[iShw2].n.u1Present)
378 {
379 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
380 pgmPoolFree(pVM,
381 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
382 pPage->idx,
383 iShw2);
384 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
385 }
386 }
387 }
388 }
389 }
390 break;
391 }
392
393 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
394 {
395 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
396 const unsigned iShw = off / sizeof(X86PTEPAE);
397 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
398 if (uShw.pPTPae->a[iShw].n.u1Present)
399 {
400# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
401 X86PTEPAE GstPte;
402 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
403 AssertRC(rc);
404
405 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
406 pgmPoolTracDerefGCPhysHint(pPool, pPage,
407 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
408 GstPte.u & X86_PTE_PAE_PG_MASK);
409# endif
410 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw].u, 0);
411 }
412
413 /* paranoia / a bit assumptive. */
414 if ( pDis
415 && (off & 7)
416 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
417 {
418 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
419 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
420
421 if (uShw.pPTPae->a[iShw2].n.u1Present)
422 {
423# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
424 X86PTEPAE GstPte;
425# ifdef IN_RING3
426 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
427# else
428 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
429# endif
430 AssertRC(rc);
431 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
432 pgmPoolTracDerefGCPhysHint(pPool, pPage,
433 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
434 GstPte.u & X86_PTE_PAE_PG_MASK);
435# endif
436 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw2].u ,0);
437 }
438 }
439 break;
440 }
441
442 case PGMPOOLKIND_32BIT_PD:
443 {
444 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
445 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
446
447 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
448 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
449# ifndef IN_RING0
450 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
451 {
452 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
453 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
454 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
455 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
456 break;
457 }
458# endif /* !IN_RING0 */
459# ifndef IN_RING0
460 else
461# endif /* !IN_RING0 */
462 {
463 if (uShw.pPD->a[iShw].n.u1Present)
464 {
465 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
466 pgmPoolFree(pVM,
467 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
468 pPage->idx,
469 iShw);
470 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
471 }
472 }
473 /* paranoia / a bit assumptive. */
474 if ( pDis
475 && (off & 3)
476 && (off & 3) + cbWrite > sizeof(X86PTE))
477 {
478 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
479 if ( iShw2 != iShw
480 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
481 {
482# ifndef IN_RING0
483 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
484 {
485 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
486 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
487 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
488 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
489 break;
490 }
491# endif /* !IN_RING0 */
492# ifndef IN_RING0
493 else
494# endif /* !IN_RING0 */
495 {
496 if (uShw.pPD->a[iShw2].n.u1Present)
497 {
498 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
499 pgmPoolFree(pVM,
500 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
501 pPage->idx,
502 iShw2);
503 ASMAtomicWriteSize(&uShw.pPD->a[iShw2].u, 0);
504 }
505 }
506 }
507 }
508#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
509 if ( uShw.pPD->a[iShw].n.u1Present
510 && !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
511 {
512 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
513# ifdef IN_RC /* TLB load - we're pushing things a bit... */
514 ASMProbeReadByte(pvAddress);
515# endif
516 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
517 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
518 }
519#endif
520 break;
521 }
522
523 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
524 {
525 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
526 const unsigned iShw = off / sizeof(X86PDEPAE);
527 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
528#ifndef IN_RING0
529 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
530 {
531 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
532 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
533 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
534 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
535 break;
536 }
537#endif /* !IN_RING0 */
538 /*
539 * Causes trouble when the guest uses a PDE to refer to the whole page table level
540 * structure. (Invalidate here; faults later on when it tries to change the page
541 * table entries -> recheck; probably only applies to the RC case.)
542 */
543# ifndef IN_RING0
544 else
545# endif /* !IN_RING0 */
546 {
547 if (uShw.pPDPae->a[iShw].n.u1Present)
548 {
549 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
550 pgmPoolFree(pVM,
551 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
552 pPage->idx,
553 iShw);
554 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
555 }
556 }
557 /* paranoia / a bit assumptive. */
558 if ( pDis
559 && (off & 7)
560 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
561 {
562 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
563 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
564
565#ifndef IN_RING0
566 if ( iShw2 != iShw
567 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
568 {
569 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
570 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
571 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
572 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
573 break;
574 }
575#endif /* !IN_RING0 */
576# ifndef IN_RING0
577 else
578# endif /* !IN_RING0 */
579 if (uShw.pPDPae->a[iShw2].n.u1Present)
580 {
581 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
582 pgmPoolFree(pVM,
583 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
584 pPage->idx,
585 iShw2);
586 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
587 }
588 }
589 break;
590 }
591
592 case PGMPOOLKIND_PAE_PDPT:
593 {
594 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
595 /*
596 * Hopefully this doesn't happen very often:
597 * - touching unused parts of the page
598 * - messing with the bits of pd pointers without changing the physical address
599 */
600 /* PDPT roots are not page aligned; 32 byte only! */
601 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
602
603 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
604 const unsigned iShw = offPdpt / sizeof(X86PDPE);
605 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
606 {
607# ifndef IN_RING0
608 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
609 {
610 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
611 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
612 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
613 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
614 break;
615 }
616# endif /* !IN_RING0 */
617# ifndef IN_RING0
618 else
619# endif /* !IN_RING0 */
620 if (uShw.pPDPT->a[iShw].n.u1Present)
621 {
622 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
623 pgmPoolFree(pVM,
624 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
625 pPage->idx,
626 iShw);
627 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
628 }
629
630 /* paranoia / a bit assumptive. */
631 if ( pDis
632 && (offPdpt & 7)
633 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
634 {
635 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
636 if ( iShw2 != iShw
637 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
638 {
639# ifndef IN_RING0
640 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
641 {
642 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
643 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
644 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
645 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
646 break;
647 }
648# endif /* !IN_RING0 */
649# ifndef IN_RING0
650 else
651# endif /* !IN_RING0 */
652 if (uShw.pPDPT->a[iShw2].n.u1Present)
653 {
654 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
655 pgmPoolFree(pVM,
656 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
657 pPage->idx,
658 iShw2);
659 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
660 }
661 }
662 }
663 }
664 break;
665 }
666
667#ifndef IN_RC
668 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
669 {
670 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
671 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
672 const unsigned iShw = off / sizeof(X86PDEPAE);
673 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
674 if (uShw.pPDPae->a[iShw].n.u1Present)
675 {
676 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
677 pgmPoolFree(pVM,
678 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
679 pPage->idx,
680 iShw);
681 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
682 }
683 /* paranoia / a bit assumptive. */
684 if ( pDis
685 && (off & 7)
686 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
687 {
688 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
689 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
690
691 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
692 if (uShw.pPDPae->a[iShw2].n.u1Present)
693 {
694 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
695 pgmPoolFree(pVM,
696 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
697 pPage->idx,
698 iShw2);
699 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
700 }
701 }
702 break;
703 }
704
705 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
706 {
707 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
708 /*
709 * Hopefully this doesn't happen very often:
710 * - messing with the bits of pd pointers without changing the physical address
711 */
712 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
713 {
714 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
715 const unsigned iShw = off / sizeof(X86PDPE);
716 if (uShw.pPDPT->a[iShw].n.u1Present)
717 {
718 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
719 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
720 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
721 }
722 /* paranoia / a bit assumptive. */
723 if ( pDis
724 && (off & 7)
725 && (off & 7) + cbWrite > sizeof(X86PDPE))
726 {
727 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
728 if (uShw.pPDPT->a[iShw2].n.u1Present)
729 {
730 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
731 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
732 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
733 }
734 }
735 }
736 break;
737 }
738
739 case PGMPOOLKIND_64BIT_PML4:
740 {
741 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
742 /*
743 * Hopefully this doesn't happen very often:
744 * - messing with the bits of pd pointers without changing the physical address
745 */
746 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
747 {
748 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
749 const unsigned iShw = off / sizeof(X86PDPE);
750 if (uShw.pPML4->a[iShw].n.u1Present)
751 {
752 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
753 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
754 ASMAtomicWriteSize(&uShw.pPML4->a[iShw].u, 0);
755 }
756 /* paranoia / a bit assumptive. */
757 if ( pDis
758 && (off & 7)
759 && (off & 7) + cbWrite > sizeof(X86PDPE))
760 {
761 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
762 if (uShw.pPML4->a[iShw2].n.u1Present)
763 {
764 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
765 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
766 ASMAtomicWriteSize(&uShw.pPML4->a[iShw2].u, 0);
767 }
768 }
769 }
770 break;
771 }
772#endif /* IN_RING0 */
773
774 default:
775 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
776 }
777 PGMPOOL_UNLOCK_PTR(pVM, uShw.pv);
778
779 /* next */
780 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
781 return;
782 pPage = &pPool->aPages[pPage->iMonitoredNext];
783 }
784}
785
786# ifndef IN_RING3
787/**
788 * Checks if a access could be a fork operation in progress.
789 *
790 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
791 *
792 * @returns true if it's likly that we're forking, otherwise false.
793 * @param pPool The pool.
794 * @param pDis The disassembled instruction.
795 * @param offFault The access offset.
796 */
797DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
798{
799 /*
800 * i386 linux is using btr to clear X86_PTE_RW.
801 * The functions involved are (2.6.16 source inspection):
802 * clear_bit
803 * ptep_set_wrprotect
804 * copy_one_pte
805 * copy_pte_range
806 * copy_pmd_range
807 * copy_pud_range
808 * copy_page_range
809 * dup_mmap
810 * dup_mm
811 * copy_mm
812 * copy_process
813 * do_fork
814 */
815 if ( pDis->pCurInstr->opcode == OP_BTR
816 && !(offFault & 4)
817 /** @todo Validate that the bit index is X86_PTE_RW. */
818 )
819 {
820 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
821 return true;
822 }
823 return false;
824}
825
826
827/**
828 * Determine whether the page is likely to have been reused.
829 *
830 * @returns true if we consider the page as being reused for a different purpose.
831 * @returns false if we consider it to still be a paging page.
832 * @param pVM VM Handle.
833 * @param pVCpu VMCPU Handle.
834 * @param pRegFrame Trap register frame.
835 * @param pDis The disassembly info for the faulting instruction.
836 * @param pvFault The fault address.
837 *
838 * @remark The REP prefix check is left to the caller because of STOSD/W.
839 */
840DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
841{
842#ifndef IN_RC
843 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
844 if ( HWACCMHasPendingIrq(pVM)
845 && (pRegFrame->rsp - pvFault) < 32)
846 {
847 /* Fault caused by stack writes while trying to inject an interrupt event. */
848 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
849 return true;
850 }
851#else
852 NOREF(pVM); NOREF(pvFault);
853#endif
854
855 LogFlow(("Reused instr %RGv %d at %RGv param1.flags=%x param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->opcode, pvFault, pDis->param1.flags, pDis->param1.base.reg_gen));
856
857 /* Non-supervisor mode write means it's used for something else. */
858 if (CPUMGetGuestCPL(pVCpu, pRegFrame) != 0)
859 return true;
860
861 switch (pDis->pCurInstr->opcode)
862 {
863 /* call implies the actual push of the return address faulted */
864 case OP_CALL:
865 Log4(("pgmPoolMonitorIsReused: CALL\n"));
866 return true;
867 case OP_PUSH:
868 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
869 return true;
870 case OP_PUSHF:
871 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
872 return true;
873 case OP_PUSHA:
874 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
875 return true;
876 case OP_FXSAVE:
877 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
878 return true;
879 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
880 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
881 return true;
882 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
883 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
884 return true;
885 case OP_MOVSWD:
886 case OP_STOSWD:
887 if ( pDis->prefix == (PREFIX_REP|PREFIX_REX)
888 && pRegFrame->rcx >= 0x40
889 )
890 {
891 Assert(pDis->mode == CPUMODE_64BIT);
892
893 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
894 return true;
895 }
896 return false;
897 }
898 if ( ( (pDis->param1.flags & USE_REG_GEN32)
899 || (pDis->param1.flags & USE_REG_GEN64))
900 && (pDis->param1.base.reg_gen == USE_REG_ESP))
901 {
902 Log4(("pgmPoolMonitorIsReused: ESP\n"));
903 return true;
904 }
905
906 return false;
907}
908
909/**
910 * Flushes the page being accessed.
911 *
912 * @returns VBox status code suitable for scheduling.
913 * @param pVM The VM handle.
914 * @param pVCpu The VMCPU handle.
915 * @param pPool The pool.
916 * @param pPage The pool page (head).
917 * @param pDis The disassembly of the write instruction.
918 * @param pRegFrame The trap register frame.
919 * @param GCPhysFault The fault address as guest physical address.
920 * @param pvFault The fault address.
921 */
922static int pgmPoolAccessHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
923 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
924{
925 /*
926 * First, do the flushing.
927 */
928 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
929
930 /*
931 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection). Must do this in raw mode (!); XP boot will fail otherwise
932 */
933 uint32_t cbWritten;
934 int rc2 = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, &cbWritten);
935 if (RT_SUCCESS(rc2))
936 pRegFrame->rip += pDis->opsize;
937 else if (rc2 == VERR_EM_INTERPRETER)
938 {
939#ifdef IN_RC
940 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
941 {
942 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
943 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
944 rc = VINF_SUCCESS;
945 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
946 }
947 else
948#endif
949 {
950 rc = VINF_EM_RAW_EMULATE_INSTR;
951 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
952 }
953 }
954 else
955 rc = rc2;
956
957 /* See use in pgmPoolAccessHandlerSimple(). */
958 PGM_INVL_VCPU_TLBS(pVCpu);
959 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
960 return rc;
961}
962
963/**
964 * Handles the STOSD write accesses.
965 *
966 * @returns VBox status code suitable for scheduling.
967 * @param pVM The VM handle.
968 * @param pPool The pool.
969 * @param pPage The pool page (head).
970 * @param pDis The disassembly of the write instruction.
971 * @param pRegFrame The trap register frame.
972 * @param GCPhysFault The fault address as guest physical address.
973 * @param pvFault The fault address.
974 */
975DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
976 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
977{
978 unsigned uIncrement = pDis->param1.size;
979
980 Assert(pDis->mode == CPUMODE_32BIT || pDis->mode == CPUMODE_64BIT);
981 Assert(pRegFrame->rcx <= 0x20);
982
983#ifdef VBOX_STRICT
984 if (pDis->opmode == CPUMODE_32BIT)
985 Assert(uIncrement == 4);
986 else
987 Assert(uIncrement == 8);
988#endif
989
990 Log3(("pgmPoolAccessHandlerSTOSD\n"));
991
992 /*
993 * Increment the modification counter and insert it into the list
994 * of modified pages the first time.
995 */
996 if (!pPage->cModifications++)
997 pgmPoolMonitorModifiedInsert(pPool, pPage);
998
999 /*
1000 * Execute REP STOSD.
1001 *
1002 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
1003 * write situation, meaning that it's safe to write here.
1004 */
1005 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
1006 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
1007 while (pRegFrame->rcx)
1008 {
1009#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1010 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1011 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1012 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1013#else
1014 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
1015#endif
1016#ifdef IN_RC
1017 *(uint32_t *)pu32 = pRegFrame->eax;
1018#else
1019 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
1020#endif
1021 pu32 += uIncrement;
1022 GCPhysFault += uIncrement;
1023 pRegFrame->rdi += uIncrement;
1024 pRegFrame->rcx--;
1025 }
1026 pRegFrame->rip += pDis->opsize;
1027
1028#ifdef IN_RC
1029 /* See use in pgmPoolAccessHandlerSimple(). */
1030 PGM_INVL_VCPU_TLBS(pVCpu);
1031#endif
1032
1033 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
1034 return VINF_SUCCESS;
1035}
1036
1037
1038/**
1039 * Handles the simple write accesses.
1040 *
1041 * @returns VBox status code suitable for scheduling.
1042 * @param pVM The VM handle.
1043 * @param pVCpu The VMCPU handle.
1044 * @param pPool The pool.
1045 * @param pPage The pool page (head).
1046 * @param pDis The disassembly of the write instruction.
1047 * @param pRegFrame The trap register frame.
1048 * @param GCPhysFault The fault address as guest physical address.
1049 * @param pvFault The fault address.
1050 * @param pfReused Reused state (out)
1051 */
1052DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
1053 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
1054{
1055 Log3(("pgmPoolAccessHandlerSimple\n"));
1056 /*
1057 * Increment the modification counter and insert it into the list
1058 * of modified pages the first time.
1059 */
1060 if (!pPage->cModifications++)
1061 pgmPoolMonitorModifiedInsert(pPool, pPage);
1062
1063 /*
1064 * Clear all the pages. ASSUMES that pvFault is readable.
1065 */
1066#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1067 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1068 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, pDis);
1069 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1070#else
1071 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, pDis);
1072#endif
1073
1074 /*
1075 * Interpret the instruction.
1076 */
1077 uint32_t cb;
1078 int rc = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, &cb);
1079 if (RT_SUCCESS(rc))
1080 pRegFrame->rip += pDis->opsize;
1081 else if (rc == VERR_EM_INTERPRETER)
1082 {
1083 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1084 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode));
1085 rc = VINF_EM_RAW_EMULATE_INSTR;
1086 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1087 }
1088
1089#if 0 /* experimental code */
1090 if (rc == VINF_SUCCESS)
1091 {
1092 switch (pPage->enmKind)
1093 {
1094 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1095 {
1096 X86PTEPAE GstPte;
1097 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1098 AssertRC(rc);
1099
1100 /* Check the new value written by the guest. If present and with a bogus physical address, then
1101 * it's fairly safe to assume the guest is reusing the PT.
1102 */
1103 if (GstPte.n.u1Present)
1104 {
1105 RTHCPHYS HCPhys = -1;
1106 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1107 if (rc != VINF_SUCCESS)
1108 {
1109 *pfReused = true;
1110 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1111 }
1112 }
1113 break;
1114 }
1115 }
1116 }
1117#endif
1118
1119#ifdef IN_RC
1120 /*
1121 * Quick hack, with logging enabled we're getting stale
1122 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
1123 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
1124 * have to be fixed to support this. But that'll have to wait till next week.
1125 *
1126 * An alternative is to keep track of the changed PTEs together with the
1127 * GCPhys from the guest PT. This may proove expensive though.
1128 *
1129 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
1130 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
1131 */
1132 PGM_INVL_VCPU_TLBS(pVCpu);
1133#endif
1134
1135 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1136 return rc;
1137}
1138
1139/**
1140 * \#PF Handler callback for PT write accesses.
1141 *
1142 * @returns VBox status code (appropriate for GC return).
1143 * @param pVM VM Handle.
1144 * @param uErrorCode CPU Error code.
1145 * @param pRegFrame Trap register frame.
1146 * NULL on DMA and other non CPU access.
1147 * @param pvFault The fault address (cr2).
1148 * @param GCPhysFault The GC physical address corresponding to pvFault.
1149 * @param pvUser User argument.
1150 */
1151DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1152{
1153 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1154 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1155 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1156 PVMCPU pVCpu = VMMGetCpu(pVM);
1157 unsigned cMaxModifications;
1158 bool fForcedFlush = false;
1159
1160 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1161
1162 pgmLock(pVM);
1163 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1164 {
1165 /* Pool page changed while we were waiting for the lock; ignore. */
1166 Log(("CPU%d: pgmPoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1167 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1168 pgmUnlock(pVM);
1169 return VINF_SUCCESS;
1170 }
1171#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1172 if (pPage->fDirty)
1173 {
1174 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH));
1175 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1176 }
1177#endif
1178
1179#if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1180 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1181 {
1182 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1183 void *pvGst;
1184 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1185 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
1186 }
1187#endif
1188
1189 /*
1190 * Disassemble the faulting instruction.
1191 */
1192 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1193 int rc = EMInterpretDisasOne(pVM, pVCpu, pRegFrame, pDis, NULL);
1194 AssertReturnStmt(rc == VINF_SUCCESS, pgmUnlock(pVM), rc);
1195
1196 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1197
1198 /*
1199 * We should ALWAYS have the list head as user parameter. This
1200 * is because we use that page to record the changes.
1201 */
1202 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1203
1204#ifdef IN_RING0
1205 /* Maximum nr of modifications depends on the page type. */
1206 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1207 cMaxModifications = 4;
1208 else
1209 cMaxModifications = 24;
1210#else
1211 cMaxModifications = 48;
1212#endif
1213
1214 /*
1215 * Incremental page table updates should weight more than random ones.
1216 * (Only applies when started from offset 0)
1217 */
1218 pVCpu->pgm.s.cPoolAccessHandler++;
1219 if ( pPage->pvLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1220 && pPage->pvLastAccessHandlerRip < pRegFrame->rip + 0x40
1221 && pvFault == (pPage->pvLastAccessHandlerFault + pDis->param1.size)
1222 && pVCpu->pgm.s.cPoolAccessHandler == (pPage->cLastAccessHandlerCount + 1))
1223 {
1224 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1225 pPage->cModifications = pPage->cModifications * 2;
1226 pPage->pvLastAccessHandlerFault = pvFault;
1227 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1228 if (pPage->cModifications >= cMaxModifications)
1229 {
1230 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1231 fForcedFlush = true;
1232 }
1233 }
1234
1235 if (pPage->cModifications >= cMaxModifications)
1236 Log(("Mod overflow %VGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1237
1238 /*
1239 * Check if it's worth dealing with.
1240 */
1241 bool fReused = false;
1242 bool fNotReusedNotForking = false;
1243 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1244 || pgmPoolIsPageLocked(&pVM->pgm.s, pPage)
1245 )
1246 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1247 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1248 {
1249 /*
1250 * Simple instructions, no REP prefix.
1251 */
1252 if (!(pDis->prefix & (PREFIX_REP | PREFIX_REPNE)))
1253 {
1254 rc = pgmPoolAccessHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1255 if (fReused)
1256 goto flushPage;
1257
1258 /* A mov instruction to change the first page table entry will be remembered so we can detect
1259 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1260 */
1261 if ( rc == VINF_SUCCESS
1262 && pDis->pCurInstr->opcode == OP_MOV
1263 && (pvFault & PAGE_OFFSET_MASK) == 0)
1264 {
1265 pPage->pvLastAccessHandlerFault = pvFault;
1266 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1267 pPage->pvLastAccessHandlerRip = pRegFrame->rip;
1268 /* Make sure we don't kick out a page too quickly. */
1269 if (pPage->cModifications > 8)
1270 pPage->cModifications = 2;
1271 }
1272 else
1273 if (pPage->pvLastAccessHandlerFault == pvFault)
1274 {
1275 /* ignore the 2nd write to this page table entry. */
1276 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1277 }
1278 else
1279 {
1280 pPage->pvLastAccessHandlerFault = 0;
1281 pPage->pvLastAccessHandlerRip = 0;
1282 }
1283
1284 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1285 pgmUnlock(pVM);
1286 return rc;
1287 }
1288
1289 /*
1290 * Windows is frequently doing small memset() operations (netio test 4k+).
1291 * We have to deal with these or we'll kill the cache and performance.
1292 */
1293 if ( pDis->pCurInstr->opcode == OP_STOSWD
1294 && !pRegFrame->eflags.Bits.u1DF
1295 && pDis->opmode == pDis->mode
1296 && pDis->addrmode == pDis->mode)
1297 {
1298 bool fValidStosd = false;
1299
1300 if ( pDis->mode == CPUMODE_32BIT
1301 && pDis->prefix == PREFIX_REP
1302 && pRegFrame->ecx <= 0x20
1303 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1304 && !((uintptr_t)pvFault & 3)
1305 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1306 )
1307 {
1308 fValidStosd = true;
1309 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1310 }
1311 else
1312 if ( pDis->mode == CPUMODE_64BIT
1313 && pDis->prefix == (PREFIX_REP | PREFIX_REX)
1314 && pRegFrame->rcx <= 0x20
1315 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1316 && !((uintptr_t)pvFault & 7)
1317 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1318 )
1319 {
1320 fValidStosd = true;
1321 }
1322
1323 if (fValidStosd)
1324 {
1325 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1326 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1327 pgmUnlock(pVM);
1328 return rc;
1329 }
1330 }
1331
1332 /* REP prefix, don't bother. */
1333 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1334 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1335 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode, pDis->prefix));
1336 fNotReusedNotForking = true;
1337 }
1338
1339#if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1340 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1341 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1342 */
1343 if ( pPage->cModifications >= cMaxModifications
1344 && !fForcedFlush
1345 && pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1346 && ( fNotReusedNotForking
1347 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1348 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1349 )
1350 )
1351 {
1352 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
1353 Assert(pPage->fDirty == false);
1354
1355 /* Flush any monitored duplicates as we will disable write protection. */
1356 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1357 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1358 {
1359 PPGMPOOLPAGE pPageHead = pPage;
1360
1361 /* Find the monitor head. */
1362 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1363 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1364
1365 while (pPageHead)
1366 {
1367 unsigned idxNext = pPageHead->iMonitoredNext;
1368
1369 if (pPageHead != pPage)
1370 {
1371 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1372 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1373 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1374 AssertRC(rc2);
1375 }
1376
1377 if (idxNext == NIL_PGMPOOL_IDX)
1378 break;
1379
1380 pPageHead = &pPool->aPages[idxNext];
1381 }
1382 }
1383
1384 /* The flushing above might fail for locked pages, so double check. */
1385 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1386 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1387 {
1388 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1389
1390 /* Temporarily allow write access to the page table again. */
1391 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys, pPage->GCPhys);
1392 if (rc == VINF_SUCCESS)
1393 {
1394 rc = PGMShwModifyPage(pVCpu, pvFault, 1, X86_PTE_RW, ~(uint64_t)X86_PTE_RW);
1395 AssertMsg(rc == VINF_SUCCESS
1396 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1397 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1398 || rc == VERR_PAGE_NOT_PRESENT,
1399 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1400
1401 pPage->pvDirtyFault = pvFault;
1402
1403 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1404 pgmUnlock(pVM);
1405 return rc;
1406 }
1407 }
1408 }
1409#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1410
1411 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushModOverflow));
1412flushPage:
1413 /*
1414 * Not worth it, so flush it.
1415 *
1416 * If we considered it to be reused, don't go back to ring-3
1417 * to emulate failed instructions since we usually cannot
1418 * interpret then. This may be a bit risky, in which case
1419 * the reuse detection must be fixed.
1420 */
1421 rc = pgmPoolAccessHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1422 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1423 && fReused)
1424 {
1425 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1426 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1427 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1428 }
1429 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1430 pgmUnlock(pVM);
1431 return rc;
1432}
1433
1434# endif /* !IN_RING3 */
1435
1436# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1437
1438# ifdef VBOX_STRICT
1439/**
1440 * Check references to guest physical memory in a PAE / PAE page table.
1441 *
1442 * @param pPool The pool.
1443 * @param pPage The page.
1444 * @param pShwPT The shadow page table (mapping of the page).
1445 * @param pGstPT The guest page table.
1446 */
1447static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
1448{
1449 unsigned cErrors = 0;
1450 int LastRc;
1451 unsigned LastPTE;
1452 RTHCPHYS LastHCPhys;
1453
1454#ifdef VBOX_STRICT
1455 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1456 AssertMsg(!pShwPT->a[i].n.u1Present, ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, pShwPT->a[i].u, pPage->iFirstPresent));
1457#endif
1458 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1459 {
1460 if (pShwPT->a[i].n.u1Present)
1461 {
1462 RTHCPHYS HCPhys = -1;
1463 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1464 if ( rc != VINF_SUCCESS
1465 || (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) != HCPhys)
1466 {
1467 RTHCPHYS HCPhysPT = -1;
1468 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1469 LastPTE = i;
1470 LastRc = rc;
1471 LastHCPhys = HCPhys;
1472 cErrors++;
1473
1474 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pPage->GCPhys, &HCPhysPT);
1475 AssertRC(rc);
1476
1477 for (unsigned i = 0; i < pPool->cCurPages; i++)
1478 {
1479 PPGMPOOLPAGE pTempPage = &pPool->aPages[i];
1480
1481 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1482 {
1483 PX86PTPAE pShwPT2 = (PX86PTPAE)PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pTempPage);
1484
1485 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1486 {
1487 if ( pShwPT2->a[j].n.u1Present
1488 && pShwPT2->a[j].n.u1Write
1489 && ((pShwPT2->a[j].u & X86_PTE_PAE_PG_MASK) == HCPhysPT))
1490 {
1491 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, pShwPT->a[j].u, pShwPT2->a[j].u));
1492 }
1493 }
1494 }
1495 }
1496 }
1497 }
1498 }
1499 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, pShwPT->a[LastPTE].u, LastHCPhys));
1500}
1501# endif /* VBOX_STRICT */
1502
1503/**
1504 * Clear references to guest physical memory in a PAE / PAE page table.
1505 *
1506 * @returns nr of changed PTEs
1507 * @param pPool The pool.
1508 * @param pPage The page.
1509 * @param pShwPT The shadow page table (mapping of the page).
1510 * @param pGstPT The guest page table.
1511 * @param pOldGstPT The old cached guest page table.
1512 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1513 * @param pfFlush Flush reused page table (out)
1514 */
1515DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT, PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1516{
1517 unsigned cChanged = 0;
1518
1519#ifdef VBOX_STRICT
1520 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1521 AssertMsg(!pShwPT->a[i].n.u1Present, ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, pShwPT->a[i].u, pPage->iFirstPresent));
1522#endif
1523 *pfFlush = false;
1524
1525 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1526 {
1527 /* Check the new value written by the guest. If present and with a bogus physical address, then
1528 * it's fairly safe to assume the guest is reusing the PT.
1529 */
1530 if ( fAllowRemoval
1531 && pGstPT->a[i].n.u1Present)
1532 {
1533 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1534 {
1535 *pfFlush = true;
1536 return ++cChanged;
1537 }
1538 }
1539 if (pShwPT->a[i].n.u1Present)
1540 {
1541 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1542 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1543 {
1544#ifdef VBOX_STRICT
1545 RTHCPHYS HCPhys = -1;
1546 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1547 AssertMsg(rc == VINF_SUCCESS && (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1548#endif
1549 uint64_t uHostAttr = pShwPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1550 bool fHostRW = !!(pShwPT->a[i].u & X86_PTE_RW);
1551 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1552 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1553
1554 if ( uHostAttr == uGuestAttr
1555 && fHostRW <= fGuestRW)
1556 continue;
1557 }
1558 cChanged++;
1559 /* Something was changed, so flush it. */
1560 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1561 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1562 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
1563 ASMAtomicWriteSize(&pShwPT->a[i].u, 0);
1564 }
1565 }
1566 return cChanged;
1567}
1568
1569
1570/**
1571 * Flush a dirty page
1572 *
1573 * @param pVM VM Handle.
1574 * @param pPool The pool.
1575 * @param idxSlot Dirty array slot index
1576 * @param fAllowRemoval Allow a reused page table to be removed
1577 */
1578static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1579{
1580 PPGMPOOLPAGE pPage;
1581 unsigned idxPage;
1582
1583 Assert(idxSlot < RT_ELEMENTS(pPool->aIdxDirtyPages));
1584 if (pPool->aIdxDirtyPages[idxSlot] == NIL_PGMPOOL_IDX)
1585 return;
1586
1587 idxPage = pPool->aIdxDirtyPages[idxSlot];
1588 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1589 pPage = &pPool->aPages[idxPage];
1590 Assert(pPage->idx == idxPage);
1591 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1592
1593 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1594 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1595
1596 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1597 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys);
1598 Assert(rc == VINF_SUCCESS);
1599 pPage->fDirty = false;
1600
1601#ifdef VBOX_STRICT
1602 uint64_t fFlags = 0;
1603 RTHCPHYS HCPhys;
1604 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->pvDirtyFault, &fFlags, &HCPhys);
1605 AssertMsg( ( rc == VINF_SUCCESS
1606 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1607 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1608 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1609 || rc == VERR_PAGE_NOT_PRESENT,
1610 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->pvDirtyFault, rc, fFlags));
1611#endif
1612
1613 /* Flush those PTEs that have changed. */
1614 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1615 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1616 void *pvGst;
1617 bool fFlush;
1618 rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1619 unsigned cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst, (PCX86PTPAE)&pPool->aDirtyPages[idxSlot][0], fAllowRemoval, &fFlush);
1620 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1621 /** Note: we might want to consider keeping the dirty page active in case there were many changes. */
1622
1623 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1624 Assert(pPage->cModifications);
1625 if (cChanges < 4)
1626 pPage->cModifications = 1; /* must use > 0 here */
1627 else
1628 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1629
1630 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1631 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages))
1632 pPool->idxFreeDirtyPage = idxSlot;
1633
1634 pPool->cDirtyPages--;
1635 pPool->aIdxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1636 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1637 if (fFlush)
1638 {
1639 Assert(fAllowRemoval);
1640 Log(("Flush reused page table!\n"));
1641 pgmPoolFlushPage(pPool, pPage);
1642 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1643 }
1644 else
1645 Log(("Removed dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1646}
1647
1648# ifndef IN_RING3
1649/**
1650 * Add a new dirty page
1651 *
1652 * @param pVM VM Handle.
1653 * @param pPool The pool.
1654 * @param pPage The page.
1655 */
1656void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1657{
1658 unsigned idxFree;
1659
1660 Assert(PGMIsLocked(pVM));
1661 AssertCompile(RT_ELEMENTS(pPool->aIdxDirtyPages) == 8 || RT_ELEMENTS(pPool->aIdxDirtyPages) == 16);
1662 Assert(!pPage->fDirty);
1663
1664 idxFree = pPool->idxFreeDirtyPage;
1665 Assert(idxFree < RT_ELEMENTS(pPool->aIdxDirtyPages));
1666 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1667
1668 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aIdxDirtyPages))
1669 {
1670 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1671 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1672 }
1673 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages));
1674 AssertMsg(pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1675
1676 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1677
1678 /* Make a copy of the guest page table as we require valid GCPhys addresses when removing
1679 * references to physical pages. (the HCPhys linear lookup is *extremely* expensive!)
1680 */
1681 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1682 void *pvGst;
1683 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1684 memcpy(&pPool->aDirtyPages[idxFree][0], pvGst, PAGE_SIZE);
1685#ifdef VBOX_STRICT
1686 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
1687#endif
1688
1689 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1690 pPage->fDirty = true;
1691 pPage->idxDirty = idxFree;
1692 pPool->aIdxDirtyPages[idxFree] = pPage->idx;
1693 pPool->cDirtyPages++;
1694
1695 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1696 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages)
1697 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1698 {
1699 unsigned i;
1700 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1701 {
1702 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1703 if (pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1704 {
1705 pPool->idxFreeDirtyPage = idxFree;
1706 break;
1707 }
1708 }
1709 Assert(i != RT_ELEMENTS(pPool->aIdxDirtyPages));
1710 }
1711
1712 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages) || pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1713 return;
1714}
1715# endif /* !IN_RING3 */
1716
1717/**
1718 * Check if the specified page is dirty (not write monitored)
1719 *
1720 * @return dirty or not
1721 * @param pVM VM Handle.
1722 * @param GCPhys Guest physical address
1723 */
1724bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1725{
1726 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1727 Assert(PGMIsLocked(pVM));
1728 if (!pPool->cDirtyPages)
1729 return false;
1730
1731 GCPhys = GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1732
1733 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1734 {
1735 if (pPool->aIdxDirtyPages[i] != NIL_PGMPOOL_IDX)
1736 {
1737 PPGMPOOLPAGE pPage;
1738 unsigned idxPage = pPool->aIdxDirtyPages[i];
1739
1740 pPage = &pPool->aPages[idxPage];
1741 if (pPage->GCPhys == GCPhys)
1742 return true;
1743 }
1744 }
1745 return false;
1746}
1747
1748/**
1749 * Reset all dirty pages by reinstating page monitoring.
1750 *
1751 * @param pVM VM Handle.
1752 */
1753void pgmPoolResetDirtyPages(PVM pVM)
1754{
1755 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1756 Assert(PGMIsLocked(pVM));
1757 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1758
1759 if (!pPool->cDirtyPages)
1760 return;
1761
1762 Log(("pgmPoolResetDirtyPages\n"));
1763 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1764 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1765
1766 pPool->idxFreeDirtyPage = 0;
1767 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aIdxDirtyPages)
1768 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1769 {
1770 unsigned i;
1771 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1772 {
1773 if (pPool->aIdxDirtyPages[i] == NIL_PGMPOOL_IDX)
1774 {
1775 pPool->idxFreeDirtyPage = i;
1776 break;
1777 }
1778 }
1779 AssertMsg(i != RT_ELEMENTS(pPool->aIdxDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1780 }
1781
1782 Assert(pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages));
1783 return;
1784}
1785# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1786#endif /* PGMPOOL_WITH_MONITORING */
1787
1788#ifdef PGMPOOL_WITH_CACHE
1789
1790/**
1791 * Inserts a page into the GCPhys hash table.
1792 *
1793 * @param pPool The pool.
1794 * @param pPage The page.
1795 */
1796DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1797{
1798 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1799 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1800 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1801 pPage->iNext = pPool->aiHash[iHash];
1802 pPool->aiHash[iHash] = pPage->idx;
1803}
1804
1805
1806/**
1807 * Removes a page from the GCPhys hash table.
1808 *
1809 * @param pPool The pool.
1810 * @param pPage The page.
1811 */
1812DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1813{
1814 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1815 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1816 if (pPool->aiHash[iHash] == pPage->idx)
1817 pPool->aiHash[iHash] = pPage->iNext;
1818 else
1819 {
1820 uint16_t iPrev = pPool->aiHash[iHash];
1821 for (;;)
1822 {
1823 const int16_t i = pPool->aPages[iPrev].iNext;
1824 if (i == pPage->idx)
1825 {
1826 pPool->aPages[iPrev].iNext = pPage->iNext;
1827 break;
1828 }
1829 if (i == NIL_PGMPOOL_IDX)
1830 {
1831 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1832 break;
1833 }
1834 iPrev = i;
1835 }
1836 }
1837 pPage->iNext = NIL_PGMPOOL_IDX;
1838}
1839
1840
1841/**
1842 * Frees up one cache page.
1843 *
1844 * @returns VBox status code.
1845 * @retval VINF_SUCCESS on success.
1846 * @param pPool The pool.
1847 * @param iUser The user index.
1848 */
1849static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1850{
1851#ifndef IN_RC
1852 const PVM pVM = pPool->CTX_SUFF(pVM);
1853#endif
1854 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1855 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1856
1857 /*
1858 * Select one page from the tail of the age list.
1859 */
1860 PPGMPOOLPAGE pPage;
1861 for (unsigned iLoop = 0; ; iLoop++)
1862 {
1863 uint16_t iToFree = pPool->iAgeTail;
1864 if (iToFree == iUser)
1865 iToFree = pPool->aPages[iToFree].iAgePrev;
1866/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1867 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1868 {
1869 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1870 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1871 {
1872 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1873 continue;
1874 iToFree = i;
1875 break;
1876 }
1877 }
1878*/
1879 Assert(iToFree != iUser);
1880 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1881 pPage = &pPool->aPages[iToFree];
1882
1883 /*
1884 * Reject any attempts at flushing the currently active shadow CR3 mapping.
1885 * Call pgmPoolCacheUsed to move the page to the head of the age list.
1886 */
1887 if (!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
1888 break;
1889 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
1890 pgmPoolCacheUsed(pPool, pPage);
1891 AssertLogRelReturn(iLoop < 8192, VERR_INTERNAL_ERROR);
1892 }
1893
1894 /*
1895 * Found a usable page, flush it and return.
1896 */
1897 return pgmPoolFlushPage(pPool, pPage);
1898}
1899
1900
1901/**
1902 * Checks if a kind mismatch is really a page being reused
1903 * or if it's just normal remappings.
1904 *
1905 * @returns true if reused and the cached page (enmKind1) should be flushed
1906 * @returns false if not reused.
1907 * @param enmKind1 The kind of the cached page.
1908 * @param enmKind2 The kind of the requested page.
1909 */
1910static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1911{
1912 switch (enmKind1)
1913 {
1914 /*
1915 * Never reuse them. There is no remapping in non-paging mode.
1916 */
1917 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1918 case PGMPOOLKIND_32BIT_PD_PHYS:
1919 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1920 case PGMPOOLKIND_PAE_PD_PHYS:
1921 case PGMPOOLKIND_PAE_PDPT_PHYS:
1922 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1923 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1924 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1925 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1926 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1927 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
1928 return false;
1929
1930 /*
1931 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1932 */
1933 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1934 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1935 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1936 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1937 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1938 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1939 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1940 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1941 case PGMPOOLKIND_32BIT_PD:
1942 case PGMPOOLKIND_PAE_PDPT:
1943 switch (enmKind2)
1944 {
1945 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1946 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1947 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1948 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1949 case PGMPOOLKIND_64BIT_PML4:
1950 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1951 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1952 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1953 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1954 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1955 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1956 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1957 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1958 return true;
1959 default:
1960 return false;
1961 }
1962
1963 /*
1964 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1965 */
1966 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1967 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1968 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1969 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1970 case PGMPOOLKIND_64BIT_PML4:
1971 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1972 switch (enmKind2)
1973 {
1974 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1975 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1976 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1977 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1978 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1979 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1980 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1981 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1982 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1983 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1984 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1985 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1986 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1987 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1988 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1989 return true;
1990 default:
1991 return false;
1992 }
1993
1994 /*
1995 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1996 */
1997 case PGMPOOLKIND_ROOT_NESTED:
1998 return false;
1999
2000 default:
2001 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2002 }
2003}
2004
2005
2006/**
2007 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2008 *
2009 * @returns VBox status code.
2010 * @retval VINF_PGM_CACHED_PAGE on success.
2011 * @retval VERR_FILE_NOT_FOUND if not found.
2012 * @param pPool The pool.
2013 * @param GCPhys The GC physical address of the page we're gonna shadow.
2014 * @param enmKind The kind of mapping.
2015 * @param enmAccess Access type for the mapping (only relevant for big pages)
2016 * @param iUser The shadow page pool index of the user table.
2017 * @param iUserTable The index into the user table (shadowed).
2018 * @param ppPage Where to store the pointer to the page.
2019 */
2020static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2021{
2022#ifndef IN_RC
2023 const PVM pVM = pPool->CTX_SUFF(pVM);
2024#endif
2025 /*
2026 * Look up the GCPhys in the hash.
2027 */
2028 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2029 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%x iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2030 if (i != NIL_PGMPOOL_IDX)
2031 {
2032 do
2033 {
2034 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2035 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2036 if (pPage->GCPhys == GCPhys)
2037 {
2038 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2039 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess)
2040 {
2041 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2042 * doesn't flush it in case there are no more free use records.
2043 */
2044 pgmPoolCacheUsed(pPool, pPage);
2045
2046 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2047 if (RT_SUCCESS(rc))
2048 {
2049 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2050 *ppPage = pPage;
2051 if (pPage->cModifications)
2052 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2053 STAM_COUNTER_INC(&pPool->StatCacheHits);
2054 return VINF_PGM_CACHED_PAGE;
2055 }
2056 return rc;
2057 }
2058
2059 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2060 {
2061 /*
2062 * The kind is different. In some cases we should now flush the page
2063 * as it has been reused, but in most cases this is normal remapping
2064 * of PDs as PT or big pages using the GCPhys field in a slightly
2065 * different way than the other kinds.
2066 */
2067 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2068 {
2069 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2070 pgmPoolFlushPage(pPool, pPage);
2071 break;
2072 }
2073 }
2074 }
2075
2076 /* next */
2077 i = pPage->iNext;
2078 } while (i != NIL_PGMPOOL_IDX);
2079 }
2080
2081 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2082 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2083 return VERR_FILE_NOT_FOUND;
2084}
2085
2086
2087/**
2088 * Inserts a page into the cache.
2089 *
2090 * @param pPool The pool.
2091 * @param pPage The cached page.
2092 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2093 */
2094static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2095{
2096 /*
2097 * Insert into the GCPhys hash if the page is fit for that.
2098 */
2099 Assert(!pPage->fCached);
2100 if (fCanBeCached)
2101 {
2102 pPage->fCached = true;
2103 pgmPoolHashInsert(pPool, pPage);
2104 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2105 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2106 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2107 }
2108 else
2109 {
2110 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2111 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2112 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2113 }
2114
2115 /*
2116 * Insert at the head of the age list.
2117 */
2118 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2119 pPage->iAgeNext = pPool->iAgeHead;
2120 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2121 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2122 else
2123 pPool->iAgeTail = pPage->idx;
2124 pPool->iAgeHead = pPage->idx;
2125}
2126
2127
2128/**
2129 * Flushes a cached page.
2130 *
2131 * @param pPool The pool.
2132 * @param pPage The cached page.
2133 */
2134static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2135{
2136 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2137
2138 /*
2139 * Remove the page from the hash.
2140 */
2141 if (pPage->fCached)
2142 {
2143 pPage->fCached = false;
2144 pgmPoolHashRemove(pPool, pPage);
2145 }
2146 else
2147 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2148
2149 /*
2150 * Remove it from the age list.
2151 */
2152 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2153 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2154 else
2155 pPool->iAgeTail = pPage->iAgePrev;
2156 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2157 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2158 else
2159 pPool->iAgeHead = pPage->iAgeNext;
2160 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2161 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2162}
2163
2164#endif /* PGMPOOL_WITH_CACHE */
2165#ifdef PGMPOOL_WITH_MONITORING
2166
2167/**
2168 * Looks for pages sharing the monitor.
2169 *
2170 * @returns Pointer to the head page.
2171 * @returns NULL if not found.
2172 * @param pPool The Pool
2173 * @param pNewPage The page which is going to be monitored.
2174 */
2175static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2176{
2177#ifdef PGMPOOL_WITH_CACHE
2178 /*
2179 * Look up the GCPhys in the hash.
2180 */
2181 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2182 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2183 if (i == NIL_PGMPOOL_IDX)
2184 return NULL;
2185 do
2186 {
2187 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2188 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2189 && pPage != pNewPage)
2190 {
2191 switch (pPage->enmKind)
2192 {
2193 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2194 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2195 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2196 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2197 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2198 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2199 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2200 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2201 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2202 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2203 case PGMPOOLKIND_64BIT_PML4:
2204 case PGMPOOLKIND_32BIT_PD:
2205 case PGMPOOLKIND_PAE_PDPT:
2206 {
2207 /* find the head */
2208 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2209 {
2210 Assert(pPage->iMonitoredPrev != pPage->idx);
2211 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2212 }
2213 return pPage;
2214 }
2215
2216 /* ignore, no monitoring. */
2217 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2218 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2219 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2220 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2221 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2222 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2223 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2224 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2225 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2226 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2227 case PGMPOOLKIND_ROOT_NESTED:
2228 case PGMPOOLKIND_PAE_PD_PHYS:
2229 case PGMPOOLKIND_PAE_PDPT_PHYS:
2230 case PGMPOOLKIND_32BIT_PD_PHYS:
2231 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2232 break;
2233 default:
2234 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2235 }
2236 }
2237
2238 /* next */
2239 i = pPage->iNext;
2240 } while (i != NIL_PGMPOOL_IDX);
2241#endif
2242 return NULL;
2243}
2244
2245
2246/**
2247 * Enabled write monitoring of a guest page.
2248 *
2249 * @returns VBox status code.
2250 * @retval VINF_SUCCESS on success.
2251 * @param pPool The pool.
2252 * @param pPage The cached page.
2253 */
2254static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2255{
2256 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
2257
2258 /*
2259 * Filter out the relevant kinds.
2260 */
2261 switch (pPage->enmKind)
2262 {
2263 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2264 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2265 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2266 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2267 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2268 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2269 case PGMPOOLKIND_64BIT_PML4:
2270 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2271 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2272 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2273 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2274 case PGMPOOLKIND_32BIT_PD:
2275 case PGMPOOLKIND_PAE_PDPT:
2276 break;
2277
2278 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2279 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2280 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2281 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2282 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2283 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2284 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2285 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2286 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2287 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2288 case PGMPOOLKIND_ROOT_NESTED:
2289 /* Nothing to monitor here. */
2290 return VINF_SUCCESS;
2291
2292 case PGMPOOLKIND_32BIT_PD_PHYS:
2293 case PGMPOOLKIND_PAE_PDPT_PHYS:
2294 case PGMPOOLKIND_PAE_PD_PHYS:
2295 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2296 /* Nothing to monitor here. */
2297 return VINF_SUCCESS;
2298#ifdef PGMPOOL_WITH_MIXED_PT_CR3
2299 break;
2300#else
2301 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2302#endif
2303 default:
2304 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2305 }
2306
2307 /*
2308 * Install handler.
2309 */
2310 int rc;
2311 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2312 if (pPageHead)
2313 {
2314 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2315 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2316
2317#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2318 if (pPageHead->fDirty)
2319 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirty, false /* do not remove */);
2320#endif
2321
2322 pPage->iMonitoredPrev = pPageHead->idx;
2323 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2324 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2325 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2326 pPageHead->iMonitoredNext = pPage->idx;
2327 rc = VINF_SUCCESS;
2328 }
2329 else
2330 {
2331 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2332 PVM pVM = pPool->CTX_SUFF(pVM);
2333 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2334 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
2335 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
2336 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
2337 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
2338 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
2339 pPool->pszAccessHandler);
2340 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2341 * the heap size should suffice. */
2342 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2343 Assert(!(VMMGetCpu(pVM)->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(VMMGetCpu(pVM), VMCPU_FF_PGM_SYNC_CR3));
2344 }
2345 pPage->fMonitored = true;
2346 return rc;
2347}
2348
2349
2350/**
2351 * Disables write monitoring of a guest page.
2352 *
2353 * @returns VBox status code.
2354 * @retval VINF_SUCCESS on success.
2355 * @param pPool The pool.
2356 * @param pPage The cached page.
2357 */
2358static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2359{
2360 /*
2361 * Filter out the relevant kinds.
2362 */
2363 switch (pPage->enmKind)
2364 {
2365 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2366 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2367 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2368 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2369 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2370 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2371 case PGMPOOLKIND_64BIT_PML4:
2372 case PGMPOOLKIND_32BIT_PD:
2373 case PGMPOOLKIND_PAE_PDPT:
2374 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2375 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2376 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2377 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2378 break;
2379
2380 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2381 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2382 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2383 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2384 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2385 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2386 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2387 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2388 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2389 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2390 case PGMPOOLKIND_ROOT_NESTED:
2391 case PGMPOOLKIND_PAE_PD_PHYS:
2392 case PGMPOOLKIND_PAE_PDPT_PHYS:
2393 case PGMPOOLKIND_32BIT_PD_PHYS:
2394 /* Nothing to monitor here. */
2395 return VINF_SUCCESS;
2396
2397#ifdef PGMPOOL_WITH_MIXED_PT_CR3
2398 break;
2399#endif
2400 default:
2401 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2402 }
2403
2404 /*
2405 * Remove the page from the monitored list or uninstall it if last.
2406 */
2407 const PVM pVM = pPool->CTX_SUFF(pVM);
2408 int rc;
2409 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2410 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2411 {
2412 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2413 {
2414 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2415 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2416 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
2417 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2418 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2419 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2420 pPool->pszAccessHandler);
2421 AssertFatalRCSuccess(rc);
2422 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2423 }
2424 else
2425 {
2426 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2427 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2428 {
2429 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2430 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2431 }
2432 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2433 rc = VINF_SUCCESS;
2434 }
2435 }
2436 else
2437 {
2438 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
2439 AssertFatalRC(rc);
2440#ifdef VBOX_STRICT
2441 PVMCPU pVCpu = VMMGetCpu(pVM);
2442#endif
2443 AssertMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2444 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2445 }
2446 pPage->fMonitored = false;
2447
2448 /*
2449 * Remove it from the list of modified pages (if in it).
2450 */
2451 pgmPoolMonitorModifiedRemove(pPool, pPage);
2452
2453 return rc;
2454}
2455
2456
2457/**
2458 * Inserts the page into the list of modified pages.
2459 *
2460 * @param pPool The pool.
2461 * @param pPage The page.
2462 */
2463void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2464{
2465 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2466 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2467 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2468 && pPool->iModifiedHead != pPage->idx,
2469 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2470 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2471 pPool->iModifiedHead, pPool->cModifiedPages));
2472
2473 pPage->iModifiedNext = pPool->iModifiedHead;
2474 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2475 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2476 pPool->iModifiedHead = pPage->idx;
2477 pPool->cModifiedPages++;
2478#ifdef VBOX_WITH_STATISTICS
2479 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2480 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2481#endif
2482}
2483
2484
2485/**
2486 * Removes the page from the list of modified pages and resets the
2487 * moficiation counter.
2488 *
2489 * @param pPool The pool.
2490 * @param pPage The page which is believed to be in the list of modified pages.
2491 */
2492static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2493{
2494 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2495 if (pPool->iModifiedHead == pPage->idx)
2496 {
2497 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2498 pPool->iModifiedHead = pPage->iModifiedNext;
2499 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2500 {
2501 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2502 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2503 }
2504 pPool->cModifiedPages--;
2505 }
2506 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2507 {
2508 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2509 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2510 {
2511 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2512 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2513 }
2514 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2515 pPool->cModifiedPages--;
2516 }
2517 else
2518 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2519 pPage->cModifications = 0;
2520}
2521
2522
2523/**
2524 * Zaps the list of modified pages, resetting their modification counters in the process.
2525 *
2526 * @param pVM The VM handle.
2527 */
2528static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2529{
2530 pgmLock(pVM);
2531 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2532 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2533
2534 unsigned cPages = 0; NOREF(cPages);
2535
2536#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2537 pgmPoolResetDirtyPages(pVM);
2538#endif
2539
2540 uint16_t idx = pPool->iModifiedHead;
2541 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2542 while (idx != NIL_PGMPOOL_IDX)
2543 {
2544 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2545 idx = pPage->iModifiedNext;
2546 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2547 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2548 pPage->cModifications = 0;
2549 Assert(++cPages);
2550 }
2551 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2552 pPool->cModifiedPages = 0;
2553 pgmUnlock(pVM);
2554}
2555
2556
2557/**
2558 * Handle SyncCR3 pool tasks
2559 *
2560 * @returns VBox status code.
2561 * @retval VINF_SUCCESS if successfully added.
2562 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2563 * @param pVCpu The VMCPU handle.
2564 * @remark Should only be used when monitoring is available, thus placed in
2565 * the PGMPOOL_WITH_MONITORING #ifdef.
2566 */
2567int pgmPoolSyncCR3(PVMCPU pVCpu)
2568{
2569 PVM pVM = pVCpu->CTX_SUFF(pVM);
2570 LogFlow(("pgmPoolSyncCR3\n"));
2571
2572 /*
2573 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2574 * Occasionally we will have to clear all the shadow page tables because we wanted
2575 * to monitor a page which was mapped by too many shadowed page tables. This operation
2576 * sometimes refered to as a 'lightweight flush'.
2577 */
2578# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2579 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2580 pgmR3PoolClearAll(pVM);
2581# else /* !IN_RING3 */
2582 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2583 {
2584 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2585 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2586 return VINF_PGM_SYNC_CR3;
2587 }
2588# endif /* !IN_RING3 */
2589 else
2590 pgmPoolMonitorModifiedClearAll(pVM);
2591
2592 return VINF_SUCCESS;
2593}
2594
2595#endif /* PGMPOOL_WITH_MONITORING */
2596#ifdef PGMPOOL_WITH_USER_TRACKING
2597
2598/**
2599 * Frees up at least one user entry.
2600 *
2601 * @returns VBox status code.
2602 * @retval VINF_SUCCESS if successfully added.
2603 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2604 * @param pPool The pool.
2605 * @param iUser The user index.
2606 */
2607static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2608{
2609 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2610#ifdef PGMPOOL_WITH_CACHE
2611 /*
2612 * Just free cached pages in a braindead fashion.
2613 */
2614 /** @todo walk the age list backwards and free the first with usage. */
2615 int rc = VINF_SUCCESS;
2616 do
2617 {
2618 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2619 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2620 rc = rc2;
2621 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2622 return rc;
2623#else
2624 /*
2625 * Lazy approach.
2626 */
2627 /* @todo This path no longer works (CR3 root pages will be flushed)!! */
2628 AssertCompileFailed();
2629 Assert(!CPUMIsGuestInLongMode(pVM));
2630 pgmPoolFlushAllInt(pPool);
2631 return VERR_PGM_POOL_FLUSHED;
2632#endif
2633}
2634
2635
2636/**
2637 * Inserts a page into the cache.
2638 *
2639 * This will create user node for the page, insert it into the GCPhys
2640 * hash, and insert it into the age list.
2641 *
2642 * @returns VBox status code.
2643 * @retval VINF_SUCCESS if successfully added.
2644 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2645 * @param pPool The pool.
2646 * @param pPage The cached page.
2647 * @param GCPhys The GC physical address of the page we're gonna shadow.
2648 * @param iUser The user index.
2649 * @param iUserTable The user table index.
2650 */
2651DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2652{
2653 int rc = VINF_SUCCESS;
2654 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2655
2656 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser %x iUserTable %x\n", GCPhys, iUser, iUserTable));
2657
2658#ifdef VBOX_STRICT
2659 /*
2660 * Check that the entry doesn't already exists.
2661 */
2662 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2663 {
2664 uint16_t i = pPage->iUserHead;
2665 do
2666 {
2667 Assert(i < pPool->cMaxUsers);
2668 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2669 i = paUsers[i].iNext;
2670 } while (i != NIL_PGMPOOL_USER_INDEX);
2671 }
2672#endif
2673
2674 /*
2675 * Find free a user node.
2676 */
2677 uint16_t i = pPool->iUserFreeHead;
2678 if (i == NIL_PGMPOOL_USER_INDEX)
2679 {
2680 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2681 if (RT_FAILURE(rc))
2682 return rc;
2683 i = pPool->iUserFreeHead;
2684 }
2685
2686 /*
2687 * Unlink the user node from the free list,
2688 * initialize and insert it into the user list.
2689 */
2690 pPool->iUserFreeHead = paUsers[i].iNext;
2691 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2692 paUsers[i].iUser = iUser;
2693 paUsers[i].iUserTable = iUserTable;
2694 pPage->iUserHead = i;
2695
2696 /*
2697 * Insert into cache and enable monitoring of the guest page if enabled.
2698 *
2699 * Until we implement caching of all levels, including the CR3 one, we'll
2700 * have to make sure we don't try monitor & cache any recursive reuse of
2701 * a monitored CR3 page. Because all windows versions are doing this we'll
2702 * have to be able to do combined access monitoring, CR3 + PT and
2703 * PD + PT (guest PAE).
2704 *
2705 * Update:
2706 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2707 */
2708#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2709# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2710 const bool fCanBeMonitored = true;
2711# else
2712 bool fCanBeMonitored = pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2713 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2714 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2715# endif
2716# ifdef PGMPOOL_WITH_CACHE
2717 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2718# endif
2719 if (fCanBeMonitored)
2720 {
2721# ifdef PGMPOOL_WITH_MONITORING
2722 rc = pgmPoolMonitorInsert(pPool, pPage);
2723 AssertRC(rc);
2724 }
2725# endif
2726#endif /* PGMPOOL_WITH_MONITORING */
2727 return rc;
2728}
2729
2730
2731# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2732/**
2733 * Adds a user reference to a page.
2734 *
2735 * This will move the page to the head of the
2736 *
2737 * @returns VBox status code.
2738 * @retval VINF_SUCCESS if successfully added.
2739 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2740 * @param pPool The pool.
2741 * @param pPage The cached page.
2742 * @param iUser The user index.
2743 * @param iUserTable The user table.
2744 */
2745static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2746{
2747 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2748
2749 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2750
2751# ifdef VBOX_STRICT
2752 /*
2753 * Check that the entry doesn't already exists. We only allow multiple users of top-level paging structures (SHW_POOL_ROOT_IDX).
2754 */
2755 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2756 {
2757 uint16_t i = pPage->iUserHead;
2758 do
2759 {
2760 Assert(i < pPool->cMaxUsers);
2761 AssertMsg(iUser != PGMPOOL_IDX_PD || iUser != PGMPOOL_IDX_PDPT || iUser != PGMPOOL_IDX_NESTED_ROOT || iUser != PGMPOOL_IDX_AMD64_CR3 ||
2762 paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2763 i = paUsers[i].iNext;
2764 } while (i != NIL_PGMPOOL_USER_INDEX);
2765 }
2766# endif
2767
2768 /*
2769 * Allocate a user node.
2770 */
2771 uint16_t i = pPool->iUserFreeHead;
2772 if (i == NIL_PGMPOOL_USER_INDEX)
2773 {
2774 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2775 if (RT_FAILURE(rc))
2776 return rc;
2777 i = pPool->iUserFreeHead;
2778 }
2779 pPool->iUserFreeHead = paUsers[i].iNext;
2780
2781 /*
2782 * Initialize the user node and insert it.
2783 */
2784 paUsers[i].iNext = pPage->iUserHead;
2785 paUsers[i].iUser = iUser;
2786 paUsers[i].iUserTable = iUserTable;
2787 pPage->iUserHead = i;
2788
2789# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2790 if (pPage->fDirty)
2791 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirty, false /* do not remove */);
2792# endif
2793
2794# ifdef PGMPOOL_WITH_CACHE
2795 /*
2796 * Tell the cache to update its replacement stats for this page.
2797 */
2798 pgmPoolCacheUsed(pPool, pPage);
2799# endif
2800 return VINF_SUCCESS;
2801}
2802# endif /* PGMPOOL_WITH_CACHE */
2803
2804
2805/**
2806 * Frees a user record associated with a page.
2807 *
2808 * This does not clear the entry in the user table, it simply replaces the
2809 * user record to the chain of free records.
2810 *
2811 * @param pPool The pool.
2812 * @param HCPhys The HC physical address of the shadow page.
2813 * @param iUser The shadow page pool index of the user table.
2814 * @param iUserTable The index into the user table (shadowed).
2815 */
2816static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2817{
2818 /*
2819 * Unlink and free the specified user entry.
2820 */
2821 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2822
2823 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2824 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2825 uint16_t i = pPage->iUserHead;
2826 if ( i != NIL_PGMPOOL_USER_INDEX
2827 && paUsers[i].iUser == iUser
2828 && paUsers[i].iUserTable == iUserTable)
2829 {
2830 pPage->iUserHead = paUsers[i].iNext;
2831
2832 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2833 paUsers[i].iNext = pPool->iUserFreeHead;
2834 pPool->iUserFreeHead = i;
2835 return;
2836 }
2837
2838 /* General: Linear search. */
2839 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2840 while (i != NIL_PGMPOOL_USER_INDEX)
2841 {
2842 if ( paUsers[i].iUser == iUser
2843 && paUsers[i].iUserTable == iUserTable)
2844 {
2845 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2846 paUsers[iPrev].iNext = paUsers[i].iNext;
2847 else
2848 pPage->iUserHead = paUsers[i].iNext;
2849
2850 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2851 paUsers[i].iNext = pPool->iUserFreeHead;
2852 pPool->iUserFreeHead = i;
2853 return;
2854 }
2855 iPrev = i;
2856 i = paUsers[i].iNext;
2857 }
2858
2859 /* Fatal: didn't find it */
2860 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2861 iUser, iUserTable, pPage->GCPhys));
2862}
2863
2864
2865/**
2866 * Gets the entry size of a shadow table.
2867 *
2868 * @param enmKind The kind of page.
2869 *
2870 * @returns The size of the entry in bytes. That is, 4 or 8.
2871 * @returns If the kind is not for a table, an assertion is raised and 0 is
2872 * returned.
2873 */
2874DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2875{
2876 switch (enmKind)
2877 {
2878 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2879 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2880 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2881 case PGMPOOLKIND_32BIT_PD:
2882 case PGMPOOLKIND_32BIT_PD_PHYS:
2883 return 4;
2884
2885 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2886 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2887 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2888 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2889 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2890 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2891 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2892 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2893 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2894 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2895 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2896 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2897 case PGMPOOLKIND_64BIT_PML4:
2898 case PGMPOOLKIND_PAE_PDPT:
2899 case PGMPOOLKIND_ROOT_NESTED:
2900 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2901 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2902 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2903 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2904 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2905 case PGMPOOLKIND_PAE_PD_PHYS:
2906 case PGMPOOLKIND_PAE_PDPT_PHYS:
2907 return 8;
2908
2909 default:
2910 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2911 }
2912}
2913
2914
2915/**
2916 * Gets the entry size of a guest table.
2917 *
2918 * @param enmKind The kind of page.
2919 *
2920 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2921 * @returns If the kind is not for a table, an assertion is raised and 0 is
2922 * returned.
2923 */
2924DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2925{
2926 switch (enmKind)
2927 {
2928 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2929 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2930 case PGMPOOLKIND_32BIT_PD:
2931 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2932 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2933 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2934 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2935 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2936 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2937 return 4;
2938
2939 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2940 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2941 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2942 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2943 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2944 case PGMPOOLKIND_64BIT_PML4:
2945 case PGMPOOLKIND_PAE_PDPT:
2946 return 8;
2947
2948 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2949 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2950 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2951 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2952 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2953 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2954 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2955 case PGMPOOLKIND_ROOT_NESTED:
2956 case PGMPOOLKIND_PAE_PD_PHYS:
2957 case PGMPOOLKIND_PAE_PDPT_PHYS:
2958 case PGMPOOLKIND_32BIT_PD_PHYS:
2959 /** @todo can we return 0? (nobody is calling this...) */
2960 AssertFailed();
2961 return 0;
2962
2963 default:
2964 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2965 }
2966}
2967
2968#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2969
2970/**
2971 * Scans one shadow page table for mappings of a physical page.
2972 *
2973 * @returns true/false indicating removal of all relevant PTEs
2974 * @param pVM The VM handle.
2975 * @param pPhysPage The guest page in question.
2976 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
2977 * @param iShw The shadow page table.
2978 * @param cRefs The number of references made in that PT.
2979 * @param pfKeptPTEs Flag indicating removal of all relevant PTEs (out)
2980 */
2981static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t cRefs)
2982{
2983 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d cRefs=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, cRefs));
2984 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2985 bool bRet = false;
2986
2987 /*
2988 * Assert sanity.
2989 */
2990 Assert(cRefs == 1);
2991 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2992 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2993
2994 /*
2995 * Then, clear the actual mappings to the page in the shadow PT.
2996 */
2997 switch (pPage->enmKind)
2998 {
2999 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3000 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3001 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3002 {
3003 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3004 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3005 uint32_t u32AndMask, u32OrMask;
3006
3007 u32AndMask = 0;
3008 u32OrMask = 0;
3009
3010 if (!fFlushPTEs)
3011 {
3012 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3013 {
3014 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3015 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3016 u32OrMask = X86_PTE_RW;
3017 u32AndMask = UINT32_MAX;
3018 bRet = true;
3019 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3020 break;
3021
3022 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3023 u32OrMask = 0;
3024 u32AndMask = ~X86_PTE_RW;
3025 bRet = true;
3026 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3027 break;
3028 default:
3029 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3030 break;
3031 }
3032 }
3033 else
3034 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3035
3036 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3037 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3038 {
3039 X86PTE Pte;
3040
3041 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
3042 Pte.u = (pPT->a[i].u & u32AndMask) | u32OrMask;
3043 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3044 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3045
3046 ASMAtomicWriteSize(&pPT->a[i].u, Pte.u);
3047 cRefs--;
3048 if (!cRefs)
3049 return bRet;
3050 }
3051#ifdef LOG_ENABLED
3052 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3053 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3054 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3055 {
3056 Log(("i=%d cRefs=%d\n", i, cRefs--));
3057 }
3058#endif
3059 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3060 break;
3061 }
3062
3063 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3064 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3065 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3066 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3067 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3068 {
3069 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3070 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3071 uint64_t u64AndMask, u64OrMask;
3072
3073 u64OrMask = 0;
3074 u64AndMask = 0;
3075 if (!fFlushPTEs)
3076 {
3077 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3078 {
3079 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3080 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3081 u64OrMask = X86_PTE_RW;
3082 u64AndMask = UINT64_MAX;
3083 bRet = true;
3084 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3085 break;
3086
3087 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3088 u64OrMask = 0;
3089 u64AndMask = ~((uint64_t)X86_PTE_RW);
3090 bRet = true;
3091 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3092 break;
3093
3094 default:
3095 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3096 break;
3097 }
3098 }
3099 else
3100 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3101
3102 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3103 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3104 {
3105 X86PTEPAE Pte;
3106
3107 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
3108 Pte.u = (pPT->a[i].u & u64AndMask) | u64OrMask;
3109 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3110 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3111
3112 ASMAtomicWriteSize(&pPT->a[i].u, Pte.u);
3113 cRefs--;
3114 if (!cRefs)
3115 return bRet;
3116 }
3117#ifdef LOG_ENABLED
3118 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3119 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3120 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3121 {
3122 Log(("i=%d cRefs=%d\n", i, cRefs--));
3123 }
3124#endif
3125 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64));
3126 break;
3127 }
3128
3129 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3130 {
3131 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3132 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3133 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3134 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3135 {
3136 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
3137 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3138 pPT->a[i].u = 0;
3139 cRefs--;
3140 if (!cRefs)
3141 return bRet;
3142 }
3143#ifdef LOG_ENABLED
3144 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3145 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3146 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3147 {
3148 Log(("i=%d cRefs=%d\n", i, cRefs--));
3149 }
3150#endif
3151 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3152 break;
3153 }
3154
3155 default:
3156 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3157 }
3158 return bRet;
3159}
3160
3161
3162/**
3163 * Scans one shadow page table for mappings of a physical page.
3164 *
3165 * @param pVM The VM handle.
3166 * @param pPhysPage The guest page in question.
3167 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3168 * @param iShw The shadow page table.
3169 * @param cRefs The number of references made in that PT.
3170 */
3171static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t cRefs)
3172{
3173 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3174
3175 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d cRefs=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, cRefs));
3176 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3177 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, cRefs);
3178 if (!fKeptPTEs)
3179 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3180 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3181}
3182
3183
3184/**
3185 * Flushes a list of shadow page tables mapping the same physical page.
3186 *
3187 * @param pVM The VM handle.
3188 * @param pPhysPage The guest page in question.
3189 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3190 * @param iPhysExt The physical cross reference extent list to flush.
3191 */
3192static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3193{
3194 Assert(PGMIsLockOwner(pVM));
3195 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3196 bool fKeepList = false;
3197
3198 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3199 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3200
3201 const uint16_t iPhysExtStart = iPhysExt;
3202 PPGMPOOLPHYSEXT pPhysExt;
3203 do
3204 {
3205 Assert(iPhysExt < pPool->cMaxPhysExts);
3206 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3207 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3208 {
3209 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3210 {
3211 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], 1);
3212 if (!fKeptPTEs)
3213 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3214 else
3215 fKeepList = true;
3216 }
3217 }
3218 /* next */
3219 iPhysExt = pPhysExt->iNext;
3220 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3221
3222 if (!fKeepList)
3223 {
3224 /* insert the list into the free list and clear the ram range entry. */
3225 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3226 pPool->iPhysExtFreeHead = iPhysExtStart;
3227 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3228 }
3229
3230 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3231}
3232
3233#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3234
3235/**
3236 * Flushes all shadow page table mappings of the given guest page.
3237 *
3238 * This is typically called when the host page backing the guest one has been
3239 * replaced or when the page protection was changed due to an access handler.
3240 *
3241 * @returns VBox status code.
3242 * @retval VINF_SUCCESS if all references has been successfully cleared.
3243 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3244 * pool cleaning. FF and sync flags are set.
3245 *
3246 * @param pVM The VM handle.
3247 * @param pPhysPage The guest page in question.
3248 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3249 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3250 * flushed, it is NOT touched if this isn't necessary.
3251 * The caller MUST initialized this to @a false.
3252 */
3253int pgmPoolTrackUpdateGCPhys(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3254{
3255 PVMCPU pVCpu = VMMGetCpu(pVM);
3256 pgmLock(pVM);
3257 int rc = VINF_SUCCESS;
3258#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3259 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3260 if (u16)
3261 {
3262 /*
3263 * The zero page is currently screwing up the tracking and we'll
3264 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3265 * is defined, zero pages won't normally be mapped. Some kind of solution
3266 * will be needed for this problem of course, but it will have to wait...
3267 */
3268 if (PGM_PAGE_IS_ZERO(pPhysPage))
3269 rc = VINF_PGM_GCPHYS_ALIASED;
3270 else
3271 {
3272# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3273 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3274 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3275 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
3276# endif
3277
3278 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3279 pgmPoolTrackFlushGCPhysPT(pVM,
3280 pPhysPage,
3281 fFlushPTEs,
3282 PGMPOOL_TD_GET_IDX(u16),
3283 PGMPOOL_TD_GET_CREFS(u16));
3284 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3285 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3286 else
3287 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3288 *pfFlushTLBs = true;
3289
3290# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3291 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
3292# endif
3293 }
3294 }
3295
3296#elif defined(PGMPOOL_WITH_CACHE)
3297 if (PGM_PAGE_IS_ZERO(pPhysPage))
3298 rc = VINF_PGM_GCPHYS_ALIASED;
3299 else
3300 {
3301# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3302 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow kills the pool otherwise. */
3303 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
3304# endif
3305 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3306 if (rc == VINF_SUCCESS)
3307 *pfFlushTLBs = true;
3308 }
3309
3310# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3311 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
3312# endif
3313
3314#else
3315 rc = VINF_PGM_GCPHYS_ALIASED;
3316#endif
3317
3318 if (rc == VINF_PGM_GCPHYS_ALIASED)
3319 {
3320 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3321 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3322 rc = VINF_PGM_SYNC_CR3;
3323 }
3324 pgmUnlock(pVM);
3325 return rc;
3326}
3327
3328
3329/**
3330 * Scans all shadow page tables for mappings of a physical page.
3331 *
3332 * This may be slow, but it's most likely more efficient than cleaning
3333 * out the entire page pool / cache.
3334 *
3335 * @returns VBox status code.
3336 * @retval VINF_SUCCESS if all references has been successfully cleared.
3337 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3338 * a page pool cleaning.
3339 *
3340 * @param pVM The VM handle.
3341 * @param pPhysPage The guest page in question.
3342 */
3343int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3344{
3345 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3346 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3347 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3348 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3349
3350#if 1
3351 /*
3352 * There is a limit to what makes sense.
3353 */
3354 if (pPool->cPresent > 1024)
3355 {
3356 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3357 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3358 return VINF_PGM_GCPHYS_ALIASED;
3359 }
3360#endif
3361
3362 /*
3363 * Iterate all the pages until we've encountered all that in use.
3364 * This is simple but not quite optimal solution.
3365 */
3366 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3367 const uint32_t u32 = u64;
3368 unsigned cLeft = pPool->cUsedPages;
3369 unsigned iPage = pPool->cCurPages;
3370 while (--iPage >= PGMPOOL_IDX_FIRST)
3371 {
3372 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3373 if (pPage->GCPhys != NIL_RTGCPHYS)
3374 {
3375 switch (pPage->enmKind)
3376 {
3377 /*
3378 * We only care about shadow page tables.
3379 */
3380 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3381 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3382 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3383 {
3384 unsigned cPresent = pPage->cPresent;
3385 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3386 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3387 if (pPT->a[i].n.u1Present)
3388 {
3389 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3390 {
3391 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3392 pPT->a[i].u = 0;
3393 }
3394 if (!--cPresent)
3395 break;
3396 }
3397 break;
3398 }
3399
3400 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3401 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3402 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3403 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3404 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3405 {
3406 unsigned cPresent = pPage->cPresent;
3407 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3408 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3409 if (pPT->a[i].n.u1Present)
3410 {
3411 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3412 {
3413 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3414 pPT->a[i].u = 0;
3415 }
3416 if (!--cPresent)
3417 break;
3418 }
3419 break;
3420 }
3421 }
3422 if (!--cLeft)
3423 break;
3424 }
3425 }
3426
3427 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3428 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3429 return VINF_SUCCESS;
3430}
3431
3432
3433/**
3434 * Clears the user entry in a user table.
3435 *
3436 * This is used to remove all references to a page when flushing it.
3437 */
3438static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3439{
3440 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3441 Assert(pUser->iUser < pPool->cCurPages);
3442 uint32_t iUserTable = pUser->iUserTable;
3443
3444 /*
3445 * Map the user page.
3446 */
3447 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3448 union
3449 {
3450 uint64_t *pau64;
3451 uint32_t *pau32;
3452 } u;
3453 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3454
3455 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3456
3457 /* Safety precaution in case we change the paging for other modes too in the future. */
3458 Assert(!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage));
3459
3460#ifdef VBOX_STRICT
3461 /*
3462 * Some sanity checks.
3463 */
3464 switch (pUserPage->enmKind)
3465 {
3466 case PGMPOOLKIND_32BIT_PD:
3467 case PGMPOOLKIND_32BIT_PD_PHYS:
3468 Assert(iUserTable < X86_PG_ENTRIES);
3469 break;
3470 case PGMPOOLKIND_PAE_PDPT:
3471 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3472 case PGMPOOLKIND_PAE_PDPT_PHYS:
3473 Assert(iUserTable < 4);
3474 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3475 break;
3476 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3477 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3478 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3479 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3480 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3481 case PGMPOOLKIND_PAE_PD_PHYS:
3482 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3483 break;
3484 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3485 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3486 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3487 break;
3488 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3489 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3490 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3491 break;
3492 case PGMPOOLKIND_64BIT_PML4:
3493 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3494 /* GCPhys >> PAGE_SHIFT is the index here */
3495 break;
3496 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3497 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3498 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3499 break;
3500
3501 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3502 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3503 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3504 break;
3505
3506 case PGMPOOLKIND_ROOT_NESTED:
3507 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3508 break;
3509
3510 default:
3511 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3512 break;
3513 }
3514#endif /* VBOX_STRICT */
3515
3516 /*
3517 * Clear the entry in the user page.
3518 */
3519 switch (pUserPage->enmKind)
3520 {
3521 /* 32-bit entries */
3522 case PGMPOOLKIND_32BIT_PD:
3523 case PGMPOOLKIND_32BIT_PD_PHYS:
3524 u.pau32[iUserTable] = 0;
3525 break;
3526
3527 /* 64-bit entries */
3528 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3529 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3530 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3531 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3532 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3533#if defined(IN_RC)
3534 /* In 32 bits PAE mode we *must* invalidate the TLB when changing a PDPT entry; the CPU fetches them only during cr3 load, so any
3535 * non-present PDPT will continue to cause page faults.
3536 */
3537 ASMReloadCR3();
3538#endif
3539 /* no break */
3540 case PGMPOOLKIND_PAE_PD_PHYS:
3541 case PGMPOOLKIND_PAE_PDPT_PHYS:
3542 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3543 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3544 case PGMPOOLKIND_64BIT_PML4:
3545 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3546 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3547 case PGMPOOLKIND_PAE_PDPT:
3548 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3549 case PGMPOOLKIND_ROOT_NESTED:
3550 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3551 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3552 u.pau64[iUserTable] = 0;
3553 break;
3554
3555 default:
3556 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3557 }
3558}
3559
3560
3561/**
3562 * Clears all users of a page.
3563 */
3564static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3565{
3566 /*
3567 * Free all the user records.
3568 */
3569 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3570
3571 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3572 uint16_t i = pPage->iUserHead;
3573 while (i != NIL_PGMPOOL_USER_INDEX)
3574 {
3575 /* Clear enter in user table. */
3576 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3577
3578 /* Free it. */
3579 const uint16_t iNext = paUsers[i].iNext;
3580 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3581 paUsers[i].iNext = pPool->iUserFreeHead;
3582 pPool->iUserFreeHead = i;
3583
3584 /* Next. */
3585 i = iNext;
3586 }
3587 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3588}
3589
3590#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3591
3592/**
3593 * Allocates a new physical cross reference extent.
3594 *
3595 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3596 * @param pVM The VM handle.
3597 * @param piPhysExt Where to store the phys ext index.
3598 */
3599PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3600{
3601 Assert(PGMIsLockOwner(pVM));
3602 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3603 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3604 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3605 {
3606 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3607 return NULL;
3608 }
3609 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3610 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3611 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3612 *piPhysExt = iPhysExt;
3613 return pPhysExt;
3614}
3615
3616
3617/**
3618 * Frees a physical cross reference extent.
3619 *
3620 * @param pVM The VM handle.
3621 * @param iPhysExt The extent to free.
3622 */
3623void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3624{
3625 Assert(PGMIsLockOwner(pVM));
3626 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3627 Assert(iPhysExt < pPool->cMaxPhysExts);
3628 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3629 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3630 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3631 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3632 pPool->iPhysExtFreeHead = iPhysExt;
3633}
3634
3635
3636/**
3637 * Frees a physical cross reference extent.
3638 *
3639 * @param pVM The VM handle.
3640 * @param iPhysExt The extent to free.
3641 */
3642void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3643{
3644 Assert(PGMIsLockOwner(pVM));
3645 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3646
3647 const uint16_t iPhysExtStart = iPhysExt;
3648 PPGMPOOLPHYSEXT pPhysExt;
3649 do
3650 {
3651 Assert(iPhysExt < pPool->cMaxPhysExts);
3652 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3653 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3654 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3655
3656 /* next */
3657 iPhysExt = pPhysExt->iNext;
3658 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3659
3660 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3661 pPool->iPhysExtFreeHead = iPhysExtStart;
3662}
3663
3664
3665/**
3666 * Insert a reference into a list of physical cross reference extents.
3667 *
3668 * @returns The new tracking data for PGMPAGE.
3669 *
3670 * @param pVM The VM handle.
3671 * @param iPhysExt The physical extent index of the list head.
3672 * @param iShwPT The shadow page table index.
3673 *
3674 */
3675static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
3676{
3677 Assert(PGMIsLockOwner(pVM));
3678 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3679 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3680
3681 /* special common case. */
3682 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3683 {
3684 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3685 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3686 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d}\n", iPhysExt, iShwPT));
3687 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3688 }
3689
3690 /* general treatment. */
3691 const uint16_t iPhysExtStart = iPhysExt;
3692 unsigned cMax = 15;
3693 for (;;)
3694 {
3695 Assert(iPhysExt < pPool->cMaxPhysExts);
3696 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3697 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3698 {
3699 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3700 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3701 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
3702 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3703 }
3704 if (!--cMax)
3705 {
3706 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3707 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3708 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3709 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3710 }
3711 }
3712
3713 /* add another extent to the list. */
3714 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3715 if (!pNew)
3716 {
3717 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3718 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3719 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
3720 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3721 }
3722 pNew->iNext = iPhysExtStart;
3723 pNew->aidx[0] = iShwPT;
3724 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
3725 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3726}
3727
3728
3729/**
3730 * Add a reference to guest physical page where extents are in use.
3731 *
3732 * @returns The new tracking data for PGMPAGE.
3733 *
3734 * @param pVM The VM handle.
3735 * @param u16 The ram range flags (top 16-bits).
3736 * @param iShwPT The shadow page table index.
3737 */
3738uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
3739{
3740 pgmLock(pVM);
3741 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3742 {
3743 /*
3744 * Convert to extent list.
3745 */
3746 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3747 uint16_t iPhysExt;
3748 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3749 if (pPhysExt)
3750 {
3751 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
3752 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3753 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
3754 pPhysExt->aidx[1] = iShwPT;
3755 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3756 }
3757 else
3758 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3759 }
3760 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3761 {
3762 /*
3763 * Insert into the extent list.
3764 */
3765 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT);
3766 }
3767 else
3768 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3769 pgmUnlock(pVM);
3770 return u16;
3771}
3772
3773
3774/**
3775 * Clear references to guest physical memory.
3776 *
3777 * @param pPool The pool.
3778 * @param pPage The page.
3779 * @param pPhysPage Pointer to the aPages entry in the ram range.
3780 */
3781void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3782{
3783 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
3784 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3785
3786 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
3787 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
3788 {
3789 PVM pVM = pPool->CTX_SUFF(pVM);
3790 pgmLock(pVM);
3791
3792 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3793 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3794 do
3795 {
3796 Assert(iPhysExt < pPool->cMaxPhysExts);
3797
3798 /*
3799 * Look for the shadow page and check if it's all freed.
3800 */
3801 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3802 {
3803 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3804 {
3805 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3806
3807 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3808 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3809 {
3810 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3811 pgmUnlock(pVM);
3812 return;
3813 }
3814
3815 /* we can free the node. */
3816 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3817 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3818 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3819 {
3820 /* lonely node */
3821 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3822 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
3823 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3824 }
3825 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3826 {
3827 /* head */
3828 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
3829 PGM_PAGE_SET_TRACKING(pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
3830 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3831 }
3832 else
3833 {
3834 /* in list */
3835 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3836 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3837 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3838 }
3839 iPhysExt = iPhysExtNext;
3840 pgmUnlock(pVM);
3841 return;
3842 }
3843 }
3844
3845 /* next */
3846 iPhysExtPrev = iPhysExt;
3847 iPhysExt = paPhysExts[iPhysExt].iNext;
3848 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3849
3850 pgmUnlock(pVM);
3851 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3852 }
3853 else /* nothing to do */
3854 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
3855}
3856
3857
3858/**
3859 * Clear references to guest physical memory.
3860 *
3861 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3862 * is assumed to be correct, so the linear search can be skipped and we can assert
3863 * at an earlier point.
3864 *
3865 * @param pPool The pool.
3866 * @param pPage The page.
3867 * @param HCPhys The host physical address corresponding to the guest page.
3868 * @param GCPhys The guest physical address corresponding to HCPhys.
3869 */
3870static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3871{
3872 /*
3873 * Walk range list.
3874 */
3875 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3876 while (pRam)
3877 {
3878 RTGCPHYS off = GCPhys - pRam->GCPhys;
3879 if (off < pRam->cb)
3880 {
3881 /* does it match? */
3882 const unsigned iPage = off >> PAGE_SHIFT;
3883 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3884#ifdef LOG_ENABLED
3885RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3886Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
3887#endif
3888 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3889 {
3890 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3891 return;
3892 }
3893 break;
3894 }
3895 pRam = pRam->CTX_SUFF(pNext);
3896 }
3897 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
3898}
3899
3900
3901/**
3902 * Clear references to guest physical memory.
3903 *
3904 * @param pPool The pool.
3905 * @param pPage The page.
3906 * @param HCPhys The host physical address corresponding to the guest page.
3907 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3908 */
3909void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3910{
3911 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
3912
3913 /*
3914 * Walk range list.
3915 */
3916 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3917 while (pRam)
3918 {
3919 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3920 if (off < pRam->cb)
3921 {
3922 /* does it match? */
3923 const unsigned iPage = off >> PAGE_SHIFT;
3924 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3925 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3926 {
3927 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3928 return;
3929 }
3930 break;
3931 }
3932 pRam = pRam->CTX_SUFF(pNext);
3933 }
3934
3935 /*
3936 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3937 */
3938 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3939 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3940 while (pRam)
3941 {
3942 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3943 while (iPage-- > 0)
3944 {
3945 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3946 {
3947 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
3948 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3949 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3950 return;
3951 }
3952 }
3953 pRam = pRam->CTX_SUFF(pNext);
3954 }
3955
3956 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
3957}
3958
3959
3960/**
3961 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3962 *
3963 * @param pPool The pool.
3964 * @param pPage The page.
3965 * @param pShwPT The shadow page table (mapping of the page).
3966 * @param pGstPT The guest page table.
3967 */
3968DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3969{
3970 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3971 if (pShwPT->a[i].n.u1Present)
3972 {
3973 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3974 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3975 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3976 if (!--pPage->cPresent)
3977 break;
3978 }
3979}
3980
3981
3982/**
3983 * Clear references to guest physical memory in a PAE / 32-bit page table.
3984 *
3985 * @param pPool The pool.
3986 * @param pPage The page.
3987 * @param pShwPT The shadow page table (mapping of the page).
3988 * @param pGstPT The guest page table (just a half one).
3989 */
3990DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3991{
3992 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3993 if (pShwPT->a[i].n.u1Present)
3994 {
3995 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
3996 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3997 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3998 if (!--pPage->cPresent)
3999 break;
4000 }
4001}
4002
4003
4004/**
4005 * Clear references to guest physical memory in a PAE / PAE page table.
4006 *
4007 * @param pPool The pool.
4008 * @param pPage The page.
4009 * @param pShwPT The shadow page table (mapping of the page).
4010 * @param pGstPT The guest page table.
4011 */
4012DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
4013{
4014 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4015 if (pShwPT->a[i].n.u1Present)
4016 {
4017 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4018 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4019 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
4020 if (!--pPage->cPresent)
4021 break;
4022 }
4023}
4024
4025
4026/**
4027 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4028 *
4029 * @param pPool The pool.
4030 * @param pPage The page.
4031 * @param pShwPT The shadow page table (mapping of the page).
4032 */
4033DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4034{
4035 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4036 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4037 if (pShwPT->a[i].n.u1Present)
4038 {
4039 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4040 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
4041 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
4042 if (!--pPage->cPresent)
4043 break;
4044 }
4045}
4046
4047
4048/**
4049 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4050 *
4051 * @param pPool The pool.
4052 * @param pPage The page.
4053 * @param pShwPT The shadow page table (mapping of the page).
4054 */
4055DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
4056{
4057 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4058 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4059 if (pShwPT->a[i].n.u1Present)
4060 {
4061 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4062 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
4063 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
4064 if (!--pPage->cPresent)
4065 break;
4066 }
4067}
4068
4069
4070/**
4071 * Clear references to shadowed pages in an EPT page table.
4072 *
4073 * @param pPool The pool.
4074 * @param pPage The page.
4075 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4076 */
4077DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4078{
4079 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4080 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4081 if (pShwPT->a[i].n.u1Present)
4082 {
4083 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4084 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4085 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
4086 if (!--pPage->cPresent)
4087 break;
4088 }
4089}
4090
4091#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
4092
4093
4094/**
4095 * Clear references to shadowed pages in a 32 bits page directory.
4096 *
4097 * @param pPool The pool.
4098 * @param pPage The page.
4099 * @param pShwPD The shadow page directory (mapping of the page).
4100 */
4101DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4102{
4103 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4104 {
4105 if ( pShwPD->a[i].n.u1Present
4106 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4107 )
4108 {
4109 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4110 if (pSubPage)
4111 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4112 else
4113 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4114 }
4115 }
4116}
4117
4118/**
4119 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4120 *
4121 * @param pPool The pool.
4122 * @param pPage The page.
4123 * @param pShwPD The shadow page directory (mapping of the page).
4124 */
4125DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4126{
4127 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4128 {
4129 if ( pShwPD->a[i].n.u1Present
4130 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4131 )
4132 {
4133 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4134 if (pSubPage)
4135 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4136 else
4137 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4138 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4139 }
4140 }
4141}
4142
4143/**
4144 * Clear references to shadowed pages in a PAE page directory pointer table.
4145 *
4146 * @param pPool The pool.
4147 * @param pPage The page.
4148 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4149 */
4150DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4151{
4152 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4153 {
4154 if ( pShwPDPT->a[i].n.u1Present
4155 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4156 )
4157 {
4158 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4159 if (pSubPage)
4160 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4161 else
4162 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4163 }
4164 }
4165}
4166
4167
4168/**
4169 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4170 *
4171 * @param pPool The pool.
4172 * @param pPage The page.
4173 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4174 */
4175DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4176{
4177 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4178 {
4179 Assert(!(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING));
4180 if (pShwPDPT->a[i].n.u1Present)
4181 {
4182 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4183 if (pSubPage)
4184 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4185 else
4186 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4187 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4188 }
4189 }
4190}
4191
4192
4193/**
4194 * Clear references to shadowed pages in a 64-bit level 4 page table.
4195 *
4196 * @param pPool The pool.
4197 * @param pPage The page.
4198 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4199 */
4200DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4201{
4202 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4203 {
4204 if (pShwPML4->a[i].n.u1Present)
4205 {
4206 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4207 if (pSubPage)
4208 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4209 else
4210 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4211 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4212 }
4213 }
4214}
4215
4216
4217/**
4218 * Clear references to shadowed pages in an EPT page directory.
4219 *
4220 * @param pPool The pool.
4221 * @param pPage The page.
4222 * @param pShwPD The shadow page directory (mapping of the page).
4223 */
4224DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4225{
4226 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4227 {
4228 if (pShwPD->a[i].n.u1Present)
4229 {
4230 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4231 if (pSubPage)
4232 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4233 else
4234 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4235 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4236 }
4237 }
4238}
4239
4240
4241/**
4242 * Clear references to shadowed pages in an EPT page directory pointer table.
4243 *
4244 * @param pPool The pool.
4245 * @param pPage The page.
4246 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4247 */
4248DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4249{
4250 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4251 {
4252 if (pShwPDPT->a[i].n.u1Present)
4253 {
4254 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4255 if (pSubPage)
4256 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4257 else
4258 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4259 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4260 }
4261 }
4262}
4263
4264
4265/**
4266 * Clears all references made by this page.
4267 *
4268 * This includes other shadow pages and GC physical addresses.
4269 *
4270 * @param pPool The pool.
4271 * @param pPage The page.
4272 */
4273static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4274{
4275 /*
4276 * Map the shadow page and take action according to the page kind.
4277 */
4278 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
4279 switch (pPage->enmKind)
4280 {
4281#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
4282 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4283 {
4284 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4285 void *pvGst;
4286 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4287 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4288 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4289 break;
4290 }
4291
4292 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4293 {
4294 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4295 void *pvGst;
4296 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4297 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
4298 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4299 break;
4300 }
4301
4302 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4303 {
4304 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4305 void *pvGst;
4306 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4307 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
4308 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4309 break;
4310 }
4311
4312 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4313 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4314 {
4315 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4316 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4317 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4318 break;
4319 }
4320
4321 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4322 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4323 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4324 {
4325 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4326 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
4327 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4328 break;
4329 }
4330
4331#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
4332 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4333 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4334 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4335 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4336 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4337 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4338 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4339 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4340 break;
4341#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
4342
4343 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4344 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4345 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4346 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4347 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4348 case PGMPOOLKIND_PAE_PD_PHYS:
4349 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4350 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4351 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4352 break;
4353
4354 case PGMPOOLKIND_32BIT_PD_PHYS:
4355 case PGMPOOLKIND_32BIT_PD:
4356 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4357 break;
4358
4359 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4360 case PGMPOOLKIND_PAE_PDPT:
4361 case PGMPOOLKIND_PAE_PDPT_PHYS:
4362 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4363 break;
4364
4365 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4366 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4367 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4368 break;
4369
4370 case PGMPOOLKIND_64BIT_PML4:
4371 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4372 break;
4373
4374 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4375 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4376 break;
4377
4378 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4379 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4380 break;
4381
4382 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4383 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4384 break;
4385
4386 default:
4387 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4388 }
4389
4390 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4391 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4392 ASMMemZeroPage(pvShw);
4393 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4394 pPage->fZeroed = true;
4395 PGMPOOL_UNLOCK_PTR(pPool->CTX_SUFF(pVM), pvShw);
4396}
4397#endif /* PGMPOOL_WITH_USER_TRACKING */
4398
4399/**
4400 * Flushes a pool page.
4401 *
4402 * This moves the page to the free list after removing all user references to it.
4403 *
4404 * @returns VBox status code.
4405 * @retval VINF_SUCCESS on success.
4406 * @param pPool The pool.
4407 * @param HCPhys The HC physical address of the shadow page.
4408 */
4409int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4410{
4411 PVM pVM = pPool->CTX_SUFF(pVM);
4412
4413 int rc = VINF_SUCCESS;
4414 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4415 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4416 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4417
4418 /*
4419 * Quietly reject any attempts at flushing any of the special root pages.
4420 */
4421 if (pPage->idx < PGMPOOL_IDX_FIRST)
4422 {
4423 AssertFailed(); /* can no longer happen */
4424 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4425 return VINF_SUCCESS;
4426 }
4427
4428 pgmLock(pVM);
4429
4430 /*
4431 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4432 */
4433 if (pgmPoolIsPageLocked(&pVM->pgm.s, pPage))
4434 {
4435 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4436 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4437 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4438 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4439 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4440 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4441 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4442 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4443 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD,
4444 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4445 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4446 pgmUnlock(pVM);
4447 return VINF_SUCCESS;
4448 }
4449
4450#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4451 /* Start a subset so we won't run out of mapping space. */
4452 PVMCPU pVCpu = VMMGetCpu(pVM);
4453 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4454#endif
4455
4456 /*
4457 * Mark the page as being in need of an ASMMemZeroPage().
4458 */
4459 pPage->fZeroed = false;
4460
4461#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4462 if (pPage->fDirty)
4463 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirty, false /* do not remove */);
4464#endif
4465
4466#ifdef PGMPOOL_WITH_USER_TRACKING
4467 /*
4468 * Clear the page.
4469 */
4470 pgmPoolTrackClearPageUsers(pPool, pPage);
4471 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4472 pgmPoolTrackDeref(pPool, pPage);
4473 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4474#endif
4475
4476#ifdef PGMPOOL_WITH_CACHE
4477 /*
4478 * Flush it from the cache.
4479 */
4480 pgmPoolCacheFlushPage(pPool, pPage);
4481#endif /* PGMPOOL_WITH_CACHE */
4482
4483#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4484 /* Heavy stuff done. */
4485 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4486#endif
4487
4488#ifdef PGMPOOL_WITH_MONITORING
4489 /*
4490 * Deregistering the monitoring.
4491 */
4492 if (pPage->fMonitored)
4493 rc = pgmPoolMonitorFlush(pPool, pPage);
4494#endif
4495
4496 /*
4497 * Free the page.
4498 */
4499 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4500 pPage->iNext = pPool->iFreeHead;
4501 pPool->iFreeHead = pPage->idx;
4502 pPage->enmKind = PGMPOOLKIND_FREE;
4503 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4504 pPage->GCPhys = NIL_RTGCPHYS;
4505 pPage->fReusedFlushPending = false;
4506
4507 pPool->cUsedPages--;
4508 pgmUnlock(pVM);
4509 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4510 return rc;
4511}
4512
4513
4514/**
4515 * Frees a usage of a pool page.
4516 *
4517 * The caller is responsible to updating the user table so that it no longer
4518 * references the shadow page.
4519 *
4520 * @param pPool The pool.
4521 * @param HCPhys The HC physical address of the shadow page.
4522 * @param iUser The shadow page pool index of the user table.
4523 * @param iUserTable The index into the user table (shadowed).
4524 */
4525void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4526{
4527 PVM pVM = pPool->CTX_SUFF(pVM);
4528
4529 STAM_PROFILE_START(&pPool->StatFree, a);
4530 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%#x iUserTable=%#x\n",
4531 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4532 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4533 pgmLock(pVM);
4534#ifdef PGMPOOL_WITH_USER_TRACKING
4535 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4536#endif
4537#ifdef PGMPOOL_WITH_CACHE
4538 if (!pPage->fCached)
4539#endif
4540 pgmPoolFlushPage(pPool, pPage);
4541 pgmUnlock(pVM);
4542 STAM_PROFILE_STOP(&pPool->StatFree, a);
4543}
4544
4545
4546/**
4547 * Makes one or more free page free.
4548 *
4549 * @returns VBox status code.
4550 * @retval VINF_SUCCESS on success.
4551 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4552 *
4553 * @param pPool The pool.
4554 * @param enmKind Page table kind
4555 * @param iUser The user of the page.
4556 */
4557static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4558{
4559 PVM pVM = pPool->CTX_SUFF(pVM);
4560
4561 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4562
4563 /*
4564 * If the pool isn't full grown yet, expand it.
4565 */
4566 if ( pPool->cCurPages < pPool->cMaxPages
4567#if defined(IN_RC)
4568 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4569 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4570 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4571#endif
4572 )
4573 {
4574 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4575#ifdef IN_RING3
4576 int rc = PGMR3PoolGrow(pVM);
4577#else
4578 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4579#endif
4580 if (RT_FAILURE(rc))
4581 return rc;
4582 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4583 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4584 return VINF_SUCCESS;
4585 }
4586
4587#ifdef PGMPOOL_WITH_CACHE
4588 /*
4589 * Free one cached page.
4590 */
4591 return pgmPoolCacheFreeOne(pPool, iUser);
4592#else
4593 /*
4594 * Flush the pool.
4595 *
4596 * If we have tracking enabled, it should be possible to come up with
4597 * a cheap replacement strategy...
4598 */
4599 /* @todo This path no longer works (CR3 root pages will be flushed)!! */
4600 AssertCompileFailed();
4601 Assert(!CPUMIsGuestInLongMode(pVM));
4602 pgmPoolFlushAllInt(pPool);
4603 return VERR_PGM_POOL_FLUSHED;
4604#endif
4605}
4606
4607/**
4608 * Allocates a page from the pool.
4609 *
4610 * This page may actually be a cached page and not in need of any processing
4611 * on the callers part.
4612 *
4613 * @returns VBox status code.
4614 * @retval VINF_SUCCESS if a NEW page was allocated.
4615 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4616 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4617 * @param pVM The VM handle.
4618 * @param GCPhys The GC physical address of the page we're gonna shadow.
4619 * For 4MB and 2MB PD entries, it's the first address the
4620 * shadow PT is covering.
4621 * @param enmKind The kind of mapping.
4622 * @param enmAccess Access type for the mapping (only relevant for big pages)
4623 * @param iUser The shadow page pool index of the user table.
4624 * @param iUserTable The index into the user table (shadowed).
4625 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4626 * @param fLockPage Lock the page
4627 */
4628int pgmPoolAllocEx(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage, bool fLockPage)
4629{
4630 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4631 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4632 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%#x iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4633 *ppPage = NULL;
4634 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4635 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4636 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4637
4638 pgmLock(pVM);
4639
4640#ifdef PGMPOOL_WITH_CACHE
4641 if (pPool->fCacheEnabled)
4642 {
4643 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, iUser, iUserTable, ppPage);
4644 if (RT_SUCCESS(rc2))
4645 {
4646 if (fLockPage)
4647 pgmPoolLockPage(pPool, *ppPage);
4648 pgmUnlock(pVM);
4649 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4650 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4651 return rc2;
4652 }
4653 }
4654#endif
4655
4656 /*
4657 * Allocate a new one.
4658 */
4659 int rc = VINF_SUCCESS;
4660 uint16_t iNew = pPool->iFreeHead;
4661 if (iNew == NIL_PGMPOOL_IDX)
4662 {
4663 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
4664 if (RT_FAILURE(rc))
4665 {
4666 pgmUnlock(pVM);
4667 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4668 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4669 return rc;
4670 }
4671 iNew = pPool->iFreeHead;
4672 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4673 }
4674
4675 /* unlink the free head */
4676 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4677 pPool->iFreeHead = pPage->iNext;
4678 pPage->iNext = NIL_PGMPOOL_IDX;
4679
4680 /*
4681 * Initialize it.
4682 */
4683 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4684 pPage->enmKind = enmKind;
4685 pPage->enmAccess = enmAccess;
4686 pPage->GCPhys = GCPhys;
4687 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4688 pPage->fMonitored = false;
4689 pPage->fCached = false;
4690#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4691 pPage->fDirty = false;
4692#endif
4693 pPage->fReusedFlushPending = false;
4694#ifdef PGMPOOL_WITH_MONITORING
4695 pPage->cModifications = 0;
4696 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4697 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4698#else
4699 pPage->fCR3Mix = false;
4700#endif
4701#ifdef PGMPOOL_WITH_USER_TRACKING
4702 pPage->cPresent = 0;
4703 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
4704 pPage->pvLastAccessHandlerFault = 0;
4705 pPage->cLastAccessHandlerCount = 0;
4706 pPage->pvLastAccessHandlerRip = 0;
4707
4708 /*
4709 * Insert into the tracking and cache. If this fails, free the page.
4710 */
4711 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4712 if (RT_FAILURE(rc3))
4713 {
4714 pPool->cUsedPages--;
4715 pPage->enmKind = PGMPOOLKIND_FREE;
4716 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4717 pPage->GCPhys = NIL_RTGCPHYS;
4718 pPage->iNext = pPool->iFreeHead;
4719 pPool->iFreeHead = pPage->idx;
4720 pgmUnlock(pVM);
4721 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4722 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4723 return rc3;
4724 }
4725#endif /* PGMPOOL_WITH_USER_TRACKING */
4726
4727 /*
4728 * Commit the allocation, clear the page and return.
4729 */
4730#ifdef VBOX_WITH_STATISTICS
4731 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4732 pPool->cUsedPagesHigh = pPool->cUsedPages;
4733#endif
4734
4735 if (!pPage->fZeroed)
4736 {
4737 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4738 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4739 ASMMemZeroPage(pv);
4740 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4741 }
4742
4743 *ppPage = pPage;
4744 if (fLockPage)
4745 pgmPoolLockPage(pPool, pPage);
4746 pgmUnlock(pVM);
4747 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4748 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4749 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4750 return rc;
4751}
4752
4753
4754/**
4755 * Frees a usage of a pool page.
4756 *
4757 * @param pVM The VM handle.
4758 * @param HCPhys The HC physical address of the shadow page.
4759 * @param iUser The shadow page pool index of the user table.
4760 * @param iUserTable The index into the user table (shadowed).
4761 */
4762void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4763{
4764 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4765 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4766 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4767}
4768
4769/**
4770 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
4771 *
4772 * @returns Pointer to the shadow page structure.
4773 * @param pPool The pool.
4774 * @param HCPhys The HC physical address of the shadow page.
4775 */
4776PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
4777{
4778 PVM pVM = pPool->CTX_SUFF(pVM);
4779
4780 Assert(PGMIsLockOwner(pVM));
4781
4782 /*
4783 * Look up the page.
4784 */
4785 pgmLock(pVM);
4786 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
4787 pgmUnlock(pVM);
4788
4789 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
4790 return pPage;
4791}
4792
4793#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
4794/**
4795 * Flush the specified page if present
4796 *
4797 * @param pVM The VM handle.
4798 * @param GCPhys Guest physical address of the page to flush
4799 */
4800void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
4801{
4802#ifdef PGMPOOL_WITH_CACHE
4803 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4804
4805 VM_ASSERT_EMT(pVM);
4806
4807 /*
4808 * Look up the GCPhys in the hash.
4809 */
4810 GCPhys = GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
4811 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
4812 if (i == NIL_PGMPOOL_IDX)
4813 return;
4814
4815 do
4816 {
4817 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4818 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
4819 {
4820 switch (pPage->enmKind)
4821 {
4822 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4823 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4824 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4825 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4826 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4827 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4828 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4829 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4830 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4831 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4832 case PGMPOOLKIND_64BIT_PML4:
4833 case PGMPOOLKIND_32BIT_PD:
4834 case PGMPOOLKIND_PAE_PDPT:
4835 {
4836 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
4837#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4838 if (pPage->fDirty)
4839 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
4840 else
4841#endif
4842 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
4843 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
4844 pgmPoolMonitorChainFlush(pPool, pPage);
4845 return;
4846 }
4847
4848 /* ignore, no monitoring. */
4849 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4850 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4851 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4852 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4853 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4854 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4855 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4856 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4857 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4858 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4859 case PGMPOOLKIND_ROOT_NESTED:
4860 case PGMPOOLKIND_PAE_PD_PHYS:
4861 case PGMPOOLKIND_PAE_PDPT_PHYS:
4862 case PGMPOOLKIND_32BIT_PD_PHYS:
4863 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4864 break;
4865
4866 default:
4867 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
4868 }
4869 }
4870
4871 /* next */
4872 i = pPage->iNext;
4873 } while (i != NIL_PGMPOOL_IDX);
4874#endif
4875 return;
4876}
4877#endif /* IN_RING3 */
4878
4879#ifdef IN_RING3
4880/**
4881 * Flushes the entire cache.
4882 *
4883 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
4884 * this and execute this CR3 flush.
4885 *
4886 * @param pPool The pool.
4887 */
4888void pgmR3PoolReset(PVM pVM)
4889{
4890 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4891
4892 Assert(PGMIsLockOwner(pVM));
4893 STAM_PROFILE_START(&pPool->StatR3Reset, a);
4894 LogFlow(("pgmR3PoolReset:\n"));
4895
4896 /*
4897 * If there are no pages in the pool, there is nothing to do.
4898 */
4899 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
4900 {
4901 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
4902 return;
4903 }
4904
4905 /*
4906 * Exit the shadow mode since we're going to clear everything,
4907 * including the root page.
4908 */
4909 for (VMCPUID i = 0; i < pVM->cCpus; i++)
4910 {
4911 PVMCPU pVCpu = &pVM->aCpus[i];
4912 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
4913 }
4914
4915 /*
4916 * Nuke the free list and reinsert all pages into it.
4917 */
4918 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
4919 {
4920 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4921
4922 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
4923#ifdef PGMPOOL_WITH_MONITORING
4924 if (pPage->fMonitored)
4925 pgmPoolMonitorFlush(pPool, pPage);
4926 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4927 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4928 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4929 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4930 pPage->cModifications = 0;
4931#endif
4932 pPage->GCPhys = NIL_RTGCPHYS;
4933 pPage->enmKind = PGMPOOLKIND_FREE;
4934 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4935 Assert(pPage->idx == i);
4936 pPage->iNext = i + 1;
4937 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
4938 pPage->fSeenNonGlobal = false;
4939 pPage->fMonitored = false;
4940#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4941 pPage->fDirty = false;
4942#endif
4943 pPage->fCached = false;
4944 pPage->fReusedFlushPending = false;
4945#ifdef PGMPOOL_WITH_USER_TRACKING
4946 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4947#else
4948 pPage->fCR3Mix = false;
4949#endif
4950#ifdef PGMPOOL_WITH_CACHE
4951 pPage->iAgeNext = NIL_PGMPOOL_IDX;
4952 pPage->iAgePrev = NIL_PGMPOOL_IDX;
4953#endif
4954 pPage->cLocked = 0;
4955 }
4956 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
4957 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
4958 pPool->cUsedPages = 0;
4959
4960#ifdef PGMPOOL_WITH_USER_TRACKING
4961 /*
4962 * Zap and reinitialize the user records.
4963 */
4964 pPool->cPresent = 0;
4965 pPool->iUserFreeHead = 0;
4966 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4967 const unsigned cMaxUsers = pPool->cMaxUsers;
4968 for (unsigned i = 0; i < cMaxUsers; i++)
4969 {
4970 paUsers[i].iNext = i + 1;
4971 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4972 paUsers[i].iUserTable = 0xfffffffe;
4973 }
4974 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
4975#endif
4976
4977#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
4978 /*
4979 * Clear all the GCPhys links and rebuild the phys ext free list.
4980 */
4981 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
4982 pRam;
4983 pRam = pRam->CTX_SUFF(pNext))
4984 {
4985 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4986 while (iPage-- > 0)
4987 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
4988 }
4989
4990 pPool->iPhysExtFreeHead = 0;
4991 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4992 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
4993 for (unsigned i = 0; i < cMaxPhysExts; i++)
4994 {
4995 paPhysExts[i].iNext = i + 1;
4996 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
4997 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
4998 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
4999 }
5000 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5001#endif
5002
5003#ifdef PGMPOOL_WITH_MONITORING
5004 /*
5005 * Just zap the modified list.
5006 */
5007 pPool->cModifiedPages = 0;
5008 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5009#endif
5010
5011#ifdef PGMPOOL_WITH_CACHE
5012 /*
5013 * Clear the GCPhys hash and the age list.
5014 */
5015 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5016 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5017 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5018 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5019#endif
5020
5021#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5022 /* Clear all dirty pages. */
5023 pPool->idxFreeDirtyPage = 0;
5024 pPool->cDirtyPages = 0;
5025 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
5026 pPool->aIdxDirtyPages[i] = NIL_PGMPOOL_IDX;
5027#endif
5028
5029 /*
5030 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5031 */
5032 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
5033 {
5034 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5035 pPage->iNext = NIL_PGMPOOL_IDX;
5036#ifdef PGMPOOL_WITH_MONITORING
5037 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5038 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5039 pPage->cModifications = 0;
5040 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
5041 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5042 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5043 if (pPage->fMonitored)
5044 {
5045 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
5046 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
5047 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
5048 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
5049 pPool->pszAccessHandler);
5050 AssertFatalRCSuccess(rc);
5051# ifdef PGMPOOL_WITH_CACHE
5052 pgmPoolHashInsert(pPool, pPage);
5053# endif
5054 }
5055#endif
5056#ifdef PGMPOOL_WITH_USER_TRACKING
5057 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
5058#endif
5059#ifdef PGMPOOL_WITH_CACHE
5060 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
5061 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
5062#endif
5063 }
5064
5065 for (VMCPUID i = 0; i < pVM->cCpus; i++)
5066 {
5067 /*
5068 * Re-enter the shadowing mode and assert Sync CR3 FF.
5069 */
5070 PVMCPU pVCpu = &pVM->aCpus[i];
5071 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5072 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5073 }
5074
5075 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5076}
5077#endif /* IN_RING3 */
5078
5079#ifdef LOG_ENABLED
5080static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5081{
5082 switch(enmKind)
5083 {
5084 case PGMPOOLKIND_INVALID:
5085 return "PGMPOOLKIND_INVALID";
5086 case PGMPOOLKIND_FREE:
5087 return "PGMPOOLKIND_FREE";
5088 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5089 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5090 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5091 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5092 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5093 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5094 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5095 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5096 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5097 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5098 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5099 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5100 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5101 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5102 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5103 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5104 case PGMPOOLKIND_32BIT_PD:
5105 return "PGMPOOLKIND_32BIT_PD";
5106 case PGMPOOLKIND_32BIT_PD_PHYS:
5107 return "PGMPOOLKIND_32BIT_PD_PHYS";
5108 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5109 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5110 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5111 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5112 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5113 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5114 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5115 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5116 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5117 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5118 case PGMPOOLKIND_PAE_PD_PHYS:
5119 return "PGMPOOLKIND_PAE_PD_PHYS";
5120 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5121 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5122 case PGMPOOLKIND_PAE_PDPT:
5123 return "PGMPOOLKIND_PAE_PDPT";
5124 case PGMPOOLKIND_PAE_PDPT_PHYS:
5125 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5126 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5127 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5128 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5129 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5130 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5131 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5132 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5133 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5134 case PGMPOOLKIND_64BIT_PML4:
5135 return "PGMPOOLKIND_64BIT_PML4";
5136 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5137 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5138 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5139 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5140 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5141 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5142 case PGMPOOLKIND_ROOT_NESTED:
5143 return "PGMPOOLKIND_ROOT_NESTED";
5144 }
5145 return "Unknown kind!";
5146}
5147#endif /* LOG_ENABLED*/
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette