VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 13299

Last change on this file since 13299 was 13202, checked in by vboxsync, 16 years ago

Missing EPT case in pgmPoolTrackFlushGCPhysPTInt.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 147.2 KB
Line 
1/* $Id: PGMAllPool.cpp 13202 2008-10-13 09:40:14Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_GC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42
43
44/*******************************************************************************
45* Internal Functions *
46*******************************************************************************/
47__BEGIN_DECLS
48static void pgmPoolFlushAllInt(PPGMPOOL pPool);
49#ifdef PGMPOOL_WITH_USER_TRACKING
50DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
51DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
52static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53#endif
54#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
55static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
56#endif
57#ifdef PGMPOOL_WITH_CACHE
58static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
59#endif
60#ifdef PGMPOOL_WITH_MONITORING
61static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
62#endif
63#ifndef IN_RING3
64DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
65#endif
66__END_DECLS
67
68
69/**
70 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
71 *
72 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
73 * @param enmKind The page kind.
74 */
75DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
76{
77 switch (enmKind)
78 {
79 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
81 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
82 return true;
83 default:
84 return false;
85 }
86}
87
88
89#if defined(IN_GC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
90/**
91 * Maps a pool page into the current context.
92 *
93 * @returns Pointer to the mapping.
94 * @param pVM The VM handle.
95 * @param pPage The page to map.
96 */
97void *pgmPoolMapPage(PVM pVM, PPGMPOOLPAGE pPage)
98{
99 /* general pages. */
100 if (pPage->idx >= PGMPOOL_IDX_FIRST)
101 {
102 Assert(pPage->idx < pVM->pgm.s.CTX_SUFF(pPool)->cCurPages);
103 void *pv;
104 int rc = PGMDynMapHCPage(pVM, pPage->Core.Key, &pv);
105 AssertReleaseRC(rc);
106 return pv;
107 }
108
109 /* special pages. */
110# ifdef IN_GC
111 switch (pPage->idx)
112 {
113 case PGMPOOL_IDX_PD:
114 return pVM->pgm.s.pGC32BitPD;
115 case PGMPOOL_IDX_PAE_PD:
116 case PGMPOOL_IDX_PAE_PD_0:
117 return pVM->pgm.s.apGCPaePDs[0];
118 case PGMPOOL_IDX_PAE_PD_1:
119 return pVM->pgm.s.apGCPaePDs[1];
120 case PGMPOOL_IDX_PAE_PD_2:
121 return pVM->pgm.s.apGCPaePDs[2];
122 case PGMPOOL_IDX_PAE_PD_3:
123 return pVM->pgm.s.apGCPaePDs[3];
124 case PGMPOOL_IDX_PDPT:
125 return pVM->pgm.s.pGCPaePDPT;
126 default:
127 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
128 return NULL;
129 }
130
131# else /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
132 RTHCPHYS HCPhys;
133 switch (pPage->idx)
134 {
135 case PGMPOOL_IDX_PD:
136 HCPhys = pVM->pgm.s.HCPhys32BitPD;
137 break;
138 case PGMPOOL_IDX_PAE_PD:
139 case PGMPOOL_IDX_PAE_PD_0:
140 HCPhys = pVM->pgm.s.aHCPhysPaePDs[0];
141 break;
142 case PGMPOOL_IDX_PAE_PD_1:
143 HCPhys = pVM->pgm.s.aHCPhysPaePDs[1];
144 break;
145 case PGMPOOL_IDX_PAE_PD_2:
146 HCPhys = pVM->pgm.s.aHCPhysPaePDs[2];
147 break;
148 case PGMPOOL_IDX_PAE_PD_3:
149 HCPhys = pVM->pgm.s.aHCPhysPaePDs[3];
150 break;
151 case PGMPOOL_IDX_PDPT:
152 HCPhys = pVM->pgm.s.HCPhysPaePDPT;
153 break;
154 default:
155 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
156 return NULL;
157 }
158 void *pv;
159 int rc = PGMDynMapHCPage(pVM, pPage->Core.Key, &pv);
160 AssertReleaseRC(rc);
161 return pv;
162# endif /* VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
163}
164#endif /* IN_GC || VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
165
166
167#ifdef PGMPOOL_WITH_MONITORING
168/**
169 * Determin the size of a write instruction.
170 * @returns number of bytes written.
171 * @param pDis The disassembler state.
172 */
173static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
174{
175 /*
176 * This is very crude and possibly wrong for some opcodes,
177 * but since it's not really supposed to be called we can
178 * probably live with that.
179 */
180 return DISGetParamSize(pDis, &pDis->param1);
181}
182
183
184/**
185 * Flushes a chain of pages sharing the same access monitor.
186 *
187 * @returns VBox status code suitable for scheduling.
188 * @param pPool The pool.
189 * @param pPage A page in the chain.
190 */
191int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
192{
193 LogFlow(("pgmPoolMonitorChainFlush: Flush page %VGp type=%d\n", pPage->GCPhys, pPage->enmKind));
194
195 /*
196 * Find the list head.
197 */
198 uint16_t idx = pPage->idx;
199 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
200 {
201 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
202 {
203 idx = pPage->iMonitoredPrev;
204 Assert(idx != pPage->idx);
205 pPage = &pPool->aPages[idx];
206 }
207 }
208
209 /*
210 * Iterate the list flushing each shadow page.
211 */
212 int rc = VINF_SUCCESS;
213 for (;;)
214 {
215 idx = pPage->iMonitoredNext;
216 Assert(idx != pPage->idx);
217 if (pPage->idx >= PGMPOOL_IDX_FIRST)
218 {
219 int rc2 = pgmPoolFlushPage(pPool, pPage);
220 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
221 rc = VINF_PGM_SYNC_CR3;
222 }
223 /* next */
224 if (idx == NIL_PGMPOOL_IDX)
225 break;
226 pPage = &pPool->aPages[idx];
227 }
228 return rc;
229}
230
231
232/**
233 * Wrapper for getting the current context pointer to the entry being modified.
234 *
235 * @returns Pointer to the current context mapping of the entry.
236 * @param pPool The pool.
237 * @param pvFault The fault virtual address.
238 * @param GCPhysFault The fault physical address.
239 * @param cbEntry The entry size.
240 */
241#ifdef IN_RING3
242DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
243#else
244DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
245#endif
246{
247#ifdef IN_GC
248 return (const void *)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
249
250#elif defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
251 void *pvRet;
252 int rc = PGMDynMapGCPageOff(pPool->pVMR0, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
253 AssertFatalRCSuccess(rc);
254 return pvRet;
255
256#elif defined(IN_RING0)
257 void *pvRet;
258 int rc = pgmRamGCPhys2HCPtr(&pPool->pVMR0->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
259 AssertFatalRCSuccess(rc);
260 return pvRet;
261
262#elif defined(IN_RING3)
263 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
264#else
265# error "huh?"
266#endif
267}
268
269
270/**
271 * Process shadow entries before they are changed by the guest.
272 *
273 * For PT entries we will clear them. For PD entries, we'll simply check
274 * for mapping conflicts and set the SyncCR3 FF if found.
275 *
276 * @param pPool The pool.
277 * @param pPage The head page.
278 * @param GCPhysFault The guest physical fault address.
279 * @param uAddress In R0 and GC this is the guest context fault address (flat).
280 * In R3 this is the host context 'fault' address.
281 * @param pCpu The disassembler state for figuring out the write size.
282 * This need not be specified if the caller knows we won't do cross entry accesses.
283 */
284#ifdef IN_RING3
285void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
286#else
287void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
288#endif
289{
290 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
291 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
292 const unsigned cbWrite = (pCpu) ? pgmPoolDisasWriteSize(pCpu) : 0;
293
294 LogFlow(("pgmPoolMonitorChainChanging: %VGv phys=%VGp kind=%d cbWrite=%d\n", pvAddress, GCPhysFault, pPage->enmKind, cbWrite));
295
296 for (;;)
297 {
298 union
299 {
300 void *pv;
301 PX86PT pPT;
302 PX86PTPAE pPTPae;
303 PX86PD pPD;
304 PX86PDPAE pPDPae;
305 PX86PDPT pPDPT;
306 PX86PML4 pPML4;
307 } uShw;
308 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
309
310 switch (pPage->enmKind)
311 {
312 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
313 {
314 const unsigned iShw = off / sizeof(X86PTE);
315 if (uShw.pPT->a[iShw].n.u1Present)
316 {
317# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
318 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
319 Log4(("pgmPoolMonitorChainChanging 32_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
320 pgmPoolTracDerefGCPhysHint(pPool, pPage,
321 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
322 pGstPte->u & X86_PTE_PG_MASK);
323# endif
324 uShw.pPT->a[iShw].u = 0;
325 }
326 break;
327 }
328
329 /* page/2 sized */
330 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
331 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
332 {
333 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
334 if (uShw.pPTPae->a[iShw].n.u1Present)
335 {
336# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
337 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
338 Log4(("pgmPoolMonitorChainChanging pae_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
339 pgmPoolTracDerefGCPhysHint(pPool, pPage,
340 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
341 pGstPte->u & X86_PTE_PG_MASK);
342# endif
343 uShw.pPTPae->a[iShw].u = 0;
344 }
345 }
346 break;
347
348 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
349 {
350 const unsigned iShw = off / sizeof(X86PTEPAE);
351 if (uShw.pPTPae->a[iShw].n.u1Present)
352 {
353# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
354 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
355 Log4(("pgmPoolMonitorChainChanging pae: deref %VHp GCPhys %VGp\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
356 pgmPoolTracDerefGCPhysHint(pPool, pPage,
357 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
358 pGstPte->u & X86_PTE_PAE_PG_MASK);
359# endif
360 uShw.pPTPae->a[iShw].u = 0;
361 }
362
363 /* paranoia / a bit assumptive. */
364 if ( pCpu
365 && (off & 7)
366 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
367 {
368 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
369 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
370
371 if (uShw.pPTPae->a[iShw2].n.u1Present)
372 {
373# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
374 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
375 Log4(("pgmPoolMonitorChainChanging pae: deref %VHp GCPhys %VGp\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
376 pgmPoolTracDerefGCPhysHint(pPool, pPage,
377 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
378 pGstPte->u & X86_PTE_PAE_PG_MASK);
379# endif
380 uShw.pPTPae->a[iShw2].u = 0;
381 }
382 }
383
384 break;
385 }
386
387 case PGMPOOLKIND_ROOT_32BIT_PD:
388 {
389 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
390 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
391 {
392 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
393 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
394 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
395 }
396 /* paranoia / a bit assumptive. */
397 else if ( pCpu
398 && (off & 3)
399 && (off & 3) + cbWrite > sizeof(X86PTE))
400 {
401 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
402 if ( iShw2 != iShw
403 && iShw2 < RT_ELEMENTS(uShw.pPD->a)
404 && uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
405 {
406 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
407 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
408 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
409 }
410 }
411#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
412 if ( uShw.pPD->a[iShw].n.u1Present
413 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
414 {
415 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
416# ifdef IN_GC /* TLB load - we're pushing things a bit... */
417 ASMProbeReadByte(pvAddress);
418# endif
419 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
420 uShw.pPD->a[iShw].u = 0;
421 }
422#endif
423 break;
424 }
425
426 case PGMPOOLKIND_ROOT_PAE_PD:
427 {
428 unsigned iShw = (off / sizeof(X86PTE)) * 2; // ASSUMING 32-bit guest paging!
429 for (unsigned i = 0; i < 2; i++, iShw++)
430 {
431 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
432 {
433 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
434 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
435 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
436 }
437 /* paranoia / a bit assumptive. */
438 else if ( pCpu
439 && (off & 3)
440 && (off & 3) + cbWrite > 4)
441 {
442 const unsigned iShw2 = iShw + 2;
443 if ( iShw2 < RT_ELEMENTS(uShw.pPDPae->a)
444 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
445 {
446 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
447 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
448 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
449 }
450 }
451#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
452 if ( uShw.pPDPae->a[iShw].n.u1Present
453 && !VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
454 {
455 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
456# ifdef IN_GC /* TLB load - we're pushing things a bit... */
457 ASMProbeReadByte(pvAddress);
458# endif
459 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
460 uShw.pPDPae->a[iShw].u = 0;
461 }
462#endif
463 }
464 break;
465 }
466
467 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
468 {
469 const unsigned iShw = off / sizeof(X86PDEPAE);
470 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
471 {
472 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
473 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
474 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
475 }
476#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
477 /* causes trouble when the guest uses a PDE to refer to the whole page table level structure. (invalidate here; faults later on when it tries
478 * to change the page table entries
479 * -> recheck; probably only applies to the GC case
480 */
481 else
482 {
483 if (uShw.pPDPae->a[iShw].n.u1Present)
484 {
485 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
486 pgmPoolFree(pPool->CTX_SUFF(pVM),
487 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
488 /* Note: hardcoded PAE implementation dependency */
489 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
490 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw);
491 uShw.pPDPae->a[iShw].u = 0;
492 }
493 }
494#endif
495 /* paranoia / a bit assumptive. */
496 if ( pCpu
497 && (off & 7)
498 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
499 {
500 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
501 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
502
503 if ( iShw2 != iShw
504 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
505 {
506 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
507 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
508 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
509 }
510#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
511 else
512 if (uShw.pPDPae->a[iShw2].n.u1Present)
513 {
514 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
515 pgmPoolFree(pPool->CTX_SUFF(pVM),
516 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
517 /* Note: hardcoded PAE implementation dependency */
518 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
519 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw2 + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw2);
520 uShw.pPDPae->a[iShw2].u = 0;
521 }
522#endif
523 }
524 break;
525 }
526
527 case PGMPOOLKIND_ROOT_PDPT:
528 {
529 /* Hopefully this doesn't happen very often:
530 * - touching unused parts of the page
531 * - messing with the bits of pd pointers without changing the physical address
532 */
533 const unsigned iShw = off / sizeof(X86PDPE);
534 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
535 {
536 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
537 {
538 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
539 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
540 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
541 }
542 /* paranoia / a bit assumptive. */
543 else if ( pCpu
544 && (off & 7)
545 && (off & 7) + cbWrite > sizeof(X86PDPE))
546 {
547 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
548 if ( iShw2 != iShw
549 && iShw2 < X86_PG_PAE_PDPE_ENTRIES
550 && uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
551 {
552 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
553 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
554 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
555 }
556 }
557 }
558 break;
559 }
560
561#ifndef IN_GC
562 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
563 {
564 Assert(pPage->enmKind == PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD);
565
566 const unsigned iShw = off / sizeof(X86PDEPAE);
567 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
568 {
569 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
570 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
571 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
572 }
573 else
574 {
575 if (uShw.pPDPae->a[iShw].n.u1Present)
576 {
577 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
578 pgmPoolFree(pPool->CTX_SUFF(pVM),
579 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
580 pPage->idx,
581 iShw);
582 uShw.pPDPae->a[iShw].u = 0;
583 }
584 }
585 /* paranoia / a bit assumptive. */
586 if ( pCpu
587 && (off & 7)
588 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
589 {
590 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
591 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
592
593 if ( iShw2 != iShw
594 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
595 {
596 Assert(pgmMapAreMappingsEnabled(&pPool->CTX_SUFF(pVM)->pgm.s));
597 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
598 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
599 }
600 else
601 if (uShw.pPDPae->a[iShw2].n.u1Present)
602 {
603 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
604 pgmPoolFree(pPool->CTX_SUFF(pVM),
605 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
606 pPage->idx,
607 iShw2);
608 uShw.pPDPae->a[iShw2].u = 0;
609 }
610 }
611 break;
612 }
613
614 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
615 {
616 /* Hopefully this doesn't happen very often:
617 * - messing with the bits of pd pointers without changing the physical address
618 */
619 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
620 {
621 const unsigned iShw = off / sizeof(X86PDPE);
622 if (uShw.pPDPT->a[iShw].n.u1Present)
623 {
624 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
625 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
626 uShw.pPDPT->a[iShw].u = 0;
627 }
628 /* paranoia / a bit assumptive. */
629 if ( pCpu
630 && (off & 7)
631 && (off & 7) + cbWrite > sizeof(X86PDPE))
632 {
633 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
634 if (uShw.pPDPT->a[iShw2].n.u1Present)
635 {
636 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
637 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
638 uShw.pPDPT->a[iShw2].u = 0;
639 }
640 }
641 }
642 break;
643 }
644
645 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
646 {
647 /* Hopefully this doesn't happen very often:
648 * - messing with the bits of pd pointers without changing the physical address
649 */
650 if (!VM_FF_ISSET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3))
651 {
652 const unsigned iShw = off / sizeof(X86PDPE);
653 if (uShw.pPML4->a[iShw].n.u1Present)
654 {
655 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
656 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
657 uShw.pPML4->a[iShw].u = 0;
658 }
659 /* paranoia / a bit assumptive. */
660 if ( pCpu
661 && (off & 7)
662 && (off & 7) + cbWrite > sizeof(X86PDPE))
663 {
664 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
665 if (uShw.pPML4->a[iShw2].n.u1Present)
666 {
667 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
668 pgmPoolFree(pPool->CTX_SUFF(pVM), uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
669 uShw.pPML4->a[iShw2].u = 0;
670 }
671 }
672 }
673 break;
674 }
675#endif /* IN_RING0 */
676
677 default:
678 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
679 }
680
681 /* next */
682 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
683 return;
684 pPage = &pPool->aPages[pPage->iMonitoredNext];
685 }
686}
687
688
689# ifndef IN_RING3
690/**
691 * Checks if a access could be a fork operation in progress.
692 *
693 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
694 *
695 * @returns true if it's likly that we're forking, otherwise false.
696 * @param pPool The pool.
697 * @param pCpu The disassembled instruction.
698 * @param offFault The access offset.
699 */
700DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
701{
702 /*
703 * i386 linux is using btr to clear X86_PTE_RW.
704 * The functions involved are (2.6.16 source inspection):
705 * clear_bit
706 * ptep_set_wrprotect
707 * copy_one_pte
708 * copy_pte_range
709 * copy_pmd_range
710 * copy_pud_range
711 * copy_page_range
712 * dup_mmap
713 * dup_mm
714 * copy_mm
715 * copy_process
716 * do_fork
717 */
718 if ( pCpu->pCurInstr->opcode == OP_BTR
719 && !(offFault & 4)
720 /** @todo Validate that the bit index is X86_PTE_RW. */
721 )
722 {
723 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
724 return true;
725 }
726 return false;
727}
728
729
730/**
731 * Determine whether the page is likely to have been reused.
732 *
733 * @returns true if we consider the page as being reused for a different purpose.
734 * @returns false if we consider it to still be a paging page.
735 * @param pVM VM Handle.
736 * @param pPage The page in question.
737 * @param pRegFrame Trap register frame.
738 * @param pCpu The disassembly info for the faulting instruction.
739 * @param pvFault The fault address.
740 *
741 * @remark The REP prefix check is left to the caller because of STOSD/W.
742 */
743DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PPGMPOOLPAGE pPage, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pCpu, RTGCPTR pvFault)
744{
745#ifndef IN_GC
746 if ( HWACCMHasPendingIrq(pVM)
747 && (pRegFrame->rsp - pvFault) < 32)
748 {
749 /* Fault caused by stack writes while trying to inject an interrupt event. */
750 Log(("pgmPoolMonitorIsReused: reused %VGv for interrupt stack (rsp=%VGv).\n", pvFault, pRegFrame->rsp));
751 return true;
752 }
753#else
754 NOREF(pVM);
755#endif
756
757 switch (pCpu->pCurInstr->opcode)
758 {
759 /* call implies the actual push of the return address faulted */
760 case OP_CALL:
761 Log4(("pgmPoolMonitorIsReused: CALL\n"));
762 return true;
763 case OP_PUSH:
764 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
765 return true;
766 case OP_PUSHF:
767 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
768 return true;
769 case OP_PUSHA:
770 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
771 return true;
772 case OP_FXSAVE:
773 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
774 return true;
775 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
776 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
777 return true;
778 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
779 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
780 return true;
781 case OP_MOVSWD:
782 case OP_STOSWD:
783 if ( pCpu->prefix == (PREFIX_REP|PREFIX_REX)
784 && pRegFrame->rcx >= 0x40
785 )
786 {
787 Assert(pCpu->mode == CPUMODE_64BIT);
788
789 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
790 return true;
791 }
792 return false;
793 }
794 if ( (pCpu->param1.flags & USE_REG_GEN32)
795 && (pCpu->param1.base.reg_gen == USE_REG_ESP))
796 {
797 Log4(("pgmPoolMonitorIsReused: ESP\n"));
798 return true;
799 }
800
801 //if (pPage->fCR3Mix)
802 // return false;
803 return false;
804}
805
806
807/**
808 * Flushes the page being accessed.
809 *
810 * @returns VBox status code suitable for scheduling.
811 * @param pVM The VM handle.
812 * @param pPool The pool.
813 * @param pPage The pool page (head).
814 * @param pCpu The disassembly of the write instruction.
815 * @param pRegFrame The trap register frame.
816 * @param GCPhysFault The fault address as guest physical address.
817 * @param pvFault The fault address.
818 */
819static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
820 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
821{
822 /*
823 * First, do the flushing.
824 */
825 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
826
827 /*
828 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
829 */
830 uint32_t cbWritten;
831 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
832 if (VBOX_SUCCESS(rc2))
833 pRegFrame->rip += pCpu->opsize;
834 else if (rc2 == VERR_EM_INTERPRETER)
835 {
836#ifdef IN_GC
837 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
838 {
839 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
840 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
841 rc = VINF_SUCCESS;
842 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
843 }
844 else
845#endif
846 {
847 rc = VINF_EM_RAW_EMULATE_INSTR;
848 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
849 }
850 }
851 else
852 rc = rc2;
853
854 /* See use in pgmPoolAccessHandlerSimple(). */
855 PGM_INVL_GUEST_TLBS();
856
857 LogFlow(("pgmPoolAccessHandlerPT: returns %Vrc (flushed)\n", rc));
858 return rc;
859
860}
861
862
863/**
864 * Handles the STOSD write accesses.
865 *
866 * @returns VBox status code suitable for scheduling.
867 * @param pVM The VM handle.
868 * @param pPool The pool.
869 * @param pPage The pool page (head).
870 * @param pCpu The disassembly of the write instruction.
871 * @param pRegFrame The trap register frame.
872 * @param GCPhysFault The fault address as guest physical address.
873 * @param pvFault The fault address.
874 */
875DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
876 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
877{
878 Assert(pCpu->mode == CPUMODE_32BIT);
879
880 /*
881 * Increment the modification counter and insert it into the list
882 * of modified pages the first time.
883 */
884 if (!pPage->cModifications++)
885 pgmPoolMonitorModifiedInsert(pPool, pPage);
886
887 /*
888 * Execute REP STOSD.
889 *
890 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
891 * write situation, meaning that it's safe to write here.
892 */
893 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
894 while (pRegFrame->ecx)
895 {
896 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
897#ifdef IN_GC
898 *(uint32_t *)pu32 = pRegFrame->eax;
899#else
900 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
901#endif
902 pu32 += 4;
903 GCPhysFault += 4;
904 pRegFrame->edi += 4;
905 pRegFrame->ecx--;
906 }
907 pRegFrame->rip += pCpu->opsize;
908
909 /* See use in pgmPoolAccessHandlerSimple(). */
910 PGM_INVL_GUEST_TLBS();
911
912 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
913 return VINF_SUCCESS;
914}
915
916
917/**
918 * Handles the simple write accesses.
919 *
920 * @returns VBox status code suitable for scheduling.
921 * @param pVM The VM handle.
922 * @param pPool The pool.
923 * @param pPage The pool page (head).
924 * @param pCpu The disassembly of the write instruction.
925 * @param pRegFrame The trap register frame.
926 * @param GCPhysFault The fault address as guest physical address.
927 * @param pvFault The fault address.
928 */
929DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
930 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
931{
932 /*
933 * Increment the modification counter and insert it into the list
934 * of modified pages the first time.
935 */
936 if (!pPage->cModifications++)
937 pgmPoolMonitorModifiedInsert(pPool, pPage);
938
939 /*
940 * Clear all the pages. ASSUMES that pvFault is readable.
941 */
942 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
943
944 /*
945 * Interpret the instruction.
946 */
947 uint32_t cb;
948 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
949 if (VBOX_SUCCESS(rc))
950 pRegFrame->rip += pCpu->opsize;
951 else if (rc == VERR_EM_INTERPRETER)
952 {
953 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
954 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pCpu->pCurInstr->opcode));
955 rc = VINF_EM_RAW_EMULATE_INSTR;
956 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
957 }
958
959 /*
960 * Quick hack, with logging enabled we're getting stale
961 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
962 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
963 * have to be fixed to support this. But that'll have to wait till next week.
964 *
965 * An alternative is to keep track of the changed PTEs together with the
966 * GCPhys from the guest PT. This may proove expensive though.
967 *
968 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
969 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
970 */
971 PGM_INVL_GUEST_TLBS();
972
973 LogFlow(("pgmPoolAccessHandlerSimple: returns %Vrc cb=%d\n", rc, cb));
974 return rc;
975}
976
977
978/**
979 * \#PF Handler callback for PT write accesses.
980 *
981 * @returns VBox status code (appropriate for GC return).
982 * @param pVM VM Handle.
983 * @param uErrorCode CPU Error code.
984 * @param pRegFrame Trap register frame.
985 * NULL on DMA and other non CPU access.
986 * @param pvFault The fault address (cr2).
987 * @param GCPhysFault The GC physical address corresponding to pvFault.
988 * @param pvUser User argument.
989 */
990DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
991{
992 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
993 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
994 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
995 LogFlow(("pgmPoolAccessHandler: pvFault=%VGv pPage=%p:{.idx=%d} GCPhysFault=%VGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
996
997 /*
998 * We should ALWAYS have the list head as user parameter. This
999 * is because we use that page to record the changes.
1000 */
1001 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1002
1003 /*
1004 * Disassemble the faulting instruction.
1005 */
1006 DISCPUSTATE Cpu;
1007 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
1008 AssertRCReturn(rc, rc);
1009
1010 /*
1011 * Check if it's worth dealing with.
1012 */
1013 bool fReused = false;
1014 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1015 || pPage->fCR3Mix)
1016 && !(fReused = pgmPoolMonitorIsReused(pVM, pPage, pRegFrame, &Cpu, pvFault))
1017 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
1018 {
1019 /*
1020 * Simple instructions, no REP prefix.
1021 */
1022 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
1023 {
1024 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1025 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1026 return rc;
1027 }
1028
1029 /*
1030 * Windows is frequently doing small memset() operations (netio test 4k+).
1031 * We have to deal with these or we'll kill the cache and performance.
1032 */
1033 if ( Cpu.pCurInstr->opcode == OP_STOSWD
1034 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
1035 && pRegFrame->ecx <= 0x20
1036 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1037 && !((uintptr_t)pvFault & 3)
1038 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1039 && Cpu.mode == CPUMODE_32BIT
1040 && Cpu.opmode == CPUMODE_32BIT
1041 && Cpu.addrmode == CPUMODE_32BIT
1042 && Cpu.prefix == PREFIX_REP
1043 && !pRegFrame->eflags.Bits.u1DF
1044 )
1045 {
1046 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1047 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1048 return rc;
1049 }
1050
1051 /* REP prefix, don't bother. */
1052 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1053 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x eip=%VGv opcode=%d prefix=%#x\n",
1054 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, pRegFrame->rip, Cpu.pCurInstr->opcode, Cpu.prefix));
1055 }
1056
1057 /*
1058 * Not worth it, so flush it.
1059 *
1060 * If we considered it to be reused, don't to back to ring-3
1061 * to emulate failed instructions since we usually cannot
1062 * interpret then. This may be a bit risky, in which case
1063 * the reuse detection must be fixed.
1064 */
1065 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1066 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1067 rc = VINF_SUCCESS;
1068 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1069 return rc;
1070}
1071
1072# endif /* !IN_RING3 */
1073#endif /* PGMPOOL_WITH_MONITORING */
1074
1075
1076
1077#ifdef PGMPOOL_WITH_CACHE
1078/**
1079 * Inserts a page into the GCPhys hash table.
1080 *
1081 * @param pPool The pool.
1082 * @param pPage The page.
1083 */
1084DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1085{
1086 Log3(("pgmPoolHashInsert: %VGp\n", pPage->GCPhys));
1087 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1088 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1089 pPage->iNext = pPool->aiHash[iHash];
1090 pPool->aiHash[iHash] = pPage->idx;
1091}
1092
1093
1094/**
1095 * Removes a page from the GCPhys hash table.
1096 *
1097 * @param pPool The pool.
1098 * @param pPage The page.
1099 */
1100DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1101{
1102 Log3(("pgmPoolHashRemove: %VGp\n", pPage->GCPhys));
1103 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1104 if (pPool->aiHash[iHash] == pPage->idx)
1105 pPool->aiHash[iHash] = pPage->iNext;
1106 else
1107 {
1108 uint16_t iPrev = pPool->aiHash[iHash];
1109 for (;;)
1110 {
1111 const int16_t i = pPool->aPages[iPrev].iNext;
1112 if (i == pPage->idx)
1113 {
1114 pPool->aPages[iPrev].iNext = pPage->iNext;
1115 break;
1116 }
1117 if (i == NIL_PGMPOOL_IDX)
1118 {
1119 AssertReleaseMsgFailed(("GCPhys=%VGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1120 break;
1121 }
1122 iPrev = i;
1123 }
1124 }
1125 pPage->iNext = NIL_PGMPOOL_IDX;
1126}
1127
1128
1129/**
1130 * Frees up one cache page.
1131 *
1132 * @returns VBox status code.
1133 * @retval VINF_SUCCESS on success.
1134 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
1135 * @param pPool The pool.
1136 * @param iUser The user index.
1137 */
1138static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1139{
1140#ifndef IN_GC
1141 const PVM pVM = pPool->CTX_SUFF(pVM);
1142#endif
1143 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1144 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1145
1146 /*
1147 * Select one page from the tail of the age list.
1148 */
1149 uint16_t iToFree = pPool->iAgeTail;
1150 if (iToFree == iUser)
1151 iToFree = pPool->aPages[iToFree].iAgePrev;
1152/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1153 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1154 {
1155 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1156 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1157 {
1158 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1159 continue;
1160 iToFree = i;
1161 break;
1162 }
1163 }
1164*/
1165 Assert(iToFree != iUser);
1166 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1167
1168 int rc = pgmPoolFlushPage(pPool, &pPool->aPages[iToFree]);
1169 if (rc == VINF_SUCCESS)
1170 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1171 return rc;
1172}
1173
1174
1175/**
1176 * Checks if a kind mismatch is really a page being reused
1177 * or if it's just normal remappings.
1178 *
1179 * @returns true if reused and the cached page (enmKind1) should be flushed
1180 * @returns false if not reused.
1181 * @param enmKind1 The kind of the cached page.
1182 * @param enmKind2 The kind of the requested page.
1183 */
1184static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1185{
1186 switch (enmKind1)
1187 {
1188 /*
1189 * Never reuse them. There is no remapping in non-paging mode.
1190 */
1191 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1192 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1193 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1194 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1195 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1196 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1197 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1198 return true;
1199
1200 /*
1201 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1202 */
1203 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1204 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1205 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1206 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1207 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1208 switch (enmKind2)
1209 {
1210 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1211 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1212 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1213 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1214 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1215 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1216 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1217 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1218 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1219 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1220 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1221 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1222 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1223 return true;
1224 default:
1225 return false;
1226 }
1227
1228 /*
1229 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1230 */
1231 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1232 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1233 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1234 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1235 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1236 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1237 switch (enmKind2)
1238 {
1239 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1240 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1241 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1242 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1243 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1244 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1245 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1246 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1247 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1248 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1249 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1250 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1251 return true;
1252 default:
1253 return false;
1254 }
1255
1256 /*
1257 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1258 */
1259 case PGMPOOLKIND_ROOT_32BIT_PD:
1260 case PGMPOOLKIND_ROOT_PAE_PD:
1261 case PGMPOOLKIND_ROOT_PDPT:
1262 case PGMPOOLKIND_ROOT_NESTED:
1263 return false;
1264
1265 default:
1266 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1267 }
1268}
1269
1270
1271/**
1272 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1273 *
1274 * @returns VBox status code.
1275 * @retval VINF_PGM_CACHED_PAGE on success.
1276 * @retval VERR_FILE_NOT_FOUND if not found.
1277 * @param pPool The pool.
1278 * @param GCPhys The GC physical address of the page we're gonna shadow.
1279 * @param enmKind The kind of mapping.
1280 * @param iUser The shadow page pool index of the user table.
1281 * @param iUserTable The index into the user table (shadowed).
1282 * @param ppPage Where to store the pointer to the page.
1283 */
1284static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1285{
1286#ifndef IN_GC
1287 const PVM pVM = pPool->CTX_SUFF(pVM);
1288#endif
1289 /*
1290 * Look up the GCPhys in the hash.
1291 */
1292 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1293 Log3(("pgmPoolCacheAlloc: %VGp kind %d iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, enmKind, iUser, iUserTable, i));
1294 if (i != NIL_PGMPOOL_IDX)
1295 {
1296 do
1297 {
1298 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1299 Log3(("pgmPoolCacheAlloc: slot %d found page %VGp\n", i, pPage->GCPhys));
1300 if (pPage->GCPhys == GCPhys)
1301 {
1302 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
1303 {
1304 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1305 if (VBOX_SUCCESS(rc))
1306 {
1307 *ppPage = pPage;
1308 STAM_COUNTER_INC(&pPool->StatCacheHits);
1309 return VINF_PGM_CACHED_PAGE;
1310 }
1311 return rc;
1312 }
1313
1314 /*
1315 * The kind is different. In some cases we should now flush the page
1316 * as it has been reused, but in most cases this is normal remapping
1317 * of PDs as PT or big pages using the GCPhys field in a slightly
1318 * different way than the other kinds.
1319 */
1320 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1321 {
1322 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1323 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
1324 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1325 break;
1326 }
1327 }
1328
1329 /* next */
1330 i = pPage->iNext;
1331 } while (i != NIL_PGMPOOL_IDX);
1332 }
1333
1334 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%d\n", GCPhys, enmKind));
1335 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1336 return VERR_FILE_NOT_FOUND;
1337}
1338
1339
1340/**
1341 * Inserts a page into the cache.
1342 *
1343 * @param pPool The pool.
1344 * @param pPage The cached page.
1345 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1346 */
1347static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1348{
1349 /*
1350 * Insert into the GCPhys hash if the page is fit for that.
1351 */
1352 Assert(!pPage->fCached);
1353 if (fCanBeCached)
1354 {
1355 pPage->fCached = true;
1356 pgmPoolHashInsert(pPool, pPage);
1357 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1358 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1359 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1360 }
1361 else
1362 {
1363 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1364 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1365 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1366 }
1367
1368 /*
1369 * Insert at the head of the age list.
1370 */
1371 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1372 pPage->iAgeNext = pPool->iAgeHead;
1373 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1374 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1375 else
1376 pPool->iAgeTail = pPage->idx;
1377 pPool->iAgeHead = pPage->idx;
1378}
1379
1380
1381/**
1382 * Flushes a cached page.
1383 *
1384 * @param pPool The pool.
1385 * @param pPage The cached page.
1386 */
1387static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1388{
1389 Log3(("pgmPoolCacheFlushPage: %VGp\n", pPage->GCPhys));
1390
1391 /*
1392 * Remove the page from the hash.
1393 */
1394 if (pPage->fCached)
1395 {
1396 pPage->fCached = false;
1397 pgmPoolHashRemove(pPool, pPage);
1398 }
1399 else
1400 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1401
1402 /*
1403 * Remove it from the age list.
1404 */
1405 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1406 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1407 else
1408 pPool->iAgeTail = pPage->iAgePrev;
1409 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1410 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1411 else
1412 pPool->iAgeHead = pPage->iAgeNext;
1413 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1414 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1415}
1416#endif /* PGMPOOL_WITH_CACHE */
1417
1418
1419#ifdef PGMPOOL_WITH_MONITORING
1420/**
1421 * Looks for pages sharing the monitor.
1422 *
1423 * @returns Pointer to the head page.
1424 * @returns NULL if not found.
1425 * @param pPool The Pool
1426 * @param pNewPage The page which is going to be monitored.
1427 */
1428static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1429{
1430#ifdef PGMPOOL_WITH_CACHE
1431 /*
1432 * Look up the GCPhys in the hash.
1433 */
1434 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1435 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1436 if (i == NIL_PGMPOOL_IDX)
1437 return NULL;
1438 do
1439 {
1440 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1441 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1442 && pPage != pNewPage)
1443 {
1444 switch (pPage->enmKind)
1445 {
1446 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1447 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1448 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1449 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1450 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1451 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1452 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1453 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1454 case PGMPOOLKIND_ROOT_32BIT_PD:
1455 case PGMPOOLKIND_ROOT_PAE_PD:
1456 case PGMPOOLKIND_ROOT_PDPT:
1457 {
1458 /* find the head */
1459 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1460 {
1461 Assert(pPage->iMonitoredPrev != pPage->idx);
1462 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1463 }
1464 return pPage;
1465 }
1466
1467 /* ignore, no monitoring. */
1468 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1469 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1470 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1471 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1472 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1473 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1474 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1475 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1476 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1477 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1478 case PGMPOOLKIND_ROOT_NESTED:
1479 break;
1480 default:
1481 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1482 }
1483 }
1484
1485 /* next */
1486 i = pPage->iNext;
1487 } while (i != NIL_PGMPOOL_IDX);
1488#endif
1489 return NULL;
1490}
1491
1492/**
1493 * Enabled write monitoring of a guest page.
1494 *
1495 * @returns VBox status code.
1496 * @retval VINF_SUCCESS on success.
1497 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1498 * @param pPool The pool.
1499 * @param pPage The cached page.
1500 */
1501static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1502{
1503 LogFlow(("pgmPoolMonitorInsert %VGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
1504
1505 /*
1506 * Filter out the relevant kinds.
1507 */
1508 switch (pPage->enmKind)
1509 {
1510 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1511 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1512 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1513 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1514 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1515 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1516 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1517 case PGMPOOLKIND_ROOT_PDPT:
1518 break;
1519
1520 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1521 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1522 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1523 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1524 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1525 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1526 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1527 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1528 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1529 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1530 case PGMPOOLKIND_ROOT_NESTED:
1531 /* Nothing to monitor here. */
1532 return VINF_SUCCESS;
1533
1534 case PGMPOOLKIND_ROOT_32BIT_PD:
1535 case PGMPOOLKIND_ROOT_PAE_PD:
1536#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1537 break;
1538#endif
1539 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1540 default:
1541 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1542 }
1543
1544 /*
1545 * Install handler.
1546 */
1547 int rc;
1548 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1549 if (pPageHead)
1550 {
1551 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1552 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1553 pPage->iMonitoredPrev = pPageHead->idx;
1554 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1555 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1556 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1557 pPageHead->iMonitoredNext = pPage->idx;
1558 rc = VINF_SUCCESS;
1559 }
1560 else
1561 {
1562 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1563 PVM pVM = pPool->CTX_SUFF(pVM);
1564 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1565 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1566 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1567 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1568 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1569 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
1570 pPool->pszAccessHandler);
1571 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1572 * the heap size should suffice. */
1573 AssertFatalRC(rc);
1574 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1575 rc = VERR_PGM_POOL_CLEARED;
1576 }
1577 pPage->fMonitored = true;
1578 return rc;
1579}
1580
1581
1582/**
1583 * Disables write monitoring of a guest page.
1584 *
1585 * @returns VBox status code.
1586 * @retval VINF_SUCCESS on success.
1587 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1588 * @param pPool The pool.
1589 * @param pPage The cached page.
1590 */
1591static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1592{
1593 /*
1594 * Filter out the relevant kinds.
1595 */
1596 switch (pPage->enmKind)
1597 {
1598 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1599 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1600 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1601 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1602 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1603 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1604 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1605 case PGMPOOLKIND_ROOT_PDPT:
1606 break;
1607
1608 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1609 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1610 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1611 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1612 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1613 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1614 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1615 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1616 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1617 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1618 case PGMPOOLKIND_ROOT_NESTED:
1619 /* Nothing to monitor here. */
1620 return VINF_SUCCESS;
1621
1622 case PGMPOOLKIND_ROOT_32BIT_PD:
1623 case PGMPOOLKIND_ROOT_PAE_PD:
1624#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1625 break;
1626#endif
1627 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1628 default:
1629 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1630 }
1631
1632 /*
1633 * Remove the page from the monitored list or uninstall it if last.
1634 */
1635 const PVM pVM = pPool->CTX_SUFF(pVM);
1636 int rc;
1637 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1638 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1639 {
1640 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1641 {
1642 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1643 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1644 pNewHead->fCR3Mix = pPage->fCR3Mix;
1645 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1646 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1647 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1648 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
1649 pPool->pszAccessHandler);
1650 AssertFatalRCSuccess(rc);
1651 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1652 }
1653 else
1654 {
1655 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1656 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1657 {
1658 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1659 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1660 }
1661 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1662 rc = VINF_SUCCESS;
1663 }
1664 }
1665 else
1666 {
1667 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1668 AssertFatalRC(rc);
1669 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1670 rc = VERR_PGM_POOL_CLEARED;
1671 }
1672 pPage->fMonitored = false;
1673
1674 /*
1675 * Remove it from the list of modified pages (if in it).
1676 */
1677 pgmPoolMonitorModifiedRemove(pPool, pPage);
1678
1679 return rc;
1680}
1681
1682
1683#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1684/**
1685 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1686 *
1687 * @param pPool The Pool.
1688 * @param pPage A page in the chain.
1689 * @param fCR3Mix The new fCR3Mix value.
1690 */
1691static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1692{
1693 /* current */
1694 pPage->fCR3Mix = fCR3Mix;
1695
1696 /* before */
1697 int16_t idx = pPage->iMonitoredPrev;
1698 while (idx != NIL_PGMPOOL_IDX)
1699 {
1700 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1701 idx = pPool->aPages[idx].iMonitoredPrev;
1702 }
1703
1704 /* after */
1705 idx = pPage->iMonitoredNext;
1706 while (idx != NIL_PGMPOOL_IDX)
1707 {
1708 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1709 idx = pPool->aPages[idx].iMonitoredNext;
1710 }
1711}
1712
1713
1714/**
1715 * Installs or modifies monitoring of a CR3 page (special).
1716 *
1717 * We're pretending the CR3 page is shadowed by the pool so we can use the
1718 * generic mechanisms in detecting chained monitoring. (This also gives us a
1719 * tast of what code changes are required to really pool CR3 shadow pages.)
1720 *
1721 * @returns VBox status code.
1722 * @param pPool The pool.
1723 * @param idxRoot The CR3 (root) page index.
1724 * @param GCPhysCR3 The (new) CR3 value.
1725 */
1726int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
1727{
1728 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1729 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1730 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d} GCPhysCR3=%VGp\n",
1731 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
1732
1733 /*
1734 * The unlikely case where it already matches.
1735 */
1736 if (pPage->GCPhys == GCPhysCR3)
1737 {
1738 Assert(pPage->fMonitored);
1739 return VINF_SUCCESS;
1740 }
1741
1742 /*
1743 * Flush the current monitoring and remove it from the hash.
1744 */
1745 int rc = VINF_SUCCESS;
1746 if (pPage->fMonitored)
1747 {
1748 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1749 rc = pgmPoolMonitorFlush(pPool, pPage);
1750 if (rc == VERR_PGM_POOL_CLEARED)
1751 rc = VINF_SUCCESS;
1752 else
1753 AssertFatalRC(rc);
1754 pgmPoolHashRemove(pPool, pPage);
1755 }
1756
1757 /*
1758 * Monitor the page at the new location and insert it into the hash.
1759 */
1760 pPage->GCPhys = GCPhysCR3;
1761 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
1762 if (rc2 != VERR_PGM_POOL_CLEARED)
1763 {
1764 AssertFatalRC(rc2);
1765 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
1766 rc = rc2;
1767 }
1768 pgmPoolHashInsert(pPool, pPage);
1769 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
1770 return rc;
1771}
1772
1773
1774/**
1775 * Removes the monitoring of a CR3 page (special).
1776 *
1777 * @returns VBox status code.
1778 * @param pPool The pool.
1779 * @param idxRoot The CR3 (root) page index.
1780 */
1781int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
1782{
1783 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1784 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1785 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d}\n",
1786 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
1787
1788 if (!pPage->fMonitored)
1789 return VINF_SUCCESS;
1790
1791 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1792 int rc = pgmPoolMonitorFlush(pPool, pPage);
1793 if (rc != VERR_PGM_POOL_CLEARED)
1794 AssertFatalRC(rc);
1795 else
1796 rc = VINF_SUCCESS;
1797 pgmPoolHashRemove(pPool, pPage);
1798 Assert(!pPage->fMonitored);
1799 pPage->GCPhys = NIL_RTGCPHYS;
1800 return rc;
1801}
1802#endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
1803
1804
1805/**
1806 * Inserts the page into the list of modified pages.
1807 *
1808 * @param pPool The pool.
1809 * @param pPage The page.
1810 */
1811void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1812{
1813 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1814 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1815 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1816 && pPool->iModifiedHead != pPage->idx,
1817 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1818 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1819 pPool->iModifiedHead, pPool->cModifiedPages));
1820
1821 pPage->iModifiedNext = pPool->iModifiedHead;
1822 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1823 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1824 pPool->iModifiedHead = pPage->idx;
1825 pPool->cModifiedPages++;
1826#ifdef VBOX_WITH_STATISTICS
1827 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1828 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1829#endif
1830}
1831
1832
1833/**
1834 * Removes the page from the list of modified pages and resets the
1835 * moficiation counter.
1836 *
1837 * @param pPool The pool.
1838 * @param pPage The page which is believed to be in the list of modified pages.
1839 */
1840static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1841{
1842 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1843 if (pPool->iModifiedHead == pPage->idx)
1844 {
1845 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1846 pPool->iModifiedHead = pPage->iModifiedNext;
1847 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1848 {
1849 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
1850 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1851 }
1852 pPool->cModifiedPages--;
1853 }
1854 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
1855 {
1856 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
1857 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1858 {
1859 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
1860 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1861 }
1862 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1863 pPool->cModifiedPages--;
1864 }
1865 else
1866 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1867 pPage->cModifications = 0;
1868}
1869
1870
1871/**
1872 * Zaps the list of modified pages, resetting their modification counters in the process.
1873 *
1874 * @param pVM The VM handle.
1875 */
1876void pgmPoolMonitorModifiedClearAll(PVM pVM)
1877{
1878 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1879 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
1880
1881 unsigned cPages = 0; NOREF(cPages);
1882 uint16_t idx = pPool->iModifiedHead;
1883 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1884 while (idx != NIL_PGMPOOL_IDX)
1885 {
1886 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
1887 idx = pPage->iModifiedNext;
1888 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1889 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1890 pPage->cModifications = 0;
1891 Assert(++cPages);
1892 }
1893 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
1894 pPool->cModifiedPages = 0;
1895}
1896
1897
1898/**
1899 * Clear all shadow pages and clear all modification counters.
1900 *
1901 * @param pVM The VM handle.
1902 * @remark Should only be used when monitoring is available, thus placed in
1903 * the PGMPOOL_WITH_MONITORING #ifdef.
1904 */
1905void pgmPoolClearAll(PVM pVM)
1906{
1907 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1908 STAM_PROFILE_START(&pPool->StatClearAll, c);
1909 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
1910
1911 /*
1912 * Iterate all the pages until we've encountered all that in use.
1913 * This is simple but not quite optimal solution.
1914 */
1915 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
1916 unsigned cLeft = pPool->cUsedPages;
1917 unsigned iPage = pPool->cCurPages;
1918 while (--iPage >= PGMPOOL_IDX_FIRST)
1919 {
1920 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1921 if (pPage->GCPhys != NIL_RTGCPHYS)
1922 {
1923 switch (pPage->enmKind)
1924 {
1925 /*
1926 * We only care about shadow page tables.
1927 */
1928 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1929 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1930 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1931 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1932 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1933 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1934 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1935 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1936 {
1937#ifdef PGMPOOL_WITH_USER_TRACKING
1938 if (pPage->cPresent)
1939#endif
1940 {
1941 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
1942 STAM_PROFILE_START(&pPool->StatZeroPage, z);
1943 ASMMemZeroPage(pvShw);
1944 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
1945#ifdef PGMPOOL_WITH_USER_TRACKING
1946 pPage->cPresent = 0;
1947 pPage->iFirstPresent = ~0;
1948#endif
1949 }
1950 }
1951 /* fall thru */
1952
1953 default:
1954 Assert(!pPage->cModifications || ++cModifiedPages);
1955 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1956 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1957 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1958 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1959 pPage->cModifications = 0;
1960 break;
1961
1962 }
1963 if (!--cLeft)
1964 break;
1965 }
1966 }
1967
1968 /* swipe the special pages too. */
1969 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
1970 {
1971 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1972 if (pPage->GCPhys != NIL_RTGCPHYS)
1973 {
1974 Assert(!pPage->cModifications || ++cModifiedPages);
1975 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1976 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1977 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1978 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1979 pPage->cModifications = 0;
1980 }
1981 }
1982
1983#ifndef DEBUG_michael
1984 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
1985#endif
1986 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1987 pPool->cModifiedPages = 0;
1988
1989#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1990 /*
1991 * Clear all the GCPhys links and rebuild the phys ext free list.
1992 */
1993 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
1994 pRam;
1995 pRam = pRam->CTX_SUFF(pNext))
1996 {
1997 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1998 while (iPage-- > 0)
1999 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2000 }
2001
2002 pPool->iPhysExtFreeHead = 0;
2003 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2004 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
2005 for (unsigned i = 0; i < cMaxPhysExts; i++)
2006 {
2007 paPhysExts[i].iNext = i + 1;
2008 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
2009 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
2010 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
2011 }
2012 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2013#endif
2014
2015
2016 pPool->cPresent = 0;
2017 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
2018}
2019
2020/**
2021 * Handle SyncCR3 pool tasks
2022 *
2023 * @returns VBox status code.
2024 * @retval VINF_SUCCESS if successfully added.
2025 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2026 * @param pVM The VM handle.
2027 * @remark Should only be used when monitoring is available, thus placed in
2028 * the PGMPOOL_WITH_MONITORING #ifdef.
2029 */
2030int pgmPoolSyncCR3(PVM pVM)
2031{
2032 /*
2033 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2034 * Occasionally we will have to clear all the shadow page tables because we wanted
2035 * to monitor a page which was mapped by too many shadowed page tables. This operation
2036 * sometimes refered to as a 'lightweight flush'.
2037 */
2038 if (!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))
2039 pgmPoolMonitorModifiedClearAll(pVM);
2040 else
2041 {
2042# ifndef IN_GC
2043 pVM->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
2044 pgmPoolClearAll(pVM);
2045# else
2046 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2047 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2048 return VINF_PGM_SYNC_CR3;
2049# endif
2050 }
2051 return VINF_SUCCESS;
2052}
2053#endif /* PGMPOOL_WITH_MONITORING */
2054
2055#ifdef PGMPOOL_WITH_USER_TRACKING
2056/**
2057 * Frees up at least one user entry.
2058 *
2059 * @returns VBox status code.
2060 * @retval VINF_SUCCESS if successfully added.
2061 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2062 * @param pPool The pool.
2063 * @param iUser The user index.
2064 */
2065static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2066{
2067 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2068#ifdef PGMPOOL_WITH_CACHE
2069 /*
2070 * Just free cached pages in a braindead fashion.
2071 */
2072 /** @todo walk the age list backwards and free the first with usage. */
2073 int rc = VINF_SUCCESS;
2074 do
2075 {
2076 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2077 if (VBOX_FAILURE(rc2) && rc == VINF_SUCCESS)
2078 rc = rc2;
2079 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2080 return rc;
2081#else
2082 /*
2083 * Lazy approach.
2084 */
2085 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
2086 Assert(!CPUMIsGuestInLongMode(pVM));
2087 pgmPoolFlushAllInt(pPool);
2088 return VERR_PGM_POOL_FLUSHED;
2089#endif
2090}
2091
2092
2093/**
2094 * Inserts a page into the cache.
2095 *
2096 * This will create user node for the page, insert it into the GCPhys
2097 * hash, and insert it into the age list.
2098 *
2099 * @returns VBox status code.
2100 * @retval VINF_SUCCESS if successfully added.
2101 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2102 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
2103 * @param pPool The pool.
2104 * @param pPage The cached page.
2105 * @param GCPhys The GC physical address of the page we're gonna shadow.
2106 * @param iUser The user index.
2107 * @param iUserTable The user table index.
2108 */
2109DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2110{
2111 int rc = VINF_SUCCESS;
2112 PPGMPOOLUSER pUser = pPool->CTX_SUFF(paUsers);
2113
2114 LogFlow(("pgmPoolTrackInsert iUser %d iUserTable %d\n", iUser, iUserTable));
2115
2116 /*
2117 * Find free a user node.
2118 */
2119 uint16_t i = pPool->iUserFreeHead;
2120 if (i == NIL_PGMPOOL_USER_INDEX)
2121 {
2122 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2123 if (VBOX_FAILURE(rc))
2124 return rc;
2125 i = pPool->iUserFreeHead;
2126 }
2127
2128 /*
2129 * Unlink the user node from the free list,
2130 * initialize and insert it into the user list.
2131 */
2132 pPool->iUserFreeHead = pUser[i].iNext;
2133 pUser[i].iNext = NIL_PGMPOOL_USER_INDEX;
2134 pUser[i].iUser = iUser;
2135 pUser[i].iUserTable = iUserTable;
2136 pPage->iUserHead = i;
2137
2138 /*
2139 * Insert into cache and enable monitoring of the guest page if enabled.
2140 *
2141 * Until we implement caching of all levels, including the CR3 one, we'll
2142 * have to make sure we don't try monitor & cache any recursive reuse of
2143 * a monitored CR3 page. Because all windows versions are doing this we'll
2144 * have to be able to do combined access monitoring, CR3 + PT and
2145 * PD + PT (guest PAE).
2146 *
2147 * Update:
2148 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2149 */
2150#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2151# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2152 const bool fCanBeMonitored = true;
2153# else
2154 bool fCanBeMonitored = pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2155 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTX_SUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2156 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2157# endif
2158# ifdef PGMPOOL_WITH_CACHE
2159 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2160# endif
2161 if (fCanBeMonitored)
2162 {
2163# ifdef PGMPOOL_WITH_MONITORING
2164 rc = pgmPoolMonitorInsert(pPool, pPage);
2165 if (rc == VERR_PGM_POOL_CLEARED)
2166 {
2167 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
2168# ifndef PGMPOOL_WITH_CACHE
2169 pgmPoolMonitorFlush(pPool, pPage);
2170 rc = VERR_PGM_POOL_FLUSHED;
2171# endif
2172 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2173 pUser[i].iNext = pPool->iUserFreeHead;
2174 pUser[i].iUser = NIL_PGMPOOL_IDX;
2175 pPool->iUserFreeHead = i;
2176 }
2177 }
2178# endif
2179#endif /* PGMPOOL_WITH_MONITORING */
2180 return rc;
2181}
2182
2183
2184# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2185/**
2186 * Adds a user reference to a page.
2187 *
2188 * This will
2189 * This will move the page to the head of the
2190 *
2191 * @returns VBox status code.
2192 * @retval VINF_SUCCESS if successfully added.
2193 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2194 * @param pPool The pool.
2195 * @param pPage The cached page.
2196 * @param iUser The user index.
2197 * @param iUserTable The user table.
2198 */
2199static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2200{
2201 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2202
2203 LogFlow(("pgmPoolTrackAddUser iUser %d iUserTable %d\n", iUser, iUserTable));
2204# ifdef VBOX_STRICT
2205 /*
2206 * Check that the entry doesn't already exists.
2207 */
2208 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2209 {
2210 uint16_t i = pPage->iUserHead;
2211 do
2212 {
2213 Assert(i < pPool->cMaxUsers);
2214 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2215 i = paUsers[i].iNext;
2216 } while (i != NIL_PGMPOOL_USER_INDEX);
2217 }
2218# endif
2219
2220 /*
2221 * Allocate a user node.
2222 */
2223 uint16_t i = pPool->iUserFreeHead;
2224 if (i == NIL_PGMPOOL_USER_INDEX)
2225 {
2226 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2227 if (VBOX_FAILURE(rc))
2228 return rc;
2229 i = pPool->iUserFreeHead;
2230 }
2231 pPool->iUserFreeHead = paUsers[i].iNext;
2232
2233 /*
2234 * Initialize the user node and insert it.
2235 */
2236 paUsers[i].iNext = pPage->iUserHead;
2237 paUsers[i].iUser = iUser;
2238 paUsers[i].iUserTable = iUserTable;
2239 pPage->iUserHead = i;
2240
2241# ifdef PGMPOOL_WITH_CACHE
2242 /*
2243 * Tell the cache to update its replacement stats for this page.
2244 */
2245 pgmPoolCacheUsed(pPool, pPage);
2246# endif
2247 return VINF_SUCCESS;
2248}
2249# endif /* PGMPOOL_WITH_CACHE */
2250
2251
2252/**
2253 * Frees a user record associated with a page.
2254 *
2255 * This does not clear the entry in the user table, it simply replaces the
2256 * user record to the chain of free records.
2257 *
2258 * @param pPool The pool.
2259 * @param HCPhys The HC physical address of the shadow page.
2260 * @param iUser The shadow page pool index of the user table.
2261 * @param iUserTable The index into the user table (shadowed).
2262 */
2263static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2264{
2265 /*
2266 * Unlink and free the specified user entry.
2267 */
2268 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2269
2270 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2271 uint16_t i = pPage->iUserHead;
2272 if ( i != NIL_PGMPOOL_USER_INDEX
2273 && paUsers[i].iUser == iUser
2274 && paUsers[i].iUserTable == iUserTable)
2275 {
2276 pPage->iUserHead = paUsers[i].iNext;
2277
2278 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2279 paUsers[i].iNext = pPool->iUserFreeHead;
2280 pPool->iUserFreeHead = i;
2281 return;
2282 }
2283
2284 /* General: Linear search. */
2285 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2286 while (i != NIL_PGMPOOL_USER_INDEX)
2287 {
2288 if ( paUsers[i].iUser == iUser
2289 && paUsers[i].iUserTable == iUserTable)
2290 {
2291 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2292 paUsers[iPrev].iNext = paUsers[i].iNext;
2293 else
2294 pPage->iUserHead = paUsers[i].iNext;
2295
2296 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2297 paUsers[i].iNext = pPool->iUserFreeHead;
2298 pPool->iUserFreeHead = i;
2299 return;
2300 }
2301 iPrev = i;
2302 i = paUsers[i].iNext;
2303 }
2304
2305 /* Fatal: didn't find it */
2306 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%VGp\n",
2307 iUser, iUserTable, pPage->GCPhys));
2308}
2309
2310
2311/**
2312 * Gets the entry size of a shadow table.
2313 *
2314 * @param enmKind The kind of page.
2315 *
2316 * @returns The size of the entry in bytes. That is, 4 or 8.
2317 * @returns If the kind is not for a table, an assertion is raised and 0 is
2318 * returned.
2319 */
2320DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2321{
2322 switch (enmKind)
2323 {
2324 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2325 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2326 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2327 case PGMPOOLKIND_ROOT_32BIT_PD:
2328 return 4;
2329
2330 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2331 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2332 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2333 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2334 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2335 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2336 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2337 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2338 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2339 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2340 case PGMPOOLKIND_ROOT_PAE_PD:
2341 case PGMPOOLKIND_ROOT_PDPT:
2342 case PGMPOOLKIND_ROOT_NESTED:
2343 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2344 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2345 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2346 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2347 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2348 return 8;
2349
2350 default:
2351 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2352 }
2353}
2354
2355
2356/**
2357 * Gets the entry size of a guest table.
2358 *
2359 * @param enmKind The kind of page.
2360 *
2361 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2362 * @returns If the kind is not for a table, an assertion is raised and 0 is
2363 * returned.
2364 */
2365DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2366{
2367 switch (enmKind)
2368 {
2369 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2370 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2371 case PGMPOOLKIND_ROOT_32BIT_PD:
2372 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2373 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2374 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2375 return 4;
2376
2377 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2378 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2379 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2380 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2381 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2382 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2383 case PGMPOOLKIND_ROOT_PAE_PD:
2384 case PGMPOOLKIND_ROOT_PDPT:
2385 return 8;
2386
2387 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2388 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2389 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2390 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2391 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2392 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2393 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2394 case PGMPOOLKIND_ROOT_NESTED:
2395 /** @todo can we return 0? (nobody is calling this...) */
2396 AssertFailed();
2397 return 0;
2398
2399 default:
2400 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2401 }
2402}
2403
2404
2405#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2406/**
2407 * Scans one shadow page table for mappings of a physical page.
2408 *
2409 * @param pVM The VM handle.
2410 * @param pPhysPage The guest page in question.
2411 * @param iShw The shadow page table.
2412 * @param cRefs The number of references made in that PT.
2413 */
2414static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2415{
2416 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2417 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2418
2419 /*
2420 * Assert sanity.
2421 */
2422 Assert(cRefs == 1);
2423 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2424 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2425
2426 /*
2427 * Then, clear the actual mappings to the page in the shadow PT.
2428 */
2429 switch (pPage->enmKind)
2430 {
2431 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2432 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2433 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2434 {
2435 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2436 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2437 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2438 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2439 {
2440 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2441 pPT->a[i].u = 0;
2442 cRefs--;
2443 if (!cRefs)
2444 return;
2445 }
2446#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2447 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2448 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2449 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2450 {
2451 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2452 pPT->a[i].u = 0;
2453 }
2454#endif
2455 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2456 break;
2457 }
2458
2459 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2460 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2461 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2462 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2463 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2464 {
2465 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2466 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2467 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2468 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2469 {
2470 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2471 pPT->a[i].u = 0;
2472 cRefs--;
2473 if (!cRefs)
2474 return;
2475 }
2476#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2477 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2478 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2479 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2480 {
2481 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2482 pPT->a[i].u = 0;
2483 }
2484#endif
2485 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2486 break;
2487 }
2488
2489 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2490 {
2491 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2492 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2493 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2494 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2495 {
2496 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2497 pPT->a[i].u = 0;
2498 cRefs--;
2499 if (!cRefs)
2500 return;
2501 }
2502#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2503 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2504 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2505 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
2506 {
2507 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2508 pPT->a[i].u = 0;
2509 }
2510#endif
2511 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2512 break;
2513 }
2514
2515 default:
2516 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2517 }
2518}
2519
2520
2521/**
2522 * Scans one shadow page table for mappings of a physical page.
2523 *
2524 * @param pVM The VM handle.
2525 * @param pPhysPage The guest page in question.
2526 * @param iShw The shadow page table.
2527 * @param cRefs The number of references made in that PT.
2528 */
2529void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2530{
2531 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
2532 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2533 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2534 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2535 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2536 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2537}
2538
2539
2540/**
2541 * Flushes a list of shadow page tables mapping the same physical page.
2542 *
2543 * @param pVM The VM handle.
2544 * @param pPhysPage The guest page in question.
2545 * @param iPhysExt The physical cross reference extent list to flush.
2546 */
2547void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2548{
2549 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2550 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2551 LogFlow(("pgmPoolTrackFlushGCPhysPTs: HCPhys=%RHp iPhysExt\n", pPhysPage->HCPhys, iPhysExt));
2552
2553 const uint16_t iPhysExtStart = iPhysExt;
2554 PPGMPOOLPHYSEXT pPhysExt;
2555 do
2556 {
2557 Assert(iPhysExt < pPool->cMaxPhysExts);
2558 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2559 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2560 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2561 {
2562 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2563 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2564 }
2565
2566 /* next */
2567 iPhysExt = pPhysExt->iNext;
2568 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2569
2570 /* insert the list into the free list and clear the ram range entry. */
2571 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2572 pPool->iPhysExtFreeHead = iPhysExtStart;
2573 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2574
2575 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2576}
2577#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2578
2579
2580/**
2581 * Scans all shadow page tables for mappings of a physical page.
2582 *
2583 * This may be slow, but it's most likely more efficient than cleaning
2584 * out the entire page pool / cache.
2585 *
2586 * @returns VBox status code.
2587 * @retval VINF_SUCCESS if all references has been successfully cleared.
2588 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2589 * a page pool cleaning.
2590 *
2591 * @param pVM The VM handle.
2592 * @param pPhysPage The guest page in question.
2593 */
2594int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
2595{
2596 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2597 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2598 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d HCPhys=%RHp\n",
2599 pPool->cUsedPages, pPool->cPresent, pPhysPage->HCPhys));
2600
2601#if 1
2602 /*
2603 * There is a limit to what makes sense.
2604 */
2605 if (pPool->cPresent > 1024)
2606 {
2607 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2608 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2609 return VINF_PGM_GCPHYS_ALIASED;
2610 }
2611#endif
2612
2613 /*
2614 * Iterate all the pages until we've encountered all that in use.
2615 * This is simple but not quite optimal solution.
2616 */
2617 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2618 const uint32_t u32 = u64;
2619 unsigned cLeft = pPool->cUsedPages;
2620 unsigned iPage = pPool->cCurPages;
2621 while (--iPage >= PGMPOOL_IDX_FIRST)
2622 {
2623 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2624 if (pPage->GCPhys != NIL_RTGCPHYS)
2625 {
2626 switch (pPage->enmKind)
2627 {
2628 /*
2629 * We only care about shadow page tables.
2630 */
2631 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2632 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2633 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2634 {
2635 unsigned cPresent = pPage->cPresent;
2636 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2637 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2638 if (pPT->a[i].n.u1Present)
2639 {
2640 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2641 {
2642 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2643 pPT->a[i].u = 0;
2644 }
2645 if (!--cPresent)
2646 break;
2647 }
2648 break;
2649 }
2650
2651 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2652 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2653 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2654 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2655 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2656 {
2657 unsigned cPresent = pPage->cPresent;
2658 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2659 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2660 if (pPT->a[i].n.u1Present)
2661 {
2662 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2663 {
2664 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2665 pPT->a[i].u = 0;
2666 }
2667 if (!--cPresent)
2668 break;
2669 }
2670 break;
2671 }
2672 }
2673 if (!--cLeft)
2674 break;
2675 }
2676 }
2677
2678 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2679 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2680 return VINF_SUCCESS;
2681}
2682
2683
2684/**
2685 * Clears the user entry in a user table.
2686 *
2687 * This is used to remove all references to a page when flushing it.
2688 */
2689static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2690{
2691 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2692 Assert(pUser->iUser < pPool->cCurPages);
2693
2694 /*
2695 * Map the user page.
2696 */
2697 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2698 union
2699 {
2700 uint64_t *pau64;
2701 uint32_t *pau32;
2702 } u;
2703 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
2704
2705 /* Safety precaution in case we change the paging for other modes too in the future. */
2706 Assert(PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) != pPage->Core.Key);
2707
2708#ifdef VBOX_STRICT
2709 /*
2710 * Some sanity checks.
2711 */
2712 switch (pUserPage->enmKind)
2713 {
2714 case PGMPOOLKIND_ROOT_32BIT_PD:
2715 Assert(pUser->iUserTable < X86_PG_ENTRIES);
2716 Assert(!(u.pau32[pUser->iUserTable] & PGM_PDFLAGS_MAPPING));
2717 break;
2718 case PGMPOOLKIND_ROOT_PAE_PD:
2719 Assert(pUser->iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
2720 Assert(!(u.pau64[pUser->iUserTable] & PGM_PDFLAGS_MAPPING));
2721 break;
2722 case PGMPOOLKIND_ROOT_PDPT:
2723 Assert(pUser->iUserTable < 4);
2724 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2725 break;
2726 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2727 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2728 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2729 break;
2730 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2731 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2732 Assert(!(u.pau64[pUser->iUserTable] & PGM_PDFLAGS_MAPPING));
2733 break;
2734 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2735 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2736 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2737 break;
2738 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2739 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2740 /* GCPhys >> PAGE_SHIFT is the index here */
2741 break;
2742 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2743 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2744 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2745 break;
2746
2747 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2748 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2749 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2750 break;
2751
2752 case PGMPOOLKIND_ROOT_NESTED:
2753 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2754 break;
2755
2756 default:
2757 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
2758 break;
2759 }
2760#endif /* VBOX_STRICT */
2761
2762 /*
2763 * Clear the entry in the user page.
2764 */
2765 switch (pUserPage->enmKind)
2766 {
2767 /* 32-bit entries */
2768 case PGMPOOLKIND_ROOT_32BIT_PD:
2769 u.pau32[pUser->iUserTable] = 0;
2770 break;
2771
2772 /* 64-bit entries */
2773 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2774 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2775 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2776 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2777 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2778 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2779 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2780 case PGMPOOLKIND_ROOT_PAE_PD:
2781 case PGMPOOLKIND_ROOT_PDPT:
2782 case PGMPOOLKIND_ROOT_NESTED:
2783 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2784 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2785 u.pau64[pUser->iUserTable] = 0;
2786 break;
2787
2788 default:
2789 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
2790 }
2791}
2792
2793
2794/**
2795 * Clears all users of a page.
2796 */
2797static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2798{
2799 /*
2800 * Free all the user records.
2801 */
2802 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2803 uint16_t i = pPage->iUserHead;
2804 while (i != NIL_PGMPOOL_USER_INDEX)
2805 {
2806 /* Clear enter in user table. */
2807 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
2808
2809 /* Free it. */
2810 const uint16_t iNext = paUsers[i].iNext;
2811 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2812 paUsers[i].iNext = pPool->iUserFreeHead;
2813 pPool->iUserFreeHead = i;
2814
2815 /* Next. */
2816 i = iNext;
2817 }
2818 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2819}
2820
2821
2822#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2823/**
2824 * Allocates a new physical cross reference extent.
2825 *
2826 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
2827 * @param pVM The VM handle.
2828 * @param piPhysExt Where to store the phys ext index.
2829 */
2830PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
2831{
2832 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2833 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
2834 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
2835 {
2836 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
2837 return NULL;
2838 }
2839 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2840 pPool->iPhysExtFreeHead = pPhysExt->iNext;
2841 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2842 *piPhysExt = iPhysExt;
2843 return pPhysExt;
2844}
2845
2846
2847/**
2848 * Frees a physical cross reference extent.
2849 *
2850 * @param pVM The VM handle.
2851 * @param iPhysExt The extent to free.
2852 */
2853void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
2854{
2855 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2856 Assert(iPhysExt < pPool->cMaxPhysExts);
2857 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2858 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2859 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2860 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2861 pPool->iPhysExtFreeHead = iPhysExt;
2862}
2863
2864
2865/**
2866 * Frees a physical cross reference extent.
2867 *
2868 * @param pVM The VM handle.
2869 * @param iPhysExt The extent to free.
2870 */
2871void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
2872{
2873 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2874
2875 const uint16_t iPhysExtStart = iPhysExt;
2876 PPGMPOOLPHYSEXT pPhysExt;
2877 do
2878 {
2879 Assert(iPhysExt < pPool->cMaxPhysExts);
2880 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
2881 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2882 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2883
2884 /* next */
2885 iPhysExt = pPhysExt->iNext;
2886 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2887
2888 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2889 pPool->iPhysExtFreeHead = iPhysExtStart;
2890}
2891
2892/**
2893 * Insert a reference into a list of physical cross reference extents.
2894 *
2895 * @returns The new ram range flags (top 16-bits).
2896 *
2897 * @param pVM The VM handle.
2898 * @param iPhysExt The physical extent index of the list head.
2899 * @param iShwPT The shadow page table index.
2900 *
2901 */
2902static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
2903{
2904 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2905 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
2906
2907 /* special common case. */
2908 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
2909 {
2910 paPhysExts[iPhysExt].aidx[2] = iShwPT;
2911 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2912 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
2913 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2914 }
2915
2916 /* general treatment. */
2917 const uint16_t iPhysExtStart = iPhysExt;
2918 unsigned cMax = 15;
2919 for (;;)
2920 {
2921 Assert(iPhysExt < pPool->cMaxPhysExts);
2922 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2923 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
2924 {
2925 paPhysExts[iPhysExt].aidx[i] = iShwPT;
2926 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2927 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
2928 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2929 }
2930 if (!--cMax)
2931 {
2932 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2933 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2934 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
2935 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2936 }
2937 }
2938
2939 /* add another extent to the list. */
2940 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2941 if (!pNew)
2942 {
2943 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2944 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2945 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2946 }
2947 pNew->iNext = iPhysExtStart;
2948 pNew->aidx[0] = iShwPT;
2949 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
2950 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2951}
2952
2953
2954/**
2955 * Add a reference to guest physical page where extents are in use.
2956 *
2957 * @returns The new ram range flags (top 16-bits).
2958 *
2959 * @param pVM The VM handle.
2960 * @param u16 The ram range flags (top 16-bits).
2961 * @param iShwPT The shadow page table index.
2962 */
2963uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
2964{
2965 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
2966 {
2967 /*
2968 * Convert to extent list.
2969 */
2970 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
2971 uint16_t iPhysExt;
2972 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2973 if (pPhysExt)
2974 {
2975 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
2976 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
2977 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
2978 pPhysExt->aidx[1] = iShwPT;
2979 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2980 }
2981 else
2982 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2983 }
2984 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
2985 {
2986 /*
2987 * Insert into the extent list.
2988 */
2989 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
2990 }
2991 else
2992 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
2993 return u16;
2994}
2995
2996
2997/**
2998 * Clear references to guest physical memory.
2999 *
3000 * @param pPool The pool.
3001 * @param pPage The page.
3002 * @param pPhysPage Pointer to the aPages entry in the ram range.
3003 */
3004void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3005{
3006 const unsigned cRefs = pPhysPage->HCPhys >> MM_RAM_FLAGS_CREFS_SHIFT; /** @todo PAGE FLAGS */
3007 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3008
3009 uint16_t iPhysExt = (pPhysPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
3010 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
3011 {
3012 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3013 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3014 do
3015 {
3016 Assert(iPhysExt < pPool->cMaxPhysExts);
3017
3018 /*
3019 * Look for the shadow page and check if it's all freed.
3020 */
3021 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3022 {
3023 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3024 {
3025 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3026
3027 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3028 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3029 {
3030 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3031 return;
3032 }
3033
3034 /* we can free the node. */
3035 PVM pVM = pPool->CTX_SUFF(pVM);
3036 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3037 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3038 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3039 {
3040 /* lonely node */
3041 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3042 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d lonely\n", pPhysPage->HCPhys, pPage->idx));
3043 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3044 }
3045 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3046 {
3047 /* head */
3048 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d head\n", pPhysPage->HCPhys, pPage->idx));
3049 pPhysPage->HCPhys = (pPhysPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) /** @todo PAGE FLAGS */
3050 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
3051 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
3052 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3053 }
3054 else
3055 {
3056 /* in list */
3057 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
3058 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3059 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3060 }
3061 iPhysExt = iPhysExtNext;
3062 return;
3063 }
3064 }
3065
3066 /* next */
3067 iPhysExtPrev = iPhysExt;
3068 iPhysExt = paPhysExts[iPhysExt].iNext;
3069 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3070
3071 AssertFatalMsgFailed(("not-found! cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
3072 }
3073 else /* nothing to do */
3074 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64\n", pPhysPage->HCPhys));
3075}
3076
3077
3078
3079/**
3080 * Clear references to guest physical memory.
3081 *
3082 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3083 * is assumed to be correct, so the linear search can be skipped and we can assert
3084 * at an earlier point.
3085 *
3086 * @param pPool The pool.
3087 * @param pPage The page.
3088 * @param HCPhys The host physical address corresponding to the guest page.
3089 * @param GCPhys The guest physical address corresponding to HCPhys.
3090 */
3091static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3092{
3093 /*
3094 * Walk range list.
3095 */
3096 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3097 while (pRam)
3098 {
3099 RTGCPHYS off = GCPhys - pRam->GCPhys;
3100 if (off < pRam->cb)
3101 {
3102 /* does it match? */
3103 const unsigned iPage = off >> PAGE_SHIFT;
3104 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3105RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3106Log(("pgmPoolTracDerefGCPhys %VHp vs %VHp\n", HCPhysPage, HCPhys));
3107 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3108 {
3109 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3110 return;
3111 }
3112 break;
3113 }
3114 pRam = pRam->CTX_SUFF(pNext);
3115 }
3116 AssertFatalMsgFailed(("HCPhys=%VHp GCPhys=%VGp\n", HCPhys, GCPhys));
3117}
3118
3119
3120/**
3121 * Clear references to guest physical memory.
3122 *
3123 * @param pPool The pool.
3124 * @param pPage The page.
3125 * @param HCPhys The host physical address corresponding to the guest page.
3126 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3127 */
3128static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3129{
3130 /*
3131 * Walk range list.
3132 */
3133 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3134 while (pRam)
3135 {
3136 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3137 if (off < pRam->cb)
3138 {
3139 /* does it match? */
3140 const unsigned iPage = off >> PAGE_SHIFT;
3141 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3142 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3143 {
3144 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3145 return;
3146 }
3147 break;
3148 }
3149 pRam = pRam->CTX_SUFF(pNext);
3150 }
3151
3152 /*
3153 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3154 */
3155 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3156 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3157 while (pRam)
3158 {
3159 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3160 while (iPage-- > 0)
3161 {
3162 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3163 {
3164 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%VHp GCPhysHint=%VGp GCPhysReal=%VGp\n",
3165 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3166 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3167 return;
3168 }
3169 }
3170 pRam = pRam->CTX_SUFF(pNext);
3171 }
3172
3173 AssertFatalMsgFailed(("HCPhys=%VHp GCPhysHint=%VGp\n", HCPhys, GCPhysHint));
3174}
3175
3176
3177/**
3178 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3179 *
3180 * @param pPool The pool.
3181 * @param pPage The page.
3182 * @param pShwPT The shadow page table (mapping of the page).
3183 * @param pGstPT The guest page table.
3184 */
3185DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3186{
3187 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3188 if (pShwPT->a[i].n.u1Present)
3189 {
3190 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3191 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3192 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3193 if (!--pPage->cPresent)
3194 break;
3195 }
3196}
3197
3198
3199/**
3200 * Clear references to guest physical memory in a PAE / 32-bit page table.
3201 *
3202 * @param pPool The pool.
3203 * @param pPage The page.
3204 * @param pShwPT The shadow page table (mapping of the page).
3205 * @param pGstPT The guest page table (just a half one).
3206 */
3207DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3208{
3209 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3210 if (pShwPT->a[i].n.u1Present)
3211 {
3212 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
3213 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3214 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3215 }
3216}
3217
3218
3219/**
3220 * Clear references to guest physical memory in a PAE / PAE page table.
3221 *
3222 * @param pPool The pool.
3223 * @param pPage The page.
3224 * @param pShwPT The shadow page table (mapping of the page).
3225 * @param pGstPT The guest page table.
3226 */
3227DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3228{
3229 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3230 if (pShwPT->a[i].n.u1Present)
3231 {
3232 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3233 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3234 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3235 }
3236}
3237
3238
3239/**
3240 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3241 *
3242 * @param pPool The pool.
3243 * @param pPage The page.
3244 * @param pShwPT The shadow page table (mapping of the page).
3245 */
3246DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3247{
3248 RTGCPHYS GCPhys = pPage->GCPhys;
3249 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3250 if (pShwPT->a[i].n.u1Present)
3251 {
3252 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3253 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3254 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3255 }
3256}
3257
3258
3259/**
3260 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3261 *
3262 * @param pPool The pool.
3263 * @param pPage The page.
3264 * @param pShwPT The shadow page table (mapping of the page).
3265 */
3266DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3267{
3268 RTGCPHYS GCPhys = pPage->GCPhys;
3269 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3270 if (pShwPT->a[i].n.u1Present)
3271 {
3272 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%VGp\n",
3273 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3274 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3275 }
3276}
3277#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3278
3279
3280/**
3281 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
3282 *
3283 * @param pPool The pool.
3284 * @param pPage The page.
3285 * @param pShwPD The shadow page directory (mapping of the page).
3286 */
3287DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
3288{
3289 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3290 {
3291 if (pShwPD->a[i].n.u1Present)
3292 {
3293 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
3294 if (pSubPage)
3295 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3296 else
3297 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
3298 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3299 }
3300 }
3301}
3302
3303
3304/**
3305 * Clear references to shadowed pages in a 64-bit page directory pointer table.
3306 *
3307 * @param pPool The pool.
3308 * @param pPage The page.
3309 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3310 */
3311DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3312{
3313 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3314 {
3315 if (pShwPDPT->a[i].n.u1Present)
3316 {
3317 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3318 if (pSubPage)
3319 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3320 else
3321 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3322 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3323 }
3324 }
3325}
3326
3327/**
3328 * Clear references to shadowed pages in a 64-bit level 4 page table.
3329 *
3330 * @param pPool The pool.
3331 * @param pPage The page.
3332 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3333 */
3334DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
3335{
3336 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
3337 {
3338 if (pShwPML4->a[i].n.u1Present)
3339 {
3340 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
3341 if (pSubPage)
3342 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3343 else
3344 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
3345 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3346 }
3347 }
3348}
3349
3350/**
3351 * Clear references to shadowed pages in an EPT page table.
3352 *
3353 * @param pPool The pool.
3354 * @param pPage The page.
3355 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3356 */
3357DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
3358{
3359 RTGCPHYS GCPhys = pPage->GCPhys;
3360 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3361 if (pShwPT->a[i].n.u1Present)
3362 {
3363 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
3364 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
3365 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
3366 }
3367}
3368
3369/**
3370 * Clear references to shadowed pages in an EPT page directory.
3371 *
3372 * @param pPool The pool.
3373 * @param pPage The page.
3374 * @param pShwPD The shadow page directory (mapping of the page).
3375 */
3376DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
3377{
3378 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3379 {
3380 if (pShwPD->a[i].n.u1Present)
3381 {
3382 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
3383 if (pSubPage)
3384 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3385 else
3386 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
3387 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3388 }
3389 }
3390}
3391
3392/**
3393 * Clear references to shadowed pages in an EPT page directory pointer table.
3394 *
3395 * @param pPool The pool.
3396 * @param pPage The page.
3397 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3398 */
3399DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
3400{
3401 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3402 {
3403 if (pShwPDPT->a[i].n.u1Present)
3404 {
3405 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
3406 if (pSubPage)
3407 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3408 else
3409 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
3410 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3411 }
3412 }
3413}
3414
3415/**
3416 * Clears all references made by this page.
3417 *
3418 * This includes other shadow pages and GC physical addresses.
3419 *
3420 * @param pPool The pool.
3421 * @param pPage The page.
3422 */
3423static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3424{
3425 /*
3426 * Map the shadow page and take action according to the page kind.
3427 */
3428 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3429 switch (pPage->enmKind)
3430 {
3431#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3432 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3433 {
3434 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3435 void *pvGst;
3436 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3437 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
3438 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3439 break;
3440 }
3441
3442 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3443 {
3444 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3445 void *pvGst;
3446 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3447 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
3448 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3449 break;
3450 }
3451
3452 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3453 {
3454 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3455 void *pvGst;
3456 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3457 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
3458 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3459 break;
3460 }
3461
3462 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
3463 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3464 {
3465 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3466 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
3467 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3468 break;
3469 }
3470
3471 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
3472 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3473 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3474 {
3475 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3476 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
3477 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3478 break;
3479 }
3480
3481#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3482 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3483 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3484 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3485 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3486 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3487 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3488 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3489 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3490 break;
3491#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3492
3493 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
3494 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3495 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3496 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3497 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
3498 break;
3499
3500 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3501 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3502 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
3503 break;
3504
3505 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
3506 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
3507 break;
3508
3509 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3510 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
3511 break;
3512
3513 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3514 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
3515 break;
3516
3517 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3518 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
3519 break;
3520
3521 default:
3522 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
3523 }
3524
3525 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
3526 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3527 ASMMemZeroPage(pvShw);
3528 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3529 pPage->fZeroed = true;
3530}
3531#endif /* PGMPOOL_WITH_USER_TRACKING */
3532
3533
3534/**
3535 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
3536 *
3537 * @param pPool The pool.
3538 */
3539static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
3540{
3541 /*
3542 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
3543 */
3544 Assert(NIL_PGMPOOL_IDX == 0);
3545 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
3546 {
3547 /*
3548 * Get the page address.
3549 */
3550 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3551 union
3552 {
3553 uint64_t *pau64;
3554 uint32_t *pau32;
3555 } u;
3556 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
3557
3558 /*
3559 * Mark stuff not present.
3560 */
3561 switch (pPage->enmKind)
3562 {
3563 case PGMPOOLKIND_ROOT_32BIT_PD:
3564 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
3565 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3566 u.pau32[iPage] = 0;
3567 break;
3568
3569 case PGMPOOLKIND_ROOT_PAE_PD:
3570 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES * X86_PG_PAE_PDPE_ENTRIES; iPage++)
3571 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3572 u.pau64[iPage] = 0;
3573 break;
3574
3575 case PGMPOOLKIND_ROOT_PDPT:
3576 /* Not root of shadowed pages currently, ignore it. */
3577 break;
3578
3579 case PGMPOOLKIND_ROOT_NESTED:
3580 ASMMemZero32(u.pau64, PAGE_SIZE);
3581 break;
3582 }
3583 }
3584
3585 /*
3586 * Paranoia (to be removed), flag a global CR3 sync.
3587 */
3588 VM_FF_SET(pPool->CTX_SUFF(pVM), VM_FF_PGM_SYNC_CR3);
3589}
3590
3591
3592/**
3593 * Flushes the entire cache.
3594 *
3595 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3596 * and execute this CR3 flush.
3597 *
3598 * @param pPool The pool.
3599 */
3600static void pgmPoolFlushAllInt(PPGMPOOL pPool)
3601{
3602 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
3603 LogFlow(("pgmPoolFlushAllInt:\n"));
3604
3605 /*
3606 * If there are no pages in the pool, there is nothing to do.
3607 */
3608 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
3609 {
3610 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3611 return;
3612 }
3613
3614 /*
3615 * Nuke the free list and reinsert all pages into it.
3616 */
3617 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
3618 {
3619 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3620
3621#ifdef IN_RING3
3622 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMR3, pPage->pvPageR3));
3623#endif
3624#ifdef PGMPOOL_WITH_MONITORING
3625 if (pPage->fMonitored)
3626 pgmPoolMonitorFlush(pPool, pPage);
3627 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3628 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3629 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3630 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3631 pPage->cModifications = 0;
3632#endif
3633 pPage->GCPhys = NIL_RTGCPHYS;
3634 pPage->enmKind = PGMPOOLKIND_FREE;
3635 Assert(pPage->idx == i);
3636 pPage->iNext = i + 1;
3637 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
3638 pPage->fSeenNonGlobal = false;
3639 pPage->fMonitored= false;
3640 pPage->fCached = false;
3641 pPage->fReusedFlushPending = false;
3642 pPage->fCR3Mix = false;
3643#ifdef PGMPOOL_WITH_USER_TRACKING
3644 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3645#endif
3646#ifdef PGMPOOL_WITH_CACHE
3647 pPage->iAgeNext = NIL_PGMPOOL_IDX;
3648 pPage->iAgePrev = NIL_PGMPOOL_IDX;
3649#endif
3650 }
3651 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
3652 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
3653 pPool->cUsedPages = 0;
3654
3655#ifdef PGMPOOL_WITH_USER_TRACKING
3656 /*
3657 * Zap and reinitialize the user records.
3658 */
3659 pPool->cPresent = 0;
3660 pPool->iUserFreeHead = 0;
3661 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3662 const unsigned cMaxUsers = pPool->cMaxUsers;
3663 for (unsigned i = 0; i < cMaxUsers; i++)
3664 {
3665 paUsers[i].iNext = i + 1;
3666 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3667 paUsers[i].iUserTable = 0xfffffffe;
3668 }
3669 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
3670#endif
3671
3672#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3673 /*
3674 * Clear all the GCPhys links and rebuild the phys ext free list.
3675 */
3676 for (PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3677 pRam;
3678 pRam = pRam->CTX_SUFF(pNext))
3679 {
3680 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3681 while (iPage-- > 0)
3682 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3683 }
3684
3685 pPool->iPhysExtFreeHead = 0;
3686 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3687 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
3688 for (unsigned i = 0; i < cMaxPhysExts; i++)
3689 {
3690 paPhysExts[i].iNext = i + 1;
3691 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
3692 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
3693 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
3694 }
3695 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3696#endif
3697
3698#ifdef PGMPOOL_WITH_MONITORING
3699 /*
3700 * Just zap the modified list.
3701 */
3702 pPool->cModifiedPages = 0;
3703 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
3704#endif
3705
3706#ifdef PGMPOOL_WITH_CACHE
3707 /*
3708 * Clear the GCPhys hash and the age list.
3709 */
3710 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
3711 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
3712 pPool->iAgeHead = NIL_PGMPOOL_IDX;
3713 pPool->iAgeTail = NIL_PGMPOOL_IDX;
3714#endif
3715
3716 /*
3717 * Flush all the special root pages.
3718 * Reinsert active pages into the hash and ensure monitoring chains are correct.
3719 */
3720 pgmPoolFlushAllSpecialRoots(pPool);
3721 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
3722 {
3723 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3724 pPage->iNext = NIL_PGMPOOL_IDX;
3725#ifdef PGMPOOL_WITH_MONITORING
3726 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3727 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3728 pPage->cModifications = 0;
3729 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
3730 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3731 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3732 if (pPage->fMonitored)
3733 {
3734 PVM pVM = pPool->CTX_SUFF(pVM);
3735 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
3736 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
3737 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
3738 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
3739 pPool->pszAccessHandler);
3740 AssertFatalRCSuccess(rc);
3741# ifdef PGMPOOL_WITH_CACHE
3742 pgmPoolHashInsert(pPool, pPage);
3743# endif
3744 }
3745#endif
3746#ifdef PGMPOOL_WITH_USER_TRACKING
3747 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
3748#endif
3749#ifdef PGMPOOL_WITH_CACHE
3750 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
3751 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
3752#endif
3753 }
3754
3755 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3756}
3757
3758
3759/**
3760 * Flushes a pool page.
3761 *
3762 * This moves the page to the free list after removing all user references to it.
3763 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
3764 *
3765 * @returns VBox status code.
3766 * @retval VINF_SUCCESS on success.
3767 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
3768 * @param pPool The pool.
3769 * @param HCPhys The HC physical address of the shadow page.
3770 */
3771int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3772{
3773 int rc = VINF_SUCCESS;
3774 STAM_PROFILE_START(&pPool->StatFlushPage, f);
3775 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%VHp, .idx=%d, .enmKind=%d, .GCPhys=%VGp}\n",
3776 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
3777
3778 /*
3779 * Quietly reject any attempts at flushing any of the special root pages.
3780 */
3781 if (pPage->idx < PGMPOOL_IDX_FIRST)
3782 {
3783 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3784 return VINF_SUCCESS;
3785 }
3786
3787 /*
3788 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
3789 */
3790 if (PGMGetHyperCR3(pPool->CTX_SUFF(pVM)) == pPage->Core.Key)
3791 {
3792 AssertMsg(pPage->enmKind == PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4, ("Can't free the shadow CR3! (%VGp vs %VGp kind=%d\n", PGMGetHyperCR3(pPool->CTX_SUFF(pVM)), pPage->Core.Key, pPage->enmKind));
3793 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3794 return VINF_SUCCESS;
3795 }
3796
3797 /*
3798 * Mark the page as being in need of a ASMMemZeroPage().
3799 */
3800 pPage->fZeroed = false;
3801
3802#ifdef PGMPOOL_WITH_USER_TRACKING
3803 /*
3804 * Clear the page.
3805 */
3806 pgmPoolTrackClearPageUsers(pPool, pPage);
3807 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
3808 pgmPoolTrackDeref(pPool, pPage);
3809 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
3810#endif
3811
3812#ifdef PGMPOOL_WITH_CACHE
3813 /*
3814 * Flush it from the cache.
3815 */
3816 pgmPoolCacheFlushPage(pPool, pPage);
3817#endif /* PGMPOOL_WITH_CACHE */
3818
3819#ifdef PGMPOOL_WITH_MONITORING
3820 /*
3821 * Deregistering the monitoring.
3822 */
3823 if (pPage->fMonitored)
3824 rc = pgmPoolMonitorFlush(pPool, pPage);
3825#endif
3826
3827 /*
3828 * Free the page.
3829 */
3830 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
3831 pPage->iNext = pPool->iFreeHead;
3832 pPool->iFreeHead = pPage->idx;
3833 pPage->enmKind = PGMPOOLKIND_FREE;
3834 pPage->GCPhys = NIL_RTGCPHYS;
3835 pPage->fReusedFlushPending = false;
3836
3837 pPool->cUsedPages--;
3838 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
3839 return rc;
3840}
3841
3842
3843/**
3844 * Frees a usage of a pool page.
3845 *
3846 * The caller is responsible to updating the user table so that it no longer
3847 * references the shadow page.
3848 *
3849 * @param pPool The pool.
3850 * @param HCPhys The HC physical address of the shadow page.
3851 * @param iUser The shadow page pool index of the user table.
3852 * @param iUserTable The index into the user table (shadowed).
3853 */
3854void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3855{
3856 STAM_PROFILE_START(&pPool->StatFree, a);
3857 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%VHp, .idx=%d, enmKind=%d} iUser=%#x iUserTable=%#x\n",
3858 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, iUser, iUserTable));
3859 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
3860#ifdef PGMPOOL_WITH_USER_TRACKING
3861 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
3862#endif
3863#ifdef PGMPOOL_WITH_CACHE
3864 if (!pPage->fCached)
3865#endif
3866 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
3867 STAM_PROFILE_STOP(&pPool->StatFree, a);
3868}
3869
3870
3871/**
3872 * Makes one or more free page free.
3873 *
3874 * @returns VBox status code.
3875 * @retval VINF_SUCCESS on success.
3876 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3877 *
3878 * @param pPool The pool.
3879 * @param iUser The user of the page.
3880 */
3881static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
3882{
3883 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
3884
3885 /*
3886 * If the pool isn't full grown yet, expand it.
3887 */
3888 if (pPool->cCurPages < pPool->cMaxPages)
3889 {
3890 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
3891#ifdef IN_RING3
3892 int rc = PGMR3PoolGrow(pPool->pVMR3);
3893#else
3894 int rc = CTXALLMID(VMM, CallHost)(pPool->CTX_SUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
3895#endif
3896 if (VBOX_FAILURE(rc))
3897 return rc;
3898 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
3899 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
3900 return VINF_SUCCESS;
3901 }
3902
3903#ifdef PGMPOOL_WITH_CACHE
3904 /*
3905 * Free one cached page.
3906 */
3907 return pgmPoolCacheFreeOne(pPool, iUser);
3908#else
3909 /*
3910 * Flush the pool.
3911 * If we have tracking enabled, it should be possible to come up with
3912 * a cheap replacement strategy...
3913 */
3914 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
3915 Assert(!CPUMIsGuestInLongMode(pVM));
3916 pgmPoolFlushAllInt(pPool);
3917 return VERR_PGM_POOL_FLUSHED;
3918#endif
3919}
3920
3921
3922/**
3923 * Allocates a page from the pool.
3924 *
3925 * This page may actually be a cached page and not in need of any processing
3926 * on the callers part.
3927 *
3928 * @returns VBox status code.
3929 * @retval VINF_SUCCESS if a NEW page was allocated.
3930 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
3931 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3932 * @param pVM The VM handle.
3933 * @param GCPhys The GC physical address of the page we're gonna shadow.
3934 * For 4MB and 2MB PD entries, it's the first address the
3935 * shadow PT is covering.
3936 * @param enmKind The kind of mapping.
3937 * @param iUser The shadow page pool index of the user table.
3938 * @param iUserTable The index into the user table (shadowed).
3939 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
3940 */
3941int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
3942{
3943 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3944 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
3945 LogFlow(("pgmPoolAlloc: GCPhys=%VGp enmKind=%d iUser=%#x iUserTable=%#x\n", GCPhys, enmKind, iUser, iUserTable));
3946 *ppPage = NULL;
3947
3948#ifdef PGMPOOL_WITH_CACHE
3949 if (pPool->fCacheEnabled)
3950 {
3951 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
3952 if (VBOX_SUCCESS(rc2))
3953 {
3954 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3955 LogFlow(("pgmPoolAlloc: cached returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
3956 return rc2;
3957 }
3958 }
3959#endif
3960
3961 /*
3962 * Allocate a new one.
3963 */
3964 int rc = VINF_SUCCESS;
3965 uint16_t iNew = pPool->iFreeHead;
3966 if (iNew == NIL_PGMPOOL_IDX)
3967 {
3968 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
3969 if (VBOX_FAILURE(rc))
3970 {
3971 if (rc != VERR_PGM_POOL_CLEARED)
3972 {
3973 Log(("pgmPoolAlloc: returns %Vrc (Free)\n", rc));
3974 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3975 return rc;
3976 }
3977 Log(("pgmPoolMakeMoreFreePages failed with %Vrc -> return VERR_PGM_POOL_FLUSHED\n", rc));
3978 rc = VERR_PGM_POOL_FLUSHED;
3979 }
3980 iNew = pPool->iFreeHead;
3981 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
3982 }
3983
3984 /* unlink the free head */
3985 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
3986 pPool->iFreeHead = pPage->iNext;
3987 pPage->iNext = NIL_PGMPOOL_IDX;
3988
3989 /*
3990 * Initialize it.
3991 */
3992 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
3993 pPage->enmKind = enmKind;
3994 pPage->GCPhys = GCPhys;
3995 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
3996 pPage->fMonitored = false;
3997 pPage->fCached = false;
3998 pPage->fReusedFlushPending = false;
3999 pPage->fCR3Mix = false;
4000#ifdef PGMPOOL_WITH_MONITORING
4001 pPage->cModifications = 0;
4002 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4003 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4004#endif
4005#ifdef PGMPOOL_WITH_USER_TRACKING
4006 pPage->cPresent = 0;
4007 pPage->iFirstPresent = ~0;
4008
4009 /*
4010 * Insert into the tracking and cache. If this fails, free the page.
4011 */
4012 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4013 if (VBOX_FAILURE(rc3))
4014 {
4015 if (rc3 != VERR_PGM_POOL_CLEARED)
4016 {
4017 pPool->cUsedPages--;
4018 pPage->enmKind = PGMPOOLKIND_FREE;
4019 pPage->GCPhys = NIL_RTGCPHYS;
4020 pPage->iNext = pPool->iFreeHead;
4021 pPool->iFreeHead = pPage->idx;
4022 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4023 Log(("pgmPoolAlloc: returns %Vrc (Insert)\n", rc3));
4024 return rc3;
4025 }
4026 Log(("pgmPoolTrackInsert failed with %Vrc -> return VERR_PGM_POOL_FLUSHED\n", rc3));
4027 rc = VERR_PGM_POOL_FLUSHED;
4028 }
4029#endif /* PGMPOOL_WITH_USER_TRACKING */
4030
4031 /*
4032 * Commit the allocation, clear the page and return.
4033 */
4034#ifdef VBOX_WITH_STATISTICS
4035 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4036 pPool->cUsedPagesHigh = pPool->cUsedPages;
4037#endif
4038
4039 if (!pPage->fZeroed)
4040 {
4041 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4042 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4043 ASMMemZeroPage(pv);
4044 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4045 }
4046
4047 *ppPage = pPage;
4048 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4049 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4050 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4051 return rc;
4052}
4053
4054
4055/**
4056 * Frees a usage of a pool page.
4057 *
4058 * @param pVM The VM handle.
4059 * @param HCPhys The HC physical address of the shadow page.
4060 * @param iUser The shadow page pool index of the user table.
4061 * @param iUserTable The index into the user table (shadowed).
4062 */
4063void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4064{
4065 LogFlow(("pgmPoolFree: HCPhys=%VHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4066 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4067 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4068}
4069
4070
4071/**
4072 * Gets a in-use page in the pool by it's physical address.
4073 *
4074 * @returns Pointer to the page.
4075 * @param pVM The VM handle.
4076 * @param HCPhys The HC physical address of the shadow page.
4077 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
4078 */
4079PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
4080{
4081 /** @todo profile this! */
4082 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4083 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
4084 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%VHp -> %p:{.idx=%d .GCPhys=%VGp .enmKind=%d}\n",
4085 HCPhys, pPage, pPage->idx, pPage->GCPhys, pPage->enmKind));
4086 return pPage;
4087}
4088
4089
4090/**
4091 * Flushes the entire cache.
4092 *
4093 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
4094 * and execute this CR3 flush.
4095 *
4096 * @param pPool The pool.
4097 */
4098void pgmPoolFlushAll(PVM pVM)
4099{
4100 LogFlow(("pgmPoolFlushAll:\n"));
4101 pgmPoolFlushAllInt(pVM->pgm.s.CTX_SUFF(pPool));
4102}
4103
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette