VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 11711

Last change on this file since 11711 was 11711, checked in by vboxsync, 16 years ago

Deal with PGMR3MappingsUnfix flushing the entire page pool. In long mode the cached CR3 page will be flushed as well.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 140.2 KB
Line 
1/* $Id: PGMAllPool.cpp 11711 2008-08-27 16:08:15Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_GC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37
38#include <VBox/log.h>
39#include <VBox/err.h>
40#include <iprt/asm.h>
41
42
43/*******************************************************************************
44* Internal Functions *
45*******************************************************************************/
46__BEGIN_DECLS
47static void pgmPoolFlushAllInt(PPGMPOOL pPool);
48#ifdef PGMPOOL_WITH_USER_TRACKING
49DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
50DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
51static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
52#endif
53#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
54static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
55#endif
56#ifdef PGMPOOL_WITH_CACHE
57static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
58#endif
59#ifdef PGMPOOL_WITH_MONITORING
60static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
61#endif
62#ifndef IN_RING3
63DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
64#endif
65__END_DECLS
66
67
68/**
69 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
70 *
71 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
72 * @param enmKind The page kind.
73 */
74DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
75{
76 switch (enmKind)
77 {
78 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
79 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
81 return true;
82 default:
83 return false;
84 }
85}
86
87
88#ifdef IN_GC
89/**
90 * Maps a pool page into the current context.
91 *
92 * @returns Pointer to the mapping.
93 * @param pVM The VM handle.
94 * @param pPage The page to map.
95 */
96void *pgmGCPoolMapPage(PVM pVM, PPGMPOOLPAGE pPage)
97{
98 /* general pages. */
99 if (pPage->idx >= PGMPOOL_IDX_FIRST)
100 {
101 Assert(pPage->idx < pVM->pgm.s.pPoolGC->cCurPages);
102 void *pv;
103 int rc = PGMGCDynMapHCPage(pVM, pPage->Core.Key, &pv);
104 AssertReleaseRC(rc);
105 return pv;
106 }
107
108 /* special pages. */
109 switch (pPage->idx)
110 {
111 case PGMPOOL_IDX_PD:
112 return pVM->pgm.s.pGC32BitPD;
113 case PGMPOOL_IDX_PAE_PD:
114 case PGMPOOL_IDX_PAE_PD_0:
115 return pVM->pgm.s.apGCPaePDs[0];
116 case PGMPOOL_IDX_PAE_PD_1:
117 return pVM->pgm.s.apGCPaePDs[1];
118 case PGMPOOL_IDX_PAE_PD_2:
119 return pVM->pgm.s.apGCPaePDs[2];
120 case PGMPOOL_IDX_PAE_PD_3:
121 return pVM->pgm.s.apGCPaePDs[3];
122 case PGMPOOL_IDX_PDPT:
123 return pVM->pgm.s.pGCPaePDPT;
124 default:
125 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
126 return NULL;
127 }
128}
129#endif /* IN_GC */
130
131
132#ifdef PGMPOOL_WITH_MONITORING
133/**
134 * Determin the size of a write instruction.
135 * @returns number of bytes written.
136 * @param pDis The disassembler state.
137 */
138static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
139{
140 /*
141 * This is very crude and possibly wrong for some opcodes,
142 * but since it's not really supposed to be called we can
143 * probably live with that.
144 */
145 return DISGetParamSize(pDis, &pDis->param1);
146}
147
148
149/**
150 * Flushes a chain of pages sharing the same access monitor.
151 *
152 * @returns VBox status code suitable for scheduling.
153 * @param pPool The pool.
154 * @param pPage A page in the chain.
155 */
156int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
157{
158 LogFlow(("pgmPoolMonitorChainFlush: Flush page %VGp type=%d\n", pPage->GCPhys, pPage->enmKind));
159
160 /*
161 * Find the list head.
162 */
163 uint16_t idx = pPage->idx;
164 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
165 {
166 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
167 {
168 idx = pPage->iMonitoredPrev;
169 Assert(idx != pPage->idx);
170 pPage = &pPool->aPages[idx];
171 }
172 }
173
174 /*
175 * Iterate the list flushing each shadow page.
176 */
177 int rc = VINF_SUCCESS;
178 for (;;)
179 {
180 idx = pPage->iMonitoredNext;
181 Assert(idx != pPage->idx);
182 if (pPage->idx >= PGMPOOL_IDX_FIRST)
183 {
184 int rc2 = pgmPoolFlushPage(pPool, pPage);
185 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
186 rc = VINF_PGM_SYNC_CR3;
187 }
188 /* next */
189 if (idx == NIL_PGMPOOL_IDX)
190 break;
191 pPage = &pPool->aPages[idx];
192 }
193 return rc;
194}
195
196
197/**
198 * Wrapper for getting the current context pointer to the entry being modified.
199 *
200 * @returns Pointer to the current context mapping of the entry.
201 * @param pPool The pool.
202 * @param pvFault The fault virtual address.
203 * @param GCPhysFault The fault physical address.
204 * @param cbEntry The entry size.
205 */
206#ifdef IN_RING3
207DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
208#else
209DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
210#endif
211{
212#ifdef IN_GC
213 return (const void *)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
214
215#elif defined(IN_RING0)
216 void *pvRet;
217 int rc = pgmRamGCPhys2HCPtr(&pPool->pVMHC->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
218 AssertFatalRCSuccess(rc);
219 return pvRet;
220
221#elif defined(IN_RING3)
222 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
223#else
224# error "huh?"
225#endif
226}
227
228
229/**
230 * Process shadow entries before they are changed by the guest.
231 *
232 * For PT entries we will clear them. For PD entries, we'll simply check
233 * for mapping conflicts and set the SyncCR3 FF if found.
234 *
235 * @param pPool The pool.
236 * @param pPage The head page.
237 * @param GCPhysFault The guest physical fault address.
238 * @param uAddress In R0 and GC this is the guest context fault address (flat).
239 * In R3 this is the host context 'fault' address.
240 * @param pCpu The disassembler state for figuring out the write size.
241 * This need not be specified if the caller knows we won't do cross entry accesses.
242 */
243#ifdef IN_RING3
244void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
245#else
246void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
247#endif
248{
249 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
250 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
251 const unsigned cbWrite = (pCpu) ? pgmPoolDisasWriteSize(pCpu) : 0;
252
253 LogFlow(("pgmPoolMonitorChainChanging: %VGv phys=%VGp kind=%d cbWrite=%d\n", pvAddress, GCPhysFault, pPage->enmKind, cbWrite));
254
255 for (;;)
256 {
257 union
258 {
259 void *pv;
260 PX86PT pPT;
261 PX86PTPAE pPTPae;
262 PX86PD pPD;
263 PX86PDPAE pPDPae;
264 PX86PDPT pPDPT;
265 PX86PML4 pPML4;
266 } uShw;
267 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
268
269 switch (pPage->enmKind)
270 {
271 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
272 {
273 const unsigned iShw = off / sizeof(X86PTE);
274 if (uShw.pPT->a[iShw].n.u1Present)
275 {
276# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
277 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
278 Log4(("pgmPoolMonitorChainChanging 32_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
279 pgmPoolTracDerefGCPhysHint(pPool, pPage,
280 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
281 pGstPte->u & X86_PTE_PG_MASK);
282# endif
283 uShw.pPT->a[iShw].u = 0;
284 }
285 break;
286 }
287
288 /* page/2 sized */
289 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
290 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
291 {
292 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
293 if (uShw.pPTPae->a[iShw].n.u1Present)
294 {
295# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
296 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
297 Log4(("pgmPoolMonitorChainChanging pae_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
298 pgmPoolTracDerefGCPhysHint(pPool, pPage,
299 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
300 pGstPte->u & X86_PTE_PG_MASK);
301# endif
302 uShw.pPTPae->a[iShw].u = 0;
303 }
304 }
305 break;
306
307 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
308 {
309 const unsigned iShw = off / sizeof(X86PTEPAE);
310 if (uShw.pPTPae->a[iShw].n.u1Present)
311 {
312# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
313 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
314 Log4(("pgmPoolMonitorChainChanging pae: deref %VHp GCPhys %VGp\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
315 pgmPoolTracDerefGCPhysHint(pPool, pPage,
316 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
317 pGstPte->u & X86_PTE_PAE_PG_MASK);
318# endif
319 uShw.pPTPae->a[iShw].u = 0;
320 }
321
322 /* paranoia / a bit assumptive. */
323 if ( pCpu
324 && (off & 7)
325 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
326 {
327 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
328 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
329
330 if (uShw.pPTPae->a[iShw2].n.u1Present)
331 {
332# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
333 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
334 Log4(("pgmPoolMonitorChainChanging pae: deref %VHp GCPhys %VGp\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
335 pgmPoolTracDerefGCPhysHint(pPool, pPage,
336 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
337 pGstPte->u & X86_PTE_PAE_PG_MASK);
338# endif
339 uShw.pPTPae->a[iShw2].u = 0;
340 }
341 }
342
343 break;
344 }
345
346 case PGMPOOLKIND_ROOT_32BIT_PD:
347 {
348 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
349 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
350 {
351 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
352 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
353 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
354 }
355 /* paranoia / a bit assumptive. */
356 else if ( pCpu
357 && (off & 3)
358 && (off & 3) + cbWrite > sizeof(X86PTE))
359 {
360 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
361 if ( iShw2 != iShw
362 && iShw2 < RT_ELEMENTS(uShw.pPD->a)
363 && uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
364 {
365 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
366 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
367 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
368 }
369 }
370#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
371 if ( uShw.pPD->a[iShw].n.u1Present
372 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
373 {
374 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
375# ifdef IN_GC /* TLB load - we're pushing things a bit... */
376 ASMProbeReadByte(pvAddress);
377# endif
378 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
379 uShw.pPD->a[iShw].u = 0;
380 }
381#endif
382 break;
383 }
384
385 case PGMPOOLKIND_ROOT_PAE_PD:
386 {
387 unsigned iShw = (off / sizeof(X86PTE)) * 2; // ASSUMING 32-bit guest paging!
388 for (unsigned i = 0; i < 2; i++, iShw++)
389 {
390 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
391 {
392 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
393 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
394 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
395 }
396 /* paranoia / a bit assumptive. */
397 else if ( pCpu
398 && (off & 3)
399 && (off & 3) + cbWrite > 4)
400 {
401 const unsigned iShw2 = iShw + 2;
402 if ( iShw2 < RT_ELEMENTS(uShw.pPDPae->a)
403 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
404 {
405 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
406 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
407 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
408 }
409 }
410#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
411 if ( uShw.pPDPae->a[iShw].n.u1Present
412 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
413 {
414 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
415# ifdef IN_GC /* TLB load - we're pushing things a bit... */
416 ASMProbeReadByte(pvAddress);
417# endif
418 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
419 uShw.pPDPae->a[iShw].u = 0;
420 }
421#endif
422 }
423 break;
424 }
425
426 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
427 {
428 const unsigned iShw = off / sizeof(X86PDEPAE);
429 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
430 {
431 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
432 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
433 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
434 }
435#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
436 /* causes trouble when the guest uses a PDE to refer to the whole page table level structure. (invalidate here; faults later on when it tries
437 * to change the page table entries
438 * -> recheck; probably only applies to the GC case
439 */
440 else
441 {
442 if (uShw.pPDPae->a[iShw].n.u1Present)
443 {
444 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
445 pgmPoolFree(pPool->CTXSUFF(pVM),
446 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
447 /* Note: hardcoded PAE implementation dependency */
448 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
449 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw);
450 uShw.pPDPae->a[iShw].u = 0;
451 }
452 }
453#endif
454 /* paranoia / a bit assumptive. */
455 if ( pCpu
456 && (off & 7)
457 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
458 {
459 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
460 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
461
462 if ( iShw2 != iShw
463 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
464 {
465 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
466 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
467 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
468 }
469#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
470 else
471 if (uShw.pPDPae->a[iShw2].n.u1Present)
472 {
473 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
474 pgmPoolFree(pPool->CTXSUFF(pVM),
475 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
476 /* Note: hardcoded PAE implementation dependency */
477 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
478 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw2 + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw2);
479 uShw.pPDPae->a[iShw2].u = 0;
480 }
481#endif
482 }
483 break;
484 }
485
486 case PGMPOOLKIND_ROOT_PDPT:
487 {
488 /* Hopefully this doesn't happen very often:
489 * - touching unused parts of the page
490 * - messing with the bits of pd pointers without changing the physical address
491 */
492 const unsigned iShw = off / sizeof(X86PDPE);
493 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
494 {
495 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
496 {
497 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
498 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
499 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
500 }
501 /* paranoia / a bit assumptive. */
502 else if ( pCpu
503 && (off & 7)
504 && (off & 7) + cbWrite > sizeof(X86PDPE))
505 {
506 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
507 if ( iShw2 != iShw
508 && iShw2 < X86_PG_PAE_PDPE_ENTRIES
509 && uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
510 {
511 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
512 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
513 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
514 }
515 }
516 }
517 break;
518 }
519
520#ifndef IN_GC
521 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
522 {
523 Assert(pPage->enmKind == PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD);
524
525 const unsigned iShw = off / sizeof(X86PDEPAE);
526 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
527 {
528 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
529 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
530 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
531 }
532 else
533 {
534 if (uShw.pPDPae->a[iShw].n.u1Present)
535 {
536 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
537 pgmPoolFree(pPool->CTXSUFF(pVM),
538 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
539 pPage->idx,
540 iShw);
541 uShw.pPDPae->a[iShw].u = 0;
542 }
543 }
544 /* paranoia / a bit assumptive. */
545 if ( pCpu
546 && (off & 7)
547 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
548 {
549 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
550 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
551
552 if ( iShw2 != iShw
553 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
554 {
555 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
556 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
557 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
558 }
559 else
560 if (uShw.pPDPae->a[iShw2].n.u1Present)
561 {
562 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
563 pgmPoolFree(pPool->CTXSUFF(pVM),
564 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
565 pPage->idx,
566 iShw2);
567 uShw.pPDPae->a[iShw2].u = 0;
568 }
569 }
570 break;
571 }
572
573 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
574 {
575 /* Hopefully this doesn't happen very often:
576 * - messing with the bits of pd pointers without changing the physical address
577 */
578 if (!VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
579 {
580 const unsigned iShw = off / sizeof(X86PDPE);
581 if (uShw.pPDPT->a[iShw].n.u1Present)
582 {
583 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
584 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
585 uShw.pPDPT->a[iShw].u = 0;
586 }
587 /* paranoia / a bit assumptive. */
588 if ( pCpu
589 && (off & 7)
590 && (off & 7) + cbWrite > sizeof(X86PDPE))
591 {
592 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
593 if (uShw.pPDPT->a[iShw2].n.u1Present)
594 {
595 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
596 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
597 uShw.pPDPT->a[iShw2].u = 0;
598 }
599 }
600 }
601 break;
602 }
603
604 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
605 {
606 /* Hopefully this doesn't happen very often:
607 * - messing with the bits of pd pointers without changing the physical address
608 */
609 if (!VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
610 {
611 const unsigned iShw = off / sizeof(X86PDPE);
612 if (uShw.pPML4->a[iShw].n.u1Present)
613 {
614 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
615 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
616 uShw.pPML4->a[iShw].u = 0;
617 }
618 /* paranoia / a bit assumptive. */
619 if ( pCpu
620 && (off & 7)
621 && (off & 7) + cbWrite > sizeof(X86PDPE))
622 {
623 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
624 if (uShw.pPML4->a[iShw2].n.u1Present)
625 {
626 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
627 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
628 uShw.pPML4->a[iShw2].u = 0;
629 }
630 }
631 }
632 break;
633 }
634#endif /* IN_RING0 */
635
636 default:
637 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
638 }
639
640 /* next */
641 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
642 return;
643 pPage = &pPool->aPages[pPage->iMonitoredNext];
644 }
645}
646
647
648# ifndef IN_RING3
649/**
650 * Checks if a access could be a fork operation in progress.
651 *
652 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
653 *
654 * @returns true if it's likly that we're forking, otherwise false.
655 * @param pPool The pool.
656 * @param pCpu The disassembled instruction.
657 * @param offFault The access offset.
658 */
659DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
660{
661 /*
662 * i386 linux is using btr to clear X86_PTE_RW.
663 * The functions involved are (2.6.16 source inspection):
664 * clear_bit
665 * ptep_set_wrprotect
666 * copy_one_pte
667 * copy_pte_range
668 * copy_pmd_range
669 * copy_pud_range
670 * copy_page_range
671 * dup_mmap
672 * dup_mm
673 * copy_mm
674 * copy_process
675 * do_fork
676 */
677 if ( pCpu->pCurInstr->opcode == OP_BTR
678 && !(offFault & 4)
679 /** @todo Validate that the bit index is X86_PTE_RW. */
680 )
681 {
682 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,Fork));
683 return true;
684 }
685 return false;
686}
687
688
689/**
690 * Determine whether the page is likely to have been reused.
691 *
692 * @returns true if we consider the page as being reused for a different purpose.
693 * @returns false if we consider it to still be a paging page.
694 * @param pVM VM Handle.
695 * @param pPage The page in question.
696 * @param pRegFrame Trap register frame.
697 * @param pCpu The disassembly info for the faulting instruction.
698 * @param pvFault The fault address.
699 *
700 * @remark The REP prefix check is left to the caller because of STOSD/W.
701 */
702DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PPGMPOOLPAGE pPage, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pCpu, RTGCPTR pvFault)
703{
704#ifndef IN_GC
705 if ( HWACCMHasPendingIrq(pVM)
706 && (pRegFrame->rsp - pvFault) < 32)
707 {
708 /* Fault caused by stack writes while trying to inject an interrupt event. */
709 Log(("pgmPoolMonitorIsReused: reused %VGv for interrupt stack (rsp=%VGv).\n", pvFault, pRegFrame->rsp));
710 return true;
711 }
712#else
713 NOREF(pVM);
714#endif
715
716 switch (pCpu->pCurInstr->opcode)
717 {
718 /* call implies the actual push of the return address faulted */
719 case OP_CALL:
720 Log4(("pgmPoolMonitorIsReused: CALL\n"));
721 return true;
722 case OP_PUSH:
723 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
724 return true;
725 case OP_PUSHF:
726 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
727 return true;
728 case OP_PUSHA:
729 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
730 return true;
731 case OP_FXSAVE:
732 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
733 return true;
734 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
735 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
736 return true;
737 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
738 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
739 return true;
740 case OP_MOVSWD:
741 case OP_STOSWD:
742 if ( pCpu->prefix == (PREFIX_REP|PREFIX_REX)
743 && pRegFrame->rcx >= 0x40
744 )
745 {
746 Assert(pCpu->mode == CPUMODE_64BIT);
747
748 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
749 return true;
750 }
751 return false;
752 }
753 if ( (pCpu->param1.flags & USE_REG_GEN32)
754 && (pCpu->param1.base.reg_gen == USE_REG_ESP))
755 {
756 Log4(("pgmPoolMonitorIsReused: ESP\n"));
757 return true;
758 }
759
760 //if (pPage->fCR3Mix)
761 // return false;
762 return false;
763}
764
765
766/**
767 * Flushes the page being accessed.
768 *
769 * @returns VBox status code suitable for scheduling.
770 * @param pVM The VM handle.
771 * @param pPool The pool.
772 * @param pPage The pool page (head).
773 * @param pCpu The disassembly of the write instruction.
774 * @param pRegFrame The trap register frame.
775 * @param GCPhysFault The fault address as guest physical address.
776 * @param pvFault The fault address.
777 */
778static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
779 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
780{
781 /*
782 * First, do the flushing.
783 */
784 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
785
786 /*
787 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
788 */
789 uint32_t cbWritten;
790 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
791 if (VBOX_SUCCESS(rc2))
792 pRegFrame->rip += pCpu->opsize;
793 else if (rc2 == VERR_EM_INTERPRETER)
794 {
795#ifdef IN_GC
796 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
797 {
798 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
799 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
800 rc = VINF_SUCCESS;
801 STAM_COUNTER_INC(&pPool->StatMonitorGCIntrFailPatch2);
802 }
803 else
804#endif
805 {
806 rc = VINF_EM_RAW_EMULATE_INSTR;
807 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
808 }
809 }
810 else
811 rc = rc2;
812
813 /* See use in pgmPoolAccessHandlerSimple(). */
814 PGM_INVL_GUEST_TLBS();
815
816 LogFlow(("pgmPoolAccessHandlerPT: returns %Vrc (flushed)\n", rc));
817 return rc;
818
819}
820
821
822/**
823 * Handles the STOSD write accesses.
824 *
825 * @returns VBox status code suitable for scheduling.
826 * @param pVM The VM handle.
827 * @param pPool The pool.
828 * @param pPage The pool page (head).
829 * @param pCpu The disassembly of the write instruction.
830 * @param pRegFrame The trap register frame.
831 * @param GCPhysFault The fault address as guest physical address.
832 * @param pvFault The fault address.
833 */
834DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
835 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
836{
837 Assert(pCpu->mode == CPUMODE_32BIT);
838
839 /*
840 * Increment the modification counter and insert it into the list
841 * of modified pages the first time.
842 */
843 if (!pPage->cModifications++)
844 pgmPoolMonitorModifiedInsert(pPool, pPage);
845
846 /*
847 * Execute REP STOSD.
848 *
849 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
850 * write situation, meaning that it's safe to write here.
851 */
852 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
853 while (pRegFrame->ecx)
854 {
855 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
856#ifdef IN_GC
857 *(uint32_t *)pu32 = pRegFrame->eax;
858#else
859 PGMPhysWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
860#endif
861 pu32 += 4;
862 GCPhysFault += 4;
863 pRegFrame->edi += 4;
864 pRegFrame->ecx--;
865 }
866 pRegFrame->rip += pCpu->opsize;
867
868 /* See use in pgmPoolAccessHandlerSimple(). */
869 PGM_INVL_GUEST_TLBS();
870
871 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
872 return VINF_SUCCESS;
873}
874
875
876/**
877 * Handles the simple write accesses.
878 *
879 * @returns VBox status code suitable for scheduling.
880 * @param pVM The VM handle.
881 * @param pPool The pool.
882 * @param pPage The pool page (head).
883 * @param pCpu The disassembly of the write instruction.
884 * @param pRegFrame The trap register frame.
885 * @param GCPhysFault The fault address as guest physical address.
886 * @param pvFault The fault address.
887 */
888DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
889 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
890{
891 /*
892 * Increment the modification counter and insert it into the list
893 * of modified pages the first time.
894 */
895 if (!pPage->cModifications++)
896 pgmPoolMonitorModifiedInsert(pPool, pPage);
897
898 /*
899 * Clear all the pages. ASSUMES that pvFault is readable.
900 */
901 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
902
903 /*
904 * Interpret the instruction.
905 */
906 uint32_t cb;
907 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
908 if (VBOX_SUCCESS(rc))
909 pRegFrame->rip += pCpu->opsize;
910 else if (rc == VERR_EM_INTERPRETER)
911 {
912 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
913 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pCpu->pCurInstr->opcode));
914 rc = VINF_EM_RAW_EMULATE_INSTR;
915 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
916 }
917
918 /*
919 * Quick hack, with logging enabled we're getting stale
920 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
921 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
922 * have to be fixed to support this. But that'll have to wait till next week.
923 *
924 * An alternative is to keep track of the changed PTEs together with the
925 * GCPhys from the guest PT. This may proove expensive though.
926 *
927 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
928 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
929 */
930 PGM_INVL_GUEST_TLBS();
931
932 LogFlow(("pgmPoolAccessHandlerSimple: returns %Vrc cb=%d\n", rc, cb));
933 return rc;
934}
935
936
937/**
938 * \#PF Handler callback for PT write accesses.
939 *
940 * @returns VBox status code (appropriate for GC return).
941 * @param pVM VM Handle.
942 * @param uErrorCode CPU Error code.
943 * @param pRegFrame Trap register frame.
944 * NULL on DMA and other non CPU access.
945 * @param pvFault The fault address (cr2).
946 * @param GCPhysFault The GC physical address corresponding to pvFault.
947 * @param pvUser User argument.
948 */
949DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
950{
951 STAM_PROFILE_START(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), a);
952 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
953 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
954 LogFlow(("pgmPoolAccessHandler: pvFault=%VGv pPage=%p:{.idx=%d} GCPhysFault=%VGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
955
956 /*
957 * We should ALWAYS have the list head as user parameter. This
958 * is because we use that page to record the changes.
959 */
960 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
961
962 /*
963 * Disassemble the faulting instruction.
964 */
965 DISCPUSTATE Cpu;
966 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
967 AssertRCReturn(rc, rc);
968
969 /*
970 * Check if it's worth dealing with.
971 */
972 bool fReused = false;
973 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
974 || pPage->fCR3Mix)
975 && !(fReused = pgmPoolMonitorIsReused(pVM, pPage, pRegFrame, &Cpu, pvFault))
976 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
977 {
978 /*
979 * Simple instructions, no REP prefix.
980 */
981 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
982 {
983 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
984 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,Handled), a);
985 return rc;
986 }
987
988 /*
989 * Windows is frequently doing small memset() operations (netio test 4k+).
990 * We have to deal with these or we'll kill the cache and performance.
991 */
992 if ( Cpu.pCurInstr->opcode == OP_STOSWD
993 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
994 && pRegFrame->ecx <= 0x20
995 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
996 && !((uintptr_t)pvFault & 3)
997 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
998 && Cpu.mode == CPUMODE_32BIT
999 && Cpu.opmode == CPUMODE_32BIT
1000 && Cpu.addrmode == CPUMODE_32BIT
1001 && Cpu.prefix == PREFIX_REP
1002 && !pRegFrame->eflags.Bits.u1DF
1003 )
1004 {
1005 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1006 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,RepStosd), a);
1007 return rc;
1008 }
1009
1010 /* REP prefix, don't bother. */
1011 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,RepPrefix));
1012 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x eip=%VGv opcode=%d prefix=%#x\n",
1013 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, pRegFrame->rip, Cpu.pCurInstr->opcode, Cpu.prefix));
1014 }
1015
1016 /*
1017 * Not worth it, so flush it.
1018 *
1019 * If we considered it to be reused, don't to back to ring-3
1020 * to emulate failed instructions since we usually cannot
1021 * interpret then. This may be a bit risky, in which case
1022 * the reuse detection must be fixed.
1023 */
1024 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1025 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1026 rc = VINF_SUCCESS;
1027 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,FlushPage), a);
1028 return rc;
1029}
1030
1031# endif /* !IN_RING3 */
1032#endif /* PGMPOOL_WITH_MONITORING */
1033
1034
1035
1036#ifdef PGMPOOL_WITH_CACHE
1037/**
1038 * Inserts a page into the GCPhys hash table.
1039 *
1040 * @param pPool The pool.
1041 * @param pPage The page.
1042 */
1043DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1044{
1045 Log3(("pgmPoolHashInsert: %VGp\n", pPage->GCPhys));
1046 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1047 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1048 pPage->iNext = pPool->aiHash[iHash];
1049 pPool->aiHash[iHash] = pPage->idx;
1050}
1051
1052
1053/**
1054 * Removes a page from the GCPhys hash table.
1055 *
1056 * @param pPool The pool.
1057 * @param pPage The page.
1058 */
1059DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1060{
1061 Log3(("pgmPoolHashRemove: %VGp\n", pPage->GCPhys));
1062 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1063 if (pPool->aiHash[iHash] == pPage->idx)
1064 pPool->aiHash[iHash] = pPage->iNext;
1065 else
1066 {
1067 uint16_t iPrev = pPool->aiHash[iHash];
1068 for (;;)
1069 {
1070 const int16_t i = pPool->aPages[iPrev].iNext;
1071 if (i == pPage->idx)
1072 {
1073 pPool->aPages[iPrev].iNext = pPage->iNext;
1074 break;
1075 }
1076 if (i == NIL_PGMPOOL_IDX)
1077 {
1078 AssertReleaseMsgFailed(("GCPhys=%VGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1079 break;
1080 }
1081 iPrev = i;
1082 }
1083 }
1084 pPage->iNext = NIL_PGMPOOL_IDX;
1085}
1086
1087
1088/**
1089 * Frees up one cache page.
1090 *
1091 * @returns VBox status code.
1092 * @retval VINF_SUCCESS on success.
1093 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
1094 * @param pPool The pool.
1095 * @param iUser The user index.
1096 */
1097static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1098{
1099#ifndef IN_GC
1100 const PVM pVM = pPool->CTXSUFF(pVM);
1101#endif
1102 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1103 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1104
1105 /*
1106 * Select one page from the tail of the age list.
1107 */
1108 uint16_t iToFree = pPool->iAgeTail;
1109 if (iToFree == iUser)
1110 iToFree = pPool->aPages[iToFree].iAgePrev;
1111/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1112 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1113 {
1114 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1115 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1116 {
1117 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1118 continue;
1119 iToFree = i;
1120 break;
1121 }
1122 }
1123*/
1124 Assert(iToFree != iUser);
1125 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1126
1127 int rc = pgmPoolFlushPage(pPool, &pPool->aPages[iToFree]);
1128 if (rc == VINF_SUCCESS)
1129 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1130 return rc;
1131}
1132
1133
1134/**
1135 * Checks if a kind mismatch is really a page being reused
1136 * or if it's just normal remappings.
1137 *
1138 * @returns true if reused and the cached page (enmKind1) should be flushed
1139 * @returns false if not reused.
1140 * @param enmKind1 The kind of the cached page.
1141 * @param enmKind2 The kind of the requested page.
1142 */
1143static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1144{
1145 switch (enmKind1)
1146 {
1147 /*
1148 * Never reuse them. There is no remapping in non-paging mode.
1149 */
1150 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1151 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1152 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1153 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1154 return true;
1155
1156 /*
1157 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1158 */
1159 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1160 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1161 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1162 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1163 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1164 switch (enmKind2)
1165 {
1166 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1167 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1168 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1169 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1170 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1171 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1172 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1173 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1174 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1175 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1176 return true;
1177 default:
1178 return false;
1179 }
1180
1181 /*
1182 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1183 */
1184 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1185 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1186 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1187 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1188 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1189 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1190 switch (enmKind2)
1191 {
1192 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1193 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1194 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1195 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1196 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1197 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1198 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1199 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1200 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1201 return true;
1202 default:
1203 return false;
1204 }
1205
1206 /*
1207 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1208 */
1209 case PGMPOOLKIND_ROOT_32BIT_PD:
1210 case PGMPOOLKIND_ROOT_PAE_PD:
1211 case PGMPOOLKIND_ROOT_PDPT:
1212 case PGMPOOLKIND_ROOT_NESTED:
1213 return false;
1214
1215 default:
1216 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1217 }
1218}
1219
1220
1221/**
1222 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1223 *
1224 * @returns VBox status code.
1225 * @retval VINF_PGM_CACHED_PAGE on success.
1226 * @retval VERR_FILE_NOT_FOUND if not found.
1227 * @param pPool The pool.
1228 * @param GCPhys The GC physical address of the page we're gonna shadow.
1229 * @param enmKind The kind of mapping.
1230 * @param iUser The shadow page pool index of the user table.
1231 * @param iUserTable The index into the user table (shadowed).
1232 * @param ppPage Where to store the pointer to the page.
1233 */
1234static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1235{
1236#ifndef IN_GC
1237 const PVM pVM = pPool->CTXSUFF(pVM);
1238#endif
1239 /*
1240 * Look up the GCPhys in the hash.
1241 */
1242 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1243 Log3(("pgmPoolCacheAlloc: %VGp kind %d iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, enmKind, iUser, iUserTable, i));
1244 if (i != NIL_PGMPOOL_IDX)
1245 {
1246 do
1247 {
1248 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1249 Log3(("pgmPoolCacheAlloc: slot %d found page %VGp\n", i, pPage->GCPhys));
1250 if (pPage->GCPhys == GCPhys)
1251 {
1252 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
1253 {
1254 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1255 if (VBOX_SUCCESS(rc))
1256 {
1257 *ppPage = pPage;
1258 STAM_COUNTER_INC(&pPool->StatCacheHits);
1259 return VINF_PGM_CACHED_PAGE;
1260 }
1261 return rc;
1262 }
1263
1264 /*
1265 * The kind is different. In some cases we should now flush the page
1266 * as it has been reused, but in most cases this is normal remapping
1267 * of PDs as PT or big pages using the GCPhys field in a slightly
1268 * different way than the other kinds.
1269 */
1270 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1271 {
1272 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1273 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
1274 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1275 break;
1276 }
1277 }
1278
1279 /* next */
1280 i = pPage->iNext;
1281 } while (i != NIL_PGMPOOL_IDX);
1282 }
1283
1284 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%d\n", GCPhys, enmKind));
1285 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1286 return VERR_FILE_NOT_FOUND;
1287}
1288
1289
1290/**
1291 * Inserts a page into the cache.
1292 *
1293 * @param pPool The pool.
1294 * @param pPage The cached page.
1295 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1296 */
1297static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1298{
1299 /*
1300 * Insert into the GCPhys hash if the page is fit for that.
1301 */
1302 Assert(!pPage->fCached);
1303 if (fCanBeCached)
1304 {
1305 pPage->fCached = true;
1306 pgmPoolHashInsert(pPool, pPage);
1307 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1308 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1309 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1310 }
1311 else
1312 {
1313 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1314 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1315 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1316 }
1317
1318 /*
1319 * Insert at the head of the age list.
1320 */
1321 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1322 pPage->iAgeNext = pPool->iAgeHead;
1323 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1324 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1325 else
1326 pPool->iAgeTail = pPage->idx;
1327 pPool->iAgeHead = pPage->idx;
1328}
1329
1330
1331/**
1332 * Flushes a cached page.
1333 *
1334 * @param pPool The pool.
1335 * @param pPage The cached page.
1336 */
1337static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1338{
1339 Log3(("pgmPoolCacheFlushPage: %VGp\n", pPage->GCPhys));
1340
1341 /*
1342 * Remove the page from the hash.
1343 */
1344 if (pPage->fCached)
1345 {
1346 pPage->fCached = false;
1347 pgmPoolHashRemove(pPool, pPage);
1348 }
1349 else
1350 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1351
1352 /*
1353 * Remove it from the age list.
1354 */
1355 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1356 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1357 else
1358 pPool->iAgeTail = pPage->iAgePrev;
1359 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1360 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1361 else
1362 pPool->iAgeHead = pPage->iAgeNext;
1363 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1364 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1365}
1366#endif /* PGMPOOL_WITH_CACHE */
1367
1368
1369#ifdef PGMPOOL_WITH_MONITORING
1370/**
1371 * Looks for pages sharing the monitor.
1372 *
1373 * @returns Pointer to the head page.
1374 * @returns NULL if not found.
1375 * @param pPool The Pool
1376 * @param pNewPage The page which is going to be monitored.
1377 */
1378static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1379{
1380#ifdef PGMPOOL_WITH_CACHE
1381 /*
1382 * Look up the GCPhys in the hash.
1383 */
1384 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1385 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1386 if (i == NIL_PGMPOOL_IDX)
1387 return NULL;
1388 do
1389 {
1390 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1391 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1392 && pPage != pNewPage)
1393 {
1394 switch (pPage->enmKind)
1395 {
1396 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1397 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1398 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1399 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1400 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1401 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1402 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1403 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1404 case PGMPOOLKIND_ROOT_32BIT_PD:
1405 case PGMPOOLKIND_ROOT_PAE_PD:
1406 case PGMPOOLKIND_ROOT_PDPT:
1407 {
1408 /* find the head */
1409 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1410 {
1411 Assert(pPage->iMonitoredPrev != pPage->idx);
1412 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1413 }
1414 return pPage;
1415 }
1416
1417 /* ignore, no monitoring. */
1418 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1419 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1420 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1421 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1422 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1423 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1424 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1425 case PGMPOOLKIND_ROOT_NESTED:
1426 break;
1427 default:
1428 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1429 }
1430 }
1431
1432 /* next */
1433 i = pPage->iNext;
1434 } while (i != NIL_PGMPOOL_IDX);
1435#endif
1436 return NULL;
1437}
1438
1439/**
1440 * Enabled write monitoring of a guest page.
1441 *
1442 * @returns VBox status code.
1443 * @retval VINF_SUCCESS on success.
1444 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1445 * @param pPool The pool.
1446 * @param pPage The cached page.
1447 */
1448static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1449{
1450 LogFlow(("pgmPoolMonitorInsert %VGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
1451
1452 /*
1453 * Filter out the relevant kinds.
1454 */
1455 switch (pPage->enmKind)
1456 {
1457 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1458 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1459 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1460 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1461 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1462 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1463 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1464 case PGMPOOLKIND_ROOT_PDPT:
1465 break;
1466
1467 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1468 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1469 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1470 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1471 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1472 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1473 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1474 case PGMPOOLKIND_ROOT_NESTED:
1475 /* Nothing to monitor here. */
1476 return VINF_SUCCESS;
1477
1478 case PGMPOOLKIND_ROOT_32BIT_PD:
1479 case PGMPOOLKIND_ROOT_PAE_PD:
1480#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1481 break;
1482#endif
1483 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1484 default:
1485 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1486 }
1487
1488 /*
1489 * Install handler.
1490 */
1491 int rc;
1492 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1493 if (pPageHead)
1494 {
1495 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1496 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1497 pPage->iMonitoredPrev = pPageHead->idx;
1498 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1499 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1500 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1501 pPageHead->iMonitoredNext = pPage->idx;
1502 rc = VINF_SUCCESS;
1503 }
1504 else
1505 {
1506 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1507 PVM pVM = pPool->CTXSUFF(pVM);
1508 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1509 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1510 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1511 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1512 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1513 pPool->pfnAccessHandlerGC, MMHyperCCToRC(pVM, pPage),
1514 pPool->pszAccessHandler);
1515 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1516 * the heap size should suffice. */
1517 AssertFatalRC(rc);
1518 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1519 rc = VERR_PGM_POOL_CLEARED;
1520 }
1521 pPage->fMonitored = true;
1522 return rc;
1523}
1524
1525
1526/**
1527 * Disables write monitoring of a guest page.
1528 *
1529 * @returns VBox status code.
1530 * @retval VINF_SUCCESS on success.
1531 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1532 * @param pPool The pool.
1533 * @param pPage The cached page.
1534 */
1535static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1536{
1537 /*
1538 * Filter out the relevant kinds.
1539 */
1540 switch (pPage->enmKind)
1541 {
1542 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1543 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1544 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1545 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1546 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1547 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1548 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1549 case PGMPOOLKIND_ROOT_PDPT:
1550 break;
1551
1552 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1553 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1554 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1555 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1556 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1557 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1558 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1559 case PGMPOOLKIND_ROOT_NESTED:
1560 /* Nothing to monitor here. */
1561 return VINF_SUCCESS;
1562
1563 case PGMPOOLKIND_ROOT_32BIT_PD:
1564 case PGMPOOLKIND_ROOT_PAE_PD:
1565#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1566 break;
1567#endif
1568 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1569 default:
1570 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1571 }
1572
1573 /*
1574 * Remove the page from the monitored list or uninstall it if last.
1575 */
1576 const PVM pVM = pPool->CTXSUFF(pVM);
1577 int rc;
1578 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1579 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1580 {
1581 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1582 {
1583 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1584 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1585 pNewHead->fCR3Mix = pPage->fCR3Mix;
1586 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1587 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1588 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1589 pPool->pfnAccessHandlerGC, MMHyperCCToRC(pVM, pNewHead),
1590 pPool->pszAccessHandler);
1591 AssertFatalRCSuccess(rc);
1592 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1593 }
1594 else
1595 {
1596 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1597 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1598 {
1599 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1600 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1601 }
1602 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1603 rc = VINF_SUCCESS;
1604 }
1605 }
1606 else
1607 {
1608 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1609 AssertFatalRC(rc);
1610 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1611 rc = VERR_PGM_POOL_CLEARED;
1612 }
1613 pPage->fMonitored = false;
1614
1615 /*
1616 * Remove it from the list of modified pages (if in it).
1617 */
1618 pgmPoolMonitorModifiedRemove(pPool, pPage);
1619
1620 return rc;
1621}
1622
1623
1624#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1625/**
1626 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1627 *
1628 * @param pPool The Pool.
1629 * @param pPage A page in the chain.
1630 * @param fCR3Mix The new fCR3Mix value.
1631 */
1632static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1633{
1634 /* current */
1635 pPage->fCR3Mix = fCR3Mix;
1636
1637 /* before */
1638 int16_t idx = pPage->iMonitoredPrev;
1639 while (idx != NIL_PGMPOOL_IDX)
1640 {
1641 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1642 idx = pPool->aPages[idx].iMonitoredPrev;
1643 }
1644
1645 /* after */
1646 idx = pPage->iMonitoredNext;
1647 while (idx != NIL_PGMPOOL_IDX)
1648 {
1649 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1650 idx = pPool->aPages[idx].iMonitoredNext;
1651 }
1652}
1653
1654
1655/**
1656 * Installs or modifies monitoring of a CR3 page (special).
1657 *
1658 * We're pretending the CR3 page is shadowed by the pool so we can use the
1659 * generic mechanisms in detecting chained monitoring. (This also gives us a
1660 * tast of what code changes are required to really pool CR3 shadow pages.)
1661 *
1662 * @returns VBox status code.
1663 * @param pPool The pool.
1664 * @param idxRoot The CR3 (root) page index.
1665 * @param GCPhysCR3 The (new) CR3 value.
1666 */
1667int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
1668{
1669 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1670 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1671 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d} GCPhysCR3=%VGp\n",
1672 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
1673
1674 /*
1675 * The unlikely case where it already matches.
1676 */
1677 if (pPage->GCPhys == GCPhysCR3)
1678 {
1679 Assert(pPage->fMonitored);
1680 return VINF_SUCCESS;
1681 }
1682
1683 /*
1684 * Flush the current monitoring and remove it from the hash.
1685 */
1686 int rc = VINF_SUCCESS;
1687 if (pPage->fMonitored)
1688 {
1689 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1690 rc = pgmPoolMonitorFlush(pPool, pPage);
1691 if (rc == VERR_PGM_POOL_CLEARED)
1692 rc = VINF_SUCCESS;
1693 else
1694 AssertFatalRC(rc);
1695 pgmPoolHashRemove(pPool, pPage);
1696 }
1697
1698 /*
1699 * Monitor the page at the new location and insert it into the hash.
1700 */
1701 pPage->GCPhys = GCPhysCR3;
1702 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
1703 if (rc2 != VERR_PGM_POOL_CLEARED)
1704 {
1705 AssertFatalRC(rc2);
1706 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
1707 rc = rc2;
1708 }
1709 pgmPoolHashInsert(pPool, pPage);
1710 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
1711 return rc;
1712}
1713
1714
1715/**
1716 * Removes the monitoring of a CR3 page (special).
1717 *
1718 * @returns VBox status code.
1719 * @param pPool The pool.
1720 * @param idxRoot The CR3 (root) page index.
1721 */
1722int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
1723{
1724 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1725 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1726 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d}\n",
1727 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
1728
1729 if (!pPage->fMonitored)
1730 return VINF_SUCCESS;
1731
1732 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1733 int rc = pgmPoolMonitorFlush(pPool, pPage);
1734 if (rc != VERR_PGM_POOL_CLEARED)
1735 AssertFatalRC(rc);
1736 else
1737 rc = VINF_SUCCESS;
1738 pgmPoolHashRemove(pPool, pPage);
1739 Assert(!pPage->fMonitored);
1740 pPage->GCPhys = NIL_RTGCPHYS;
1741 return rc;
1742}
1743#endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
1744
1745
1746/**
1747 * Inserts the page into the list of modified pages.
1748 *
1749 * @param pPool The pool.
1750 * @param pPage The page.
1751 */
1752void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1753{
1754 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1755 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1756 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1757 && pPool->iModifiedHead != pPage->idx,
1758 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1759 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1760 pPool->iModifiedHead, pPool->cModifiedPages));
1761
1762 pPage->iModifiedNext = pPool->iModifiedHead;
1763 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1764 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1765 pPool->iModifiedHead = pPage->idx;
1766 pPool->cModifiedPages++;
1767#ifdef VBOX_WITH_STATISTICS
1768 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1769 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1770#endif
1771}
1772
1773
1774/**
1775 * Removes the page from the list of modified pages and resets the
1776 * moficiation counter.
1777 *
1778 * @param pPool The pool.
1779 * @param pPage The page which is believed to be in the list of modified pages.
1780 */
1781static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1782{
1783 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1784 if (pPool->iModifiedHead == pPage->idx)
1785 {
1786 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1787 pPool->iModifiedHead = pPage->iModifiedNext;
1788 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1789 {
1790 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
1791 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1792 }
1793 pPool->cModifiedPages--;
1794 }
1795 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
1796 {
1797 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
1798 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1799 {
1800 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
1801 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1802 }
1803 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1804 pPool->cModifiedPages--;
1805 }
1806 else
1807 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1808 pPage->cModifications = 0;
1809}
1810
1811
1812/**
1813 * Zaps the list of modified pages, resetting their modification counters in the process.
1814 *
1815 * @param pVM The VM handle.
1816 */
1817void pgmPoolMonitorModifiedClearAll(PVM pVM)
1818{
1819 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1820 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
1821
1822 unsigned cPages = 0; NOREF(cPages);
1823 uint16_t idx = pPool->iModifiedHead;
1824 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1825 while (idx != NIL_PGMPOOL_IDX)
1826 {
1827 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
1828 idx = pPage->iModifiedNext;
1829 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1830 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1831 pPage->cModifications = 0;
1832 Assert(++cPages);
1833 }
1834 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
1835 pPool->cModifiedPages = 0;
1836}
1837
1838
1839/**
1840 * Clear all shadow pages and clear all modification counters.
1841 *
1842 * @param pVM The VM handle.
1843 * @remark Should only be used when monitoring is available, thus placed in
1844 * the PGMPOOL_WITH_MONITORING #ifdef.
1845 */
1846void pgmPoolClearAll(PVM pVM)
1847{
1848 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1849 STAM_PROFILE_START(&pPool->StatClearAll, c);
1850 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
1851
1852 /*
1853 * Iterate all the pages until we've encountered all that in use.
1854 * This is simple but not quite optimal solution.
1855 */
1856 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
1857 unsigned cLeft = pPool->cUsedPages;
1858 unsigned iPage = pPool->cCurPages;
1859 while (--iPage >= PGMPOOL_IDX_FIRST)
1860 {
1861 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1862 if (pPage->GCPhys != NIL_RTGCPHYS)
1863 {
1864 switch (pPage->enmKind)
1865 {
1866 /*
1867 * We only care about shadow page tables.
1868 */
1869 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1870 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1871 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1872 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1873 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1874 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1875 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1876 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1877 {
1878#ifdef PGMPOOL_WITH_USER_TRACKING
1879 if (pPage->cPresent)
1880#endif
1881 {
1882 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
1883 STAM_PROFILE_START(&pPool->StatZeroPage, z);
1884 ASMMemZeroPage(pvShw);
1885 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
1886#ifdef PGMPOOL_WITH_USER_TRACKING
1887 pPage->cPresent = 0;
1888 pPage->iFirstPresent = ~0;
1889#endif
1890 }
1891 }
1892 /* fall thru */
1893
1894 default:
1895 Assert(!pPage->cModifications || ++cModifiedPages);
1896 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1897 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1898 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1899 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1900 pPage->cModifications = 0;
1901 break;
1902
1903 }
1904 if (!--cLeft)
1905 break;
1906 }
1907 }
1908
1909 /* swipe the special pages too. */
1910 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
1911 {
1912 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1913 if (pPage->GCPhys != NIL_RTGCPHYS)
1914 {
1915 Assert(!pPage->cModifications || ++cModifiedPages);
1916 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1917 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1918 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1919 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1920 pPage->cModifications = 0;
1921 }
1922 }
1923
1924#ifndef DEBUG_michael
1925 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
1926#endif
1927 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1928 pPool->cModifiedPages = 0;
1929
1930#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1931 /*
1932 * Clear all the GCPhys links and rebuild the phys ext free list.
1933 */
1934 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
1935 pRam;
1936 pRam = CTXALLSUFF(pRam->pNext))
1937 {
1938 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1939 while (iPage-- > 0)
1940 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
1941 }
1942
1943 pPool->iPhysExtFreeHead = 0;
1944 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
1945 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
1946 for (unsigned i = 0; i < cMaxPhysExts; i++)
1947 {
1948 paPhysExts[i].iNext = i + 1;
1949 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
1950 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
1951 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
1952 }
1953 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
1954#endif
1955
1956
1957 pPool->cPresent = 0;
1958 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
1959}
1960
1961/**
1962 * Handle SyncCR3 pool tasks
1963 *
1964 * @returns VBox status code.
1965 * @retval VINF_SUCCESS if successfully added.
1966 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
1967 * @param pVM The VM handle.
1968 * @remark Should only be used when monitoring is available, thus placed in
1969 * the PGMPOOL_WITH_MONITORING #ifdef.
1970 */
1971int pgmPoolSyncCR3(PVM pVM)
1972{
1973 /*
1974 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
1975 * Occasionally we will have to clear all the shadow page tables because we wanted
1976 * to monitor a page which was mapped by too many shadowed page tables. This operation
1977 * sometimes refered to as a 'lightweight flush'.
1978 */
1979 if (!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))
1980 pgmPoolMonitorModifiedClearAll(pVM);
1981 else
1982 {
1983# ifndef IN_GC
1984 pVM->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
1985 pgmPoolClearAll(pVM);
1986# else
1987 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
1988 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
1989 return VINF_PGM_SYNC_CR3;
1990# endif
1991 }
1992 return VINF_SUCCESS;
1993}
1994#endif /* PGMPOOL_WITH_MONITORING */
1995
1996#ifdef PGMPOOL_WITH_USER_TRACKING
1997/**
1998 * Frees up at least one user entry.
1999 *
2000 * @returns VBox status code.
2001 * @retval VINF_SUCCESS if successfully added.
2002 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2003 * @param pPool The pool.
2004 * @param iUser The user index.
2005 */
2006static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2007{
2008 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2009#ifdef PGMPOOL_WITH_CACHE
2010 /*
2011 * Just free cached pages in a braindead fashion.
2012 */
2013 /** @todo walk the age list backwards and free the first with usage. */
2014 int rc = VINF_SUCCESS;
2015 do
2016 {
2017 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2018 if (VBOX_FAILURE(rc2) && rc == VINF_SUCCESS)
2019 rc = rc2;
2020 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2021 return rc;
2022#else
2023 /*
2024 * Lazy approach.
2025 */
2026 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
2027 Assert(!CPUMIsGuestInLongMode(pVM));
2028 pgmPoolFlushAllInt(pPool);
2029 return VERR_PGM_POOL_FLUSHED;
2030#endif
2031}
2032
2033
2034/**
2035 * Inserts a page into the cache.
2036 *
2037 * This will create user node for the page, insert it into the GCPhys
2038 * hash, and insert it into the age list.
2039 *
2040 * @returns VBox status code.
2041 * @retval VINF_SUCCESS if successfully added.
2042 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2043 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
2044 * @param pPool The pool.
2045 * @param pPage The cached page.
2046 * @param GCPhys The GC physical address of the page we're gonna shadow.
2047 * @param iUser The user index.
2048 * @param iUserTable The user table index.
2049 */
2050DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2051{
2052 int rc = VINF_SUCCESS;
2053 PPGMPOOLUSER pUser = pPool->CTXSUFF(paUsers);
2054
2055 LogFlow(("pgmPoolTrackInsert iUser %d iUserTable %d\n", iUser, iUserTable));
2056
2057 /*
2058 * Find free a user node.
2059 */
2060 uint16_t i = pPool->iUserFreeHead;
2061 if (i == NIL_PGMPOOL_USER_INDEX)
2062 {
2063 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2064 if (VBOX_FAILURE(rc))
2065 return rc;
2066 i = pPool->iUserFreeHead;
2067 }
2068
2069 /*
2070 * Unlink the user node from the free list,
2071 * initialize and insert it into the user list.
2072 */
2073 pPool->iUserFreeHead = pUser[i].iNext;
2074 pUser[i].iNext = NIL_PGMPOOL_USER_INDEX;
2075 pUser[i].iUser = iUser;
2076 pUser[i].iUserTable = iUserTable;
2077 pPage->iUserHead = i;
2078
2079 /*
2080 * Insert into cache and enable monitoring of the guest page if enabled.
2081 *
2082 * Until we implement caching of all levels, including the CR3 one, we'll
2083 * have to make sure we don't try monitor & cache any recursive reuse of
2084 * a monitored CR3 page. Because all windows versions are doing this we'll
2085 * have to be able to do combined access monitoring, CR3 + PT and
2086 * PD + PT (guest PAE).
2087 *
2088 * Update:
2089 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2090 */
2091#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2092# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2093 const bool fCanBeMonitored = true;
2094# else
2095 bool fCanBeMonitored = pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2096 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2097 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2098# endif
2099# ifdef PGMPOOL_WITH_CACHE
2100 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2101# endif
2102 if (fCanBeMonitored)
2103 {
2104# ifdef PGMPOOL_WITH_MONITORING
2105 rc = pgmPoolMonitorInsert(pPool, pPage);
2106 if (rc == VERR_PGM_POOL_CLEARED)
2107 {
2108 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
2109# ifndef PGMPOOL_WITH_CACHE
2110 pgmPoolMonitorFlush(pPool, pPage);
2111 rc = VERR_PGM_POOL_FLUSHED;
2112# endif
2113 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2114 pUser[i].iNext = pPool->iUserFreeHead;
2115 pUser[i].iUser = NIL_PGMPOOL_IDX;
2116 pPool->iUserFreeHead = i;
2117 }
2118 }
2119# endif
2120#endif /* PGMPOOL_WITH_MONITORING */
2121 return rc;
2122}
2123
2124
2125# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2126/**
2127 * Adds a user reference to a page.
2128 *
2129 * This will
2130 * This will move the page to the head of the
2131 *
2132 * @returns VBox status code.
2133 * @retval VINF_SUCCESS if successfully added.
2134 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2135 * @param pPool The pool.
2136 * @param pPage The cached page.
2137 * @param iUser The user index.
2138 * @param iUserTable The user table.
2139 */
2140static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2141{
2142 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
2143
2144 LogFlow(("pgmPoolTrackAddUser iUser %d iUserTable %d\n", iUser, iUserTable));
2145# ifdef VBOX_STRICT
2146 /*
2147 * Check that the entry doesn't already exists.
2148 */
2149 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2150 {
2151 uint16_t i = pPage->iUserHead;
2152 do
2153 {
2154 Assert(i < pPool->cMaxUsers);
2155 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2156 i = paUsers[i].iNext;
2157 } while (i != NIL_PGMPOOL_USER_INDEX);
2158 }
2159# endif
2160
2161 /*
2162 * Allocate a user node.
2163 */
2164 uint16_t i = pPool->iUserFreeHead;
2165 if (i == NIL_PGMPOOL_USER_INDEX)
2166 {
2167 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2168 if (VBOX_FAILURE(rc))
2169 return rc;
2170 i = pPool->iUserFreeHead;
2171 }
2172 pPool->iUserFreeHead = paUsers[i].iNext;
2173
2174 /*
2175 * Initialize the user node and insert it.
2176 */
2177 paUsers[i].iNext = pPage->iUserHead;
2178 paUsers[i].iUser = iUser;
2179 paUsers[i].iUserTable = iUserTable;
2180 pPage->iUserHead = i;
2181
2182# ifdef PGMPOOL_WITH_CACHE
2183 /*
2184 * Tell the cache to update its replacement stats for this page.
2185 */
2186 pgmPoolCacheUsed(pPool, pPage);
2187# endif
2188 return VINF_SUCCESS;
2189}
2190# endif /* PGMPOOL_WITH_CACHE */
2191
2192
2193/**
2194 * Frees a user record associated with a page.
2195 *
2196 * This does not clear the entry in the user table, it simply replaces the
2197 * user record to the chain of free records.
2198 *
2199 * @param pPool The pool.
2200 * @param HCPhys The HC physical address of the shadow page.
2201 * @param iUser The shadow page pool index of the user table.
2202 * @param iUserTable The index into the user table (shadowed).
2203 */
2204static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2205{
2206 /*
2207 * Unlink and free the specified user entry.
2208 */
2209 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
2210
2211 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2212 uint16_t i = pPage->iUserHead;
2213 if ( i != NIL_PGMPOOL_USER_INDEX
2214 && paUsers[i].iUser == iUser
2215 && paUsers[i].iUserTable == iUserTable)
2216 {
2217 pPage->iUserHead = paUsers[i].iNext;
2218
2219 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2220 paUsers[i].iNext = pPool->iUserFreeHead;
2221 pPool->iUserFreeHead = i;
2222 return;
2223 }
2224
2225 /* General: Linear search. */
2226 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2227 while (i != NIL_PGMPOOL_USER_INDEX)
2228 {
2229 if ( paUsers[i].iUser == iUser
2230 && paUsers[i].iUserTable == iUserTable)
2231 {
2232 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2233 paUsers[iPrev].iNext = paUsers[i].iNext;
2234 else
2235 pPage->iUserHead = paUsers[i].iNext;
2236
2237 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2238 paUsers[i].iNext = pPool->iUserFreeHead;
2239 pPool->iUserFreeHead = i;
2240 return;
2241 }
2242 iPrev = i;
2243 i = paUsers[i].iNext;
2244 }
2245
2246 /* Fatal: didn't find it */
2247 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%VGp\n",
2248 iUser, iUserTable, pPage->GCPhys));
2249}
2250
2251
2252/**
2253 * Gets the entry size of a shadow table.
2254 *
2255 * @param enmKind The kind of page.
2256 *
2257 * @returns The size of the entry in bytes. That is, 4 or 8.
2258 * @returns If the kind is not for a table, an assertion is raised and 0 is
2259 * returned.
2260 */
2261DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2262{
2263 switch (enmKind)
2264 {
2265 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2266 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2267 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2268 case PGMPOOLKIND_ROOT_32BIT_PD:
2269 return 4;
2270
2271 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2272 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2273 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2274 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2275 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2276 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2277 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2278 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2279 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2280 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2281 case PGMPOOLKIND_ROOT_PAE_PD:
2282 case PGMPOOLKIND_ROOT_PDPT:
2283 case PGMPOOLKIND_ROOT_NESTED:
2284 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2285 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2286 return 8;
2287
2288 default:
2289 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2290 }
2291}
2292
2293
2294/**
2295 * Gets the entry size of a guest table.
2296 *
2297 * @param enmKind The kind of page.
2298 *
2299 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2300 * @returns If the kind is not for a table, an assertion is raised and 0 is
2301 * returned.
2302 */
2303DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2304{
2305 switch (enmKind)
2306 {
2307 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2308 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2309 case PGMPOOLKIND_ROOT_32BIT_PD:
2310 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2311 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2312 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2313 return 4;
2314
2315 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2316 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2317 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2318 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2319 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2320 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2321 case PGMPOOLKIND_ROOT_PAE_PD:
2322 case PGMPOOLKIND_ROOT_PDPT:
2323 return 8;
2324
2325 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2326 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2327 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2328 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2329 case PGMPOOLKIND_ROOT_NESTED:
2330 /** @todo can we return 0? (nobody is calling this...) */
2331 AssertFailed();
2332 return 0;
2333
2334 default:
2335 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2336 }
2337}
2338
2339
2340#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2341/**
2342 * Scans one shadow page table for mappings of a physical page.
2343 *
2344 * @param pVM The VM handle.
2345 * @param pPhysPage The guest page in question.
2346 * @param iShw The shadow page table.
2347 * @param cRefs The number of references made in that PT.
2348 */
2349static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2350{
2351 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2352 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2353
2354 /*
2355 * Assert sanity.
2356 */
2357 Assert(cRefs == 1);
2358 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2359 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2360
2361 /*
2362 * Then, clear the actual mappings to the page in the shadow PT.
2363 */
2364 switch (pPage->enmKind)
2365 {
2366 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2367 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2368 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2369 {
2370 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2371 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2372 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2373 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2374 {
2375 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2376 pPT->a[i].u = 0;
2377 cRefs--;
2378 if (!cRefs)
2379 return;
2380 }
2381#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2382 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2383 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2384 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2385 {
2386 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2387 pPT->a[i].u = 0;
2388 }
2389#endif
2390 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2391 break;
2392 }
2393
2394 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2395 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2396 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2397 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2398 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2399 {
2400 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2401 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2402 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2403 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2404 {
2405 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2406 pPT->a[i].u = 0;
2407 cRefs--;
2408 if (!cRefs)
2409 return;
2410 }
2411#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2412 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2413 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2414 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2415 {
2416 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2417 pPT->a[i].u = 0;
2418 }
2419#endif
2420 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2421 break;
2422 }
2423
2424 default:
2425 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2426 }
2427}
2428
2429
2430/**
2431 * Scans one shadow page table for mappings of a physical page.
2432 *
2433 * @param pVM The VM handle.
2434 * @param pPhysPage The guest page in question.
2435 * @param iShw The shadow page table.
2436 * @param cRefs The number of references made in that PT.
2437 */
2438void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2439{
2440 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool); NOREF(pPool);
2441 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2442 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2443 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2444 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2445 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2446}
2447
2448
2449/**
2450 * Flushes a list of shadow page tables mapping the same physical page.
2451 *
2452 * @param pVM The VM handle.
2453 * @param pPhysPage The guest page in question.
2454 * @param iPhysExt The physical cross reference extent list to flush.
2455 */
2456void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2457{
2458 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2459 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2460 LogFlow(("pgmPoolTrackFlushGCPhysPTs: HCPhys=%RHp iPhysExt\n", pPhysPage->HCPhys, iPhysExt));
2461
2462 const uint16_t iPhysExtStart = iPhysExt;
2463 PPGMPOOLPHYSEXT pPhysExt;
2464 do
2465 {
2466 Assert(iPhysExt < pPool->cMaxPhysExts);
2467 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2468 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2469 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2470 {
2471 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2472 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2473 }
2474
2475 /* next */
2476 iPhysExt = pPhysExt->iNext;
2477 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2478
2479 /* insert the list into the free list and clear the ram range entry. */
2480 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2481 pPool->iPhysExtFreeHead = iPhysExtStart;
2482 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2483
2484 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2485}
2486#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2487
2488
2489/**
2490 * Scans all shadow page tables for mappings of a physical page.
2491 *
2492 * This may be slow, but it's most likely more efficient than cleaning
2493 * out the entire page pool / cache.
2494 *
2495 * @returns VBox status code.
2496 * @retval VINF_SUCCESS if all references has been successfully cleared.
2497 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2498 * a page pool cleaning.
2499 *
2500 * @param pVM The VM handle.
2501 * @param pPhysPage The guest page in question.
2502 */
2503int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
2504{
2505 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2506 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2507 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d HCPhys=%RHp\n",
2508 pPool->cUsedPages, pPool->cPresent, pPhysPage->HCPhys));
2509
2510#if 1
2511 /*
2512 * There is a limit to what makes sense.
2513 */
2514 if (pPool->cPresent > 1024)
2515 {
2516 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2517 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2518 return VINF_PGM_GCPHYS_ALIASED;
2519 }
2520#endif
2521
2522 /*
2523 * Iterate all the pages until we've encountered all that in use.
2524 * This is simple but not quite optimal solution.
2525 */
2526 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2527 const uint32_t u32 = u64;
2528 unsigned cLeft = pPool->cUsedPages;
2529 unsigned iPage = pPool->cCurPages;
2530 while (--iPage >= PGMPOOL_IDX_FIRST)
2531 {
2532 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2533 if (pPage->GCPhys != NIL_RTGCPHYS)
2534 {
2535 switch (pPage->enmKind)
2536 {
2537 /*
2538 * We only care about shadow page tables.
2539 */
2540 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2541 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2542 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2543 {
2544 unsigned cPresent = pPage->cPresent;
2545 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2546 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2547 if (pPT->a[i].n.u1Present)
2548 {
2549 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2550 {
2551 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2552 pPT->a[i].u = 0;
2553 }
2554 if (!--cPresent)
2555 break;
2556 }
2557 break;
2558 }
2559
2560 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2561 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2562 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2563 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2564 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2565 {
2566 unsigned cPresent = pPage->cPresent;
2567 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2568 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2569 if (pPT->a[i].n.u1Present)
2570 {
2571 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2572 {
2573 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2574 pPT->a[i].u = 0;
2575 }
2576 if (!--cPresent)
2577 break;
2578 }
2579 break;
2580 }
2581 }
2582 if (!--cLeft)
2583 break;
2584 }
2585 }
2586
2587 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2588 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2589 return VINF_SUCCESS;
2590}
2591
2592
2593/**
2594 * Clears the user entry in a user table.
2595 *
2596 * This is used to remove all references to a page when flushing it.
2597 */
2598static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2599{
2600 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2601 Assert(pUser->iUser < pPool->cCurPages);
2602
2603 /*
2604 * Map the user page.
2605 */
2606 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2607 union
2608 {
2609 uint64_t *pau64;
2610 uint32_t *pau32;
2611 } u;
2612 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pUserPage);
2613
2614 /* Safety precaution in case we change the paging for other modes too in the future. */
2615 Assert(PGMGetHyperCR3(CTXSUFF(pPool->pVM)) != pPage->Core.Key);
2616
2617#ifdef VBOX_STRICT
2618 /*
2619 * Some sanity checks.
2620 */
2621 switch (pUserPage->enmKind)
2622 {
2623 case PGMPOOLKIND_ROOT_32BIT_PD:
2624 Assert(pUser->iUserTable < X86_PG_ENTRIES);
2625 Assert(!(u.pau32[pUser->iUserTable] & PGM_PDFLAGS_MAPPING));
2626 break;
2627 case PGMPOOLKIND_ROOT_PAE_PD:
2628 Assert(pUser->iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
2629 Assert(!(u.pau64[pUser->iUserTable] & PGM_PDFLAGS_MAPPING));
2630 break;
2631 case PGMPOOLKIND_ROOT_PDPT:
2632 Assert(pUser->iUserTable < 4);
2633 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2634 break;
2635 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2636 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2637 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2638 break;
2639 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2640 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2641 Assert(!(u.pau64[pUser->iUserTable] & PGM_PDFLAGS_MAPPING));
2642 break;
2643 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2644 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2645 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2646 break;
2647 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2648 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2649 /* GCPhys >> PAGE_SHIFT is the index here */
2650 break;
2651 case PGMPOOLKIND_ROOT_NESTED:
2652 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2653 break;
2654
2655 default:
2656 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
2657 break;
2658 }
2659#endif /* VBOX_STRICT */
2660
2661 /*
2662 * Clear the entry in the user page.
2663 */
2664 switch (pUserPage->enmKind)
2665 {
2666 /* 32-bit entries */
2667 case PGMPOOLKIND_ROOT_32BIT_PD:
2668 u.pau32[pUser->iUserTable] = 0;
2669 break;
2670
2671 /* 64-bit entries */
2672 case PGMPOOLKIND_ROOT_PAE_PD:
2673 case PGMPOOLKIND_ROOT_PDPT:
2674 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2675 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2676 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2677 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2678 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2679 case PGMPOOLKIND_ROOT_NESTED:
2680 u.pau64[pUser->iUserTable] = 0;
2681 break;
2682
2683 default:
2684 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
2685 }
2686}
2687
2688
2689/**
2690 * Clears all users of a page.
2691 */
2692static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2693{
2694 /*
2695 * Free all the user records.
2696 */
2697 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
2698 uint16_t i = pPage->iUserHead;
2699 while (i != NIL_PGMPOOL_USER_INDEX)
2700 {
2701 /* Clear enter in user table. */
2702 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
2703
2704 /* Free it. */
2705 const uint16_t iNext = paUsers[i].iNext;
2706 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2707 paUsers[i].iNext = pPool->iUserFreeHead;
2708 pPool->iUserFreeHead = i;
2709
2710 /* Next. */
2711 i = iNext;
2712 }
2713 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2714}
2715
2716
2717#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2718/**
2719 * Allocates a new physical cross reference extent.
2720 *
2721 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
2722 * @param pVM The VM handle.
2723 * @param piPhysExt Where to store the phys ext index.
2724 */
2725PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
2726{
2727 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2728 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
2729 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
2730 {
2731 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
2732 return NULL;
2733 }
2734 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2735 pPool->iPhysExtFreeHead = pPhysExt->iNext;
2736 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2737 *piPhysExt = iPhysExt;
2738 return pPhysExt;
2739}
2740
2741
2742/**
2743 * Frees a physical cross reference extent.
2744 *
2745 * @param pVM The VM handle.
2746 * @param iPhysExt The extent to free.
2747 */
2748void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
2749{
2750 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2751 Assert(iPhysExt < pPool->cMaxPhysExts);
2752 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2753 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2754 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2755 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2756 pPool->iPhysExtFreeHead = iPhysExt;
2757}
2758
2759
2760/**
2761 * Frees a physical cross reference extent.
2762 *
2763 * @param pVM The VM handle.
2764 * @param iPhysExt The extent to free.
2765 */
2766void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
2767{
2768 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2769
2770 const uint16_t iPhysExtStart = iPhysExt;
2771 PPGMPOOLPHYSEXT pPhysExt;
2772 do
2773 {
2774 Assert(iPhysExt < pPool->cMaxPhysExts);
2775 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2776 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2777 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2778
2779 /* next */
2780 iPhysExt = pPhysExt->iNext;
2781 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2782
2783 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2784 pPool->iPhysExtFreeHead = iPhysExtStart;
2785}
2786
2787/**
2788 * Insert a reference into a list of physical cross reference extents.
2789 *
2790 * @returns The new ram range flags (top 16-bits).
2791 *
2792 * @param pVM The VM handle.
2793 * @param iPhysExt The physical extent index of the list head.
2794 * @param iShwPT The shadow page table index.
2795 *
2796 */
2797static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
2798{
2799 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2800 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2801
2802 /* special common case. */
2803 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
2804 {
2805 paPhysExts[iPhysExt].aidx[2] = iShwPT;
2806 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2807 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
2808 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2809 }
2810
2811 /* general treatment. */
2812 const uint16_t iPhysExtStart = iPhysExt;
2813 unsigned cMax = 15;
2814 for (;;)
2815 {
2816 Assert(iPhysExt < pPool->cMaxPhysExts);
2817 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2818 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
2819 {
2820 paPhysExts[iPhysExt].aidx[i] = iShwPT;
2821 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2822 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
2823 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2824 }
2825 if (!--cMax)
2826 {
2827 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2828 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2829 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
2830 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2831 }
2832 }
2833
2834 /* add another extent to the list. */
2835 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2836 if (!pNew)
2837 {
2838 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2839 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2840 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2841 }
2842 pNew->iNext = iPhysExtStart;
2843 pNew->aidx[0] = iShwPT;
2844 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
2845 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2846}
2847
2848
2849/**
2850 * Add a reference to guest physical page where extents are in use.
2851 *
2852 * @returns The new ram range flags (top 16-bits).
2853 *
2854 * @param pVM The VM handle.
2855 * @param u16 The ram range flags (top 16-bits).
2856 * @param iShwPT The shadow page table index.
2857 */
2858uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
2859{
2860 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
2861 {
2862 /*
2863 * Convert to extent list.
2864 */
2865 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
2866 uint16_t iPhysExt;
2867 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2868 if (pPhysExt)
2869 {
2870 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
2871 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
2872 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
2873 pPhysExt->aidx[1] = iShwPT;
2874 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2875 }
2876 else
2877 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2878 }
2879 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
2880 {
2881 /*
2882 * Insert into the extent list.
2883 */
2884 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
2885 }
2886 else
2887 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
2888 return u16;
2889}
2890
2891
2892/**
2893 * Clear references to guest physical memory.
2894 *
2895 * @param pPool The pool.
2896 * @param pPage The page.
2897 * @param pPhysPage Pointer to the aPages entry in the ram range.
2898 */
2899void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
2900{
2901 const unsigned cRefs = pPhysPage->HCPhys >> MM_RAM_FLAGS_CREFS_SHIFT; /** @todo PAGE FLAGS */
2902 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
2903
2904 uint16_t iPhysExt = (pPhysPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
2905 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
2906 {
2907 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
2908 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2909 do
2910 {
2911 Assert(iPhysExt < pPool->cMaxPhysExts);
2912
2913 /*
2914 * Look for the shadow page and check if it's all freed.
2915 */
2916 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2917 {
2918 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
2919 {
2920 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
2921
2922 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2923 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
2924 {
2925 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
2926 return;
2927 }
2928
2929 /* we can free the node. */
2930 PVM pVM = pPool->CTXSUFF(pVM);
2931 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
2932 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
2933 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
2934 {
2935 /* lonely node */
2936 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2937 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d lonely\n", pPhysPage->HCPhys, pPage->idx));
2938 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2939 }
2940 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
2941 {
2942 /* head */
2943 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d head\n", pPhysPage->HCPhys, pPage->idx));
2944 pPhysPage->HCPhys = (pPhysPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) /** @todo PAGE FLAGS */
2945 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
2946 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
2947 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2948 }
2949 else
2950 {
2951 /* in list */
2952 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
2953 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
2954 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2955 }
2956 iPhysExt = iPhysExtNext;
2957 return;
2958 }
2959 }
2960
2961 /* next */
2962 iPhysExtPrev = iPhysExt;
2963 iPhysExt = paPhysExts[iPhysExt].iNext;
2964 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2965
2966 AssertFatalMsgFailed(("not-found! cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
2967 }
2968 else /* nothing to do */
2969 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64\n", pPhysPage->HCPhys));
2970}
2971
2972
2973
2974/**
2975 * Clear references to guest physical memory.
2976 *
2977 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
2978 * is assumed to be correct, so the linear search can be skipped and we can assert
2979 * at an earlier point.
2980 *
2981 * @param pPool The pool.
2982 * @param pPage The page.
2983 * @param HCPhys The host physical address corresponding to the guest page.
2984 * @param GCPhys The guest physical address corresponding to HCPhys.
2985 */
2986static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
2987{
2988 /*
2989 * Walk range list.
2990 */
2991 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
2992 while (pRam)
2993 {
2994 RTGCPHYS off = GCPhys - pRam->GCPhys;
2995 if (off < pRam->cb)
2996 {
2997 /* does it match? */
2998 const unsigned iPage = off >> PAGE_SHIFT;
2999 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3000RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3001Log(("pgmPoolTracDerefGCPhys %VHp vs %VHp\n", HCPhysPage, HCPhys));
3002 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3003 {
3004 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3005 return;
3006 }
3007 break;
3008 }
3009 pRam = CTXALLSUFF(pRam->pNext);
3010 }
3011 AssertFatalMsgFailed(("HCPhys=%VHp GCPhys=%VGp\n", HCPhys, GCPhys));
3012}
3013
3014
3015/**
3016 * Clear references to guest physical memory.
3017 *
3018 * @param pPool The pool.
3019 * @param pPage The page.
3020 * @param HCPhys The host physical address corresponding to the guest page.
3021 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3022 */
3023static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3024{
3025 /*
3026 * Walk range list.
3027 */
3028 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
3029 while (pRam)
3030 {
3031 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3032 if (off < pRam->cb)
3033 {
3034 /* does it match? */
3035 const unsigned iPage = off >> PAGE_SHIFT;
3036 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3037 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3038 {
3039 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3040 return;
3041 }
3042 break;
3043 }
3044 pRam = CTXALLSUFF(pRam->pNext);
3045 }
3046
3047 /*
3048 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3049 */
3050 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3051 pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
3052 while (pRam)
3053 {
3054 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3055 while (iPage-- > 0)
3056 {
3057 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3058 {
3059 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%VHp GCPhysHint=%VGp GCPhysReal=%VGp\n",
3060 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3061 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3062 return;
3063 }
3064 }
3065 pRam = CTXALLSUFF(pRam->pNext);
3066 }
3067
3068 AssertFatalMsgFailed(("HCPhys=%VHp GCPhysHint=%VGp\n", HCPhys, GCPhysHint));
3069}
3070
3071
3072/**
3073 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3074 *
3075 * @param pPool The pool.
3076 * @param pPage The page.
3077 * @param pShwPT The shadow page table (mapping of the page).
3078 * @param pGstPT The guest page table.
3079 */
3080DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3081{
3082 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3083 if (pShwPT->a[i].n.u1Present)
3084 {
3085 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3086 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3087 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3088 if (!--pPage->cPresent)
3089 break;
3090 }
3091}
3092
3093
3094/**
3095 * Clear references to guest physical memory in a PAE / 32-bit page table.
3096 *
3097 * @param pPool The pool.
3098 * @param pPage The page.
3099 * @param pShwPT The shadow page table (mapping of the page).
3100 * @param pGstPT The guest page table (just a half one).
3101 */
3102DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3103{
3104 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3105 if (pShwPT->a[i].n.u1Present)
3106 {
3107 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
3108 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3109 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3110 }
3111}
3112
3113
3114/**
3115 * Clear references to guest physical memory in a PAE / PAE page table.
3116 *
3117 * @param pPool The pool.
3118 * @param pPage The page.
3119 * @param pShwPT The shadow page table (mapping of the page).
3120 * @param pGstPT The guest page table.
3121 */
3122DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3123{
3124 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3125 if (pShwPT->a[i].n.u1Present)
3126 {
3127 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3128 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3129 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3130 }
3131}
3132
3133
3134/**
3135 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3136 *
3137 * @param pPool The pool.
3138 * @param pPage The page.
3139 * @param pShwPT The shadow page table (mapping of the page).
3140 */
3141DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3142{
3143 RTGCPHYS GCPhys = pPage->GCPhys;
3144 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3145 if (pShwPT->a[i].n.u1Present)
3146 {
3147 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3148 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3149 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3150 }
3151}
3152
3153
3154/**
3155 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3156 *
3157 * @param pPool The pool.
3158 * @param pPage The page.
3159 * @param pShwPT The shadow page table (mapping of the page).
3160 */
3161DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3162{
3163 RTGCPHYS GCPhys = pPage->GCPhys;
3164 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3165 if (pShwPT->a[i].n.u1Present)
3166 {
3167 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%VGp\n",
3168 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3169 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3170 }
3171}
3172#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3173
3174
3175/**
3176 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
3177 *
3178 * @param pPool The pool.
3179 * @param pPage The page.
3180 * @param pShwPD The shadow page directory (mapping of the page).
3181 */
3182DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
3183{
3184 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3185 {
3186 if (pShwPD->a[i].n.u1Present)
3187 {
3188 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
3189 if (pSubPage)
3190 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3191 else
3192 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
3193 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3194 }
3195 }
3196}
3197
3198
3199/**
3200 * Clear references to shadowed pages in a 64-bit page directory pointer table.
3201 *
3202 * @param pPool The pool.
3203 * @param pPage The page.
3204 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3205 */
3206DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3207{
3208 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3209 {
3210 if (pShwPDPT->a[i].n.u1Present)
3211 {
3212 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3213 if (pSubPage)
3214 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3215 else
3216 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3217 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3218 }
3219 }
3220}
3221
3222/**
3223 * Clear references to shadowed pages in a 64-bit level 4 page table.
3224 *
3225 * @param pPool The pool.
3226 * @param pPage The page.
3227 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3228 */
3229DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
3230{
3231 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
3232 {
3233 if (pShwPML4->a[i].n.u1Present)
3234 {
3235 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
3236 if (pSubPage)
3237 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3238 else
3239 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
3240 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3241 }
3242 }
3243}
3244
3245
3246/**
3247 * Clears all references made by this page.
3248 *
3249 * This includes other shadow pages and GC physical addresses.
3250 *
3251 * @param pPool The pool.
3252 * @param pPage The page.
3253 */
3254static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3255{
3256 /*
3257 * Map the shadow page and take action according to the page kind.
3258 */
3259 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
3260 switch (pPage->enmKind)
3261 {
3262#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3263 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3264 {
3265 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3266 void *pvGst;
3267 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3268 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
3269 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3270 break;
3271 }
3272
3273 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3274 {
3275 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3276 void *pvGst;
3277 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3278 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
3279 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3280 break;
3281 }
3282
3283 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3284 {
3285 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3286 void *pvGst;
3287 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3288 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
3289 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3290 break;
3291 }
3292
3293 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
3294 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3295 {
3296 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3297 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
3298 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3299 break;
3300 }
3301
3302 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 4 MB page */
3303 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3304 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3305 {
3306 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3307 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
3308 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3309 break;
3310 }
3311
3312#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3313 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3314 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3315 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3316 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3317 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3318 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3319 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3320 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3321 break;
3322#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3323
3324 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
3325 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3326 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3327 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3328 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
3329 break;
3330
3331 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3332 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3333 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
3334 break;
3335
3336 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
3337 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
3338 break;
3339
3340 default:
3341 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
3342 }
3343
3344 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
3345 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3346 ASMMemZeroPage(pvShw);
3347 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3348 pPage->fZeroed = true;
3349}
3350#endif /* PGMPOOL_WITH_USER_TRACKING */
3351
3352
3353/**
3354 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
3355 *
3356 * @param pPool The pool.
3357 */
3358static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
3359{
3360 /*
3361 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
3362 */
3363 Assert(NIL_PGMPOOL_IDX == 0);
3364 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
3365 {
3366 /*
3367 * Get the page address.
3368 */
3369 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3370 union
3371 {
3372 uint64_t *pau64;
3373 uint32_t *pau32;
3374 } u;
3375 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
3376
3377 /*
3378 * Mark stuff not present.
3379 */
3380 switch (pPage->enmKind)
3381 {
3382 case PGMPOOLKIND_ROOT_32BIT_PD:
3383 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
3384 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3385 u.pau32[iPage] = 0;
3386 break;
3387
3388 case PGMPOOLKIND_ROOT_PAE_PD:
3389 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES * X86_PG_PAE_PDPE_ENTRIES; iPage++)
3390 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3391 u.pau64[iPage] = 0;
3392 break;
3393
3394 case PGMPOOLKIND_ROOT_PDPT:
3395 /* Not root of shadowed pages currently, ignore it. */
3396 break;
3397
3398 case PGMPOOLKIND_ROOT_NESTED:
3399 ASMMemZero32(u.pau64, PAGE_SIZE);
3400 break;
3401 }
3402 }
3403
3404 /*
3405 * Paranoia (to be removed), flag a global CR3 sync.
3406 */
3407 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
3408}
3409
3410
3411/**
3412 * Flushes the entire cache.
3413 *
3414 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3415 * and execute this CR3 flush.
3416 *
3417 * @param pPool The pool.
3418 */
3419static void pgmPoolFlushAllInt(PPGMPOOL pPool)
3420{
3421 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
3422 LogFlow(("pgmPoolFlushAllInt:\n"));
3423
3424 /*
3425 * If there are no pages in the pool, there is nothing to do.
3426 */
3427 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
3428 {
3429 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3430 return;
3431 }
3432
3433 /*
3434 * Nuke the free list and reinsert all pages into it.
3435 */
3436 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
3437 {
3438 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3439
3440#ifdef IN_RING3
3441 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMHC, pPage->pvPageHC));
3442#endif
3443#ifdef PGMPOOL_WITH_MONITORING
3444 if (pPage->fMonitored)
3445 pgmPoolMonitorFlush(pPool, pPage);
3446 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3447 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3448 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3449 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3450 pPage->cModifications = 0;
3451#endif
3452 pPage->GCPhys = NIL_RTGCPHYS;
3453 pPage->enmKind = PGMPOOLKIND_FREE;
3454 Assert(pPage->idx == i);
3455 pPage->iNext = i + 1;
3456 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
3457 pPage->fSeenNonGlobal = false;
3458 pPage->fMonitored= false;
3459 pPage->fCached = false;
3460 pPage->fReusedFlushPending = false;
3461 pPage->fCR3Mix = false;
3462#ifdef PGMPOOL_WITH_USER_TRACKING
3463 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3464#endif
3465#ifdef PGMPOOL_WITH_CACHE
3466 pPage->iAgeNext = NIL_PGMPOOL_IDX;
3467 pPage->iAgePrev = NIL_PGMPOOL_IDX;
3468#endif
3469 }
3470 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
3471 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
3472 pPool->cUsedPages = 0;
3473
3474#ifdef PGMPOOL_WITH_USER_TRACKING
3475 /*
3476 * Zap and reinitialize the user records.
3477 */
3478 pPool->cPresent = 0;
3479 pPool->iUserFreeHead = 0;
3480 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
3481 const unsigned cMaxUsers = pPool->cMaxUsers;
3482 for (unsigned i = 0; i < cMaxUsers; i++)
3483 {
3484 paUsers[i].iNext = i + 1;
3485 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3486 paUsers[i].iUserTable = 0xfffffffe;
3487 }
3488 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
3489#endif
3490
3491#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3492 /*
3493 * Clear all the GCPhys links and rebuild the phys ext free list.
3494 */
3495 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
3496 pRam;
3497 pRam = CTXALLSUFF(pRam->pNext))
3498 {
3499 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3500 while (iPage-- > 0)
3501 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3502 }
3503
3504 pPool->iPhysExtFreeHead = 0;
3505 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
3506 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
3507 for (unsigned i = 0; i < cMaxPhysExts; i++)
3508 {
3509 paPhysExts[i].iNext = i + 1;
3510 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
3511 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
3512 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
3513 }
3514 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3515#endif
3516
3517#ifdef PGMPOOL_WITH_MONITORING
3518 /*
3519 * Just zap the modified list.
3520 */
3521 pPool->cModifiedPages = 0;
3522 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
3523#endif
3524
3525#ifdef PGMPOOL_WITH_CACHE
3526 /*
3527 * Clear the GCPhys hash and the age list.
3528 */
3529 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
3530 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
3531 pPool->iAgeHead = NIL_PGMPOOL_IDX;
3532 pPool->iAgeTail = NIL_PGMPOOL_IDX;
3533#endif
3534
3535 /*
3536 * Flush all the special root pages.
3537 * Reinsert active pages into the hash and ensure monitoring chains are correct.
3538 */
3539 pgmPoolFlushAllSpecialRoots(pPool);
3540 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
3541 {
3542 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3543 pPage->iNext = NIL_PGMPOOL_IDX;
3544#ifdef PGMPOOL_WITH_MONITORING
3545 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3546 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3547 pPage->cModifications = 0;
3548 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
3549 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3550 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3551 if (pPage->fMonitored)
3552 {
3553 PVM pVM = pPool->CTXSUFF(pVM);
3554 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
3555 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
3556 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
3557 pPool->pfnAccessHandlerGC, MMHyperCCToRC(pVM, pPage),
3558 pPool->pszAccessHandler);
3559 AssertFatalRCSuccess(rc);
3560# ifdef PGMPOOL_WITH_CACHE
3561 pgmPoolHashInsert(pPool, pPage);
3562# endif
3563 }
3564#endif
3565#ifdef PGMPOOL_WITH_USER_TRACKING
3566 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
3567#endif
3568#ifdef PGMPOOL_WITH_CACHE
3569 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
3570 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
3571#endif
3572 }
3573
3574 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3575}
3576
3577
3578/**
3579 * Flushes a pool page.
3580 *
3581 * This moves the page to the free list after removing all user references to it.
3582 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
3583 *
3584 * @returns VBox status code.
3585 * @retval VINF_SUCCESS on success.
3586 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
3587 * @param pPool The pool.
3588 * @param HCPhys The HC physical address of the shadow page.
3589 */
3590int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3591{
3592 int rc = VINF_SUCCESS;
3593 STAM_PROFILE_START(&pPool->StatFlushPage, f);
3594 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%VHp, .idx=%d, .enmKind=%d, .GCPhys=%VGp}\n",
3595 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
3596
3597 /*
3598 * Quietly reject any attempts at flushing any of the special root pages.
3599 */
3600 if (pPage->idx < PGMPOOL_IDX_FIRST)
3601 {
3602 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3603 return VINF_SUCCESS;
3604 }
3605
3606 /*
3607 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
3608 */
3609 if ( pPage->enmKind == PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4
3610 && PGMGetHyperCR3(CTXSUFF(pPool->pVM)) == pPage->Core.Key)
3611 {
3612 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3613 return VINF_SUCCESS;
3614 }
3615 /* Safety precaution in case we change the paging for other modes too in the future. */
3616 AssertFatal(PGMGetHyperCR3(CTXSUFF(pPool->pVM)) != pPage->Core.Key);
3617
3618 /*
3619 * Mark the page as being in need of a ASMMemZeroPage().
3620 */
3621 pPage->fZeroed = false;
3622
3623#ifdef PGMPOOL_WITH_USER_TRACKING
3624 /*
3625 * Clear the page.
3626 */
3627 pgmPoolTrackClearPageUsers(pPool, pPage);
3628 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
3629 pgmPoolTrackDeref(pPool, pPage);
3630 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
3631#endif
3632
3633#ifdef PGMPOOL_WITH_CACHE
3634 /*
3635 * Flush it from the cache.
3636 */
3637 pgmPoolCacheFlushPage(pPool, pPage);
3638#endif /* PGMPOOL_WITH_CACHE */
3639
3640#ifdef PGMPOOL_WITH_MONITORING
3641 /*
3642 * Deregistering the monitoring.
3643 */
3644 if (pPage->fMonitored)
3645 rc = pgmPoolMonitorFlush(pPool, pPage);
3646#endif
3647
3648 /*
3649 * Free the page.
3650 */
3651 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
3652 pPage->iNext = pPool->iFreeHead;
3653 pPool->iFreeHead = pPage->idx;
3654 pPage->enmKind = PGMPOOLKIND_FREE;
3655 pPage->GCPhys = NIL_RTGCPHYS;
3656 pPage->fReusedFlushPending = false;
3657
3658 pPool->cUsedPages--;
3659 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
3660 return rc;
3661}
3662
3663
3664/**
3665 * Frees a usage of a pool page.
3666 *
3667 * The caller is responsible to updating the user table so that it no longer
3668 * references the shadow page.
3669 *
3670 * @param pPool The pool.
3671 * @param HCPhys The HC physical address of the shadow page.
3672 * @param iUser The shadow page pool index of the user table.
3673 * @param iUserTable The index into the user table (shadowed).
3674 */
3675void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3676{
3677 STAM_PROFILE_START(&pPool->StatFree, a);
3678 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%VHp, .idx=%d, enmKind=%d} iUser=%#x iUserTable=%#x\n",
3679 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, iUser, iUserTable));
3680 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
3681#ifdef PGMPOOL_WITH_USER_TRACKING
3682 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
3683#endif
3684#ifdef PGMPOOL_WITH_CACHE
3685 if (!pPage->fCached)
3686#endif
3687 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
3688 STAM_PROFILE_STOP(&pPool->StatFree, a);
3689}
3690
3691
3692/**
3693 * Makes one or more free page free.
3694 *
3695 * @returns VBox status code.
3696 * @retval VINF_SUCCESS on success.
3697 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3698 *
3699 * @param pPool The pool.
3700 * @param iUser The user of the page.
3701 */
3702static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
3703{
3704 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
3705
3706 /*
3707 * If the pool isn't full grown yet, expand it.
3708 */
3709 if (pPool->cCurPages < pPool->cMaxPages)
3710 {
3711 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
3712#ifdef IN_RING3
3713 int rc = PGMR3PoolGrow(pPool->pVMHC);
3714#else
3715 int rc = CTXALLMID(VMM, CallHost)(pPool->CTXSUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
3716#endif
3717 if (VBOX_FAILURE(rc))
3718 return rc;
3719 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
3720 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
3721 return VINF_SUCCESS;
3722 }
3723
3724#ifdef PGMPOOL_WITH_CACHE
3725 /*
3726 * Free one cached page.
3727 */
3728 return pgmPoolCacheFreeOne(pPool, iUser);
3729#else
3730 /*
3731 * Flush the pool.
3732 * If we have tracking enabled, it should be possible to come up with
3733 * a cheap replacement strategy...
3734 */
3735 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
3736 Assert(!CPUMIsGuestInLongMode(pVM));
3737 pgmPoolFlushAllInt(pPool);
3738 return VERR_PGM_POOL_FLUSHED;
3739#endif
3740}
3741
3742
3743/**
3744 * Allocates a page from the pool.
3745 *
3746 * This page may actually be a cached page and not in need of any processing
3747 * on the callers part.
3748 *
3749 * @returns VBox status code.
3750 * @retval VINF_SUCCESS if a NEW page was allocated.
3751 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
3752 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3753 * @param pVM The VM handle.
3754 * @param GCPhys The GC physical address of the page we're gonna shadow.
3755 * For 4MB and 2MB PD entries, it's the first address the
3756 * shadow PT is covering.
3757 * @param enmKind The kind of mapping.
3758 * @param iUser The shadow page pool index of the user table.
3759 * @param iUserTable The index into the user table (shadowed).
3760 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
3761 */
3762int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
3763{
3764 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3765 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
3766 LogFlow(("pgmPoolAlloc: GCPhys=%VGp enmKind=%d iUser=%#x iUserTable=%#x\n", GCPhys, enmKind, iUser, iUserTable));
3767 *ppPage = NULL;
3768
3769#ifdef PGMPOOL_WITH_CACHE
3770 if (pPool->fCacheEnabled)
3771 {
3772 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
3773 if (VBOX_SUCCESS(rc2))
3774 {
3775 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3776 LogFlow(("pgmPoolAlloc: cached returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
3777 return rc2;
3778 }
3779 }
3780#endif
3781
3782 /*
3783 * Allocate a new one.
3784 */
3785 int rc = VINF_SUCCESS;
3786 uint16_t iNew = pPool->iFreeHead;
3787 if (iNew == NIL_PGMPOOL_IDX)
3788 {
3789 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
3790 if (VBOX_FAILURE(rc))
3791 {
3792 if (rc != VERR_PGM_POOL_CLEARED)
3793 {
3794 Log(("pgmPoolAlloc: returns %Vrc (Free)\n", rc));
3795 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3796 return rc;
3797 }
3798 Log(("pgmPoolMakeMoreFreePages failed with %Vrc -> return VERR_PGM_POOL_FLUSHED\n", rc));
3799 rc = VERR_PGM_POOL_FLUSHED;
3800 }
3801 iNew = pPool->iFreeHead;
3802 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
3803 }
3804
3805 /* unlink the free head */
3806 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
3807 pPool->iFreeHead = pPage->iNext;
3808 pPage->iNext = NIL_PGMPOOL_IDX;
3809
3810 /*
3811 * Initialize it.
3812 */
3813 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
3814 pPage->enmKind = enmKind;
3815 pPage->GCPhys = GCPhys;
3816 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
3817 pPage->fMonitored = false;
3818 pPage->fCached = false;
3819 pPage->fReusedFlushPending = false;
3820 pPage->fCR3Mix = false;
3821#ifdef PGMPOOL_WITH_MONITORING
3822 pPage->cModifications = 0;
3823 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3824 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3825#endif
3826#ifdef PGMPOOL_WITH_USER_TRACKING
3827 pPage->cPresent = 0;
3828 pPage->iFirstPresent = ~0;
3829
3830 /*
3831 * Insert into the tracking and cache. If this fails, free the page.
3832 */
3833 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
3834 if (VBOX_FAILURE(rc3))
3835 {
3836 if (rc3 != VERR_PGM_POOL_CLEARED)
3837 {
3838 pPool->cUsedPages--;
3839 pPage->enmKind = PGMPOOLKIND_FREE;
3840 pPage->GCPhys = NIL_RTGCPHYS;
3841 pPage->iNext = pPool->iFreeHead;
3842 pPool->iFreeHead = pPage->idx;
3843 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3844 Log(("pgmPoolAlloc: returns %Vrc (Insert)\n", rc3));
3845 return rc3;
3846 }
3847 Log(("pgmPoolTrackInsert failed with %Vrc -> return VERR_PGM_POOL_FLUSHED\n", rc3));
3848 rc = VERR_PGM_POOL_FLUSHED;
3849 }
3850#endif /* PGMPOOL_WITH_USER_TRACKING */
3851
3852 /*
3853 * Commit the allocation, clear the page and return.
3854 */
3855#ifdef VBOX_WITH_STATISTICS
3856 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
3857 pPool->cUsedPagesHigh = pPool->cUsedPages;
3858#endif
3859
3860 if (!pPage->fZeroed)
3861 {
3862 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3863 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
3864 ASMMemZeroPage(pv);
3865 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3866 }
3867
3868 *ppPage = pPage;
3869 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
3870 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
3871 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3872 return rc;
3873}
3874
3875
3876/**
3877 * Frees a usage of a pool page.
3878 *
3879 * @param pVM The VM handle.
3880 * @param HCPhys The HC physical address of the shadow page.
3881 * @param iUser The shadow page pool index of the user table.
3882 * @param iUserTable The index into the user table (shadowed).
3883 */
3884void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
3885{
3886 LogFlow(("pgmPoolFree: HCPhys=%VHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
3887 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3888 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
3889}
3890
3891
3892/**
3893 * Gets a in-use page in the pool by it's physical address.
3894 *
3895 * @returns Pointer to the page.
3896 * @param pVM The VM handle.
3897 * @param HCPhys The HC physical address of the shadow page.
3898 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
3899 */
3900PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
3901{
3902 /** @todo profile this! */
3903 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3904 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
3905 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%VHp -> %p:{.idx=%d .GCPhys=%VGp .enmKind=%d}\n",
3906 HCPhys, pPage, pPage->idx, pPage->GCPhys, pPage->enmKind));
3907 return pPage;
3908}
3909
3910
3911/**
3912 * Flushes the entire cache.
3913 *
3914 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3915 * and execute this CR3 flush.
3916 *
3917 * @param pPool The pool.
3918 */
3919void pgmPoolFlushAll(PVM pVM)
3920{
3921 LogFlow(("pgmPoolFlushAll:\n"));
3922 pgmPoolFlushAllInt(pVM->pgm.s.CTXSUFF(pPool));
3923}
3924
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette