VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 12932

Last change on this file since 12932 was 12932, checked in by vboxsync, 16 years ago

Paging updates in preparation for EPT.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 140.4 KB
Line 
1/* $Id: PGMAllPool.cpp 12932 2008-10-02 11:48:49Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_GC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37
38#include <VBox/log.h>
39#include <VBox/err.h>
40#include <iprt/asm.h>
41
42
43/*******************************************************************************
44* Internal Functions *
45*******************************************************************************/
46__BEGIN_DECLS
47static void pgmPoolFlushAllInt(PPGMPOOL pPool);
48#ifdef PGMPOOL_WITH_USER_TRACKING
49DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
50DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
51static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
52#endif
53#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
54static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
55#endif
56#ifdef PGMPOOL_WITH_CACHE
57static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
58#endif
59#ifdef PGMPOOL_WITH_MONITORING
60static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
61#endif
62#ifndef IN_RING3
63DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
64#endif
65__END_DECLS
66
67
68/**
69 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
70 *
71 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
72 * @param enmKind The page kind.
73 */
74DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
75{
76 switch (enmKind)
77 {
78 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
79 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
80 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
81 return true;
82 default:
83 return false;
84 }
85}
86
87
88#ifdef IN_GC
89/**
90 * Maps a pool page into the current context.
91 *
92 * @returns Pointer to the mapping.
93 * @param pVM The VM handle.
94 * @param pPage The page to map.
95 */
96void *pgmGCPoolMapPage(PVM pVM, PPGMPOOLPAGE pPage)
97{
98 /* general pages. */
99 if (pPage->idx >= PGMPOOL_IDX_FIRST)
100 {
101 Assert(pPage->idx < pVM->pgm.s.pPoolGC->cCurPages);
102 void *pv;
103 int rc = PGMGCDynMapHCPage(pVM, pPage->Core.Key, &pv);
104 AssertReleaseRC(rc);
105 return pv;
106 }
107
108 /* special pages. */
109 switch (pPage->idx)
110 {
111 case PGMPOOL_IDX_PD:
112 return pVM->pgm.s.pGC32BitPD;
113 case PGMPOOL_IDX_PAE_PD:
114 case PGMPOOL_IDX_PAE_PD_0:
115 return pVM->pgm.s.apGCPaePDs[0];
116 case PGMPOOL_IDX_PAE_PD_1:
117 return pVM->pgm.s.apGCPaePDs[1];
118 case PGMPOOL_IDX_PAE_PD_2:
119 return pVM->pgm.s.apGCPaePDs[2];
120 case PGMPOOL_IDX_PAE_PD_3:
121 return pVM->pgm.s.apGCPaePDs[3];
122 case PGMPOOL_IDX_PDPT:
123 return pVM->pgm.s.pGCPaePDPT;
124 default:
125 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
126 return NULL;
127 }
128}
129#endif /* IN_GC */
130
131
132#ifdef PGMPOOL_WITH_MONITORING
133/**
134 * Determin the size of a write instruction.
135 * @returns number of bytes written.
136 * @param pDis The disassembler state.
137 */
138static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
139{
140 /*
141 * This is very crude and possibly wrong for some opcodes,
142 * but since it's not really supposed to be called we can
143 * probably live with that.
144 */
145 return DISGetParamSize(pDis, &pDis->param1);
146}
147
148
149/**
150 * Flushes a chain of pages sharing the same access monitor.
151 *
152 * @returns VBox status code suitable for scheduling.
153 * @param pPool The pool.
154 * @param pPage A page in the chain.
155 */
156int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
157{
158 LogFlow(("pgmPoolMonitorChainFlush: Flush page %VGp type=%d\n", pPage->GCPhys, pPage->enmKind));
159
160 /*
161 * Find the list head.
162 */
163 uint16_t idx = pPage->idx;
164 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
165 {
166 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
167 {
168 idx = pPage->iMonitoredPrev;
169 Assert(idx != pPage->idx);
170 pPage = &pPool->aPages[idx];
171 }
172 }
173
174 /*
175 * Iterate the list flushing each shadow page.
176 */
177 int rc = VINF_SUCCESS;
178 for (;;)
179 {
180 idx = pPage->iMonitoredNext;
181 Assert(idx != pPage->idx);
182 if (pPage->idx >= PGMPOOL_IDX_FIRST)
183 {
184 int rc2 = pgmPoolFlushPage(pPool, pPage);
185 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
186 rc = VINF_PGM_SYNC_CR3;
187 }
188 /* next */
189 if (idx == NIL_PGMPOOL_IDX)
190 break;
191 pPage = &pPool->aPages[idx];
192 }
193 return rc;
194}
195
196
197/**
198 * Wrapper for getting the current context pointer to the entry being modified.
199 *
200 * @returns Pointer to the current context mapping of the entry.
201 * @param pPool The pool.
202 * @param pvFault The fault virtual address.
203 * @param GCPhysFault The fault physical address.
204 * @param cbEntry The entry size.
205 */
206#ifdef IN_RING3
207DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
208#else
209DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
210#endif
211{
212#ifdef IN_GC
213 return (const void *)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
214
215#elif defined(IN_RING0)
216 void *pvRet;
217 int rc = pgmRamGCPhys2HCPtr(&pPool->pVMHC->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
218 AssertFatalRCSuccess(rc);
219 return pvRet;
220
221#elif defined(IN_RING3)
222 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
223#else
224# error "huh?"
225#endif
226}
227
228
229/**
230 * Process shadow entries before they are changed by the guest.
231 *
232 * For PT entries we will clear them. For PD entries, we'll simply check
233 * for mapping conflicts and set the SyncCR3 FF if found.
234 *
235 * @param pPool The pool.
236 * @param pPage The head page.
237 * @param GCPhysFault The guest physical fault address.
238 * @param uAddress In R0 and GC this is the guest context fault address (flat).
239 * In R3 this is the host context 'fault' address.
240 * @param pCpu The disassembler state for figuring out the write size.
241 * This need not be specified if the caller knows we won't do cross entry accesses.
242 */
243#ifdef IN_RING3
244void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
245#else
246void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
247#endif
248{
249 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
250 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
251 const unsigned cbWrite = (pCpu) ? pgmPoolDisasWriteSize(pCpu) : 0;
252
253 LogFlow(("pgmPoolMonitorChainChanging: %VGv phys=%VGp kind=%d cbWrite=%d\n", pvAddress, GCPhysFault, pPage->enmKind, cbWrite));
254
255 for (;;)
256 {
257 union
258 {
259 void *pv;
260 PX86PT pPT;
261 PX86PTPAE pPTPae;
262 PX86PD pPD;
263 PX86PDPAE pPDPae;
264 PX86PDPT pPDPT;
265 PX86PML4 pPML4;
266 } uShw;
267 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
268
269 switch (pPage->enmKind)
270 {
271 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
272 {
273 const unsigned iShw = off / sizeof(X86PTE);
274 if (uShw.pPT->a[iShw].n.u1Present)
275 {
276# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
277 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
278 Log4(("pgmPoolMonitorChainChanging 32_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
279 pgmPoolTracDerefGCPhysHint(pPool, pPage,
280 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
281 pGstPte->u & X86_PTE_PG_MASK);
282# endif
283 uShw.pPT->a[iShw].u = 0;
284 }
285 break;
286 }
287
288 /* page/2 sized */
289 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
290 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
291 {
292 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
293 if (uShw.pPTPae->a[iShw].n.u1Present)
294 {
295# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
296 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
297 Log4(("pgmPoolMonitorChainChanging pae_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
298 pgmPoolTracDerefGCPhysHint(pPool, pPage,
299 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
300 pGstPte->u & X86_PTE_PG_MASK);
301# endif
302 uShw.pPTPae->a[iShw].u = 0;
303 }
304 }
305 break;
306
307 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
308 {
309 const unsigned iShw = off / sizeof(X86PTEPAE);
310 if (uShw.pPTPae->a[iShw].n.u1Present)
311 {
312# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
313 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
314 Log4(("pgmPoolMonitorChainChanging pae: deref %VHp GCPhys %VGp\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
315 pgmPoolTracDerefGCPhysHint(pPool, pPage,
316 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
317 pGstPte->u & X86_PTE_PAE_PG_MASK);
318# endif
319 uShw.pPTPae->a[iShw].u = 0;
320 }
321
322 /* paranoia / a bit assumptive. */
323 if ( pCpu
324 && (off & 7)
325 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
326 {
327 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
328 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
329
330 if (uShw.pPTPae->a[iShw2].n.u1Present)
331 {
332# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
333 PCX86PTEPAE pGstPte = (PCX86PTEPAE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
334 Log4(("pgmPoolMonitorChainChanging pae: deref %VHp GCPhys %VGp\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
335 pgmPoolTracDerefGCPhysHint(pPool, pPage,
336 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
337 pGstPte->u & X86_PTE_PAE_PG_MASK);
338# endif
339 uShw.pPTPae->a[iShw2].u = 0;
340 }
341 }
342
343 break;
344 }
345
346 case PGMPOOLKIND_ROOT_32BIT_PD:
347 {
348 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
349 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
350 {
351 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
352 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
353 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
354 }
355 /* paranoia / a bit assumptive. */
356 else if ( pCpu
357 && (off & 3)
358 && (off & 3) + cbWrite > sizeof(X86PTE))
359 {
360 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
361 if ( iShw2 != iShw
362 && iShw2 < RT_ELEMENTS(uShw.pPD->a)
363 && uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
364 {
365 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
366 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
367 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
368 }
369 }
370#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
371 if ( uShw.pPD->a[iShw].n.u1Present
372 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
373 {
374 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
375# ifdef IN_GC /* TLB load - we're pushing things a bit... */
376 ASMProbeReadByte(pvAddress);
377# endif
378 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
379 uShw.pPD->a[iShw].u = 0;
380 }
381#endif
382 break;
383 }
384
385 case PGMPOOLKIND_ROOT_PAE_PD:
386 {
387 unsigned iShw = (off / sizeof(X86PTE)) * 2; // ASSUMING 32-bit guest paging!
388 for (unsigned i = 0; i < 2; i++, iShw++)
389 {
390 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
391 {
392 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
393 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
394 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
395 }
396 /* paranoia / a bit assumptive. */
397 else if ( pCpu
398 && (off & 3)
399 && (off & 3) + cbWrite > 4)
400 {
401 const unsigned iShw2 = iShw + 2;
402 if ( iShw2 < RT_ELEMENTS(uShw.pPDPae->a)
403 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
404 {
405 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
406 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
407 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
408 }
409 }
410#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
411 if ( uShw.pPDPae->a[iShw].n.u1Present
412 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
413 {
414 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
415# ifdef IN_GC /* TLB load - we're pushing things a bit... */
416 ASMProbeReadByte(pvAddress);
417# endif
418 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
419 uShw.pPDPae->a[iShw].u = 0;
420 }
421#endif
422 }
423 break;
424 }
425
426 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
427 {
428 const unsigned iShw = off / sizeof(X86PDEPAE);
429 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
430 {
431 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
432 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
433 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
434 }
435#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
436 /* causes trouble when the guest uses a PDE to refer to the whole page table level structure. (invalidate here; faults later on when it tries
437 * to change the page table entries
438 * -> recheck; probably only applies to the GC case
439 */
440 else
441 {
442 if (uShw.pPDPae->a[iShw].n.u1Present)
443 {
444 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
445 pgmPoolFree(pPool->CTXSUFF(pVM),
446 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
447 /* Note: hardcoded PAE implementation dependency */
448 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
449 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw);
450 uShw.pPDPae->a[iShw].u = 0;
451 }
452 }
453#endif
454 /* paranoia / a bit assumptive. */
455 if ( pCpu
456 && (off & 7)
457 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
458 {
459 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
460 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
461
462 if ( iShw2 != iShw
463 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
464 {
465 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
466 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
467 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
468 }
469#ifdef PGMPOOL_INVALIDATE_UPPER_SHADOW_TABLE_ENTRIES
470 else
471 if (uShw.pPDPae->a[iShw2].n.u1Present)
472 {
473 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
474 pgmPoolFree(pPool->CTXSUFF(pVM),
475 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
476 /* Note: hardcoded PAE implementation dependency */
477 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? PGMPOOL_IDX_PAE_PD : pPage->idx,
478 (pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD) ? iShw2 + (pPage->idx - PGMPOOL_IDX_PAE_PD_0) * X86_PG_PAE_ENTRIES : iShw2);
479 uShw.pPDPae->a[iShw2].u = 0;
480 }
481#endif
482 }
483 break;
484 }
485
486 case PGMPOOLKIND_ROOT_PDPT:
487 {
488 /* Hopefully this doesn't happen very often:
489 * - touching unused parts of the page
490 * - messing with the bits of pd pointers without changing the physical address
491 */
492 const unsigned iShw = off / sizeof(X86PDPE);
493 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
494 {
495 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
496 {
497 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
498 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
499 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
500 }
501 /* paranoia / a bit assumptive. */
502 else if ( pCpu
503 && (off & 7)
504 && (off & 7) + cbWrite > sizeof(X86PDPE))
505 {
506 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
507 if ( iShw2 != iShw
508 && iShw2 < X86_PG_PAE_PDPE_ENTRIES
509 && uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
510 {
511 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
512 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
513 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
514 }
515 }
516 }
517 break;
518 }
519
520#ifndef IN_GC
521 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
522 {
523 Assert(pPage->enmKind == PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD);
524
525 const unsigned iShw = off / sizeof(X86PDEPAE);
526 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
527 {
528 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
529 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
530 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
531 }
532 else
533 {
534 if (uShw.pPDPae->a[iShw].n.u1Present)
535 {
536 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
537 pgmPoolFree(pPool->CTXSUFF(pVM),
538 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
539 pPage->idx,
540 iShw);
541 uShw.pPDPae->a[iShw].u = 0;
542 }
543 }
544 /* paranoia / a bit assumptive. */
545 if ( pCpu
546 && (off & 7)
547 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
548 {
549 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
550 AssertReturnVoid(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
551
552 if ( iShw2 != iShw
553 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
554 {
555 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
556 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
557 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
558 }
559 else
560 if (uShw.pPDPae->a[iShw2].n.u1Present)
561 {
562 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
563 pgmPoolFree(pPool->CTXSUFF(pVM),
564 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
565 pPage->idx,
566 iShw2);
567 uShw.pPDPae->a[iShw2].u = 0;
568 }
569 }
570 break;
571 }
572
573 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
574 {
575 /* Hopefully this doesn't happen very often:
576 * - messing with the bits of pd pointers without changing the physical address
577 */
578 if (!VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
579 {
580 const unsigned iShw = off / sizeof(X86PDPE);
581 if (uShw.pPDPT->a[iShw].n.u1Present)
582 {
583 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
584 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
585 uShw.pPDPT->a[iShw].u = 0;
586 }
587 /* paranoia / a bit assumptive. */
588 if ( pCpu
589 && (off & 7)
590 && (off & 7) + cbWrite > sizeof(X86PDPE))
591 {
592 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
593 if (uShw.pPDPT->a[iShw2].n.u1Present)
594 {
595 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
596 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
597 uShw.pPDPT->a[iShw2].u = 0;
598 }
599 }
600 }
601 break;
602 }
603
604 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
605 {
606 /* Hopefully this doesn't happen very often:
607 * - messing with the bits of pd pointers without changing the physical address
608 */
609 if (!VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
610 {
611 const unsigned iShw = off / sizeof(X86PDPE);
612 if (uShw.pPML4->a[iShw].n.u1Present)
613 {
614 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
615 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
616 uShw.pPML4->a[iShw].u = 0;
617 }
618 /* paranoia / a bit assumptive. */
619 if ( pCpu
620 && (off & 7)
621 && (off & 7) + cbWrite > sizeof(X86PDPE))
622 {
623 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
624 if (uShw.pPML4->a[iShw2].n.u1Present)
625 {
626 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
627 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
628 uShw.pPML4->a[iShw2].u = 0;
629 }
630 }
631 }
632 break;
633 }
634#endif /* IN_RING0 */
635
636 default:
637 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
638 }
639
640 /* next */
641 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
642 return;
643 pPage = &pPool->aPages[pPage->iMonitoredNext];
644 }
645}
646
647
648# ifndef IN_RING3
649/**
650 * Checks if a access could be a fork operation in progress.
651 *
652 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
653 *
654 * @returns true if it's likly that we're forking, otherwise false.
655 * @param pPool The pool.
656 * @param pCpu The disassembled instruction.
657 * @param offFault The access offset.
658 */
659DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
660{
661 /*
662 * i386 linux is using btr to clear X86_PTE_RW.
663 * The functions involved are (2.6.16 source inspection):
664 * clear_bit
665 * ptep_set_wrprotect
666 * copy_one_pte
667 * copy_pte_range
668 * copy_pmd_range
669 * copy_pud_range
670 * copy_page_range
671 * dup_mmap
672 * dup_mm
673 * copy_mm
674 * copy_process
675 * do_fork
676 */
677 if ( pCpu->pCurInstr->opcode == OP_BTR
678 && !(offFault & 4)
679 /** @todo Validate that the bit index is X86_PTE_RW. */
680 )
681 {
682 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,Fork));
683 return true;
684 }
685 return false;
686}
687
688
689/**
690 * Determine whether the page is likely to have been reused.
691 *
692 * @returns true if we consider the page as being reused for a different purpose.
693 * @returns false if we consider it to still be a paging page.
694 * @param pVM VM Handle.
695 * @param pPage The page in question.
696 * @param pRegFrame Trap register frame.
697 * @param pCpu The disassembly info for the faulting instruction.
698 * @param pvFault The fault address.
699 *
700 * @remark The REP prefix check is left to the caller because of STOSD/W.
701 */
702DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PPGMPOOLPAGE pPage, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pCpu, RTGCPTR pvFault)
703{
704#ifndef IN_GC
705 if ( HWACCMHasPendingIrq(pVM)
706 && (pRegFrame->rsp - pvFault) < 32)
707 {
708 /* Fault caused by stack writes while trying to inject an interrupt event. */
709 Log(("pgmPoolMonitorIsReused: reused %VGv for interrupt stack (rsp=%VGv).\n", pvFault, pRegFrame->rsp));
710 return true;
711 }
712#else
713 NOREF(pVM);
714#endif
715
716 switch (pCpu->pCurInstr->opcode)
717 {
718 /* call implies the actual push of the return address faulted */
719 case OP_CALL:
720 Log4(("pgmPoolMonitorIsReused: CALL\n"));
721 return true;
722 case OP_PUSH:
723 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
724 return true;
725 case OP_PUSHF:
726 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
727 return true;
728 case OP_PUSHA:
729 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
730 return true;
731 case OP_FXSAVE:
732 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
733 return true;
734 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
735 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
736 return true;
737 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
738 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
739 return true;
740 case OP_MOVSWD:
741 case OP_STOSWD:
742 if ( pCpu->prefix == (PREFIX_REP|PREFIX_REX)
743 && pRegFrame->rcx >= 0x40
744 )
745 {
746 Assert(pCpu->mode == CPUMODE_64BIT);
747
748 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
749 return true;
750 }
751 return false;
752 }
753 if ( (pCpu->param1.flags & USE_REG_GEN32)
754 && (pCpu->param1.base.reg_gen == USE_REG_ESP))
755 {
756 Log4(("pgmPoolMonitorIsReused: ESP\n"));
757 return true;
758 }
759
760 //if (pPage->fCR3Mix)
761 // return false;
762 return false;
763}
764
765
766/**
767 * Flushes the page being accessed.
768 *
769 * @returns VBox status code suitable for scheduling.
770 * @param pVM The VM handle.
771 * @param pPool The pool.
772 * @param pPage The pool page (head).
773 * @param pCpu The disassembly of the write instruction.
774 * @param pRegFrame The trap register frame.
775 * @param GCPhysFault The fault address as guest physical address.
776 * @param pvFault The fault address.
777 */
778static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
779 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
780{
781 /*
782 * First, do the flushing.
783 */
784 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
785
786 /*
787 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
788 */
789 uint32_t cbWritten;
790 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
791 if (VBOX_SUCCESS(rc2))
792 pRegFrame->rip += pCpu->opsize;
793 else if (rc2 == VERR_EM_INTERPRETER)
794 {
795#ifdef IN_GC
796 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
797 {
798 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
799 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
800 rc = VINF_SUCCESS;
801 STAM_COUNTER_INC(&pPool->StatMonitorGCIntrFailPatch2);
802 }
803 else
804#endif
805 {
806 rc = VINF_EM_RAW_EMULATE_INSTR;
807 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
808 }
809 }
810 else
811 rc = rc2;
812
813 /* See use in pgmPoolAccessHandlerSimple(). */
814 PGM_INVL_GUEST_TLBS();
815
816 LogFlow(("pgmPoolAccessHandlerPT: returns %Vrc (flushed)\n", rc));
817 return rc;
818
819}
820
821
822/**
823 * Handles the STOSD write accesses.
824 *
825 * @returns VBox status code suitable for scheduling.
826 * @param pVM The VM handle.
827 * @param pPool The pool.
828 * @param pPage The pool page (head).
829 * @param pCpu The disassembly of the write instruction.
830 * @param pRegFrame The trap register frame.
831 * @param GCPhysFault The fault address as guest physical address.
832 * @param pvFault The fault address.
833 */
834DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
835 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
836{
837 Assert(pCpu->mode == CPUMODE_32BIT);
838
839 /*
840 * Increment the modification counter and insert it into the list
841 * of modified pages the first time.
842 */
843 if (!pPage->cModifications++)
844 pgmPoolMonitorModifiedInsert(pPool, pPage);
845
846 /*
847 * Execute REP STOSD.
848 *
849 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
850 * write situation, meaning that it's safe to write here.
851 */
852 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
853 while (pRegFrame->ecx)
854 {
855 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, (RTGCPTR)pu32, NULL);
856#ifdef IN_GC
857 *(uint32_t *)pu32 = pRegFrame->eax;
858#else
859 PGMPhysWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
860#endif
861 pu32 += 4;
862 GCPhysFault += 4;
863 pRegFrame->edi += 4;
864 pRegFrame->ecx--;
865 }
866 pRegFrame->rip += pCpu->opsize;
867
868 /* See use in pgmPoolAccessHandlerSimple(). */
869 PGM_INVL_GUEST_TLBS();
870
871 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
872 return VINF_SUCCESS;
873}
874
875
876/**
877 * Handles the simple write accesses.
878 *
879 * @returns VBox status code suitable for scheduling.
880 * @param pVM The VM handle.
881 * @param pPool The pool.
882 * @param pPage The pool page (head).
883 * @param pCpu The disassembly of the write instruction.
884 * @param pRegFrame The trap register frame.
885 * @param GCPhysFault The fault address as guest physical address.
886 * @param pvFault The fault address.
887 */
888DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
889 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
890{
891 /*
892 * Increment the modification counter and insert it into the list
893 * of modified pages the first time.
894 */
895 if (!pPage->cModifications++)
896 pgmPoolMonitorModifiedInsert(pPool, pPage);
897
898 /*
899 * Clear all the pages. ASSUMES that pvFault is readable.
900 */
901 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
902
903 /*
904 * Interpret the instruction.
905 */
906 uint32_t cb;
907 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
908 if (VBOX_SUCCESS(rc))
909 pRegFrame->rip += pCpu->opsize;
910 else if (rc == VERR_EM_INTERPRETER)
911 {
912 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
913 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pCpu->pCurInstr->opcode));
914 rc = VINF_EM_RAW_EMULATE_INSTR;
915 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
916 }
917
918 /*
919 * Quick hack, with logging enabled we're getting stale
920 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
921 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
922 * have to be fixed to support this. But that'll have to wait till next week.
923 *
924 * An alternative is to keep track of the changed PTEs together with the
925 * GCPhys from the guest PT. This may proove expensive though.
926 *
927 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
928 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
929 */
930 PGM_INVL_GUEST_TLBS();
931
932 LogFlow(("pgmPoolAccessHandlerSimple: returns %Vrc cb=%d\n", rc, cb));
933 return rc;
934}
935
936
937/**
938 * \#PF Handler callback for PT write accesses.
939 *
940 * @returns VBox status code (appropriate for GC return).
941 * @param pVM VM Handle.
942 * @param uErrorCode CPU Error code.
943 * @param pRegFrame Trap register frame.
944 * NULL on DMA and other non CPU access.
945 * @param pvFault The fault address (cr2).
946 * @param GCPhysFault The GC physical address corresponding to pvFault.
947 * @param pvUser User argument.
948 */
949DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
950{
951 STAM_PROFILE_START(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), a);
952 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
953 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
954 LogFlow(("pgmPoolAccessHandler: pvFault=%VGv pPage=%p:{.idx=%d} GCPhysFault=%VGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
955
956 /*
957 * We should ALWAYS have the list head as user parameter. This
958 * is because we use that page to record the changes.
959 */
960 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
961
962 /*
963 * Disassemble the faulting instruction.
964 */
965 DISCPUSTATE Cpu;
966 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
967 AssertRCReturn(rc, rc);
968
969 /*
970 * Check if it's worth dealing with.
971 */
972 bool fReused = false;
973 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
974 || pPage->fCR3Mix)
975 && !(fReused = pgmPoolMonitorIsReused(pVM, pPage, pRegFrame, &Cpu, pvFault))
976 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
977 {
978 /*
979 * Simple instructions, no REP prefix.
980 */
981 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
982 {
983 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
984 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,Handled), a);
985 return rc;
986 }
987
988 /*
989 * Windows is frequently doing small memset() operations (netio test 4k+).
990 * We have to deal with these or we'll kill the cache and performance.
991 */
992 if ( Cpu.pCurInstr->opcode == OP_STOSWD
993 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
994 && pRegFrame->ecx <= 0x20
995 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
996 && !((uintptr_t)pvFault & 3)
997 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
998 && Cpu.mode == CPUMODE_32BIT
999 && Cpu.opmode == CPUMODE_32BIT
1000 && Cpu.addrmode == CPUMODE_32BIT
1001 && Cpu.prefix == PREFIX_REP
1002 && !pRegFrame->eflags.Bits.u1DF
1003 )
1004 {
1005 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1006 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,RepStosd), a);
1007 return rc;
1008 }
1009
1010 /* REP prefix, don't bother. */
1011 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,RepPrefix));
1012 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x eip=%VGv opcode=%d prefix=%#x\n",
1013 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, pRegFrame->rip, Cpu.pCurInstr->opcode, Cpu.prefix));
1014 }
1015
1016 /*
1017 * Not worth it, so flush it.
1018 *
1019 * If we considered it to be reused, don't to back to ring-3
1020 * to emulate failed instructions since we usually cannot
1021 * interpret then. This may be a bit risky, in which case
1022 * the reuse detection must be fixed.
1023 */
1024 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
1025 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
1026 rc = VINF_SUCCESS;
1027 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,FlushPage), a);
1028 return rc;
1029}
1030
1031# endif /* !IN_RING3 */
1032#endif /* PGMPOOL_WITH_MONITORING */
1033
1034
1035
1036#ifdef PGMPOOL_WITH_CACHE
1037/**
1038 * Inserts a page into the GCPhys hash table.
1039 *
1040 * @param pPool The pool.
1041 * @param pPage The page.
1042 */
1043DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1044{
1045 Log3(("pgmPoolHashInsert: %VGp\n", pPage->GCPhys));
1046 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1047 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1048 pPage->iNext = pPool->aiHash[iHash];
1049 pPool->aiHash[iHash] = pPage->idx;
1050}
1051
1052
1053/**
1054 * Removes a page from the GCPhys hash table.
1055 *
1056 * @param pPool The pool.
1057 * @param pPage The page.
1058 */
1059DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1060{
1061 Log3(("pgmPoolHashRemove: %VGp\n", pPage->GCPhys));
1062 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1063 if (pPool->aiHash[iHash] == pPage->idx)
1064 pPool->aiHash[iHash] = pPage->iNext;
1065 else
1066 {
1067 uint16_t iPrev = pPool->aiHash[iHash];
1068 for (;;)
1069 {
1070 const int16_t i = pPool->aPages[iPrev].iNext;
1071 if (i == pPage->idx)
1072 {
1073 pPool->aPages[iPrev].iNext = pPage->iNext;
1074 break;
1075 }
1076 if (i == NIL_PGMPOOL_IDX)
1077 {
1078 AssertReleaseMsgFailed(("GCPhys=%VGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1079 break;
1080 }
1081 iPrev = i;
1082 }
1083 }
1084 pPage->iNext = NIL_PGMPOOL_IDX;
1085}
1086
1087
1088/**
1089 * Frees up one cache page.
1090 *
1091 * @returns VBox status code.
1092 * @retval VINF_SUCCESS on success.
1093 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
1094 * @param pPool The pool.
1095 * @param iUser The user index.
1096 */
1097static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1098{
1099#ifndef IN_GC
1100 const PVM pVM = pPool->CTXSUFF(pVM);
1101#endif
1102 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1103 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1104
1105 /*
1106 * Select one page from the tail of the age list.
1107 */
1108 uint16_t iToFree = pPool->iAgeTail;
1109 if (iToFree == iUser)
1110 iToFree = pPool->aPages[iToFree].iAgePrev;
1111/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1112 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1113 {
1114 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1115 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1116 {
1117 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1118 continue;
1119 iToFree = i;
1120 break;
1121 }
1122 }
1123*/
1124 Assert(iToFree != iUser);
1125 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1126
1127 int rc = pgmPoolFlushPage(pPool, &pPool->aPages[iToFree]);
1128 if (rc == VINF_SUCCESS)
1129 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1130 return rc;
1131}
1132
1133
1134/**
1135 * Checks if a kind mismatch is really a page being reused
1136 * or if it's just normal remappings.
1137 *
1138 * @returns true if reused and the cached page (enmKind1) should be flushed
1139 * @returns false if not reused.
1140 * @param enmKind1 The kind of the cached page.
1141 * @param enmKind2 The kind of the requested page.
1142 */
1143static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1144{
1145 switch (enmKind1)
1146 {
1147 /*
1148 * Never reuse them. There is no remapping in non-paging mode.
1149 */
1150 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1151 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1152 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1153 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1154 return true;
1155
1156 /*
1157 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1158 */
1159 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1160 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1161 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1162 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1163 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1164 switch (enmKind2)
1165 {
1166 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1167 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1168 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1169 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1170 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1171 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1172 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1173 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1174 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1175 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1176 return true;
1177 default:
1178 return false;
1179 }
1180
1181 /*
1182 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1183 */
1184 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1185 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1186 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1187 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1188 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1189 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1190 switch (enmKind2)
1191 {
1192 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1193 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1194 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1195 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1196 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1197 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1198 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1199 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1200 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1201 return true;
1202 default:
1203 return false;
1204 }
1205
1206 /*
1207 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1208 */
1209 case PGMPOOLKIND_ROOT_32BIT_PD:
1210 case PGMPOOLKIND_ROOT_PAE_PD:
1211 case PGMPOOLKIND_ROOT_PDPT:
1212 case PGMPOOLKIND_ROOT_NESTED:
1213 return false;
1214
1215 default:
1216 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1217 }
1218}
1219
1220
1221/**
1222 * Attempts to satisfy a pgmPoolAlloc request from the cache.
1223 *
1224 * @returns VBox status code.
1225 * @retval VINF_PGM_CACHED_PAGE on success.
1226 * @retval VERR_FILE_NOT_FOUND if not found.
1227 * @param pPool The pool.
1228 * @param GCPhys The GC physical address of the page we're gonna shadow.
1229 * @param enmKind The kind of mapping.
1230 * @param iUser The shadow page pool index of the user table.
1231 * @param iUserTable The index into the user table (shadowed).
1232 * @param ppPage Where to store the pointer to the page.
1233 */
1234static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
1235{
1236#ifndef IN_GC
1237 const PVM pVM = pPool->CTXSUFF(pVM);
1238#endif
1239 /*
1240 * Look up the GCPhys in the hash.
1241 */
1242 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1243 Log3(("pgmPoolCacheAlloc: %VGp kind %d iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, enmKind, iUser, iUserTable, i));
1244 if (i != NIL_PGMPOOL_IDX)
1245 {
1246 do
1247 {
1248 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1249 Log3(("pgmPoolCacheAlloc: slot %d found page %VGp\n", i, pPage->GCPhys));
1250 if (pPage->GCPhys == GCPhys)
1251 {
1252 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
1253 {
1254 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
1255 if (VBOX_SUCCESS(rc))
1256 {
1257 *ppPage = pPage;
1258 STAM_COUNTER_INC(&pPool->StatCacheHits);
1259 return VINF_PGM_CACHED_PAGE;
1260 }
1261 return rc;
1262 }
1263
1264 /*
1265 * The kind is different. In some cases we should now flush the page
1266 * as it has been reused, but in most cases this is normal remapping
1267 * of PDs as PT or big pages using the GCPhys field in a slightly
1268 * different way than the other kinds.
1269 */
1270 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
1271 {
1272 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
1273 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
1274 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
1275 break;
1276 }
1277 }
1278
1279 /* next */
1280 i = pPage->iNext;
1281 } while (i != NIL_PGMPOOL_IDX);
1282 }
1283
1284 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%d\n", GCPhys, enmKind));
1285 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1286 return VERR_FILE_NOT_FOUND;
1287}
1288
1289
1290/**
1291 * Inserts a page into the cache.
1292 *
1293 * @param pPool The pool.
1294 * @param pPage The cached page.
1295 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1296 */
1297static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1298{
1299 /*
1300 * Insert into the GCPhys hash if the page is fit for that.
1301 */
1302 Assert(!pPage->fCached);
1303 if (fCanBeCached)
1304 {
1305 pPage->fCached = true;
1306 pgmPoolHashInsert(pPool, pPage);
1307 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1308 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1309 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1310 }
1311 else
1312 {
1313 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1314 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1315 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1316 }
1317
1318 /*
1319 * Insert at the head of the age list.
1320 */
1321 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1322 pPage->iAgeNext = pPool->iAgeHead;
1323 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1324 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1325 else
1326 pPool->iAgeTail = pPage->idx;
1327 pPool->iAgeHead = pPage->idx;
1328}
1329
1330
1331/**
1332 * Flushes a cached page.
1333 *
1334 * @param pPool The pool.
1335 * @param pPage The cached page.
1336 */
1337static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1338{
1339 Log3(("pgmPoolCacheFlushPage: %VGp\n", pPage->GCPhys));
1340
1341 /*
1342 * Remove the page from the hash.
1343 */
1344 if (pPage->fCached)
1345 {
1346 pPage->fCached = false;
1347 pgmPoolHashRemove(pPool, pPage);
1348 }
1349 else
1350 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1351
1352 /*
1353 * Remove it from the age list.
1354 */
1355 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1356 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1357 else
1358 pPool->iAgeTail = pPage->iAgePrev;
1359 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1360 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1361 else
1362 pPool->iAgeHead = pPage->iAgeNext;
1363 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1364 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1365}
1366#endif /* PGMPOOL_WITH_CACHE */
1367
1368
1369#ifdef PGMPOOL_WITH_MONITORING
1370/**
1371 * Looks for pages sharing the monitor.
1372 *
1373 * @returns Pointer to the head page.
1374 * @returns NULL if not found.
1375 * @param pPool The Pool
1376 * @param pNewPage The page which is going to be monitored.
1377 */
1378static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1379{
1380#ifdef PGMPOOL_WITH_CACHE
1381 /*
1382 * Look up the GCPhys in the hash.
1383 */
1384 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1385 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1386 if (i == NIL_PGMPOOL_IDX)
1387 return NULL;
1388 do
1389 {
1390 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1391 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1392 && pPage != pNewPage)
1393 {
1394 switch (pPage->enmKind)
1395 {
1396 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1397 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1398 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1399 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1400 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1401 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1402 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1403 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1404 case PGMPOOLKIND_ROOT_32BIT_PD:
1405 case PGMPOOLKIND_ROOT_PAE_PD:
1406 case PGMPOOLKIND_ROOT_PDPT:
1407 {
1408 /* find the head */
1409 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1410 {
1411 Assert(pPage->iMonitoredPrev != pPage->idx);
1412 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1413 }
1414 return pPage;
1415 }
1416
1417 /* ignore, no monitoring. */
1418 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1419 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1420 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1421 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1422 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1423 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1424 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1425 case PGMPOOLKIND_ROOT_NESTED:
1426 break;
1427 default:
1428 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1429 }
1430 }
1431
1432 /* next */
1433 i = pPage->iNext;
1434 } while (i != NIL_PGMPOOL_IDX);
1435#endif
1436 return NULL;
1437}
1438
1439/**
1440 * Enabled write monitoring of a guest page.
1441 *
1442 * @returns VBox status code.
1443 * @retval VINF_SUCCESS on success.
1444 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1445 * @param pPool The pool.
1446 * @param pPage The cached page.
1447 */
1448static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1449{
1450 LogFlow(("pgmPoolMonitorInsert %VGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
1451
1452 /*
1453 * Filter out the relevant kinds.
1454 */
1455 switch (pPage->enmKind)
1456 {
1457 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1458 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1459 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1460 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1461 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1462 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1463 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1464 case PGMPOOLKIND_ROOT_PDPT:
1465 break;
1466
1467 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1468 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1469 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1470 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1471 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1472 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1473 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1474 case PGMPOOLKIND_ROOT_NESTED:
1475 /* Nothing to monitor here. */
1476 return VINF_SUCCESS;
1477
1478 case PGMPOOLKIND_ROOT_32BIT_PD:
1479 case PGMPOOLKIND_ROOT_PAE_PD:
1480#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1481 break;
1482#endif
1483 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1484 default:
1485 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1486 }
1487
1488 /*
1489 * Install handler.
1490 */
1491 int rc;
1492 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1493 if (pPageHead)
1494 {
1495 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1496 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1497 pPage->iMonitoredPrev = pPageHead->idx;
1498 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1499 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1500 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1501 pPageHead->iMonitoredNext = pPage->idx;
1502 rc = VINF_SUCCESS;
1503 }
1504 else
1505 {
1506 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1507 PVM pVM = pPool->CTXSUFF(pVM);
1508 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1509 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1510 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1511 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1512 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1513 pPool->pfnAccessHandlerGC, MMHyperCCToRC(pVM, pPage),
1514 pPool->pszAccessHandler);
1515 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1516 * the heap size should suffice. */
1517 AssertFatalRC(rc);
1518 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1519 rc = VERR_PGM_POOL_CLEARED;
1520 }
1521 pPage->fMonitored = true;
1522 return rc;
1523}
1524
1525
1526/**
1527 * Disables write monitoring of a guest page.
1528 *
1529 * @returns VBox status code.
1530 * @retval VINF_SUCCESS on success.
1531 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1532 * @param pPool The pool.
1533 * @param pPage The cached page.
1534 */
1535static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1536{
1537 /*
1538 * Filter out the relevant kinds.
1539 */
1540 switch (pPage->enmKind)
1541 {
1542 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1543 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1544 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1545 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1546 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1547 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1548 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
1549 case PGMPOOLKIND_ROOT_PDPT:
1550 break;
1551
1552 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1553 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1554 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1555 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1556 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1557 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1558 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1559 case PGMPOOLKIND_ROOT_NESTED:
1560 /* Nothing to monitor here. */
1561 return VINF_SUCCESS;
1562
1563 case PGMPOOLKIND_ROOT_32BIT_PD:
1564 case PGMPOOLKIND_ROOT_PAE_PD:
1565#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1566 break;
1567#endif
1568 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1569 default:
1570 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1571 }
1572
1573 /*
1574 * Remove the page from the monitored list or uninstall it if last.
1575 */
1576 const PVM pVM = pPool->CTXSUFF(pVM);
1577 int rc;
1578 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1579 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1580 {
1581 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1582 {
1583 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1584 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1585 pNewHead->fCR3Mix = pPage->fCR3Mix;
1586 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1587 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1588 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1589 pPool->pfnAccessHandlerGC, MMHyperCCToRC(pVM, pNewHead),
1590 pPool->pszAccessHandler);
1591 AssertFatalRCSuccess(rc);
1592 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1593 }
1594 else
1595 {
1596 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1597 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1598 {
1599 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1600 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1601 }
1602 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1603 rc = VINF_SUCCESS;
1604 }
1605 }
1606 else
1607 {
1608 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1609 AssertFatalRC(rc);
1610 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1611 rc = VERR_PGM_POOL_CLEARED;
1612 }
1613 pPage->fMonitored = false;
1614
1615 /*
1616 * Remove it from the list of modified pages (if in it).
1617 */
1618 pgmPoolMonitorModifiedRemove(pPool, pPage);
1619
1620 return rc;
1621}
1622
1623
1624#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1625/**
1626 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1627 *
1628 * @param pPool The Pool.
1629 * @param pPage A page in the chain.
1630 * @param fCR3Mix The new fCR3Mix value.
1631 */
1632static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1633{
1634 /* current */
1635 pPage->fCR3Mix = fCR3Mix;
1636
1637 /* before */
1638 int16_t idx = pPage->iMonitoredPrev;
1639 while (idx != NIL_PGMPOOL_IDX)
1640 {
1641 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1642 idx = pPool->aPages[idx].iMonitoredPrev;
1643 }
1644
1645 /* after */
1646 idx = pPage->iMonitoredNext;
1647 while (idx != NIL_PGMPOOL_IDX)
1648 {
1649 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1650 idx = pPool->aPages[idx].iMonitoredNext;
1651 }
1652}
1653
1654
1655/**
1656 * Installs or modifies monitoring of a CR3 page (special).
1657 *
1658 * We're pretending the CR3 page is shadowed by the pool so we can use the
1659 * generic mechanisms in detecting chained monitoring. (This also gives us a
1660 * tast of what code changes are required to really pool CR3 shadow pages.)
1661 *
1662 * @returns VBox status code.
1663 * @param pPool The pool.
1664 * @param idxRoot The CR3 (root) page index.
1665 * @param GCPhysCR3 The (new) CR3 value.
1666 */
1667int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
1668{
1669 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1670 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1671 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d} GCPhysCR3=%VGp\n",
1672 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
1673
1674 /*
1675 * The unlikely case where it already matches.
1676 */
1677 if (pPage->GCPhys == GCPhysCR3)
1678 {
1679 Assert(pPage->fMonitored);
1680 return VINF_SUCCESS;
1681 }
1682
1683 /*
1684 * Flush the current monitoring and remove it from the hash.
1685 */
1686 int rc = VINF_SUCCESS;
1687 if (pPage->fMonitored)
1688 {
1689 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1690 rc = pgmPoolMonitorFlush(pPool, pPage);
1691 if (rc == VERR_PGM_POOL_CLEARED)
1692 rc = VINF_SUCCESS;
1693 else
1694 AssertFatalRC(rc);
1695 pgmPoolHashRemove(pPool, pPage);
1696 }
1697
1698 /*
1699 * Monitor the page at the new location and insert it into the hash.
1700 */
1701 pPage->GCPhys = GCPhysCR3;
1702 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
1703 if (rc2 != VERR_PGM_POOL_CLEARED)
1704 {
1705 AssertFatalRC(rc2);
1706 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
1707 rc = rc2;
1708 }
1709 pgmPoolHashInsert(pPool, pPage);
1710 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
1711 return rc;
1712}
1713
1714
1715/**
1716 * Removes the monitoring of a CR3 page (special).
1717 *
1718 * @returns VBox status code.
1719 * @param pPool The pool.
1720 * @param idxRoot The CR3 (root) page index.
1721 */
1722int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
1723{
1724 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1725 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1726 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d}\n",
1727 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
1728
1729 if (!pPage->fMonitored)
1730 return VINF_SUCCESS;
1731
1732 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1733 int rc = pgmPoolMonitorFlush(pPool, pPage);
1734 if (rc != VERR_PGM_POOL_CLEARED)
1735 AssertFatalRC(rc);
1736 else
1737 rc = VINF_SUCCESS;
1738 pgmPoolHashRemove(pPool, pPage);
1739 Assert(!pPage->fMonitored);
1740 pPage->GCPhys = NIL_RTGCPHYS;
1741 return rc;
1742}
1743#endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
1744
1745
1746/**
1747 * Inserts the page into the list of modified pages.
1748 *
1749 * @param pPool The pool.
1750 * @param pPage The page.
1751 */
1752void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1753{
1754 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1755 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1756 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1757 && pPool->iModifiedHead != pPage->idx,
1758 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1759 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1760 pPool->iModifiedHead, pPool->cModifiedPages));
1761
1762 pPage->iModifiedNext = pPool->iModifiedHead;
1763 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1764 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1765 pPool->iModifiedHead = pPage->idx;
1766 pPool->cModifiedPages++;
1767#ifdef VBOX_WITH_STATISTICS
1768 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1769 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1770#endif
1771}
1772
1773
1774/**
1775 * Removes the page from the list of modified pages and resets the
1776 * moficiation counter.
1777 *
1778 * @param pPool The pool.
1779 * @param pPage The page which is believed to be in the list of modified pages.
1780 */
1781static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1782{
1783 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1784 if (pPool->iModifiedHead == pPage->idx)
1785 {
1786 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1787 pPool->iModifiedHead = pPage->iModifiedNext;
1788 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1789 {
1790 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
1791 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1792 }
1793 pPool->cModifiedPages--;
1794 }
1795 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
1796 {
1797 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
1798 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1799 {
1800 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
1801 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1802 }
1803 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1804 pPool->cModifiedPages--;
1805 }
1806 else
1807 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1808 pPage->cModifications = 0;
1809}
1810
1811
1812/**
1813 * Zaps the list of modified pages, resetting their modification counters in the process.
1814 *
1815 * @param pVM The VM handle.
1816 */
1817void pgmPoolMonitorModifiedClearAll(PVM pVM)
1818{
1819 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1820 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
1821
1822 unsigned cPages = 0; NOREF(cPages);
1823 uint16_t idx = pPool->iModifiedHead;
1824 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1825 while (idx != NIL_PGMPOOL_IDX)
1826 {
1827 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
1828 idx = pPage->iModifiedNext;
1829 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1830 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1831 pPage->cModifications = 0;
1832 Assert(++cPages);
1833 }
1834 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
1835 pPool->cModifiedPages = 0;
1836}
1837
1838
1839/**
1840 * Clear all shadow pages and clear all modification counters.
1841 *
1842 * @param pVM The VM handle.
1843 * @remark Should only be used when monitoring is available, thus placed in
1844 * the PGMPOOL_WITH_MONITORING #ifdef.
1845 */
1846void pgmPoolClearAll(PVM pVM)
1847{
1848 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1849 STAM_PROFILE_START(&pPool->StatClearAll, c);
1850 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
1851
1852 /*
1853 * Iterate all the pages until we've encountered all that in use.
1854 * This is simple but not quite optimal solution.
1855 */
1856 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
1857 unsigned cLeft = pPool->cUsedPages;
1858 unsigned iPage = pPool->cCurPages;
1859 while (--iPage >= PGMPOOL_IDX_FIRST)
1860 {
1861 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1862 if (pPage->GCPhys != NIL_RTGCPHYS)
1863 {
1864 switch (pPage->enmKind)
1865 {
1866 /*
1867 * We only care about shadow page tables.
1868 */
1869 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1870 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1871 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1872 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1873 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1874 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1875 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1876 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1877 {
1878#ifdef PGMPOOL_WITH_USER_TRACKING
1879 if (pPage->cPresent)
1880#endif
1881 {
1882 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
1883 STAM_PROFILE_START(&pPool->StatZeroPage, z);
1884 ASMMemZeroPage(pvShw);
1885 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
1886#ifdef PGMPOOL_WITH_USER_TRACKING
1887 pPage->cPresent = 0;
1888 pPage->iFirstPresent = ~0;
1889#endif
1890 }
1891 }
1892 /* fall thru */
1893
1894 default:
1895 Assert(!pPage->cModifications || ++cModifiedPages);
1896 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1897 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1898 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1899 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1900 pPage->cModifications = 0;
1901 break;
1902
1903 }
1904 if (!--cLeft)
1905 break;
1906 }
1907 }
1908
1909 /* swipe the special pages too. */
1910 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
1911 {
1912 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1913 if (pPage->GCPhys != NIL_RTGCPHYS)
1914 {
1915 Assert(!pPage->cModifications || ++cModifiedPages);
1916 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1917 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1918 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1919 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1920 pPage->cModifications = 0;
1921 }
1922 }
1923
1924#ifndef DEBUG_michael
1925 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
1926#endif
1927 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1928 pPool->cModifiedPages = 0;
1929
1930#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1931 /*
1932 * Clear all the GCPhys links and rebuild the phys ext free list.
1933 */
1934 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
1935 pRam;
1936 pRam = CTXALLSUFF(pRam->pNext))
1937 {
1938 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1939 while (iPage-- > 0)
1940 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
1941 }
1942
1943 pPool->iPhysExtFreeHead = 0;
1944 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
1945 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
1946 for (unsigned i = 0; i < cMaxPhysExts; i++)
1947 {
1948 paPhysExts[i].iNext = i + 1;
1949 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
1950 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
1951 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
1952 }
1953 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
1954#endif
1955
1956
1957 pPool->cPresent = 0;
1958 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
1959}
1960
1961/**
1962 * Handle SyncCR3 pool tasks
1963 *
1964 * @returns VBox status code.
1965 * @retval VINF_SUCCESS if successfully added.
1966 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
1967 * @param pVM The VM handle.
1968 * @remark Should only be used when monitoring is available, thus placed in
1969 * the PGMPOOL_WITH_MONITORING #ifdef.
1970 */
1971int pgmPoolSyncCR3(PVM pVM)
1972{
1973 /*
1974 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
1975 * Occasionally we will have to clear all the shadow page tables because we wanted
1976 * to monitor a page which was mapped by too many shadowed page tables. This operation
1977 * sometimes refered to as a 'lightweight flush'.
1978 */
1979 if (!(pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL))
1980 pgmPoolMonitorModifiedClearAll(pVM);
1981 else
1982 {
1983# ifndef IN_GC
1984 pVM->pgm.s.fSyncFlags &= ~PGM_SYNC_CLEAR_PGM_POOL;
1985 pgmPoolClearAll(pVM);
1986# else
1987 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
1988 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
1989 return VINF_PGM_SYNC_CR3;
1990# endif
1991 }
1992 return VINF_SUCCESS;
1993}
1994#endif /* PGMPOOL_WITH_MONITORING */
1995
1996#ifdef PGMPOOL_WITH_USER_TRACKING
1997/**
1998 * Frees up at least one user entry.
1999 *
2000 * @returns VBox status code.
2001 * @retval VINF_SUCCESS if successfully added.
2002 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2003 * @param pPool The pool.
2004 * @param iUser The user index.
2005 */
2006static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2007{
2008 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2009#ifdef PGMPOOL_WITH_CACHE
2010 /*
2011 * Just free cached pages in a braindead fashion.
2012 */
2013 /** @todo walk the age list backwards and free the first with usage. */
2014 int rc = VINF_SUCCESS;
2015 do
2016 {
2017 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2018 if (VBOX_FAILURE(rc2) && rc == VINF_SUCCESS)
2019 rc = rc2;
2020 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2021 return rc;
2022#else
2023 /*
2024 * Lazy approach.
2025 */
2026 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
2027 Assert(!CPUMIsGuestInLongMode(pVM));
2028 pgmPoolFlushAllInt(pPool);
2029 return VERR_PGM_POOL_FLUSHED;
2030#endif
2031}
2032
2033
2034/**
2035 * Inserts a page into the cache.
2036 *
2037 * This will create user node for the page, insert it into the GCPhys
2038 * hash, and insert it into the age list.
2039 *
2040 * @returns VBox status code.
2041 * @retval VINF_SUCCESS if successfully added.
2042 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2043 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
2044 * @param pPool The pool.
2045 * @param pPage The cached page.
2046 * @param GCPhys The GC physical address of the page we're gonna shadow.
2047 * @param iUser The user index.
2048 * @param iUserTable The user table index.
2049 */
2050DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2051{
2052 int rc = VINF_SUCCESS;
2053 PPGMPOOLUSER pUser = pPool->CTXSUFF(paUsers);
2054
2055 LogFlow(("pgmPoolTrackInsert iUser %d iUserTable %d\n", iUser, iUserTable));
2056
2057 /*
2058 * Find free a user node.
2059 */
2060 uint16_t i = pPool->iUserFreeHead;
2061 if (i == NIL_PGMPOOL_USER_INDEX)
2062 {
2063 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2064 if (VBOX_FAILURE(rc))
2065 return rc;
2066 i = pPool->iUserFreeHead;
2067 }
2068
2069 /*
2070 * Unlink the user node from the free list,
2071 * initialize and insert it into the user list.
2072 */
2073 pPool->iUserFreeHead = pUser[i].iNext;
2074 pUser[i].iNext = NIL_PGMPOOL_USER_INDEX;
2075 pUser[i].iUser = iUser;
2076 pUser[i].iUserTable = iUserTable;
2077 pPage->iUserHead = i;
2078
2079 /*
2080 * Insert into cache and enable monitoring of the guest page if enabled.
2081 *
2082 * Until we implement caching of all levels, including the CR3 one, we'll
2083 * have to make sure we don't try monitor & cache any recursive reuse of
2084 * a monitored CR3 page. Because all windows versions are doing this we'll
2085 * have to be able to do combined access monitoring, CR3 + PT and
2086 * PD + PT (guest PAE).
2087 *
2088 * Update:
2089 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2090 */
2091#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
2092# ifdef PGMPOOL_WITH_MIXED_PT_CR3
2093 const bool fCanBeMonitored = true;
2094# else
2095 bool fCanBeMonitored = pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
2096 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
2097 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
2098# endif
2099# ifdef PGMPOOL_WITH_CACHE
2100 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2101# endif
2102 if (fCanBeMonitored)
2103 {
2104# ifdef PGMPOOL_WITH_MONITORING
2105 rc = pgmPoolMonitorInsert(pPool, pPage);
2106 if (rc == VERR_PGM_POOL_CLEARED)
2107 {
2108 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
2109# ifndef PGMPOOL_WITH_CACHE
2110 pgmPoolMonitorFlush(pPool, pPage);
2111 rc = VERR_PGM_POOL_FLUSHED;
2112# endif
2113 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2114 pUser[i].iNext = pPool->iUserFreeHead;
2115 pUser[i].iUser = NIL_PGMPOOL_IDX;
2116 pPool->iUserFreeHead = i;
2117 }
2118 }
2119# endif
2120#endif /* PGMPOOL_WITH_MONITORING */
2121 return rc;
2122}
2123
2124
2125# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
2126/**
2127 * Adds a user reference to a page.
2128 *
2129 * This will
2130 * This will move the page to the head of the
2131 *
2132 * @returns VBox status code.
2133 * @retval VINF_SUCCESS if successfully added.
2134 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2135 * @param pPool The pool.
2136 * @param pPage The cached page.
2137 * @param iUser The user index.
2138 * @param iUserTable The user table.
2139 */
2140static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2141{
2142 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
2143
2144 LogFlow(("pgmPoolTrackAddUser iUser %d iUserTable %d\n", iUser, iUserTable));
2145# ifdef VBOX_STRICT
2146 /*
2147 * Check that the entry doesn't already exists.
2148 */
2149 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2150 {
2151 uint16_t i = pPage->iUserHead;
2152 do
2153 {
2154 Assert(i < pPool->cMaxUsers);
2155 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2156 i = paUsers[i].iNext;
2157 } while (i != NIL_PGMPOOL_USER_INDEX);
2158 }
2159# endif
2160
2161 /*
2162 * Allocate a user node.
2163 */
2164 uint16_t i = pPool->iUserFreeHead;
2165 if (i == NIL_PGMPOOL_USER_INDEX)
2166 {
2167 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2168 if (VBOX_FAILURE(rc))
2169 return rc;
2170 i = pPool->iUserFreeHead;
2171 }
2172 pPool->iUserFreeHead = paUsers[i].iNext;
2173
2174 /*
2175 * Initialize the user node and insert it.
2176 */
2177 paUsers[i].iNext = pPage->iUserHead;
2178 paUsers[i].iUser = iUser;
2179 paUsers[i].iUserTable = iUserTable;
2180 pPage->iUserHead = i;
2181
2182# ifdef PGMPOOL_WITH_CACHE
2183 /*
2184 * Tell the cache to update its replacement stats for this page.
2185 */
2186 pgmPoolCacheUsed(pPool, pPage);
2187# endif
2188 return VINF_SUCCESS;
2189}
2190# endif /* PGMPOOL_WITH_CACHE */
2191
2192
2193/**
2194 * Frees a user record associated with a page.
2195 *
2196 * This does not clear the entry in the user table, it simply replaces the
2197 * user record to the chain of free records.
2198 *
2199 * @param pPool The pool.
2200 * @param HCPhys The HC physical address of the shadow page.
2201 * @param iUser The shadow page pool index of the user table.
2202 * @param iUserTable The index into the user table (shadowed).
2203 */
2204static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2205{
2206 /*
2207 * Unlink and free the specified user entry.
2208 */
2209 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
2210
2211 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2212 uint16_t i = pPage->iUserHead;
2213 if ( i != NIL_PGMPOOL_USER_INDEX
2214 && paUsers[i].iUser == iUser
2215 && paUsers[i].iUserTable == iUserTable)
2216 {
2217 pPage->iUserHead = paUsers[i].iNext;
2218
2219 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2220 paUsers[i].iNext = pPool->iUserFreeHead;
2221 pPool->iUserFreeHead = i;
2222 return;
2223 }
2224
2225 /* General: Linear search. */
2226 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2227 while (i != NIL_PGMPOOL_USER_INDEX)
2228 {
2229 if ( paUsers[i].iUser == iUser
2230 && paUsers[i].iUserTable == iUserTable)
2231 {
2232 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2233 paUsers[iPrev].iNext = paUsers[i].iNext;
2234 else
2235 pPage->iUserHead = paUsers[i].iNext;
2236
2237 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2238 paUsers[i].iNext = pPool->iUserFreeHead;
2239 pPool->iUserFreeHead = i;
2240 return;
2241 }
2242 iPrev = i;
2243 i = paUsers[i].iNext;
2244 }
2245
2246 /* Fatal: didn't find it */
2247 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%VGp\n",
2248 iUser, iUserTable, pPage->GCPhys));
2249}
2250
2251
2252/**
2253 * Gets the entry size of a shadow table.
2254 *
2255 * @param enmKind The kind of page.
2256 *
2257 * @returns The size of the entry in bytes. That is, 4 or 8.
2258 * @returns If the kind is not for a table, an assertion is raised and 0 is
2259 * returned.
2260 */
2261DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2262{
2263 switch (enmKind)
2264 {
2265 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2266 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2267 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2268 case PGMPOOLKIND_ROOT_32BIT_PD:
2269 return 4;
2270
2271 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2272 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2273 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2274 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2275 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2276 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2277 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2278 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2279 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2280 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2281 case PGMPOOLKIND_ROOT_PAE_PD:
2282 case PGMPOOLKIND_ROOT_PDPT:
2283 case PGMPOOLKIND_ROOT_NESTED:
2284 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2285 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2286 return 8;
2287
2288 default:
2289 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2290 }
2291}
2292
2293
2294/**
2295 * Gets the entry size of a guest table.
2296 *
2297 * @param enmKind The kind of page.
2298 *
2299 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2300 * @returns If the kind is not for a table, an assertion is raised and 0 is
2301 * returned.
2302 */
2303DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2304{
2305 switch (enmKind)
2306 {
2307 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2308 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2309 case PGMPOOLKIND_ROOT_32BIT_PD:
2310 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2311 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2312 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2313 return 4;
2314
2315 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2316 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2317 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2318 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2319 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2320 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2321 case PGMPOOLKIND_ROOT_PAE_PD:
2322 case PGMPOOLKIND_ROOT_PDPT:
2323 return 8;
2324
2325 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2326 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2327 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2328 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2329 case PGMPOOLKIND_ROOT_NESTED:
2330 /** @todo can we return 0? (nobody is calling this...) */
2331 AssertFailed();
2332 return 0;
2333
2334 default:
2335 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2336 }
2337}
2338
2339
2340#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2341/**
2342 * Scans one shadow page table for mappings of a physical page.
2343 *
2344 * @param pVM The VM handle.
2345 * @param pPhysPage The guest page in question.
2346 * @param iShw The shadow page table.
2347 * @param cRefs The number of references made in that PT.
2348 */
2349static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2350{
2351 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2352 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2353
2354 /*
2355 * Assert sanity.
2356 */
2357 Assert(cRefs == 1);
2358 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2359 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2360
2361 /*
2362 * Then, clear the actual mappings to the page in the shadow PT.
2363 */
2364 switch (pPage->enmKind)
2365 {
2366 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2367 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2368 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2369 {
2370 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2371 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2372 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2373 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2374 {
2375 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2376 pPT->a[i].u = 0;
2377 cRefs--;
2378 if (!cRefs)
2379 return;
2380 }
2381#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2382 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2383 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2384 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2385 {
2386 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2387 pPT->a[i].u = 0;
2388 }
2389#endif
2390 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2391 break;
2392 }
2393
2394 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2395 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2396 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2397 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2398 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2399 {
2400 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2401 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2402 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2403 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2404 {
2405 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2406 pPT->a[i].u = 0;
2407 cRefs--;
2408 if (!cRefs)
2409 return;
2410 }
2411#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2412 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2413 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
2414 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2415 {
2416 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2417 pPT->a[i].u = 0;
2418 }
2419#endif
2420 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2421 break;
2422 }
2423
2424 default:
2425 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2426 }
2427}
2428
2429
2430/**
2431 * Scans one shadow page table for mappings of a physical page.
2432 *
2433 * @param pVM The VM handle.
2434 * @param pPhysPage The guest page in question.
2435 * @param iShw The shadow page table.
2436 * @param cRefs The number of references made in that PT.
2437 */
2438void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2439{
2440 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool); NOREF(pPool);
2441 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2442 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2443 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2444 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2445 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2446}
2447
2448
2449/**
2450 * Flushes a list of shadow page tables mapping the same physical page.
2451 *
2452 * @param pVM The VM handle.
2453 * @param pPhysPage The guest page in question.
2454 * @param iPhysExt The physical cross reference extent list to flush.
2455 */
2456void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2457{
2458 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2459 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2460 LogFlow(("pgmPoolTrackFlushGCPhysPTs: HCPhys=%RHp iPhysExt\n", pPhysPage->HCPhys, iPhysExt));
2461
2462 const uint16_t iPhysExtStart = iPhysExt;
2463 PPGMPOOLPHYSEXT pPhysExt;
2464 do
2465 {
2466 Assert(iPhysExt < pPool->cMaxPhysExts);
2467 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2468 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2469 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2470 {
2471 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2472 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2473 }
2474
2475 /* next */
2476 iPhysExt = pPhysExt->iNext;
2477 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2478
2479 /* insert the list into the free list and clear the ram range entry. */
2480 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2481 pPool->iPhysExtFreeHead = iPhysExtStart;
2482 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2483
2484 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2485}
2486#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2487
2488
2489/**
2490 * Scans all shadow page tables for mappings of a physical page.
2491 *
2492 * This may be slow, but it's most likely more efficient than cleaning
2493 * out the entire page pool / cache.
2494 *
2495 * @returns VBox status code.
2496 * @retval VINF_SUCCESS if all references has been successfully cleared.
2497 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2498 * a page pool cleaning.
2499 *
2500 * @param pVM The VM handle.
2501 * @param pPhysPage The guest page in question.
2502 */
2503int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
2504{
2505 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2506 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2507 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d HCPhys=%RHp\n",
2508 pPool->cUsedPages, pPool->cPresent, pPhysPage->HCPhys));
2509
2510#if 1
2511 /*
2512 * There is a limit to what makes sense.
2513 */
2514 if (pPool->cPresent > 1024)
2515 {
2516 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2517 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2518 return VINF_PGM_GCPHYS_ALIASED;
2519 }
2520#endif
2521
2522 /*
2523 * Iterate all the pages until we've encountered all that in use.
2524 * This is simple but not quite optimal solution.
2525 */
2526 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2527 const uint32_t u32 = u64;
2528 unsigned cLeft = pPool->cUsedPages;
2529 unsigned iPage = pPool->cCurPages;
2530 while (--iPage >= PGMPOOL_IDX_FIRST)
2531 {
2532 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2533 if (pPage->GCPhys != NIL_RTGCPHYS)
2534 {
2535 switch (pPage->enmKind)
2536 {
2537 /*
2538 * We only care about shadow page tables.
2539 */
2540 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2541 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2542 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2543 {
2544 unsigned cPresent = pPage->cPresent;
2545 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2546 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2547 if (pPT->a[i].n.u1Present)
2548 {
2549 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2550 {
2551 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2552 pPT->a[i].u = 0;
2553 }
2554 if (!--cPresent)
2555 break;
2556 }
2557 break;
2558 }
2559
2560 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2561 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2562 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2563 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2564 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2565 {
2566 unsigned cPresent = pPage->cPresent;
2567 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2568 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2569 if (pPT->a[i].n.u1Present)
2570 {
2571 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2572 {
2573 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2574 pPT->a[i].u = 0;
2575 }
2576 if (!--cPresent)
2577 break;
2578 }
2579 break;
2580 }
2581 }
2582 if (!--cLeft)
2583 break;
2584 }
2585 }
2586
2587 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2588 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2589 return VINF_SUCCESS;
2590}
2591
2592
2593/**
2594 * Clears the user entry in a user table.
2595 *
2596 * This is used to remove all references to a page when flushing it.
2597 */
2598static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2599{
2600 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2601 Assert(pUser->iUser < pPool->cCurPages);
2602
2603 /*
2604 * Map the user page.
2605 */
2606 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2607 union
2608 {
2609 uint64_t *pau64;
2610 uint32_t *pau32;
2611 } u;
2612 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pUserPage);
2613
2614 /* Safety precaution in case we change the paging for other modes too in the future. */
2615 Assert(PGMGetHyperCR3(CTXSUFF(pPool->pVM)) != pPage->Core.Key);
2616
2617#ifdef VBOX_STRICT
2618 /*
2619 * Some sanity checks.
2620 */
2621 switch (pUserPage->enmKind)
2622 {
2623 case PGMPOOLKIND_ROOT_32BIT_PD:
2624 Assert(pUser->iUserTable < X86_PG_ENTRIES);
2625 Assert(!(u.pau32[pUser->iUserTable] & PGM_PDFLAGS_MAPPING));
2626 break;
2627 case PGMPOOLKIND_ROOT_PAE_PD:
2628 Assert(pUser->iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
2629 Assert(!(u.pau64[pUser->iUserTable] & PGM_PDFLAGS_MAPPING));
2630 break;
2631 case PGMPOOLKIND_ROOT_PDPT:
2632 Assert(pUser->iUserTable < 4);
2633 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2634 break;
2635 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2636 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2637 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2638 break;
2639 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2640 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2641 Assert(!(u.pau64[pUser->iUserTable] & PGM_PDFLAGS_MAPPING));
2642 break;
2643 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2644 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2645 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2646 break;
2647 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2648 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2649 /* GCPhys >> PAGE_SHIFT is the index here */
2650 break;
2651 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2652 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2653 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2654 break;
2655 case PGMPOOLKIND_ROOT_NESTED:
2656 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2657 break;
2658
2659 default:
2660 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
2661 break;
2662 }
2663#endif /* VBOX_STRICT */
2664
2665 /*
2666 * Clear the entry in the user page.
2667 */
2668 switch (pUserPage->enmKind)
2669 {
2670 /* 32-bit entries */
2671 case PGMPOOLKIND_ROOT_32BIT_PD:
2672 u.pau32[pUser->iUserTable] = 0;
2673 break;
2674
2675 /* 64-bit entries */
2676 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2677 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2678 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2679 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2680 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
2681 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2682 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2683 case PGMPOOLKIND_ROOT_PAE_PD:
2684 case PGMPOOLKIND_ROOT_PDPT:
2685 case PGMPOOLKIND_ROOT_NESTED:
2686 u.pau64[pUser->iUserTable] = 0;
2687 break;
2688
2689 default:
2690 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
2691 }
2692}
2693
2694
2695/**
2696 * Clears all users of a page.
2697 */
2698static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2699{
2700 /*
2701 * Free all the user records.
2702 */
2703 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
2704 uint16_t i = pPage->iUserHead;
2705 while (i != NIL_PGMPOOL_USER_INDEX)
2706 {
2707 /* Clear enter in user table. */
2708 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
2709
2710 /* Free it. */
2711 const uint16_t iNext = paUsers[i].iNext;
2712 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2713 paUsers[i].iNext = pPool->iUserFreeHead;
2714 pPool->iUserFreeHead = i;
2715
2716 /* Next. */
2717 i = iNext;
2718 }
2719 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2720}
2721
2722
2723#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2724/**
2725 * Allocates a new physical cross reference extent.
2726 *
2727 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
2728 * @param pVM The VM handle.
2729 * @param piPhysExt Where to store the phys ext index.
2730 */
2731PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
2732{
2733 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2734 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
2735 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
2736 {
2737 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
2738 return NULL;
2739 }
2740 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2741 pPool->iPhysExtFreeHead = pPhysExt->iNext;
2742 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2743 *piPhysExt = iPhysExt;
2744 return pPhysExt;
2745}
2746
2747
2748/**
2749 * Frees a physical cross reference extent.
2750 *
2751 * @param pVM The VM handle.
2752 * @param iPhysExt The extent to free.
2753 */
2754void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
2755{
2756 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2757 Assert(iPhysExt < pPool->cMaxPhysExts);
2758 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2759 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2760 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2761 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2762 pPool->iPhysExtFreeHead = iPhysExt;
2763}
2764
2765
2766/**
2767 * Frees a physical cross reference extent.
2768 *
2769 * @param pVM The VM handle.
2770 * @param iPhysExt The extent to free.
2771 */
2772void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
2773{
2774 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2775
2776 const uint16_t iPhysExtStart = iPhysExt;
2777 PPGMPOOLPHYSEXT pPhysExt;
2778 do
2779 {
2780 Assert(iPhysExt < pPool->cMaxPhysExts);
2781 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2782 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
2783 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2784
2785 /* next */
2786 iPhysExt = pPhysExt->iNext;
2787 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2788
2789 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2790 pPool->iPhysExtFreeHead = iPhysExtStart;
2791}
2792
2793/**
2794 * Insert a reference into a list of physical cross reference extents.
2795 *
2796 * @returns The new ram range flags (top 16-bits).
2797 *
2798 * @param pVM The VM handle.
2799 * @param iPhysExt The physical extent index of the list head.
2800 * @param iShwPT The shadow page table index.
2801 *
2802 */
2803static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
2804{
2805 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2806 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2807
2808 /* special common case. */
2809 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
2810 {
2811 paPhysExts[iPhysExt].aidx[2] = iShwPT;
2812 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2813 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
2814 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2815 }
2816
2817 /* general treatment. */
2818 const uint16_t iPhysExtStart = iPhysExt;
2819 unsigned cMax = 15;
2820 for (;;)
2821 {
2822 Assert(iPhysExt < pPool->cMaxPhysExts);
2823 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2824 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
2825 {
2826 paPhysExts[iPhysExt].aidx[i] = iShwPT;
2827 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2828 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
2829 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2830 }
2831 if (!--cMax)
2832 {
2833 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2834 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2835 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
2836 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2837 }
2838 }
2839
2840 /* add another extent to the list. */
2841 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2842 if (!pNew)
2843 {
2844 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2845 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2846 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2847 }
2848 pNew->iNext = iPhysExtStart;
2849 pNew->aidx[0] = iShwPT;
2850 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
2851 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2852}
2853
2854
2855/**
2856 * Add a reference to guest physical page where extents are in use.
2857 *
2858 * @returns The new ram range flags (top 16-bits).
2859 *
2860 * @param pVM The VM handle.
2861 * @param u16 The ram range flags (top 16-bits).
2862 * @param iShwPT The shadow page table index.
2863 */
2864uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
2865{
2866 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
2867 {
2868 /*
2869 * Convert to extent list.
2870 */
2871 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
2872 uint16_t iPhysExt;
2873 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2874 if (pPhysExt)
2875 {
2876 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
2877 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
2878 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
2879 pPhysExt->aidx[1] = iShwPT;
2880 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2881 }
2882 else
2883 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2884 }
2885 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
2886 {
2887 /*
2888 * Insert into the extent list.
2889 */
2890 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
2891 }
2892 else
2893 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
2894 return u16;
2895}
2896
2897
2898/**
2899 * Clear references to guest physical memory.
2900 *
2901 * @param pPool The pool.
2902 * @param pPage The page.
2903 * @param pPhysPage Pointer to the aPages entry in the ram range.
2904 */
2905void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
2906{
2907 const unsigned cRefs = pPhysPage->HCPhys >> MM_RAM_FLAGS_CREFS_SHIFT; /** @todo PAGE FLAGS */
2908 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
2909
2910 uint16_t iPhysExt = (pPhysPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
2911 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
2912 {
2913 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
2914 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2915 do
2916 {
2917 Assert(iPhysExt < pPool->cMaxPhysExts);
2918
2919 /*
2920 * Look for the shadow page and check if it's all freed.
2921 */
2922 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2923 {
2924 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
2925 {
2926 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
2927
2928 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2929 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
2930 {
2931 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
2932 return;
2933 }
2934
2935 /* we can free the node. */
2936 PVM pVM = pPool->CTXSUFF(pVM);
2937 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
2938 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
2939 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
2940 {
2941 /* lonely node */
2942 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2943 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d lonely\n", pPhysPage->HCPhys, pPage->idx));
2944 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2945 }
2946 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
2947 {
2948 /* head */
2949 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d head\n", pPhysPage->HCPhys, pPage->idx));
2950 pPhysPage->HCPhys = (pPhysPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) /** @todo PAGE FLAGS */
2951 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
2952 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
2953 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2954 }
2955 else
2956 {
2957 /* in list */
2958 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
2959 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
2960 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2961 }
2962 iPhysExt = iPhysExtNext;
2963 return;
2964 }
2965 }
2966
2967 /* next */
2968 iPhysExtPrev = iPhysExt;
2969 iPhysExt = paPhysExts[iPhysExt].iNext;
2970 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2971
2972 AssertFatalMsgFailed(("not-found! cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
2973 }
2974 else /* nothing to do */
2975 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64\n", pPhysPage->HCPhys));
2976}
2977
2978
2979
2980/**
2981 * Clear references to guest physical memory.
2982 *
2983 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
2984 * is assumed to be correct, so the linear search can be skipped and we can assert
2985 * at an earlier point.
2986 *
2987 * @param pPool The pool.
2988 * @param pPage The page.
2989 * @param HCPhys The host physical address corresponding to the guest page.
2990 * @param GCPhys The guest physical address corresponding to HCPhys.
2991 */
2992static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
2993{
2994 /*
2995 * Walk range list.
2996 */
2997 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
2998 while (pRam)
2999 {
3000 RTGCPHYS off = GCPhys - pRam->GCPhys;
3001 if (off < pRam->cb)
3002 {
3003 /* does it match? */
3004 const unsigned iPage = off >> PAGE_SHIFT;
3005 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3006RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3007Log(("pgmPoolTracDerefGCPhys %VHp vs %VHp\n", HCPhysPage, HCPhys));
3008 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3009 {
3010 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3011 return;
3012 }
3013 break;
3014 }
3015 pRam = CTXALLSUFF(pRam->pNext);
3016 }
3017 AssertFatalMsgFailed(("HCPhys=%VHp GCPhys=%VGp\n", HCPhys, GCPhys));
3018}
3019
3020
3021/**
3022 * Clear references to guest physical memory.
3023 *
3024 * @param pPool The pool.
3025 * @param pPage The page.
3026 * @param HCPhys The host physical address corresponding to the guest page.
3027 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3028 */
3029static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3030{
3031 /*
3032 * Walk range list.
3033 */
3034 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
3035 while (pRam)
3036 {
3037 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3038 if (off < pRam->cb)
3039 {
3040 /* does it match? */
3041 const unsigned iPage = off >> PAGE_SHIFT;
3042 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3043 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3044 {
3045 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3046 return;
3047 }
3048 break;
3049 }
3050 pRam = CTXALLSUFF(pRam->pNext);
3051 }
3052
3053 /*
3054 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3055 */
3056 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3057 pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
3058 while (pRam)
3059 {
3060 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3061 while (iPage-- > 0)
3062 {
3063 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3064 {
3065 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%VHp GCPhysHint=%VGp GCPhysReal=%VGp\n",
3066 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3067 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3068 return;
3069 }
3070 }
3071 pRam = CTXALLSUFF(pRam->pNext);
3072 }
3073
3074 AssertFatalMsgFailed(("HCPhys=%VHp GCPhysHint=%VGp\n", HCPhys, GCPhysHint));
3075}
3076
3077
3078/**
3079 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3080 *
3081 * @param pPool The pool.
3082 * @param pPage The page.
3083 * @param pShwPT The shadow page table (mapping of the page).
3084 * @param pGstPT The guest page table.
3085 */
3086DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3087{
3088 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3089 if (pShwPT->a[i].n.u1Present)
3090 {
3091 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3092 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3093 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3094 if (!--pPage->cPresent)
3095 break;
3096 }
3097}
3098
3099
3100/**
3101 * Clear references to guest physical memory in a PAE / 32-bit page table.
3102 *
3103 * @param pPool The pool.
3104 * @param pPage The page.
3105 * @param pShwPT The shadow page table (mapping of the page).
3106 * @param pGstPT The guest page table (just a half one).
3107 */
3108DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3109{
3110 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3111 if (pShwPT->a[i].n.u1Present)
3112 {
3113 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
3114 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3115 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3116 }
3117}
3118
3119
3120/**
3121 * Clear references to guest physical memory in a PAE / PAE page table.
3122 *
3123 * @param pPool The pool.
3124 * @param pPage The page.
3125 * @param pShwPT The shadow page table (mapping of the page).
3126 * @param pGstPT The guest page table.
3127 */
3128DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3129{
3130 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
3131 if (pShwPT->a[i].n.u1Present)
3132 {
3133 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3134 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3135 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3136 }
3137}
3138
3139
3140/**
3141 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3142 *
3143 * @param pPool The pool.
3144 * @param pPage The page.
3145 * @param pShwPT The shadow page table (mapping of the page).
3146 */
3147DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3148{
3149 RTGCPHYS GCPhys = pPage->GCPhys;
3150 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3151 if (pShwPT->a[i].n.u1Present)
3152 {
3153 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3154 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3155 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3156 }
3157}
3158
3159
3160/**
3161 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3162 *
3163 * @param pPool The pool.
3164 * @param pPage The page.
3165 * @param pShwPT The shadow page table (mapping of the page).
3166 */
3167DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3168{
3169 RTGCPHYS GCPhys = pPage->GCPhys;
3170 for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3171 if (pShwPT->a[i].n.u1Present)
3172 {
3173 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%VGp\n",
3174 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3175 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3176 }
3177}
3178#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
3179
3180
3181/**
3182 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
3183 *
3184 * @param pPool The pool.
3185 * @param pPage The page.
3186 * @param pShwPD The shadow page directory (mapping of the page).
3187 */
3188DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
3189{
3190 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
3191 {
3192 if (pShwPD->a[i].n.u1Present)
3193 {
3194 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
3195 if (pSubPage)
3196 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3197 else
3198 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
3199 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3200 }
3201 }
3202}
3203
3204
3205/**
3206 * Clear references to shadowed pages in a 64-bit page directory pointer table.
3207 *
3208 * @param pPool The pool.
3209 * @param pPage The page.
3210 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
3211 */
3212DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
3213{
3214 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
3215 {
3216 if (pShwPDPT->a[i].n.u1Present)
3217 {
3218 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
3219 if (pSubPage)
3220 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3221 else
3222 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
3223 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3224 }
3225 }
3226}
3227
3228/**
3229 * Clear references to shadowed pages in a 64-bit level 4 page table.
3230 *
3231 * @param pPool The pool.
3232 * @param pPage The page.
3233 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
3234 */
3235DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
3236{
3237 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
3238 {
3239 if (pShwPML4->a[i].n.u1Present)
3240 {
3241 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
3242 if (pSubPage)
3243 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
3244 else
3245 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
3246 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
3247 }
3248 }
3249}
3250
3251
3252/**
3253 * Clears all references made by this page.
3254 *
3255 * This includes other shadow pages and GC physical addresses.
3256 *
3257 * @param pPool The pool.
3258 * @param pPage The page.
3259 */
3260static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3261{
3262 /*
3263 * Map the shadow page and take action according to the page kind.
3264 */
3265 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
3266 switch (pPage->enmKind)
3267 {
3268#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3269 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3270 {
3271 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3272 void *pvGst;
3273 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3274 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
3275 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3276 break;
3277 }
3278
3279 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3280 {
3281 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3282 void *pvGst;
3283 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3284 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
3285 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3286 break;
3287 }
3288
3289 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3290 {
3291 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3292 void *pvGst;
3293 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
3294 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
3295 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3296 break;
3297 }
3298
3299 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
3300 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3301 {
3302 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3303 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
3304 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3305 break;
3306 }
3307
3308 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 4 MB page */
3309 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3310 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3311 {
3312 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
3313 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
3314 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
3315 break;
3316 }
3317
3318#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3319 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3320 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3321 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3322 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3323 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3324 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3325 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3326 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3327 break;
3328#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
3329
3330 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
3331 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3332 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3333 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3334 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
3335 break;
3336
3337 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3338 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3339 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
3340 break;
3341
3342 case PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4:
3343 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
3344 break;
3345
3346 default:
3347 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
3348 }
3349
3350 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
3351 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3352 ASMMemZeroPage(pvShw);
3353 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3354 pPage->fZeroed = true;
3355}
3356#endif /* PGMPOOL_WITH_USER_TRACKING */
3357
3358
3359/**
3360 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
3361 *
3362 * @param pPool The pool.
3363 */
3364static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
3365{
3366 /*
3367 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
3368 */
3369 Assert(NIL_PGMPOOL_IDX == 0);
3370 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
3371 {
3372 /*
3373 * Get the page address.
3374 */
3375 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3376 union
3377 {
3378 uint64_t *pau64;
3379 uint32_t *pau32;
3380 } u;
3381 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
3382
3383 /*
3384 * Mark stuff not present.
3385 */
3386 switch (pPage->enmKind)
3387 {
3388 case PGMPOOLKIND_ROOT_32BIT_PD:
3389 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
3390 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3391 u.pau32[iPage] = 0;
3392 break;
3393
3394 case PGMPOOLKIND_ROOT_PAE_PD:
3395 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES * X86_PG_PAE_PDPE_ENTRIES; iPage++)
3396 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3397 u.pau64[iPage] = 0;
3398 break;
3399
3400 case PGMPOOLKIND_ROOT_PDPT:
3401 /* Not root of shadowed pages currently, ignore it. */
3402 break;
3403
3404 case PGMPOOLKIND_ROOT_NESTED:
3405 ASMMemZero32(u.pau64, PAGE_SIZE);
3406 break;
3407 }
3408 }
3409
3410 /*
3411 * Paranoia (to be removed), flag a global CR3 sync.
3412 */
3413 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
3414}
3415
3416
3417/**
3418 * Flushes the entire cache.
3419 *
3420 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3421 * and execute this CR3 flush.
3422 *
3423 * @param pPool The pool.
3424 */
3425static void pgmPoolFlushAllInt(PPGMPOOL pPool)
3426{
3427 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
3428 LogFlow(("pgmPoolFlushAllInt:\n"));
3429
3430 /*
3431 * If there are no pages in the pool, there is nothing to do.
3432 */
3433 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
3434 {
3435 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3436 return;
3437 }
3438
3439 /*
3440 * Nuke the free list and reinsert all pages into it.
3441 */
3442 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
3443 {
3444 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3445
3446#ifdef IN_RING3
3447 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMHC, pPage->pvPageHC));
3448#endif
3449#ifdef PGMPOOL_WITH_MONITORING
3450 if (pPage->fMonitored)
3451 pgmPoolMonitorFlush(pPool, pPage);
3452 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3453 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3454 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3455 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3456 pPage->cModifications = 0;
3457#endif
3458 pPage->GCPhys = NIL_RTGCPHYS;
3459 pPage->enmKind = PGMPOOLKIND_FREE;
3460 Assert(pPage->idx == i);
3461 pPage->iNext = i + 1;
3462 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
3463 pPage->fSeenNonGlobal = false;
3464 pPage->fMonitored= false;
3465 pPage->fCached = false;
3466 pPage->fReusedFlushPending = false;
3467 pPage->fCR3Mix = false;
3468#ifdef PGMPOOL_WITH_USER_TRACKING
3469 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3470#endif
3471#ifdef PGMPOOL_WITH_CACHE
3472 pPage->iAgeNext = NIL_PGMPOOL_IDX;
3473 pPage->iAgePrev = NIL_PGMPOOL_IDX;
3474#endif
3475 }
3476 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
3477 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
3478 pPool->cUsedPages = 0;
3479
3480#ifdef PGMPOOL_WITH_USER_TRACKING
3481 /*
3482 * Zap and reinitialize the user records.
3483 */
3484 pPool->cPresent = 0;
3485 pPool->iUserFreeHead = 0;
3486 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
3487 const unsigned cMaxUsers = pPool->cMaxUsers;
3488 for (unsigned i = 0; i < cMaxUsers; i++)
3489 {
3490 paUsers[i].iNext = i + 1;
3491 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3492 paUsers[i].iUserTable = 0xfffffffe;
3493 }
3494 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
3495#endif
3496
3497#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3498 /*
3499 * Clear all the GCPhys links and rebuild the phys ext free list.
3500 */
3501 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXALLSUFF(pRamRanges);
3502 pRam;
3503 pRam = CTXALLSUFF(pRam->pNext))
3504 {
3505 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3506 while (iPage-- > 0)
3507 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3508 }
3509
3510 pPool->iPhysExtFreeHead = 0;
3511 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
3512 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
3513 for (unsigned i = 0; i < cMaxPhysExts; i++)
3514 {
3515 paPhysExts[i].iNext = i + 1;
3516 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
3517 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
3518 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
3519 }
3520 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3521#endif
3522
3523#ifdef PGMPOOL_WITH_MONITORING
3524 /*
3525 * Just zap the modified list.
3526 */
3527 pPool->cModifiedPages = 0;
3528 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
3529#endif
3530
3531#ifdef PGMPOOL_WITH_CACHE
3532 /*
3533 * Clear the GCPhys hash and the age list.
3534 */
3535 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
3536 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
3537 pPool->iAgeHead = NIL_PGMPOOL_IDX;
3538 pPool->iAgeTail = NIL_PGMPOOL_IDX;
3539#endif
3540
3541 /*
3542 * Flush all the special root pages.
3543 * Reinsert active pages into the hash and ensure monitoring chains are correct.
3544 */
3545 pgmPoolFlushAllSpecialRoots(pPool);
3546 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
3547 {
3548 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3549 pPage->iNext = NIL_PGMPOOL_IDX;
3550#ifdef PGMPOOL_WITH_MONITORING
3551 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3552 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3553 pPage->cModifications = 0;
3554 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
3555 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3556 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3557 if (pPage->fMonitored)
3558 {
3559 PVM pVM = pPool->CTXSUFF(pVM);
3560 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
3561 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
3562 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
3563 pPool->pfnAccessHandlerGC, MMHyperCCToRC(pVM, pPage),
3564 pPool->pszAccessHandler);
3565 AssertFatalRCSuccess(rc);
3566# ifdef PGMPOOL_WITH_CACHE
3567 pgmPoolHashInsert(pPool, pPage);
3568# endif
3569 }
3570#endif
3571#ifdef PGMPOOL_WITH_USER_TRACKING
3572 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
3573#endif
3574#ifdef PGMPOOL_WITH_CACHE
3575 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
3576 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
3577#endif
3578 }
3579
3580 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3581}
3582
3583
3584/**
3585 * Flushes a pool page.
3586 *
3587 * This moves the page to the free list after removing all user references to it.
3588 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
3589 *
3590 * @returns VBox status code.
3591 * @retval VINF_SUCCESS on success.
3592 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
3593 * @param pPool The pool.
3594 * @param HCPhys The HC physical address of the shadow page.
3595 */
3596int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3597{
3598 int rc = VINF_SUCCESS;
3599 STAM_PROFILE_START(&pPool->StatFlushPage, f);
3600 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%VHp, .idx=%d, .enmKind=%d, .GCPhys=%VGp}\n",
3601 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
3602
3603 /*
3604 * Quietly reject any attempts at flushing any of the special root pages.
3605 */
3606 if (pPage->idx < PGMPOOL_IDX_FIRST)
3607 {
3608 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3609 return VINF_SUCCESS;
3610 }
3611
3612 /*
3613 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
3614 */
3615 if (PGMGetHyperCR3(CTXSUFF(pPool->pVM)) == pPage->Core.Key)
3616 {
3617 AssertMsg(pPage->enmKind == PGMPOOLKIND_64BIT_PML4_FOR_64BIT_PML4, ("Can't free the shadow CR3! (%VGp vs %VGp kind=%d\n", PGMGetHyperCR3(CTXSUFF(pPool->pVM)), pPage->Core.Key, pPage->enmKind));
3618 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3619 return VINF_SUCCESS;
3620 }
3621
3622 /*
3623 * Mark the page as being in need of a ASMMemZeroPage().
3624 */
3625 pPage->fZeroed = false;
3626
3627#ifdef PGMPOOL_WITH_USER_TRACKING
3628 /*
3629 * Clear the page.
3630 */
3631 pgmPoolTrackClearPageUsers(pPool, pPage);
3632 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
3633 pgmPoolTrackDeref(pPool, pPage);
3634 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
3635#endif
3636
3637#ifdef PGMPOOL_WITH_CACHE
3638 /*
3639 * Flush it from the cache.
3640 */
3641 pgmPoolCacheFlushPage(pPool, pPage);
3642#endif /* PGMPOOL_WITH_CACHE */
3643
3644#ifdef PGMPOOL_WITH_MONITORING
3645 /*
3646 * Deregistering the monitoring.
3647 */
3648 if (pPage->fMonitored)
3649 rc = pgmPoolMonitorFlush(pPool, pPage);
3650#endif
3651
3652 /*
3653 * Free the page.
3654 */
3655 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
3656 pPage->iNext = pPool->iFreeHead;
3657 pPool->iFreeHead = pPage->idx;
3658 pPage->enmKind = PGMPOOLKIND_FREE;
3659 pPage->GCPhys = NIL_RTGCPHYS;
3660 pPage->fReusedFlushPending = false;
3661
3662 pPool->cUsedPages--;
3663 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
3664 return rc;
3665}
3666
3667
3668/**
3669 * Frees a usage of a pool page.
3670 *
3671 * The caller is responsible to updating the user table so that it no longer
3672 * references the shadow page.
3673 *
3674 * @param pPool The pool.
3675 * @param HCPhys The HC physical address of the shadow page.
3676 * @param iUser The shadow page pool index of the user table.
3677 * @param iUserTable The index into the user table (shadowed).
3678 */
3679void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
3680{
3681 STAM_PROFILE_START(&pPool->StatFree, a);
3682 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%VHp, .idx=%d, enmKind=%d} iUser=%#x iUserTable=%#x\n",
3683 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, iUser, iUserTable));
3684 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
3685#ifdef PGMPOOL_WITH_USER_TRACKING
3686 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
3687#endif
3688#ifdef PGMPOOL_WITH_CACHE
3689 if (!pPage->fCached)
3690#endif
3691 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
3692 STAM_PROFILE_STOP(&pPool->StatFree, a);
3693}
3694
3695
3696/**
3697 * Makes one or more free page free.
3698 *
3699 * @returns VBox status code.
3700 * @retval VINF_SUCCESS on success.
3701 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3702 *
3703 * @param pPool The pool.
3704 * @param iUser The user of the page.
3705 */
3706static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
3707{
3708 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
3709
3710 /*
3711 * If the pool isn't full grown yet, expand it.
3712 */
3713 if (pPool->cCurPages < pPool->cMaxPages)
3714 {
3715 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
3716#ifdef IN_RING3
3717 int rc = PGMR3PoolGrow(pPool->pVMHC);
3718#else
3719 int rc = CTXALLMID(VMM, CallHost)(pPool->CTXSUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
3720#endif
3721 if (VBOX_FAILURE(rc))
3722 return rc;
3723 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
3724 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
3725 return VINF_SUCCESS;
3726 }
3727
3728#ifdef PGMPOOL_WITH_CACHE
3729 /*
3730 * Free one cached page.
3731 */
3732 return pgmPoolCacheFreeOne(pPool, iUser);
3733#else
3734 /*
3735 * Flush the pool.
3736 * If we have tracking enabled, it should be possible to come up with
3737 * a cheap replacement strategy...
3738 */
3739 /* @todo incompatible with long mode paging (cr3 root will be flushed) */
3740 Assert(!CPUMIsGuestInLongMode(pVM));
3741 pgmPoolFlushAllInt(pPool);
3742 return VERR_PGM_POOL_FLUSHED;
3743#endif
3744}
3745
3746
3747/**
3748 * Allocates a page from the pool.
3749 *
3750 * This page may actually be a cached page and not in need of any processing
3751 * on the callers part.
3752 *
3753 * @returns VBox status code.
3754 * @retval VINF_SUCCESS if a NEW page was allocated.
3755 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
3756 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3757 * @param pVM The VM handle.
3758 * @param GCPhys The GC physical address of the page we're gonna shadow.
3759 * For 4MB and 2MB PD entries, it's the first address the
3760 * shadow PT is covering.
3761 * @param enmKind The kind of mapping.
3762 * @param iUser The shadow page pool index of the user table.
3763 * @param iUserTable The index into the user table (shadowed).
3764 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
3765 */
3766int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
3767{
3768 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3769 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
3770 LogFlow(("pgmPoolAlloc: GCPhys=%VGp enmKind=%d iUser=%#x iUserTable=%#x\n", GCPhys, enmKind, iUser, iUserTable));
3771 *ppPage = NULL;
3772
3773#ifdef PGMPOOL_WITH_CACHE
3774 if (pPool->fCacheEnabled)
3775 {
3776 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
3777 if (VBOX_SUCCESS(rc2))
3778 {
3779 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3780 LogFlow(("pgmPoolAlloc: cached returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
3781 return rc2;
3782 }
3783 }
3784#endif
3785
3786 /*
3787 * Allocate a new one.
3788 */
3789 int rc = VINF_SUCCESS;
3790 uint16_t iNew = pPool->iFreeHead;
3791 if (iNew == NIL_PGMPOOL_IDX)
3792 {
3793 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
3794 if (VBOX_FAILURE(rc))
3795 {
3796 if (rc != VERR_PGM_POOL_CLEARED)
3797 {
3798 Log(("pgmPoolAlloc: returns %Vrc (Free)\n", rc));
3799 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3800 return rc;
3801 }
3802 Log(("pgmPoolMakeMoreFreePages failed with %Vrc -> return VERR_PGM_POOL_FLUSHED\n", rc));
3803 rc = VERR_PGM_POOL_FLUSHED;
3804 }
3805 iNew = pPool->iFreeHead;
3806 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
3807 }
3808
3809 /* unlink the free head */
3810 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
3811 pPool->iFreeHead = pPage->iNext;
3812 pPage->iNext = NIL_PGMPOOL_IDX;
3813
3814 /*
3815 * Initialize it.
3816 */
3817 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
3818 pPage->enmKind = enmKind;
3819 pPage->GCPhys = GCPhys;
3820 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
3821 pPage->fMonitored = false;
3822 pPage->fCached = false;
3823 pPage->fReusedFlushPending = false;
3824 pPage->fCR3Mix = false;
3825#ifdef PGMPOOL_WITH_MONITORING
3826 pPage->cModifications = 0;
3827 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3828 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3829#endif
3830#ifdef PGMPOOL_WITH_USER_TRACKING
3831 pPage->cPresent = 0;
3832 pPage->iFirstPresent = ~0;
3833
3834 /*
3835 * Insert into the tracking and cache. If this fails, free the page.
3836 */
3837 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
3838 if (VBOX_FAILURE(rc3))
3839 {
3840 if (rc3 != VERR_PGM_POOL_CLEARED)
3841 {
3842 pPool->cUsedPages--;
3843 pPage->enmKind = PGMPOOLKIND_FREE;
3844 pPage->GCPhys = NIL_RTGCPHYS;
3845 pPage->iNext = pPool->iFreeHead;
3846 pPool->iFreeHead = pPage->idx;
3847 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3848 Log(("pgmPoolAlloc: returns %Vrc (Insert)\n", rc3));
3849 return rc3;
3850 }
3851 Log(("pgmPoolTrackInsert failed with %Vrc -> return VERR_PGM_POOL_FLUSHED\n", rc3));
3852 rc = VERR_PGM_POOL_FLUSHED;
3853 }
3854#endif /* PGMPOOL_WITH_USER_TRACKING */
3855
3856 /*
3857 * Commit the allocation, clear the page and return.
3858 */
3859#ifdef VBOX_WITH_STATISTICS
3860 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
3861 pPool->cUsedPagesHigh = pPool->cUsedPages;
3862#endif
3863
3864 if (!pPage->fZeroed)
3865 {
3866 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3867 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
3868 ASMMemZeroPage(pv);
3869 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3870 }
3871
3872 *ppPage = pPage;
3873 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
3874 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
3875 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3876 return rc;
3877}
3878
3879
3880/**
3881 * Frees a usage of a pool page.
3882 *
3883 * @param pVM The VM handle.
3884 * @param HCPhys The HC physical address of the shadow page.
3885 * @param iUser The shadow page pool index of the user table.
3886 * @param iUserTable The index into the user table (shadowed).
3887 */
3888void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
3889{
3890 LogFlow(("pgmPoolFree: HCPhys=%VHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
3891 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3892 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
3893}
3894
3895
3896/**
3897 * Gets a in-use page in the pool by it's physical address.
3898 *
3899 * @returns Pointer to the page.
3900 * @param pVM The VM handle.
3901 * @param HCPhys The HC physical address of the shadow page.
3902 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
3903 */
3904PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
3905{
3906 /** @todo profile this! */
3907 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3908 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
3909 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%VHp -> %p:{.idx=%d .GCPhys=%VGp .enmKind=%d}\n",
3910 HCPhys, pPage, pPage->idx, pPage->GCPhys, pPage->enmKind));
3911 return pPage;
3912}
3913
3914
3915/**
3916 * Flushes the entire cache.
3917 *
3918 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3919 * and execute this CR3 flush.
3920 *
3921 * @param pPool The pool.
3922 */
3923void pgmPoolFlushAll(PVM pVM)
3924{
3925 LogFlow(("pgmPoolFlushAll:\n"));
3926 pgmPoolFlushAllInt(pVM->pgm.s.CTXSUFF(pPool));
3927}
3928
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette