VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 5662

Last change on this file since 5662 was 5393, checked in by vboxsync, 17 years ago

When flushing a page because it's been reused, don't emulate instructions that fail interpretation. This gets rid of the majority of the failures due to SSE2 instructions on solaris.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 120.5 KB
Line 
1/* $Id: PGMAllPool.cpp 5393 2007-10-19 17:28:17Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 innotek GmbH
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License as published by the Free Software Foundation,
13 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
14 * distribution. VirtualBox OSE is distributed in the hope that it will
15 * be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/pgm.h>
24#include <VBox/mm.h>
25#include <VBox/em.h>
26#include <VBox/cpum.h>
27#ifdef IN_GC
28# include <VBox/patm.h>
29#endif
30#include "PGMInternal.h"
31#include <VBox/vm.h>
32#include <VBox/disopcode.h>
33
34#include <VBox/log.h>
35#include <VBox/err.h>
36#include <iprt/asm.h>
37
38
39/*******************************************************************************
40* Internal Functions *
41*******************************************************************************/
42__BEGIN_DECLS
43static void pgmPoolFlushAllInt(PPGMPOOL pPool);
44#ifdef PGMPOOL_WITH_USER_TRACKING
45DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
46DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
47static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
48#endif
49#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
50static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
51#endif
52#ifdef PGMPOOL_WITH_CACHE
53static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable);
54#endif
55#ifdef PGMPOOL_WITH_MONITORING
56static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
57#endif
58#ifndef IN_RING3
59DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
60#endif
61__END_DECLS
62
63
64/**
65 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
66 *
67 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
68 * @param enmKind The page kind.
69 */
70DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
71{
72 switch (enmKind)
73 {
74 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
75 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
76 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
77 return true;
78 default:
79 return false;
80 }
81}
82
83
84#ifdef IN_GC
85/**
86 * Maps a pool page into the current context.
87 *
88 * @returns Pointer to the mapping.
89 * @param pVM The VM handle.
90 * @param pPage The page to map.
91 */
92void *pgmGCPoolMapPage(PVM pVM, PPGMPOOLPAGE pPage)
93{
94 /* general pages. */
95 if (pPage->idx >= PGMPOOL_IDX_FIRST)
96 {
97 Assert(pPage->idx < pVM->pgm.s.pPoolGC->cCurPages);
98 void *pv;
99 int rc = PGMGCDynMapHCPage(pVM, pPage->Core.Key, &pv);
100 AssertReleaseRC(rc);
101 return pv;
102 }
103
104 /* special pages. */
105 switch (pPage->idx)
106 {
107 case PGMPOOL_IDX_PD:
108 return pVM->pgm.s.pGC32BitPD;
109 case PGMPOOL_IDX_PAE_PD:
110 return pVM->pgm.s.apGCPaePDs[0];
111 case PGMPOOL_IDX_PDPTR:
112 return pVM->pgm.s.pGCPaePDPTR;
113 case PGMPOOL_IDX_PML4:
114 return pVM->pgm.s.pGCPaePML4;
115 default:
116 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
117 return NULL;
118 }
119}
120#endif /* IN_GC */
121
122
123#ifdef PGMPOOL_WITH_MONITORING
124/**
125 * Determin the size of a write instruction.
126 * @returns number of bytes written.
127 * @param pDis The disassembler state.
128 */
129static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
130{
131 /*
132 * This is very crude and possibly wrong for some opcodes,
133 * but since it's not really supposed to be called we can
134 * probably live with that.
135 */
136 return DISGetParamSize(pDis, &pDis->param1);
137}
138
139
140/**
141 * Flushes a chain of pages sharing the same access monitor.
142 *
143 * @returns VBox status code suitable for scheduling.
144 * @param pPool The pool.
145 * @param pPage A page in the chain.
146 */
147int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
148{
149 /*
150 * Find the list head.
151 */
152 uint16_t idx = pPage->idx;
153 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
154 {
155 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
156 {
157 idx = pPage->iMonitoredPrev;
158 Assert(idx != pPage->idx);
159 pPage = &pPool->aPages[idx];
160 }
161 }
162
163 /*
164 * Itereate the list flushing each shadow page.
165 */
166 int rc = VINF_SUCCESS;
167 for (;;)
168 {
169 idx = pPage->iMonitoredNext;
170 Assert(idx != pPage->idx);
171 if (pPage->idx >= PGMPOOL_IDX_FIRST)
172 {
173 int rc2 = pgmPoolFlushPage(pPool, pPage);
174 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
175 rc = VINF_PGM_SYNC_CR3;
176 }
177 /* next */
178 if (idx == NIL_PGMPOOL_IDX)
179 break;
180 pPage = &pPool->aPages[idx];
181 }
182 return rc;
183}
184
185
186/**
187 * Wrapper for getting the current context pointer to the entry being modified.
188 *
189 * @returns Pointer to the current context mapping of the entry.
190 * @param pPool The pool.
191 * @param pvFault The fault virtual address.
192 * @param GCPhysFault The fault physical address.
193 * @param cbEntry The entry size.
194 */
195#ifdef IN_RING3
196DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
197#else
198DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
199#endif
200{
201#ifdef IN_GC
202 return (RTGCPTR)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
203
204#elif defined(IN_RING0)
205 void *pvRet;
206 int rc = pgmRamGCPhys2HCPtr(&pPool->pVMHC->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
207 AssertFatalRCSuccess(rc);
208 return pvRet;
209
210#elif defined(IN_RING3)
211 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
212#else
213# error "huh?"
214#endif
215}
216
217
218/**
219 * Process shadow entries before they are changed by the guest.
220 *
221 * For PT entries we will clear them. For PD entries, we'll simply check
222 * for mapping conflicts and set the SyncCR3 FF if found.
223 *
224 * @param pPool The pool.
225 * @param pPage The head page.
226 * @param GCPhysFault The guest physical fault address.
227 * @param uAddress In R0 and GC this is the guest context fault address (flat).
228 * In R3 this is the host context 'fault' address.
229 * @param pCpu The disassembler state for figuring out the write size.
230 * This need not be specified if the caller knows we won't do cross entry accesses.
231 */
232#ifdef IN_RING3
233void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
234#else
235void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
236#endif
237{
238 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
239 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
240 for (;;)
241 {
242 union
243 {
244 void *pv;
245 PX86PT pPT;
246 PX86PTPAE pPTPae;
247 PX86PD pPD;
248 PX86PDPAE pPDPae;
249 } uShw;
250 uShw.pv = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
251
252 switch (pPage->enmKind)
253 {
254 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
255 {
256 const unsigned iShw = off / sizeof(X86PTE);
257 if (uShw.pPT->a[iShw].n.u1Present)
258 {
259# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
260 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
261 Log4(("pgmPoolMonitorChainChanging 32_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
262 pgmPoolTracDerefGCPhysHint(pPool, pPage,
263 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
264 pGstPte->u & X86_PTE_PG_MASK);
265# endif
266 uShw.pPT->a[iShw].u = 0;
267 }
268 break;
269 }
270
271 /* page/2 sized */
272 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
273 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
274 {
275 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
276 if (uShw.pPTPae->a[iShw].n.u1Present)
277 {
278# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
279 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
280 Log4(("pgmPoolMonitorChainChanging pae_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
281 pgmPoolTracDerefGCPhysHint(pPool, pPage,
282 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
283 pGstPte->u & X86_PTE_PG_MASK);
284# endif
285 uShw.pPTPae->a[iShw].u = 0;
286 }
287 }
288 break;
289
290 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
291 {
292 const unsigned iShw = off / sizeof(X86PTPAE);
293 if (uShw.pPTPae->a[iShw].n.u1Present)
294 {
295# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
296 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
297 Log4(("pgmPoolMonitorChainChanging pae_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
298 pgmPoolTracDerefGCPhysHint(pPool, pPage,
299 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
300 pGstPte->u & X86_PTE_PAE_PG_MASK);
301# endif
302 uShw.pPTPae->a[iShw].u = 0;
303 }
304 break;
305 }
306
307 case PGMPOOLKIND_ROOT_32BIT_PD:
308 {
309 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
310 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
311 {
312 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
313 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
314 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
315 }
316 /* paranoia / a bit assumptive. */
317 else if ( pCpu
318 && (off & 4)
319 && (off & 4) + pgmPoolDisasWriteSize(pCpu) > 4)
320 {
321 const unsigned iShw2 = (off + pgmPoolDisasWriteSize(pCpu) - 1) / sizeof(X86PTE);
322 if ( iShw2 != iShw
323 && iShw2 < ELEMENTS(uShw.pPD->a)
324 && uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
325 {
326 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
327 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
328 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
329 }
330 }
331#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
332 if ( uShw.pPD->a[iShw].n.u1Present
333 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
334 {
335 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
336# ifdef IN_GC /* TLB load - we're pushing things a bit... */
337 ASMProbeReadByte(pvAddress);
338# endif
339 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
340 uShw.pPD->a[iShw].u = 0;
341 }
342#endif
343 break;
344 }
345
346 case PGMPOOLKIND_ROOT_PAE_PD:
347 {
348 unsigned iShw = (off / sizeof(X86PTE)) * 2; // ASSUMING 32-bit guest paging!
349 for (unsigned i = 0; i < 2; i++, iShw++)
350 {
351 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
352 {
353 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
354 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
355 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
356 }
357 /* paranoia / a bit assumptive. */
358 else if ( pCpu
359 && (off & 4)
360 && (off & 4) + pgmPoolDisasWriteSize(pCpu) > 4)
361 {
362 const unsigned iShw2 = iShw + 2;
363 if ( iShw2 < ELEMENTS(uShw.pPDPae->a)
364 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
365 {
366 Assert(pgmMapAreMappingsEnabled(&pPool->CTXSUFF(pVM)->pgm.s));
367 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
368 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
369 }
370 }
371#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
372 if ( uShw.pPDPae->a[iShw].n.u1Present
373 && !VM_FF_ISSET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3))
374 {
375 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
376# ifdef IN_GC /* TLB load - we're pushing things a bit... */
377 ASMProbeReadByte(pvAddress);
378# endif
379 pgmPoolFree(pPool->CTXSUFF(pVM), uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
380 uShw.pPDPae->a[iShw].u = 0;
381 }
382#endif
383 }
384 break;
385 }
386
387 default:
388 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
389 }
390
391 /* next */
392 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
393 return;
394 pPage = &pPool->aPages[pPage->iMonitoredNext];
395 }
396}
397
398
399# ifndef IN_RING3
400/**
401 * Checks if a access could be a fork operation in progress.
402 *
403 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
404 *
405 * @returns true if it's likly that we're forking, otherwise false.
406 * @param pPool The pool.
407 * @param pCpu The disassembled instruction.
408 * @param offFault The access offset.
409 */
410DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
411{
412 /*
413 * i386 linux is using btr to clear X86_PTE_RW.
414 * The functions involved are (2.6.16 source inspection):
415 * clear_bit
416 * ptep_set_wrprotect
417 * copy_one_pte
418 * copy_pte_range
419 * copy_pmd_range
420 * copy_pud_range
421 * copy_page_range
422 * dup_mmap
423 * dup_mm
424 * copy_mm
425 * copy_process
426 * do_fork
427 */
428 if ( pCpu->pCurInstr->opcode == OP_BTR
429 && !(offFault & 4)
430 /** @todo Validate that the bit index is X86_PTE_RW. */
431 )
432 {
433 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,Fork));
434 return true;
435 }
436 return false;
437}
438
439
440/**
441 * Determin whether the page is likely to have been reused.
442 *
443 * @returns true if we consider the page as being reused for a different purpose.
444 * @returns false if we consider it to still be a paging page.
445 * @param pPage The page in question.
446 * @param pCpu The disassembly info for the faulting insturction.
447 * @param pvFault The fault address.
448 *
449 * @remark The REP prefix check is left to the caller because of STOSD/W.
450 */
451DECLINLINE(bool) pgmPoolMonitorIsReused(PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu, RTGCPTR pvFault)
452{
453 switch (pCpu->pCurInstr->opcode)
454 {
455 case OP_PUSH:
456 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
457 return true;
458 case OP_PUSHF:
459 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
460 return true;
461 case OP_PUSHA:
462 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
463 return true;
464 case OP_FXSAVE:
465 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
466 return true;
467 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
468 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
469 return true;
470 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
471 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
472 return true;
473 }
474 if ( (pCpu->param1.flags & USE_REG_GEN32)
475 && (pCpu->param1.base.reg_gen32 == USE_REG_ESP))
476 {
477 Log4(("pgmPoolMonitorIsReused: ESP\n"));
478 return true;
479 }
480
481 //if (pPage->fCR3Mix)
482 // return false;
483 return false;
484}
485
486
487/**
488 * Flushes the page being accessed.
489 *
490 * @returns VBox status code suitable for scheduling.
491 * @param pVM The VM handle.
492 * @param pPool The pool.
493 * @param pPage The pool page (head).
494 * @param pCpu The disassembly of the write instruction.
495 * @param pRegFrame The trap register frame.
496 * @param GCPhysFault The fault address as guest physical address.
497 * @param pvFault The fault address.
498 */
499static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
500 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
501{
502 /*
503 * First, do the flushing.
504 */
505 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
506
507 /*
508 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
509 */
510 uint32_t cbWritten;
511 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
512 if (VBOX_SUCCESS(rc2))
513 pRegFrame->eip += pCpu->opsize;
514 else if (rc2 == VERR_EM_INTERPRETER)
515 {
516#ifdef IN_GC
517 if (PATMIsPatchGCAddr(pVM, (RTGCPTR)pRegFrame->eip))
518 {
519 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
520 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
521 rc = VINF_SUCCESS;
522 STAM_COUNTER_INC(&pPool->StatMonitorGCIntrFailPatch2);
523 }
524 else
525#endif
526 {
527 rc = VINF_EM_RAW_EMULATE_INSTR;
528 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
529 }
530 }
531 else
532 rc = rc2;
533
534 /* See use in pgmPoolAccessHandlerSimple(). */
535 PGM_INVL_GUEST_TLBS();
536
537 LogFlow(("pgmPoolAccessHandlerPT: returns %Vrc (flushed)\n", rc));
538 return rc;
539
540}
541
542
543/**
544 * Handles the STOSD write accesses.
545 *
546 * @returns VBox status code suitable for scheduling.
547 * @param pVM The VM handle.
548 * @param pPool The pool.
549 * @param pPage The pool page (head).
550 * @param pCpu The disassembly of the write instruction.
551 * @param pRegFrame The trap register frame.
552 * @param GCPhysFault The fault address as guest physical address.
553 * @param pvFault The fault address.
554 */
555DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
556 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
557{
558 /*
559 * Increment the modification counter and insert it into the list
560 * of modified pages the first time.
561 */
562 if (!pPage->cModifications++)
563 pgmPoolMonitorModifiedInsert(pPool, pPage);
564
565 /*
566 * Execute REP STOSD.
567 *
568 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
569 * write situation, meaning that it's safe to write here.
570 */
571#ifdef IN_GC
572 uint32_t *pu32 = (uint32_t *)pvFault;
573#else
574 RTGCPTR pu32 = pvFault;
575#endif
576 while (pRegFrame->ecx)
577 {
578 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pu32, NULL);
579#ifdef IN_GC
580 *pu32++ = pRegFrame->eax;
581#else
582 PGMPhysWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
583 pu32 += 4;
584#endif
585 GCPhysFault += 4;
586 pRegFrame->edi += 4;
587 pRegFrame->ecx--;
588 }
589 pRegFrame->eip += pCpu->opsize;
590
591 /* See use in pgmPoolAccessHandlerSimple(). */
592 PGM_INVL_GUEST_TLBS();
593
594 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
595 return VINF_SUCCESS;
596}
597
598
599/**
600 * Handles the simple write accesses.
601 *
602 * @returns VBox status code suitable for scheduling.
603 * @param pVM The VM handle.
604 * @param pPool The pool.
605 * @param pPage The pool page (head).
606 * @param pCpu The disassembly of the write instruction.
607 * @param pRegFrame The trap register frame.
608 * @param GCPhysFault The fault address as guest physical address.
609 * @param pvFault The fault address.
610 */
611DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
612 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
613{
614 /*
615 * Increment the modification counter and insert it into the list
616 * of modified pages the first time.
617 */
618 if (!pPage->cModifications++)
619 pgmPoolMonitorModifiedInsert(pPool, pPage);
620
621 /*
622 * Clear all the pages. ASSUMES that pvFault is readable.
623 */
624 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
625
626 /*
627 * Interpret the instruction.
628 */
629 uint32_t cb;
630 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
631 if (VBOX_SUCCESS(rc))
632 pRegFrame->eip += pCpu->opsize;
633 else if (rc == VERR_EM_INTERPRETER)
634 {
635# ifdef IN_GC
636 if (PATMIsPatchGCAddr(pVM, (RTGCPTR)(RTGCUINTPTR)pCpu->opaddr))
637 {
638 /* We're not able to handle this in ring-3, so fix the interpreter! */
639 /** @note Should be fine. There's no need to flush the whole thing. */
640#ifndef DEBUG_sandervl
641 AssertMsgFailed(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv - opcode=%d\n",
642 pRegFrame->cs, (RTGCPTR)pRegFrame->eip, pCpu->pCurInstr->opcode));
643#endif
644 STAM_COUNTER_INC(&pPool->StatMonitorGCIntrFailPatch1);
645 rc = pgmPoolMonitorChainFlush(pPool, pPage);
646 }
647 else
648# endif
649 {
650 rc = VINF_EM_RAW_EMULATE_INSTR;
651 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
652 }
653 }
654
655 /*
656 * Quick hack, with logging enabled we're getting stale
657 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
658 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
659 * have to be fixed to support this. But that'll have to wait till next week.
660 *
661 * An alternative is to keep track of the changed PTEs together with the
662 * GCPhys from the guest PT. This may proove expensive though.
663 *
664 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
665 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
666 */
667 PGM_INVL_GUEST_TLBS();
668
669 LogFlow(("pgmPoolAccessHandlerSimple: returns %Vrc cb=%d\n", rc, cb));
670 return rc;
671}
672
673
674/**
675 * \#PF Handler callback for PT write accesses.
676 *
677 * @returns VBox status code (appropriate for GC return).
678 * @param pVM VM Handle.
679 * @param uErrorCode CPU Error code.
680 * @param pRegFrame Trap register frame.
681 * NULL on DMA and other non CPU access.
682 * @param pvFault The fault address (cr2).
683 * @param GCPhysFault The GC physical address corresponding to pvFault.
684 * @param pvUser User argument.
685 */
686DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
687{
688 STAM_PROFILE_START(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), a);
689 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
690 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
691 LogFlow(("pgmPoolAccessHandler: pvFault=%p pPage=%p:{.idx=%d} GCPhysFault=%VGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
692
693 /*
694 * We should ALWAYS have the list head as user parameter. This
695 * is because we use that page to record the changes.
696 */
697 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
698
699 /*
700 * Disassemble the faulting instruction.
701 */
702 DISCPUSTATE Cpu;
703 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
704 AssertRCReturn(rc, rc);
705
706 /*
707 * Check if it's worth dealing with.
708 */
709 bool fReused = false;
710 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
711 || pPage->fCR3Mix)
712 && !(fReused = pgmPoolMonitorIsReused(pPage, &Cpu, pvFault))
713 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
714 {
715 /*
716 * Simple instructions, no REP prefix.
717 */
718 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
719 {
720 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
721 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,Handled), a);
722 return rc;
723 }
724
725 /*
726 * Windows is frequently doing small memset() operations (netio test 4k+).
727 * We have to deal with these or we'll kill the cache and performance.
728 */
729 if ( Cpu.pCurInstr->opcode == OP_STOSWD
730 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
731 && pRegFrame->ecx <= 0x20
732 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
733 && !((uintptr_t)pvFault & 3)
734 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
735 && Cpu.mode == CPUMODE_32BIT
736 && Cpu.opmode == CPUMODE_32BIT
737 && Cpu.addrmode == CPUMODE_32BIT
738 && Cpu.prefix == PREFIX_REP
739 && !pRegFrame->eflags.Bits.u1DF
740 )
741 {
742 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
743 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,RepStosd), a);
744 return rc;
745 }
746
747 /* REP prefix, don't bother. */
748 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,RepPrefix));
749 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x eip=%#x opcode=%d prefix=%#x\n",
750 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, pRegFrame->eip, Cpu.pCurInstr->opcode, Cpu.prefix));
751 }
752
753 /*
754 * Not worth it, so flush it.
755 *
756 * If we considered it to be reused, don't to back to ring-3
757 * to emulate failed instructions since we usually cannot
758 * interpret then. This may be a bit risky, in which case
759 * the reuse detection must be fixed.
760 */
761 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
762 if (rc == VINF_EM_RAW_EMULATE_INSTR && fReused)
763 rc = VINF_SUCCESS;
764 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,FlushPage), a);
765 return rc;
766}
767
768# endif /* !IN_RING3 */
769#endif /* PGMPOOL_WITH_MONITORING */
770
771
772
773#ifdef PGMPOOL_WITH_CACHE
774/**
775 * Inserts a page into the GCPhys hash table.
776 *
777 * @param pPool The pool.
778 * @param pPage The page.
779 */
780DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
781{
782 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
783 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
784 pPage->iNext = pPool->aiHash[iHash];
785 pPool->aiHash[iHash] = pPage->idx;
786}
787
788
789/**
790 * Removes a page from the GCPhys hash table.
791 *
792 * @param pPool The pool.
793 * @param pPage The page.
794 */
795DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
796{
797 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
798 if (pPool->aiHash[iHash] == pPage->idx)
799 pPool->aiHash[iHash] = pPage->iNext;
800 else
801 {
802 uint16_t iPrev = pPool->aiHash[iHash];
803 for (;;)
804 {
805 const int16_t i = pPool->aPages[iPrev].iNext;
806 if (i == pPage->idx)
807 {
808 pPool->aPages[iPrev].iNext = pPage->iNext;
809 break;
810 }
811 if (i == NIL_PGMPOOL_IDX)
812 {
813 AssertReleaseMsgFailed(("GCPhys=%VGp idx=%#x\n", pPage->GCPhys, pPage->idx));
814 break;
815 }
816 iPrev = i;
817 }
818 }
819 pPage->iNext = NIL_PGMPOOL_IDX;
820}
821
822
823/**
824 * Frees up one cache page.
825 *
826 * @returns VBox status code.
827 * @retval VINF_SUCCESS on success.
828 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
829 * @param pPool The pool.
830 * @param iUser The user index.
831 */
832static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
833{
834 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
835 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
836
837 /*
838 * Select one page from the tail of the age list.
839 */
840 uint16_t iToFree = pPool->iAgeTail;
841 if (iToFree == iUser)
842 iToFree = pPool->aPages[iToFree].iAgePrev;
843/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
844 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
845 {
846 uint16_t i = pPool->aPages[iToFree].iAgePrev;
847 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
848 {
849 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
850 continue;
851 iToFree = i;
852 break;
853 }
854 }
855*/
856 Assert(iToFree != iUser);
857 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
858
859 int rc = pgmPoolFlushPage(pPool, &pPool->aPages[iToFree]);
860 if (rc == VINF_SUCCESS)
861 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
862 return rc;
863}
864
865
866/**
867 * Checks if a kind mismatch is really a page being reused
868 * or if it's just normal remappings.
869 *
870 * @returns true if reused and the cached page (enmKind1) should be flushed
871 * @returns false if not reused.
872 * @param enmKind1 The kind of the cached page.
873 * @param enmKind2 The kind of the requested page.
874 */
875static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
876{
877 switch (enmKind1)
878 {
879 /*
880 * Never reuse them. There is no remapping in non-paging mode.
881 */
882 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
883 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
884 return true;
885
886 /*
887 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
888 */
889 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
890 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
891 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
892 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
893 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
894 switch (enmKind2)
895 {
896 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
897 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
898 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
899 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
900 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
901 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
902 return true;
903 default:
904 return false;
905 }
906
907 /*
908 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
909 */
910 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
911 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
912 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
913 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
914 switch (enmKind2)
915 {
916 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
917 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
918 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
919 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
920 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
921 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
922 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
923 return true;
924 default:
925 return false;
926 }
927
928 /*
929 * These cannot be flushed, and it's common to reuse the PDs as PTs.
930 */
931 case PGMPOOLKIND_ROOT_32BIT_PD:
932 case PGMPOOLKIND_ROOT_PAE_PD:
933 case PGMPOOLKIND_ROOT_PDPTR:
934 case PGMPOOLKIND_ROOT_PML4:
935 return false;
936
937 default:
938 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
939 }
940}
941
942
943/**
944 * Attempts to satisfy a pgmPoolAlloc request from the cache.
945 *
946 * @returns VBox status code.
947 * @retval VINF_PGM_CACHED_PAGE on success.
948 * @retval VERR_FILE_NOT_FOUND if not found.
949 * @param pPool The pool.
950 * @param GCPhys The GC physical address of the page we're gonna shadow.
951 * @param enmKind The kind of mapping.
952 * @param iUser The shadow page pool index of the user table.
953 * @param iUserTable The index into the user table (shadowed).
954 * @param ppPage Where to store the pointer to the page.
955 */
956static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint16_t iUserTable, PPPGMPOOLPAGE ppPage)
957{
958 /*
959 * Look up the GCPhys in the hash.
960 */
961 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
962 if (i != NIL_PGMPOOL_IDX)
963 {
964 do
965 {
966 PPGMPOOLPAGE pPage = &pPool->aPages[i];
967 if (pPage->GCPhys == GCPhys)
968 {
969 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
970 {
971 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
972 if (VBOX_SUCCESS(rc))
973 {
974 *ppPage = pPage;
975 STAM_COUNTER_INC(&pPool->StatCacheHits);
976 return VINF_PGM_CACHED_PAGE;
977 }
978 return rc;
979 }
980
981 /*
982 * The kind is different. In some cases we should now flush the page
983 * as it has been reused, but in most cases this is normal remapping
984 * of PDs as PT or big pages using the GCPhys field in a slightly
985 * different way than the other kinds.
986 */
987 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
988 {
989 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
990 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
991 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
992 break;
993 }
994 }
995
996 /* next */
997 i = pPage->iNext;
998 } while (i != NIL_PGMPOOL_IDX);
999 }
1000
1001 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%d\n", GCPhys, enmKind));
1002 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1003 return VERR_FILE_NOT_FOUND;
1004}
1005
1006
1007/**
1008 * Inserts a page into the cache.
1009 *
1010 * @param pPool The pool.
1011 * @param pPage The cached page.
1012 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1013 */
1014static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1015{
1016 /*
1017 * Insert into the GCPhys hash if the page is fit for that.
1018 */
1019 Assert(!pPage->fCached);
1020 if (fCanBeCached)
1021 {
1022 pPage->fCached = true;
1023 pgmPoolHashInsert(pPool, pPage);
1024 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1025 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1026 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1027 }
1028 else
1029 {
1030 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1031 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1032 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1033 }
1034
1035 /*
1036 * Insert at the head of the age list.
1037 */
1038 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1039 pPage->iAgeNext = pPool->iAgeHead;
1040 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1041 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1042 else
1043 pPool->iAgeTail = pPage->idx;
1044 pPool->iAgeHead = pPage->idx;
1045}
1046
1047
1048/**
1049 * Flushes a cached page.
1050 *
1051 * @param pPool The pool.
1052 * @param pPage The cached page.
1053 */
1054static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1055{
1056 /*
1057 * Remove the page from the hash.
1058 */
1059 if (pPage->fCached)
1060 {
1061 pPage->fCached = false;
1062 pgmPoolHashRemove(pPool, pPage);
1063 }
1064 else
1065 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1066
1067 /*
1068 * Remove it from the age list.
1069 */
1070 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1071 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1072 else
1073 pPool->iAgeTail = pPage->iAgePrev;
1074 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1075 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1076 else
1077 pPool->iAgeHead = pPage->iAgeNext;
1078 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1079 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1080}
1081#endif /* PGMPOOL_WITH_CACHE */
1082
1083
1084#ifdef PGMPOOL_WITH_MONITORING
1085/**
1086 * Looks for pages sharing the monitor.
1087 *
1088 * @returns Pointer to the head page.
1089 * @returns NULL if not found.
1090 * @param pPool The Pool
1091 * @param pNewPage The page which is going to be monitored.
1092 */
1093static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1094{
1095#ifdef PGMPOOL_WITH_CACHE
1096 /*
1097 * Look up the GCPhys in the hash.
1098 */
1099 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1100 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1101 if (i == NIL_PGMPOOL_IDX)
1102 return NULL;
1103 do
1104 {
1105 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1106 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1107 && pPage != pNewPage)
1108 {
1109 switch (pPage->enmKind)
1110 {
1111 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1112 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1113 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1114 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1115 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1116 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1117 case PGMPOOLKIND_ROOT_32BIT_PD:
1118 case PGMPOOLKIND_ROOT_PAE_PD:
1119 case PGMPOOLKIND_ROOT_PDPTR:
1120 case PGMPOOLKIND_ROOT_PML4:
1121 {
1122 /* find the head */
1123 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1124 {
1125 Assert(pPage->iMonitoredPrev != pPage->idx);
1126 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1127 }
1128 return pPage;
1129 }
1130
1131 /* ignore, no monitoring. */
1132 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1133 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1134 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1135 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1136 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1137 break;
1138 default:
1139 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1140 }
1141 }
1142
1143 /* next */
1144 i = pPage->iNext;
1145 } while (i != NIL_PGMPOOL_IDX);
1146#endif
1147 return NULL;
1148}
1149
1150/**
1151 * Enabled write monitoring of a guest page.
1152 *
1153 * @returns VBox status code.
1154 * @retval VINF_SUCCESS on success.
1155 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1156 * @param pPool The pool.
1157 * @param pPage The cached page.
1158 */
1159static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1160{
1161 /*
1162 * Filter out the relevant kinds.
1163 */
1164 switch (pPage->enmKind)
1165 {
1166 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1167 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1168 break;
1169
1170 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1171 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1172 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1173 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1174 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1175 /* Nothing to monitor here. */
1176 return VINF_SUCCESS;
1177
1178 case PGMPOOLKIND_ROOT_32BIT_PD:
1179 case PGMPOOLKIND_ROOT_PAE_PD:
1180#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1181 break;
1182#endif
1183 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1184 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1185 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1186 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1187 case PGMPOOLKIND_ROOT_PDPTR:
1188 case PGMPOOLKIND_ROOT_PML4:
1189 default:
1190 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1191 }
1192
1193 /*
1194 * Install handler.
1195 */
1196 int rc;
1197 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1198 if (pPageHead)
1199 {
1200 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1201 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1202 pPage->iMonitoredPrev = pPageHead->idx;
1203 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1204 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1205 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1206 pPageHead->iMonitoredNext = pPage->idx;
1207 rc = VINF_SUCCESS;
1208 }
1209 else
1210 {
1211 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1212 PVM pVM = pPool->CTXSUFF(pVM);
1213 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1214 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1215 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1216 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1217 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1218 pPool->pfnAccessHandlerGC, MMHyperCCToGC(pVM, pPage),
1219 pPool->pszAccessHandler);
1220 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1221 * the heap size should suffice. */
1222 AssertFatalRC(rc);
1223 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1224 rc = VERR_PGM_POOL_CLEARED;
1225 }
1226 pPage->fMonitored = true;
1227 return rc;
1228}
1229
1230
1231/**
1232 * Disables write monitoring of a guest page.
1233 *
1234 * @returns VBox status code.
1235 * @retval VINF_SUCCESS on success.
1236 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1237 * @param pPool The pool.
1238 * @param pPage The cached page.
1239 */
1240static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1241{
1242 /*
1243 * Filter out the relevant kinds.
1244 */
1245 switch (pPage->enmKind)
1246 {
1247 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1248 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1249 break;
1250
1251 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1252 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1253 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1254 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1255 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1256 /* Nothing to monitor here. */
1257 return VINF_SUCCESS;
1258
1259 case PGMPOOLKIND_ROOT_32BIT_PD:
1260 case PGMPOOLKIND_ROOT_PAE_PD:
1261#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1262 break;
1263#endif
1264 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1265 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1266 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1267 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1268 case PGMPOOLKIND_ROOT_PDPTR:
1269 case PGMPOOLKIND_ROOT_PML4:
1270 default:
1271 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1272 }
1273
1274 /*
1275 * Remove the page from the monitored list or uninstall it if last.
1276 */
1277 const PVM pVM = pPool->CTXSUFF(pVM);
1278 int rc;
1279 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1280 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1281 {
1282 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1283 {
1284 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1285 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1286 pNewHead->fCR3Mix = pPage->fCR3Mix;
1287 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1288 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1289 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1290 pPool->pfnAccessHandlerGC, MMHyperCCToGC(pVM, pNewHead),
1291 pPool->pszAccessHandler);
1292 AssertFatalRCSuccess(rc);
1293 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1294 }
1295 else
1296 {
1297 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1298 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1299 {
1300 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1301 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1302 }
1303 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1304 rc = VINF_SUCCESS;
1305 }
1306 }
1307 else
1308 {
1309 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1310 AssertFatalRC(rc);
1311 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1312 rc = VERR_PGM_POOL_CLEARED;
1313 }
1314 pPage->fMonitored = false;
1315
1316 /*
1317 * Remove it from the list of modified pages (if in it).
1318 */
1319 pgmPoolMonitorModifiedRemove(pPool, pPage);
1320
1321 return rc;
1322}
1323
1324
1325#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1326/**
1327 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1328 *
1329 * @param pPool The Pool.
1330 * @param pPage A page in the chain.
1331 * @param fCR3Mix The new fCR3Mix value.
1332 */
1333static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1334{
1335 /* current */
1336 pPage->fCR3Mix = fCR3Mix;
1337
1338 /* before */
1339 int16_t idx = pPage->iMonitoredPrev;
1340 while (idx != NIL_PGMPOOL_IDX)
1341 {
1342 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1343 idx = pPool->aPages[idx].iMonitoredPrev;
1344 }
1345
1346 /* after */
1347 idx = pPage->iMonitoredNext;
1348 while (idx != NIL_PGMPOOL_IDX)
1349 {
1350 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1351 idx = pPool->aPages[idx].iMonitoredNext;
1352 }
1353}
1354
1355
1356/**
1357 * Installs or modifies monitoring of a CR3 page (special).
1358 *
1359 * We're pretending the CR3 page is shadowed by the pool so we can use the
1360 * generic mechanisms in detecting chained monitoring. (This also gives us a
1361 * tast of what code changes are required to really pool CR3 shadow pages.)
1362 *
1363 * @returns VBox status code.
1364 * @param pPool The pool.
1365 * @param idxRoot The CR3 (root) page index.
1366 * @param GCPhysCR3 The (new) CR3 value.
1367 */
1368int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
1369{
1370 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1371 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1372 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d} GCPhysCR3=%VGp\n",
1373 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
1374
1375 /*
1376 * The unlikely case where it already matches.
1377 */
1378 if (pPage->GCPhys == GCPhysCR3)
1379 {
1380 Assert(pPage->fMonitored);
1381 return VINF_SUCCESS;
1382 }
1383
1384 /*
1385 * Flush the current monitoring and remove it from the hash.
1386 */
1387 int rc = VINF_SUCCESS;
1388 if (pPage->fMonitored)
1389 {
1390 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1391 rc = pgmPoolMonitorFlush(pPool, pPage);
1392 if (rc == VERR_PGM_POOL_CLEARED)
1393 rc = VINF_SUCCESS;
1394 else
1395 AssertFatalRC(rc);
1396 pgmPoolHashRemove(pPool, pPage);
1397 }
1398
1399 /*
1400 * Monitor the page at the new location and insert it into the hash.
1401 */
1402 pPage->GCPhys = GCPhysCR3;
1403 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
1404 if (rc2 != VERR_PGM_POOL_CLEARED)
1405 {
1406 AssertFatalRC(rc2);
1407 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
1408 rc = rc2;
1409 }
1410 pgmPoolHashInsert(pPool, pPage);
1411 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
1412 return rc;
1413}
1414
1415
1416/**
1417 * Removes the monitoring of a CR3 page (special).
1418 *
1419 * @returns VBox status code.
1420 * @param pPool The pool.
1421 * @param idxRoot The CR3 (root) page index.
1422 */
1423int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
1424{
1425 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1426 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1427 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d}\n",
1428 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
1429
1430 if (!pPage->fMonitored)
1431 return VINF_SUCCESS;
1432
1433 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1434 int rc = pgmPoolMonitorFlush(pPool, pPage);
1435 if (rc != VERR_PGM_POOL_CLEARED)
1436 AssertFatalRC(rc);
1437 else
1438 rc = VINF_SUCCESS;
1439 pgmPoolHashRemove(pPool, pPage);
1440 Assert(!pPage->fMonitored);
1441 pPage->GCPhys = NIL_RTGCPHYS;
1442 return rc;
1443}
1444#endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
1445
1446
1447/**
1448 * Inserts the page into the list of modified pages.
1449 *
1450 * @param pPool The pool.
1451 * @param pPage The page.
1452 */
1453void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1454{
1455 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1456 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1457 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1458 && pPool->iModifiedHead != pPage->idx,
1459 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1460 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1461 pPool->iModifiedHead, pPool->cModifiedPages));
1462
1463 pPage->iModifiedNext = pPool->iModifiedHead;
1464 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1465 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1466 pPool->iModifiedHead = pPage->idx;
1467 pPool->cModifiedPages++;
1468#ifdef VBOX_WITH_STATISTICS
1469 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1470 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1471#endif
1472}
1473
1474
1475/**
1476 * Removes the page from the list of modified pages and resets the
1477 * moficiation counter.
1478 *
1479 * @param pPool The pool.
1480 * @param pPage The page which is believed to be in the list of modified pages.
1481 */
1482static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1483{
1484 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1485 if (pPool->iModifiedHead == pPage->idx)
1486 {
1487 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1488 pPool->iModifiedHead = pPage->iModifiedNext;
1489 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1490 {
1491 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
1492 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1493 }
1494 pPool->cModifiedPages--;
1495 }
1496 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
1497 {
1498 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
1499 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1500 {
1501 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
1502 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1503 }
1504 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1505 pPool->cModifiedPages--;
1506 }
1507 else
1508 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1509 pPage->cModifications = 0;
1510}
1511
1512
1513/**
1514 * Zaps the list of modified pages, resetting their modification counters in the process.
1515 *
1516 * @param pVM The VM handle.
1517 */
1518void pgmPoolMonitorModifiedClearAll(PVM pVM)
1519{
1520 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1521 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
1522
1523 unsigned cPages = 0; NOREF(cPages);
1524 uint16_t idx = pPool->iModifiedHead;
1525 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1526 while (idx != NIL_PGMPOOL_IDX)
1527 {
1528 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
1529 idx = pPage->iModifiedNext;
1530 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1531 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1532 pPage->cModifications = 0;
1533 Assert(++cPages);
1534 }
1535 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
1536 pPool->cModifiedPages = 0;
1537}
1538
1539
1540/**
1541 * Clear all shadow pages and clear all modification counters.
1542 *
1543 * @param pVM The VM handle.
1544 * @remark Should only be used when monitoring is available, thus placed in
1545 * the PGMPOOL_WITH_MONITORING #ifdef.
1546 */
1547void pgmPoolClearAll(PVM pVM)
1548{
1549 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1550 STAM_PROFILE_START(&pPool->StatClearAll, c);
1551 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
1552
1553 /*
1554 * Iterate all the pages until we've encountered all that in use.
1555 * This is simple but not quite optimal solution.
1556 */
1557 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
1558 unsigned cLeft = pPool->cUsedPages;
1559 unsigned iPage = pPool->cCurPages;
1560 while (--iPage >= PGMPOOL_IDX_FIRST)
1561 {
1562 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1563 if (pPage->GCPhys != NIL_RTGCPHYS)
1564 {
1565 switch (pPage->enmKind)
1566 {
1567 /*
1568 * We only care about shadow page tables.
1569 */
1570 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1571 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1572 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1573 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1574 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1575 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1576 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1577 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1578 {
1579#ifdef PGMPOOL_WITH_USER_TRACKING
1580 if (pPage->cPresent)
1581#endif
1582 {
1583 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
1584 STAM_PROFILE_START(&pPool->StatZeroPage, z);
1585 ASMMemZeroPage(pvShw);
1586 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
1587#ifdef PGMPOOL_WITH_USER_TRACKING
1588 pPage->cPresent = 0;
1589 pPage->iFirstPresent = ~0;
1590#endif
1591 }
1592 }
1593 /* fall thru */
1594
1595 default:
1596 Assert(!pPage->cModifications || ++cModifiedPages);
1597 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1598 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1599 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1600 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1601 pPage->cModifications = 0;
1602 break;
1603
1604 }
1605 if (!--cLeft)
1606 break;
1607 }
1608 }
1609
1610 /* swipe the special pages too. */
1611 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
1612 {
1613 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1614 if (pPage->GCPhys != NIL_RTGCPHYS)
1615 {
1616 Assert(!pPage->cModifications || ++cModifiedPages);
1617 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1618 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1619 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1620 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1621 pPage->cModifications = 0;
1622 }
1623 }
1624
1625 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
1626 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1627 pPool->cModifiedPages = 0;
1628
1629#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1630 /*
1631 * Clear all the GCPhys links and rebuild the phys ext free list.
1632 */
1633 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
1634 pRam;
1635 pRam = pRam->CTXSUFF(pNext))
1636 {
1637 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1638 while (iPage-- > 0)
1639 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
1640 }
1641
1642 pPool->iPhysExtFreeHead = 0;
1643 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
1644 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
1645 for (unsigned i = 0; i < cMaxPhysExts; i++)
1646 {
1647 paPhysExts[i].iNext = i + 1;
1648 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
1649 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
1650 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
1651 }
1652 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
1653#endif
1654
1655
1656 pPool->cPresent = 0;
1657 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
1658}
1659#endif /* PGMPOOL_WITH_MONITORING */
1660
1661
1662#ifdef PGMPOOL_WITH_USER_TRACKING
1663/**
1664 * Frees up at least one user entry.
1665 *
1666 * @returns VBox status code.
1667 * @retval VINF_SUCCESS if successfully added.
1668 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1669 * @param pPool The pool.
1670 * @param iUser The user index.
1671 */
1672static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
1673{
1674 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
1675#ifdef PGMPOOL_WITH_CACHE
1676 /*
1677 * Just free cached pages in a braindead fashion.
1678 */
1679 /** @todo walk the age list backwards and free the first with usage. */
1680 int rc = VINF_SUCCESS;
1681 do
1682 {
1683 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
1684 if (VBOX_FAILURE(rc2) && rc == VINF_SUCCESS)
1685 rc = rc2;
1686 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
1687 return rc;
1688#else
1689 /*
1690 * Lazy approach.
1691 */
1692 pgmPoolFlushAllInt(pPool);
1693 return VERR_PGM_POOL_FLUSHED;
1694#endif
1695}
1696
1697
1698/**
1699 * Inserts a page into the cache.
1700 *
1701 * This will create user node for the page, insert it into the GCPhys
1702 * hash, and insert it into the age list.
1703 *
1704 * @returns VBox status code.
1705 * @retval VINF_SUCCESS if successfully added.
1706 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1707 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1708 * @param pPool The pool.
1709 * @param pPage The cached page.
1710 * @param GCPhys The GC physical address of the page we're gonna shadow.
1711 * @param iUser The user index.
1712 * @param iUserTable The user table index.
1713 */
1714DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint16_t iUserTable)
1715{
1716 int rc = VINF_SUCCESS;
1717 PPGMPOOLUSER pUser = pPool->CTXSUFF(paUsers);
1718
1719 /*
1720 * Find free a user node.
1721 */
1722 uint16_t i = pPool->iUserFreeHead;
1723 if (i == NIL_PGMPOOL_USER_INDEX)
1724 {
1725 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
1726 if (VBOX_FAILURE(rc))
1727 return rc;
1728 i = pPool->iUserFreeHead;
1729 }
1730
1731 /*
1732 * Unlink the user node from the free list,
1733 * initialize and insert it into the user list.
1734 */
1735 pPool->iUserFreeHead = pUser[i].iNext;
1736 pUser[i].iNext = NIL_PGMPOOL_USER_INDEX;
1737 pUser[i].iUser = iUser;
1738 pUser[i].iUserTable = iUserTable;
1739 pPage->iUserHead = i;
1740
1741 /*
1742 * Insert into cache and enable monitoring of the guest page if enabled.
1743 *
1744 * Until we implement caching of all levels, including the CR3 one, we'll
1745 * have to make sure we don't try monitor & cache any recursive reuse of
1746 * a monitored CR3 page. Because all windows versions are doing this we'll
1747 * have to be able to do combined access monitoring, CR3 + PT and
1748 * PD + PT (guest PAE).
1749 *
1750 * Update:
1751 * We're now cooperating with the CR3 monitor if an uncachable page is found.
1752 */
1753#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
1754# ifdef PGMPOOL_WITH_MIXED_PT_CR3
1755 const bool fCanBeMonitored = true;
1756# else
1757 bool fCanBeMonitored = pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
1758 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
1759 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
1760# endif
1761# ifdef PGMPOOL_WITH_CACHE
1762 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
1763# endif
1764 if (fCanBeMonitored)
1765 {
1766# ifdef PGMPOOL_WITH_MONITORING
1767 rc = pgmPoolMonitorInsert(pPool, pPage);
1768 if (rc == VERR_PGM_POOL_CLEARED)
1769 {
1770 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
1771# ifndef PGMPOOL_WITH_CACHE
1772 pgmPoolMonitorFlush(pPool, pPage);
1773 rc = VERR_PGM_POOL_FLUSHED;
1774# endif
1775 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
1776 pUser[i].iNext = pPool->iUserFreeHead;
1777 pUser[i].iUser = NIL_PGMPOOL_IDX;
1778 pPool->iUserFreeHead = i;
1779 }
1780 }
1781# endif
1782#endif /* PGMPOOL_WITH_MONITORING */
1783 return rc;
1784}
1785
1786
1787# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
1788/**
1789 * Adds a user reference to a page.
1790 *
1791 * This will
1792 * This will move the page to the head of the
1793 *
1794 * @returns VBox status code.
1795 * @retval VINF_SUCCESS if successfully added.
1796 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1797 * @param pPool The pool.
1798 * @param pPage The cached page.
1799 * @param iUser The user index.
1800 * @param iUserTable The user table.
1801 */
1802static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
1803{
1804 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
1805
1806# ifdef VBOX_STRICT
1807 /*
1808 * Check that the entry doesn't already exists.
1809 */
1810 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
1811 {
1812 uint16_t i = pPage->iUserHead;
1813 do
1814 {
1815 Assert(i < pPool->cMaxUsers);
1816 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%d %d\n", iUser, iUserTable));
1817 i = paUsers[i].iNext;
1818 } while (i != NIL_PGMPOOL_USER_INDEX);
1819 }
1820# endif
1821
1822 /*
1823 * Allocate a user node.
1824 */
1825 uint16_t i = pPool->iUserFreeHead;
1826 if (i == NIL_PGMPOOL_USER_INDEX)
1827 {
1828 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
1829 if (VBOX_FAILURE(rc))
1830 return rc;
1831 i = pPool->iUserFreeHead;
1832 }
1833 pPool->iUserFreeHead = paUsers[i].iNext;
1834
1835 /*
1836 * Initialize the user node and insert it.
1837 */
1838 paUsers[i].iNext = pPage->iUserHead;
1839 paUsers[i].iUser = iUser;
1840 paUsers[i].iUserTable = iUserTable;
1841 pPage->iUserHead = i;
1842
1843# ifdef PGMPOOL_WITH_CACHE
1844 /*
1845 * Tell the cache to update its replacement stats for this page.
1846 */
1847 pgmPoolCacheUsed(pPool, pPage);
1848# endif
1849 return VINF_SUCCESS;
1850}
1851# endif /* PGMPOOL_WITH_CACHE */
1852
1853
1854/**
1855 * Frees a user record associated with a page.
1856 *
1857 * This does not clear the entry in the user table, it simply replaces the
1858 * user record to the chain of free records.
1859 *
1860 * @param pPool The pool.
1861 * @param HCPhys The HC physical address of the shadow page.
1862 * @param iUser The shadow page pool index of the user table.
1863 * @param iUserTable The index into the user table (shadowed).
1864 */
1865static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
1866{
1867 /*
1868 * Unlink and free the specified user entry.
1869 */
1870 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
1871
1872 /* Special: For PAE and 32-bit paging, there are usually no more than one user. */
1873 uint16_t i = pPage->iUserHead;
1874 if ( i != NIL_PGMPOOL_USER_INDEX
1875 && paUsers[i].iUser == iUser
1876 && paUsers[i].iUserTable == iUserTable)
1877 {
1878 pPage->iUserHead = paUsers[i].iNext;
1879
1880 paUsers[i].iUser = NIL_PGMPOOL_IDX;
1881 paUsers[i].iNext = pPool->iUserFreeHead;
1882 pPool->iUserFreeHead = i;
1883 return;
1884 }
1885
1886 /* General: Linear search. */
1887 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
1888 while (i != NIL_PGMPOOL_USER_INDEX)
1889 {
1890 if ( paUsers[i].iUser == iUser
1891 && paUsers[i].iUserTable == iUserTable)
1892 {
1893 if (iPrev != NIL_PGMPOOL_USER_INDEX)
1894 paUsers[iPrev].iNext = paUsers[i].iNext;
1895 else
1896 pPage->iUserHead = paUsers[i].iNext;
1897
1898 paUsers[i].iUser = NIL_PGMPOOL_IDX;
1899 paUsers[i].iNext = pPool->iUserFreeHead;
1900 pPool->iUserFreeHead = i;
1901 return;
1902 }
1903 iPrev = i;
1904 i = paUsers[i].iNext;
1905 }
1906
1907 /* Fatal: didn't find it */
1908 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%VGp\n",
1909 iUser, iUserTable, pPage->GCPhys));
1910}
1911
1912
1913/**
1914 * Gets the entry size of a shadow table.
1915 *
1916 * @param enmKind The kind of page.
1917 *
1918 * @returns The size of the entry in bytes. That is, 4 or 8.
1919 * @returns If the kind is not for a table, an assertion is raised and 0 is
1920 * returned.
1921 */
1922DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
1923{
1924 switch (enmKind)
1925 {
1926 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1927 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1928 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1929 case PGMPOOLKIND_ROOT_32BIT_PD:
1930 return 4;
1931
1932 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1933 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1934 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1935 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1936 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1937 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1938 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1939 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1940 case PGMPOOLKIND_ROOT_PAE_PD:
1941 case PGMPOOLKIND_ROOT_PDPTR:
1942 case PGMPOOLKIND_ROOT_PML4:
1943 return 8;
1944
1945 default:
1946 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
1947 }
1948}
1949
1950
1951/**
1952 * Gets the entry size of a guest table.
1953 *
1954 * @param enmKind The kind of page.
1955 *
1956 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
1957 * @returns If the kind is not for a table, an assertion is raised and 0 is
1958 * returned.
1959 */
1960DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
1961{
1962 switch (enmKind)
1963 {
1964 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1965 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1966 case PGMPOOLKIND_ROOT_32BIT_PD:
1967 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1968 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1969 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1970 return 4;
1971
1972 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1973 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1974 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1975 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1976 case PGMPOOLKIND_ROOT_PAE_PD:
1977 case PGMPOOLKIND_ROOT_PDPTR:
1978 case PGMPOOLKIND_ROOT_PML4:
1979 return 8;
1980
1981 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1982 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1983 /** @todo can we return 0? (nobody is calling this...) */
1984 return 0;
1985
1986 default:
1987 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
1988 }
1989}
1990
1991
1992#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1993/**
1994 * Scans one shadow page table for mappings of a physical page.
1995 *
1996 * @param pVM The VM handle.
1997 * @param pPhysPage The guest page in question.
1998 * @param iShw The shadow page table.
1999 * @param cRefs The number of references made in that PT.
2000 */
2001static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2002{
2003 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2004 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2005
2006 /*
2007 * Assert sanity.
2008 */
2009 Assert(cRefs == 1);
2010 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2011 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2012
2013 /*
2014 * Then, clear the actual mappings to the page in the shadow PT.
2015 */
2016 switch (pPage->enmKind)
2017 {
2018 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2019 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2020 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2021 {
2022 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2023 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2024 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2025 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2026 {
2027 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2028 pPT->a[i].u = 0;
2029 cRefs--;
2030 if (!cRefs)
2031 return;
2032 }
2033#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2034 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2035 for (unsigned i = 0; i < ELEMENTS(pPT->a); i++)
2036 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2037 {
2038 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2039 pPT->a[i].u = 0;
2040 }
2041#endif
2042 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2043 break;
2044 }
2045
2046 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2047 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2048 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2049 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2050 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2051 {
2052 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2053 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2054 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2055 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2056 {
2057 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2058 pPT->a[i].u = 0;
2059 cRefs--;
2060 if (!cRefs)
2061 return;
2062 }
2063#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2064 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2065 for (unsigned i = 0; i < ELEMENTS(pPT->a); i++)
2066 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2067 {
2068 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2069 pPT->a[i].u = 0;
2070 }
2071#endif
2072 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2073 break;
2074 }
2075
2076 default:
2077 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2078 }
2079}
2080
2081
2082/**
2083 * Scans one shadow page table for mappings of a physical page.
2084 *
2085 * @param pVM The VM handle.
2086 * @param pPhysPage The guest page in question.
2087 * @param iShw The shadow page table.
2088 * @param cRefs The number of references made in that PT.
2089 */
2090void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2091{
2092 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool); NOREF(pPool);
2093 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2094 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2095 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2096 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2097 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2098}
2099
2100
2101/**
2102 * Flushes a list of shadow page tables mapping the same physical page.
2103 *
2104 * @param pVM The VM handle.
2105 * @param pPhysPage The guest page in question.
2106 * @param iPhysExt The physical cross reference extent list to flush.
2107 */
2108void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2109{
2110 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2111 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2112 LogFlow(("pgmPoolTrackFlushGCPhysPTs: HCPhys=%RHp iPhysExt\n", pPhysPage->HCPhys, iPhysExt));
2113
2114 const uint16_t iPhysExtStart = iPhysExt;
2115 PPGMPOOLPHYSEXT pPhysExt;
2116 do
2117 {
2118 Assert(iPhysExt < pPool->cMaxPhysExts);
2119 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2120 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2121 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2122 {
2123 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2124 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2125 }
2126
2127 /* next */
2128 iPhysExt = pPhysExt->iNext;
2129 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2130
2131 /* insert the list into the free list and clear the ram range entry. */
2132 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2133 pPool->iPhysExtFreeHead = iPhysExtStart;
2134 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2135
2136 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2137}
2138#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2139
2140
2141/**
2142 * Scans all shadow page tables for mappings of a physical page.
2143 *
2144 * This may be slow, but it's most likely more efficient than cleaning
2145 * out the entire page pool / cache.
2146 *
2147 * @returns VBox status code.
2148 * @retval VINF_SUCCESS if all references has been successfully cleared.
2149 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2150 * a page pool cleaning.
2151 *
2152 * @param pVM The VM handle.
2153 * @param pPhysPage The guest page in question.
2154 */
2155int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
2156{
2157 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2158 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2159 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d HCPhys=%RHp\n",
2160 pPool->cUsedPages, pPool->cPresent, pPhysPage->HCPhys));
2161
2162#if 1
2163 /*
2164 * There is a limit to what makes sense.
2165 */
2166 if (pPool->cPresent > 1024)
2167 {
2168 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2169 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2170 return VINF_PGM_GCPHYS_ALIASED;
2171 }
2172#endif
2173
2174 /*
2175 * Iterate all the pages until we've encountered all that in use.
2176 * This is simple but not quite optimal solution.
2177 */
2178 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2179 const uint32_t u32 = u64;
2180 unsigned cLeft = pPool->cUsedPages;
2181 unsigned iPage = pPool->cCurPages;
2182 while (--iPage >= PGMPOOL_IDX_FIRST)
2183 {
2184 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2185 if (pPage->GCPhys != NIL_RTGCPHYS)
2186 {
2187 switch (pPage->enmKind)
2188 {
2189 /*
2190 * We only care about shadow page tables.
2191 */
2192 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2193 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2194 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2195 {
2196 unsigned cPresent = pPage->cPresent;
2197 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2198 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2199 if (pPT->a[i].n.u1Present)
2200 {
2201 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2202 {
2203 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2204 pPT->a[i].u = 0;
2205 }
2206 if (!--cPresent)
2207 break;
2208 }
2209 break;
2210 }
2211
2212 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2213 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2214 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2215 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2216 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2217 {
2218 unsigned cPresent = pPage->cPresent;
2219 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2220 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2221 if (pPT->a[i].n.u1Present)
2222 {
2223 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2224 {
2225 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2226 pPT->a[i].u = 0;
2227 }
2228 if (!--cPresent)
2229 break;
2230 }
2231 break;
2232 }
2233 }
2234 if (!--cLeft)
2235 break;
2236 }
2237 }
2238
2239 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2240 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2241 return VINF_SUCCESS;
2242}
2243
2244
2245/**
2246 * Clears the user entry in a user table.
2247 *
2248 * This is used to remove all references to a page when flushing it.
2249 */
2250static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2251{
2252 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2253 Assert(pUser->iUser < pPool->cCurPages);
2254
2255 /*
2256 * Map the user page.
2257 */
2258 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2259 union
2260 {
2261 uint64_t *pau64;
2262 uint32_t *pau32;
2263 } u;
2264 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pUserPage);
2265
2266#ifdef VBOX_STRICT
2267 /*
2268 * Some sanity checks.
2269 */
2270 switch (pUserPage->enmKind)
2271 {
2272 case PGMPOOLKIND_ROOT_32BIT_PD:
2273 Assert(!(u.pau32[pUser->iUser] & PGM_PDFLAGS_MAPPING));
2274 Assert(pUser->iUserTable < X86_PG_ENTRIES);
2275 break;
2276 case PGMPOOLKIND_ROOT_PAE_PD:
2277 Assert(!(u.pau64[pUser->iUser] & PGM_PDFLAGS_MAPPING));
2278 Assert(pUser->iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
2279 break;
2280 case PGMPOOLKIND_ROOT_PDPTR:
2281 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2282 Assert(pUser->iUserTable < 4);
2283 break;
2284 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2285 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2286 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2287 break;
2288 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2289 case PGMPOOLKIND_ROOT_PML4:
2290 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2291 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2292 break;
2293 default:
2294 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
2295 break;
2296 }
2297#endif /* VBOX_STRICT */
2298
2299 /*
2300 * Clear the entry in the user page.
2301 */
2302 switch (pUserPage->enmKind)
2303 {
2304 /* 32-bit entries */
2305 case PGMPOOLKIND_ROOT_32BIT_PD:
2306 u.pau32[pUser->iUserTable] = 0;
2307 break;
2308
2309 /* 64-bit entries */
2310 case PGMPOOLKIND_ROOT_PAE_PD:
2311 case PGMPOOLKIND_ROOT_PDPTR:
2312 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2313 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2314 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2315 case PGMPOOLKIND_ROOT_PML4:
2316 u.pau64[pUser->iUserTable] = 0;
2317 break;
2318
2319 default:
2320 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
2321 }
2322}
2323
2324
2325/**
2326 * Clears all users of a page.
2327 */
2328static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2329{
2330 /*
2331 * Free all the user records.
2332 */
2333 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
2334 uint16_t i = pPage->iUserHead;
2335 while (i != NIL_PGMPOOL_USER_INDEX)
2336 {
2337 /* Clear enter in user table. */
2338 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
2339
2340 /* Free it. */
2341 const uint16_t iNext = paUsers[i].iNext;
2342 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2343 paUsers[i].iNext = pPool->iUserFreeHead;
2344 pPool->iUserFreeHead = i;
2345
2346 /* Next. */
2347 i = iNext;
2348 }
2349 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2350}
2351
2352
2353#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2354/**
2355 * Allocates a new physical cross reference extent.
2356 *
2357 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
2358 * @param pVM The VM handle.
2359 * @param piPhysExt Where to store the phys ext index.
2360 */
2361PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
2362{
2363 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2364 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
2365 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
2366 {
2367 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
2368 return NULL;
2369 }
2370 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2371 pPool->iPhysExtFreeHead = pPhysExt->iNext;
2372 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2373 *piPhysExt = iPhysExt;
2374 return pPhysExt;
2375}
2376
2377
2378/**
2379 * Frees a physical cross reference extent.
2380 *
2381 * @param pVM The VM handle.
2382 * @param iPhysExt The extent to free.
2383 */
2384void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
2385{
2386 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2387 Assert(iPhysExt < pPool->cMaxPhysExts);
2388 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2389 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2390 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2391 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2392 pPool->iPhysExtFreeHead = iPhysExt;
2393}
2394
2395
2396/**
2397 * Frees a physical cross reference extent.
2398 *
2399 * @param pVM The VM handle.
2400 * @param iPhysExt The extent to free.
2401 */
2402void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
2403{
2404 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2405
2406 const uint16_t iPhysExtStart = iPhysExt;
2407 PPGMPOOLPHYSEXT pPhysExt;
2408 do
2409 {
2410 Assert(iPhysExt < pPool->cMaxPhysExts);
2411 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2412 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2413 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2414
2415 /* next */
2416 iPhysExt = pPhysExt->iNext;
2417 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2418
2419 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2420 pPool->iPhysExtFreeHead = iPhysExtStart;
2421}
2422
2423/**
2424 * Insert a reference into a list of physical cross reference extents.
2425 *
2426 * @returns The new ram range flags (top 16-bits).
2427 *
2428 * @param pVM The VM handle.
2429 * @param iPhysExt The physical extent index of the list head.
2430 * @param iShwPT The shadow page table index.
2431 *
2432 */
2433static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
2434{
2435 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2436 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2437
2438 /* special common case. */
2439 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
2440 {
2441 paPhysExts[iPhysExt].aidx[2] = iShwPT;
2442 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2443 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
2444 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2445 }
2446
2447 /* general treatment. */
2448 const uint16_t iPhysExtStart = iPhysExt;
2449 unsigned cMax = 15;
2450 for (;;)
2451 {
2452 Assert(iPhysExt < pPool->cMaxPhysExts);
2453 for (unsigned i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2454 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
2455 {
2456 paPhysExts[iPhysExt].aidx[i] = iShwPT;
2457 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2458 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
2459 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2460 }
2461 if (!--cMax)
2462 {
2463 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2464 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2465 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
2466 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2467 }
2468 }
2469
2470 /* add another extent to the list. */
2471 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2472 if (!pNew)
2473 {
2474 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2475 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2476 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2477 }
2478 pNew->iNext = iPhysExtStart;
2479 pNew->aidx[0] = iShwPT;
2480 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
2481 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2482}
2483
2484
2485/**
2486 * Add a reference to guest physical page where extents are in use.
2487 *
2488 * @returns The new ram range flags (top 16-bits).
2489 *
2490 * @param pVM The VM handle.
2491 * @param u16 The ram range flags (top 16-bits).
2492 * @param iShwPT The shadow page table index.
2493 */
2494uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
2495{
2496 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
2497 {
2498 /*
2499 * Convert to extent list.
2500 */
2501 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
2502 uint16_t iPhysExt;
2503 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2504 if (pPhysExt)
2505 {
2506 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
2507 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
2508 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
2509 pPhysExt->aidx[1] = iShwPT;
2510 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2511 }
2512 else
2513 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2514 }
2515 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
2516 {
2517 /*
2518 * Insert into the extent list.
2519 */
2520 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
2521 }
2522 else
2523 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
2524 return u16;
2525}
2526
2527
2528/**
2529 * Clear references to guest physical memory.
2530 *
2531 * @param pPool The pool.
2532 * @param pPage The page.
2533 * @param pPhysPage Pointer to the aPages entry in the ram range.
2534 */
2535void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
2536{
2537 const unsigned cRefs = pPhysPage->HCPhys >> MM_RAM_FLAGS_CREFS_SHIFT; /** @todo PAGE FLAGS */
2538 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
2539
2540 uint16_t iPhysExt = (pPhysPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
2541 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
2542 {
2543 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
2544 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2545 do
2546 {
2547 Assert(iPhysExt < pPool->cMaxPhysExts);
2548
2549 /*
2550 * Look for the shadow page and check if it's all freed.
2551 */
2552 for (unsigned i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2553 {
2554 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
2555 {
2556 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
2557
2558 for (i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2559 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
2560 {
2561 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
2562 return;
2563 }
2564
2565 /* we can free the node. */
2566 PVM pVM = pPool->CTXSUFF(pVM);
2567 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
2568 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
2569 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
2570 {
2571 /* lonely node */
2572 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2573 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d lonely\n", pPhysPage->HCPhys, pPage->idx));
2574 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2575 }
2576 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
2577 {
2578 /* head */
2579 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d head\n", pPhysPage->HCPhys, pPage->idx));
2580 pPhysPage->HCPhys = (pPhysPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) /** @todo PAGE FLAGS */
2581 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
2582 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
2583 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2584 }
2585 else
2586 {
2587 /* in list */
2588 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
2589 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
2590 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2591 }
2592 iPhysExt = iPhysExtNext;
2593 return;
2594 }
2595 }
2596
2597 /* next */
2598 iPhysExtPrev = iPhysExt;
2599 iPhysExt = paPhysExts[iPhysExt].iNext;
2600 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2601
2602 AssertFatalMsgFailed(("not-found! cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
2603 }
2604 else /* nothing to do */
2605 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64\n", pPhysPage->HCPhys));
2606}
2607
2608
2609
2610/**
2611 * Clear references to guest physical memory.
2612 *
2613 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
2614 * is assumed to be correct, so the linear search can be skipped and we can assert
2615 * at an earlier point.
2616 *
2617 * @param pPool The pool.
2618 * @param pPage The page.
2619 * @param HCPhys The host physical address corresponding to the guest page.
2620 * @param GCPhys The guest physical address corresponding to HCPhys.
2621 */
2622static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
2623{
2624 /*
2625 * Walk range list.
2626 */
2627 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
2628 while (pRam)
2629 {
2630 RTGCPHYS off = GCPhys - pRam->GCPhys;
2631 if (off < pRam->cb)
2632 {
2633 /* does it match? */
2634 const unsigned iPage = off >> PAGE_SHIFT;
2635 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
2636 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
2637 {
2638 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
2639 return;
2640 }
2641 break;
2642 }
2643 pRam = CTXSUFF(pRam->pNext);
2644 }
2645 AssertFatalMsgFailed(("HCPhys=%VHp GCPhys=%VGp\n", HCPhys, GCPhys));
2646}
2647
2648
2649/**
2650 * Clear references to guest physical memory.
2651 *
2652 * @param pPool The pool.
2653 * @param pPage The page.
2654 * @param HCPhys The host physical address corresponding to the guest page.
2655 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
2656 */
2657static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
2658{
2659 /*
2660 * Walk range list.
2661 */
2662 PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
2663 while (pRam)
2664 {
2665 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
2666 if (off < pRam->cb)
2667 {
2668 /* does it match? */
2669 const unsigned iPage = off >> PAGE_SHIFT;
2670 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
2671 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
2672 {
2673 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
2674 return;
2675 }
2676 break;
2677 }
2678 pRam = CTXSUFF(pRam->pNext);
2679 }
2680
2681 /*
2682 * Damn, the hint didn't work. We'll have to do an expensive linear search.
2683 */
2684 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
2685 pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
2686 while (pRam)
2687 {
2688 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2689 while (iPage-- > 0)
2690 {
2691 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
2692 {
2693 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%VHp GCPhysHint=%VGp GCPhysReal=%VGp\n",
2694 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
2695 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
2696 return;
2697 }
2698 }
2699 pRam = CTXSUFF(pRam->pNext);
2700 }
2701
2702 AssertFatalMsgFailed(("HCPhys=%VHp GCPhysHint=%VGp\n", HCPhys, GCPhysHint));
2703}
2704
2705
2706/**
2707 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
2708 *
2709 * @param pPool The pool.
2710 * @param pPage The page.
2711 * @param pShwPT The shadow page table (mapping of the page).
2712 * @param pGstPT The guest page table.
2713 */
2714DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
2715{
2716 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pShwPT->a); i++)
2717 if (pShwPT->a[i].n.u1Present)
2718 {
2719 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
2720 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
2721 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
2722 if (!--pPage->cPresent)
2723 break;
2724 }
2725}
2726
2727
2728/**
2729 * Clear references to guest physical memory in a PAE / 32-bit page table.
2730 *
2731 * @param pPool The pool.
2732 * @param pPage The page.
2733 * @param pShwPT The shadow page table (mapping of the page).
2734 * @param pGstPT The guest page table (just a half one).
2735 */
2736DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
2737{
2738 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++)
2739 if (pShwPT->a[i].n.u1Present)
2740 {
2741 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
2742 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
2743 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
2744 }
2745}
2746
2747
2748/**
2749 * Clear references to guest physical memory in a PAE / PAE page table.
2750 *
2751 * @param pPool The pool.
2752 * @param pPage The page.
2753 * @param pShwPT The shadow page table (mapping of the page).
2754 * @param pGstPT The guest page table.
2755 */
2756DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
2757{
2758 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++)
2759 if (pShwPT->a[i].n.u1Present)
2760 {
2761 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
2762 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
2763 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
2764 }
2765}
2766
2767
2768/**
2769 * Clear references to guest physical memory in a 32-bit / 4MB page table.
2770 *
2771 * @param pPool The pool.
2772 * @param pPage The page.
2773 * @param pShwPT The shadow page table (mapping of the page).
2774 */
2775DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
2776{
2777 RTGCPHYS GCPhys = pPage->GCPhys;
2778 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
2779 if (pShwPT->a[i].n.u1Present)
2780 {
2781 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
2782 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
2783 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
2784 }
2785}
2786
2787
2788/**
2789 * Clear references to guest physical memory in a PAE / 2/4MB page table.
2790 *
2791 * @param pPool The pool.
2792 * @param pPage The page.
2793 * @param pShwPT The shadow page table (mapping of the page).
2794 */
2795DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
2796{
2797 RTGCPHYS GCPhys = pPage->GCPhys;
2798 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
2799 if (pShwPT->a[i].n.u1Present)
2800 {
2801 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
2802 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
2803 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
2804 }
2805}
2806#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2807
2808
2809/**
2810 * Clear references to shadowed pages in a PAE page directory.
2811 *
2812 * @param pPool The pool.
2813 * @param pPage The page.
2814 * @param pShwPD The shadow page directory (mapping of the page).
2815 */
2816DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
2817{
2818 for (unsigned i = 0; i < ELEMENTS(pShwPD->a); i++)
2819 {
2820 if (pShwPD->a[i].n.u1Present)
2821 {
2822 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
2823 if (pSubPage)
2824 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
2825 else
2826 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
2827 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
2828 }
2829 }
2830}
2831
2832
2833/**
2834 * Clear references to shadowed pages in a 64-bit page directory pointer table.
2835 *
2836 * @param pPool The pool.
2837 * @param pPage The page.
2838 * @param pShwPdPtr The shadow page directory pointer table (mapping of the page).
2839 */
2840DECLINLINE(void) pgmPoolTrackDerefPDPTR64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPTR pShwPdPtr)
2841{
2842 for (unsigned i = 0; i < ELEMENTS(pShwPdPtr->a); i++)
2843 {
2844 if (pShwPdPtr->a[i].n.u1Present)
2845 {
2846 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPdPtr->a[i].u & X86_PDPE_PG_MASK);
2847 if (pSubPage)
2848 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
2849 else
2850 AssertFatalMsgFailed(("%RX64\n", pShwPdPtr->a[i].u & X86_PDPE_PG_MASK));
2851 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
2852 }
2853 }
2854}
2855
2856
2857/**
2858 * Clears all references made by this page.
2859 *
2860 * This includes other shadow pages and GC physical addresses.
2861 *
2862 * @param pPool The pool.
2863 * @param pPage The page.
2864 */
2865static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2866{
2867 /*
2868 * Map the shadow page and take action according to the page kind.
2869 */
2870 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
2871 switch (pPage->enmKind)
2872 {
2873#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2874 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2875 {
2876 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2877 void *pvGst;
2878 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2879 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
2880 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2881 break;
2882 }
2883
2884 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2885 {
2886 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2887 void *pvGst;
2888 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2889 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
2890 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2891 break;
2892 }
2893
2894 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2895 {
2896 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2897 void *pvGst;
2898 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2899 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
2900 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2901 break;
2902 }
2903
2904 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
2905 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2906 {
2907 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2908 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
2909 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2910 break;
2911 }
2912
2913 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 4 MB page */
2914 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2915 {
2916 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2917 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
2918 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2919 break;
2920 }
2921
2922#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
2923 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2924 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2925 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2926 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2927 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2928 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2929 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2930 break;
2931#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
2932
2933 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2934 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2935 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
2936 break;
2937
2938 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2939 pgmPoolTrackDerefPDPTR64Bit(pPool, pPage, (PX86PDPTR)pvShw);
2940 break;
2941
2942 default:
2943 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
2944 }
2945
2946 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
2947 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2948 ASMMemZeroPage(pvShw);
2949 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2950 pPage->fZeroed = true;
2951}
2952#endif /* PGMPOOL_WITH_USER_TRACKING */
2953
2954
2955/**
2956 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
2957 *
2958 * @param pPool The pool.
2959 */
2960static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
2961{
2962 /*
2963 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
2964 */
2965 Assert(NIL_PGMPOOL_IDX == 0);
2966 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
2967 {
2968 /*
2969 * Get the page address.
2970 */
2971 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2972 union
2973 {
2974 uint64_t *pau64;
2975 uint32_t *pau32;
2976 } u;
2977 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
2978
2979 /*
2980 * Mark stuff not present.
2981 */
2982 switch (pPage->enmKind)
2983 {
2984 case PGMPOOLKIND_ROOT_32BIT_PD:
2985 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
2986 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
2987 u.pau32[iPage] = 0;
2988 break;
2989
2990 case PGMPOOLKIND_ROOT_PAE_PD:
2991 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES * 4; iPage++)
2992 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
2993 u.pau64[iPage] = 0;
2994 break;
2995
2996 case PGMPOOLKIND_ROOT_PML4:
2997 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES; iPage++)
2998 if ((u.pau64[iPage] & (PGM_PLXFLAGS_PERMANENT | X86_PML4E_P)) == X86_PML4E_P)
2999 u.pau64[iPage] = 0;
3000 break;
3001
3002 case PGMPOOLKIND_ROOT_PDPTR:
3003 /* Not root of shadowed pages currently, ignore it. */
3004 break;
3005 }
3006 }
3007
3008 /*
3009 * Paranoia (to be removed), flag a global CR3 sync.
3010 */
3011 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
3012}
3013
3014
3015/**
3016 * Flushes the entire cache.
3017 *
3018 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3019 * and execute this CR3 flush.
3020 *
3021 * @param pPool The pool.
3022 */
3023static void pgmPoolFlushAllInt(PPGMPOOL pPool)
3024{
3025 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
3026 LogFlow(("pgmPoolFlushAllInt:\n"));
3027
3028 /*
3029 * If there are no pages in the pool, there is nothing to do.
3030 */
3031 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
3032 {
3033 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3034 return;
3035 }
3036
3037 /*
3038 * Nuke the free list and reinsert all pages into it.
3039 */
3040 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
3041 {
3042 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3043
3044#ifdef IN_RING3
3045 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMHC, pPage->pvPageHC));
3046#endif
3047#ifdef PGMPOOL_WITH_MONITORING
3048 if (pPage->fMonitored)
3049 pgmPoolMonitorFlush(pPool, pPage);
3050 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3051 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3052 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3053 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3054 pPage->cModifications = 0;
3055#endif
3056 pPage->GCPhys = NIL_RTGCPHYS;
3057 pPage->enmKind = PGMPOOLKIND_FREE;
3058 Assert(pPage->idx == i);
3059 pPage->iNext = i + 1;
3060 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
3061 pPage->fSeenNonGlobal = false;
3062 pPage->fMonitored= false;
3063 pPage->fCached = false;
3064 pPage->fReusedFlushPending = false;
3065 pPage->fCR3Mix = false;
3066#ifdef PGMPOOL_WITH_USER_TRACKING
3067 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3068#endif
3069#ifdef PGMPOOL_WITH_CACHE
3070 pPage->iAgeNext = NIL_PGMPOOL_IDX;
3071 pPage->iAgePrev = NIL_PGMPOOL_IDX;
3072#endif
3073 }
3074 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
3075 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
3076 pPool->cUsedPages = 0;
3077
3078#ifdef PGMPOOL_WITH_USER_TRACKING
3079 /*
3080 * Zap and reinitialize the user records.
3081 */
3082 pPool->cPresent = 0;
3083 pPool->iUserFreeHead = 0;
3084 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
3085 const unsigned cMaxUsers = pPool->cMaxUsers;
3086 for (unsigned i = 0; i < cMaxUsers; i++)
3087 {
3088 paUsers[i].iNext = i + 1;
3089 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3090 paUsers[i].iUserTable = 0xfffe;
3091 }
3092 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
3093#endif
3094
3095#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3096 /*
3097 * Clear all the GCPhys links and rebuild the phys ext free list.
3098 */
3099 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
3100 pRam;
3101 pRam = pRam->CTXSUFF(pNext))
3102 {
3103 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3104 while (iPage-- > 0)
3105 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3106 }
3107
3108 pPool->iPhysExtFreeHead = 0;
3109 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
3110 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
3111 for (unsigned i = 0; i < cMaxPhysExts; i++)
3112 {
3113 paPhysExts[i].iNext = i + 1;
3114 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
3115 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
3116 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
3117 }
3118 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3119#endif
3120
3121#ifdef PGMPOOL_WITH_MONITORING
3122 /*
3123 * Just zap the modified list.
3124 */
3125 pPool->cModifiedPages = 0;
3126 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
3127#endif
3128
3129#ifdef PGMPOOL_WITH_CACHE
3130 /*
3131 * Clear the GCPhys hash and the age list.
3132 */
3133 for (unsigned i = 0; i < ELEMENTS(pPool->aiHash); i++)
3134 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
3135 pPool->iAgeHead = NIL_PGMPOOL_IDX;
3136 pPool->iAgeTail = NIL_PGMPOOL_IDX;
3137#endif
3138
3139 /*
3140 * Flush all the special root pages.
3141 * Reinsert active pages into the hash and ensure monitoring chains are correct.
3142 */
3143 pgmPoolFlushAllSpecialRoots(pPool);
3144 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
3145 {
3146 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3147 pPage->iNext = NIL_PGMPOOL_IDX;
3148#ifdef PGMPOOL_WITH_MONITORING
3149 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3150 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3151 pPage->cModifications = 0;
3152 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
3153 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3154 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3155 if (pPage->fMonitored)
3156 {
3157 PVM pVM = pPool->CTXSUFF(pVM);
3158 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
3159 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
3160 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
3161 pPool->pfnAccessHandlerGC, MMHyperCCToGC(pVM, pPage),
3162 pPool->pszAccessHandler);
3163 AssertFatalRCSuccess(rc);
3164# ifdef PGMPOOL_WITH_CACHE
3165 pgmPoolHashInsert(pPool, pPage);
3166# endif
3167 }
3168#endif
3169#ifdef PGMPOOL_WITH_USER_TRACKING
3170 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
3171#endif
3172#ifdef PGMPOOL_WITH_CACHE
3173 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
3174 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
3175#endif
3176 }
3177
3178 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3179}
3180
3181
3182/**
3183 * Flushes a pool page.
3184 *
3185 * This moves the page to the free list after removing all user references to it.
3186 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
3187 *
3188 * @returns VBox status code.
3189 * @retval VINF_SUCCESS on success.
3190 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
3191 * @param pPool The pool.
3192 * @param HCPhys The HC physical address of the shadow page.
3193 */
3194int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3195{
3196 int rc = VINF_SUCCESS;
3197 STAM_PROFILE_START(&pPool->StatFlushPage, f);
3198 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%VHp, .idx=%d, .enmKind=%d, .GCPhys=%VGp}\n",
3199 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
3200
3201 /*
3202 * Quietly reject any attempts at flushing any of the special root pages.
3203 */
3204 if (pPage->idx < PGMPOOL_IDX_FIRST)
3205 {
3206 Log(("pgmPoolFlushPage: specaial root page, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3207 return VINF_SUCCESS;
3208 }
3209
3210 /*
3211 * Mark the page as being in need of a ASMMemZeroPage().
3212 */
3213 pPage->fZeroed = false;
3214
3215#ifdef PGMPOOL_WITH_USER_TRACKING
3216 /*
3217 * Clear the page.
3218 */
3219 pgmPoolTrackClearPageUsers(pPool, pPage);
3220 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
3221 pgmPoolTrackDeref(pPool, pPage);
3222 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
3223#endif
3224
3225#ifdef PGMPOOL_WITH_CACHE
3226 /*
3227 * Flush it from the cache.
3228 */
3229 pgmPoolCacheFlushPage(pPool, pPage);
3230#endif /* PGMPOOL_WITH_CACHE */
3231
3232#ifdef PGMPOOL_WITH_MONITORING
3233 /*
3234 * Deregistering the monitoring.
3235 */
3236 if (pPage->fMonitored)
3237 rc = pgmPoolMonitorFlush(pPool, pPage);
3238#endif
3239
3240 /*
3241 * Free the page.
3242 */
3243 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
3244 pPage->iNext = pPool->iFreeHead;
3245 pPool->iFreeHead = pPage->idx;
3246 pPage->enmKind = PGMPOOLKIND_FREE;
3247 pPage->GCPhys = NIL_RTGCPHYS;
3248 pPage->fReusedFlushPending = false;
3249
3250 pPool->cUsedPages--;
3251 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
3252 return rc;
3253}
3254
3255
3256/**
3257 * Frees a usage of a pool page.
3258 *
3259 * The caller is responsible to updating the user table so that it no longer
3260 * references the shadow page.
3261 *
3262 * @param pPool The pool.
3263 * @param HCPhys The HC physical address of the shadow page.
3264 * @param iUser The shadow page pool index of the user table.
3265 * @param iUserTable The index into the user table (shadowed).
3266 */
3267void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
3268{
3269 STAM_PROFILE_START(&pPool->StatFree, a);
3270 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%VHp, .idx=%d, enmKind=%d} iUser=%#x iUserTable=%#x\n",
3271 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, iUser, iUserTable));
3272 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
3273#ifdef PGMPOOL_WITH_USER_TRACKING
3274 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
3275#endif
3276#ifdef PGMPOOL_WITH_CACHE
3277 if (!pPage->fCached)
3278#endif
3279 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
3280 STAM_PROFILE_STOP(&pPool->StatFree, a);
3281}
3282
3283
3284/**
3285 * Makes one or more free page free.
3286 *
3287 * @returns VBox status code.
3288 * @retval VINF_SUCCESS on success.
3289 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3290 *
3291 * @param pPool The pool.
3292 * @param iUser The user of the page.
3293 */
3294static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
3295{
3296 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
3297
3298 /*
3299 * If the pool isn't full grown yet, expand it.
3300 */
3301 if (pPool->cCurPages < pPool->cMaxPages)
3302 {
3303 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
3304#ifdef IN_RING3
3305 int rc = PGMR3PoolGrow(pPool->pVMHC);
3306#else
3307 int rc = CTXALLMID(VMM, CallHost)(pPool->CTXSUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
3308#endif
3309 if (VBOX_FAILURE(rc))
3310 return rc;
3311 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
3312 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
3313 return VINF_SUCCESS;
3314 }
3315
3316#ifdef PGMPOOL_WITH_CACHE
3317 /*
3318 * Free one cached page.
3319 */
3320 return pgmPoolCacheFreeOne(pPool, iUser);
3321#else
3322 /*
3323 * Flush the pool.
3324 * If we have tracking enabled, it should be possible to come up with
3325 * a cheap replacement strategy...
3326 */
3327 pgmPoolFlushAllInt(pPool);
3328 return VERR_PGM_POOL_FLUSHED;
3329#endif
3330}
3331
3332
3333/**
3334 * Allocates a page from the pool.
3335 *
3336 * This page may actually be a cached page and not in need of any processing
3337 * on the callers part.
3338 *
3339 * @returns VBox status code.
3340 * @retval VINF_SUCCESS if a NEW page was allocated.
3341 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
3342 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3343 * @param pVM The VM handle.
3344 * @param GCPhys The GC physical address of the page we're gonna shadow.
3345 * For 4MB and 2MB PD entries, it's the first address the
3346 * shadow PT is covering.
3347 * @param enmKind The kind of mapping.
3348 * @param iUser The shadow page pool index of the user table.
3349 * @param iUserTable The index into the user table (shadowed).
3350 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
3351 */
3352int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint16_t iUserTable, PPPGMPOOLPAGE ppPage)
3353{
3354 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3355 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
3356 LogFlow(("pgmPoolAlloc: GCPhys=%VGp enmKind=%d iUser=%#x iUserTable=%#x\n", GCPhys, enmKind, iUser, iUserTable));
3357
3358 *ppPage = NULL;
3359
3360#ifdef PGMPOOL_WITH_CACHE
3361 if (pPool->fCacheEnabled)
3362 {
3363 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
3364 if (VBOX_SUCCESS(rc2))
3365 {
3366 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3367 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
3368 return rc2;
3369 }
3370 }
3371#endif
3372
3373 /*
3374 * Allocate a new one.
3375 */
3376 int rc = VINF_SUCCESS;
3377 uint16_t iNew = pPool->iFreeHead;
3378 if (iNew == NIL_PGMPOOL_IDX)
3379 {
3380 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
3381 if (VBOX_FAILURE(rc))
3382 {
3383 if (rc != VERR_PGM_POOL_CLEARED)
3384 {
3385 Log(("pgmPoolAlloc: returns %Vrc (Free)\n", rc));
3386 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3387 return rc;
3388 }
3389 rc = VERR_PGM_POOL_FLUSHED;
3390 }
3391 iNew = pPool->iFreeHead;
3392 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
3393 }
3394
3395 /* unlink the free head */
3396 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
3397 pPool->iFreeHead = pPage->iNext;
3398 pPage->iNext = NIL_PGMPOOL_IDX;
3399
3400 /*
3401 * Initialize it.
3402 */
3403 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
3404 pPage->enmKind = enmKind;
3405 pPage->GCPhys = GCPhys;
3406 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
3407 pPage->fMonitored = false;
3408 pPage->fCached = false;
3409 pPage->fReusedFlushPending = false;
3410 pPage->fCR3Mix = false;
3411#ifdef PGMPOOL_WITH_MONITORING
3412 pPage->cModifications = 0;
3413 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3414 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3415#endif
3416#ifdef PGMPOOL_WITH_USER_TRACKING
3417 pPage->cPresent = 0;
3418 pPage->iFirstPresent = ~0;
3419
3420 /*
3421 * Insert into the tracking and cache. If this fails, free the page.
3422 */
3423 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
3424 if (VBOX_FAILURE(rc3))
3425 {
3426 if (rc3 != VERR_PGM_POOL_CLEARED)
3427 {
3428 pPool->cUsedPages--;
3429 pPage->enmKind = PGMPOOLKIND_FREE;
3430 pPage->GCPhys = NIL_RTGCPHYS;
3431 pPage->iNext = pPool->iFreeHead;
3432 pPool->iFreeHead = pPage->idx;
3433 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3434 Log(("pgmPoolAlloc: returns %Vrc (Insert)\n", rc3));
3435 return rc3;
3436 }
3437 rc = VERR_PGM_POOL_FLUSHED;
3438 }
3439#endif /* PGMPOOL_WITH_USER_TRACKING */
3440
3441 /*
3442 * Commit the allocation, clear the page and return.
3443 */
3444#ifdef VBOX_WITH_STATISTICS
3445 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
3446 pPool->cUsedPagesHigh = pPool->cUsedPages;
3447#endif
3448
3449 if (!pPage->fZeroed)
3450 {
3451 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3452 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
3453 ASMMemZeroPage(pv);
3454 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3455 }
3456
3457 *ppPage = pPage;
3458 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
3459 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
3460 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3461 return rc;
3462}
3463
3464
3465/**
3466 * Frees a usage of a pool page.
3467 *
3468 * @param pVM The VM handle.
3469 * @param HCPhys The HC physical address of the shadow page.
3470 * @param iUser The shadow page pool index of the user table.
3471 * @param iUserTable The index into the user table (shadowed).
3472 */
3473void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint16_t iUserTable)
3474{
3475 LogFlow(("pgmPoolFree: HCPhys=%VHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
3476 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3477 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
3478}
3479
3480
3481/**
3482 * Gets a in-use page in the pool by it's physical address.
3483 *
3484 * @returns Pointer to the page.
3485 * @param pVM The VM handle.
3486 * @param HCPhys The HC physical address of the shadow page.
3487 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
3488 */
3489PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
3490{
3491 /** @todo profile this! */
3492 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3493 PPGMPOOLPAGE pPage = pgmPoolGetPage(pPool, HCPhys);
3494 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%VHp -> %p:{.idx=%d .GCPhys=%VGp .enmKind=%d}\n",
3495 HCPhys, pPage, pPage->idx, pPage->GCPhys, pPage->enmKind));
3496 return pPage;
3497}
3498
3499
3500/**
3501 * Flushes the entire cache.
3502 *
3503 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3504 * and execute this CR3 flush.
3505 *
3506 * @param pPool The pool.
3507 */
3508void pgmPoolFlushAll(PVM pVM)
3509{
3510 LogFlow(("pgmPoolFlushAll:\n"));
3511 pgmPoolFlushAllInt(pVM->pgm.s.CTXSUFF(pPool));
3512}
3513
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette