VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 26112

最後變更 在這個檔案從26112是 26066,由 vboxsync 提交於 15 年 前

Guest SMP: force all VCPUs to go back to ring 3 when a pgm pool flush is pending. Not doing so might cause trouble on a loaded host.

  • 屬性 svn:eol-style 設為 native
  • 屬性 svn:keywords 設為 Id
檔案大小: 187.4 KB
 
1/* $Id: PGMAllPool.cpp 26066 2010-01-27 12:59:32Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.alldomusa.eu.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PGM_POOL
27#include <VBox/pgm.h>
28#include <VBox/mm.h>
29#include <VBox/em.h>
30#include <VBox/cpum.h>
31#ifdef IN_RC
32# include <VBox/patm.h>
33#endif
34#include "PGMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/disopcode.h>
37#include <VBox/hwacc_vmx.h>
38
39#include <VBox/log.h>
40#include <VBox/err.h>
41#include <iprt/asm.h>
42#include <iprt/string.h>
43
44
45/*******************************************************************************
46* Internal Functions *
47*******************************************************************************/
48RT_C_DECLS_BEGIN
49static void pgmPoolFlushAllInt(PPGMPOOL pPool);
50DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
51DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
52static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
53static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
54static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
55#ifndef IN_RING3
56DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
57#endif
58#ifdef LOG_ENABLED
59static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
60#endif
61#if defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)
62static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT);
63#endif
64
65int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage);
66PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt);
67void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt);
68void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt);
69
70RT_C_DECLS_END
71
72
73/**
74 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
75 *
76 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
77 * @param enmKind The page kind.
78 */
79DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
80{
81 switch (enmKind)
82 {
83 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
84 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
85 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
86 return true;
87 default:
88 return false;
89 }
90}
91
92/** @def PGMPOOL_PAGE_2_LOCKED_PTR
93 * Maps a pool page pool into the current context and lock it (RC only).
94 *
95 * @returns VBox status code.
96 * @param pVM The VM handle.
97 * @param pPage The pool page.
98 *
99 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
100 * small page window employeed by that function. Be careful.
101 * @remark There is no need to assert on the result.
102 */
103#if defined(IN_RC)
104DECLINLINE(void *) PGMPOOL_PAGE_2_LOCKED_PTR(PVM pVM, PPGMPOOLPAGE pPage)
105{
106 void *pv = pgmPoolMapPageInlined(&pVM->pgm.s, pPage);
107
108 /* Make sure the dynamic mapping will not be reused. */
109 if (pv)
110 PGMDynLockHCPage(pVM, (uint8_t *)pv);
111
112 return pv;
113}
114#else
115# define PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage) PGMPOOL_PAGE_2_PTR(pVM, pPage)
116#endif
117
118/** @def PGMPOOL_UNLOCK_PTR
119 * Unlock a previously locked dynamic caching (RC only).
120 *
121 * @returns VBox status code.
122 * @param pVM The VM handle.
123 * @param pPage The pool page.
124 *
125 * @remark In RC this uses PGMGCDynMapHCPage(), so it will consume of the
126 * small page window employeed by that function. Be careful.
127 * @remark There is no need to assert on the result.
128 */
129#if defined(IN_RC)
130DECLINLINE(void) PGMPOOL_UNLOCK_PTR(PVM pVM, void *pvPage)
131{
132 if (pvPage)
133 PGMDynUnlockHCPage(pVM, (uint8_t *)pvPage);
134}
135#else
136# define PGMPOOL_UNLOCK_PTR(pVM, pPage) do {} while (0)
137#endif
138
139
140/**
141 * Flushes a chain of pages sharing the same access monitor.
142 *
143 * @returns VBox status code suitable for scheduling.
144 * @param pPool The pool.
145 * @param pPage A page in the chain.
146 */
147int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
148{
149 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
150
151 /*
152 * Find the list head.
153 */
154 uint16_t idx = pPage->idx;
155 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
156 {
157 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
158 {
159 idx = pPage->iMonitoredPrev;
160 Assert(idx != pPage->idx);
161 pPage = &pPool->aPages[idx];
162 }
163 }
164
165 /*
166 * Iterate the list flushing each shadow page.
167 */
168 int rc = VINF_SUCCESS;
169 for (;;)
170 {
171 idx = pPage->iMonitoredNext;
172 Assert(idx != pPage->idx);
173 if (pPage->idx >= PGMPOOL_IDX_FIRST)
174 {
175 int rc2 = pgmPoolFlushPage(pPool, pPage);
176 AssertRC(rc2);
177 }
178 /* next */
179 if (idx == NIL_PGMPOOL_IDX)
180 break;
181 pPage = &pPool->aPages[idx];
182 }
183 return rc;
184}
185
186
187/**
188 * Wrapper for getting the current context pointer to the entry being modified.
189 *
190 * @returns VBox status code suitable for scheduling.
191 * @param pVM VM Handle.
192 * @param pvDst Destination address
193 * @param pvSrc Source guest virtual address.
194 * @param GCPhysSrc The source guest physical address.
195 * @param cb Size of data to read
196 */
197DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVM pVM, void *pvDst, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
198{
199#if defined(IN_RING3)
200 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
201 return VINF_SUCCESS;
202#else
203 /* @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
204 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
205#endif
206}
207
208/**
209 * Process shadow entries before they are changed by the guest.
210 *
211 * For PT entries we will clear them. For PD entries, we'll simply check
212 * for mapping conflicts and set the SyncCR3 FF if found.
213 *
214 * @param pVCpu VMCPU handle
215 * @param pPool The pool.
216 * @param pPage The head page.
217 * @param GCPhysFault The guest physical fault address.
218 * @param uAddress In R0 and GC this is the guest context fault address (flat).
219 * In R3 this is the host context 'fault' address.
220 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
221 */
222void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, CTXTYPE(RTGCPTR, RTHCPTR, RTGCPTR) pvAddress, unsigned cbWrite)
223{
224 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%#x (idx=%#x)\n", pPage->iMonitoredPrev, pPage->idx));
225 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
226 PVM pVM = pPool->CTX_SUFF(pVM);
227
228 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n", (RTGCPTR)pvAddress, GCPhysFault, cbWrite));
229
230 for (;;)
231 {
232 union
233 {
234 void *pv;
235 PX86PT pPT;
236 PX86PTPAE pPTPae;
237 PX86PD pPD;
238 PX86PDPAE pPDPae;
239 PX86PDPT pPDPT;
240 PX86PML4 pPML4;
241 } uShw;
242
243 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s\n", pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
244
245 uShw.pv = NULL;
246 switch (pPage->enmKind)
247 {
248 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
249 {
250 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
251 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
252 const unsigned iShw = off / sizeof(X86PTE);
253 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
254 if (uShw.pPT->a[iShw].n.u1Present)
255 {
256 X86PTE GstPte;
257
258 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
259 AssertRC(rc);
260 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
261 pgmPoolTracDerefGCPhysHint(pPool, pPage,
262 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
263 GstPte.u & X86_PTE_PG_MASK);
264 ASMAtomicWriteSize(&uShw.pPT->a[iShw], 0);
265 }
266 break;
267 }
268
269 /* page/2 sized */
270 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
271 {
272 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
273 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
274 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
275 {
276 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
277 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
278 if (uShw.pPTPae->a[iShw].n.u1Present)
279 {
280 X86PTE GstPte;
281 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
282 AssertRC(rc);
283
284 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
285 pgmPoolTracDerefGCPhysHint(pPool, pPage,
286 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
287 GstPte.u & X86_PTE_PG_MASK);
288 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw], 0);
289 }
290 }
291 break;
292 }
293
294 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
295 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
296 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
297 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
298 {
299 unsigned iGst = off / sizeof(X86PDE);
300 unsigned iShwPdpt = iGst / 256;
301 unsigned iShw = (iGst % 256) * 2;
302 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
303
304 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
305 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
306 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
307 {
308 for (unsigned i = 0; i < 2; i++)
309 {
310# ifndef IN_RING0
311 if ((uShw.pPDPae->a[iShw + i].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
312 {
313 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
314 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
315 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw=%#x!\n", iShwPdpt, iShw+i));
316 break;
317 }
318 else
319# endif /* !IN_RING0 */
320 if (uShw.pPDPae->a[iShw+i].n.u1Present)
321 {
322 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw+i, uShw.pPDPae->a[iShw+i].u));
323 pgmPoolFree(pVM,
324 uShw.pPDPae->a[iShw+i].u & X86_PDE_PAE_PG_MASK,
325 pPage->idx,
326 iShw + i);
327 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw+i], 0);
328 }
329
330 /* paranoia / a bit assumptive. */
331 if ( (off & 3)
332 && (off & 3) + cbWrite > 4)
333 {
334 const unsigned iShw2 = iShw + 2 + i;
335 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
336 {
337# ifndef IN_RING0
338 if ((uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
339 {
340 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
341 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
342 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShwPdpt=%#x iShw2=%#x!\n", iShwPdpt, iShw2));
343 break;
344 }
345 else
346# endif /* !IN_RING0 */
347 if (uShw.pPDPae->a[iShw2].n.u1Present)
348 {
349 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
350 pgmPoolFree(pVM,
351 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
352 pPage->idx,
353 iShw2);
354 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
355 }
356 }
357 }
358 }
359 }
360 break;
361 }
362
363 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
364 {
365 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
366 const unsigned iShw = off / sizeof(X86PTEPAE);
367 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
368 if (uShw.pPTPae->a[iShw].n.u1Present)
369 {
370 X86PTEPAE GstPte;
371 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
372 AssertRC(rc);
373
374 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
375 pgmPoolTracDerefGCPhysHint(pPool, pPage,
376 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
377 GstPte.u & X86_PTE_PAE_PG_MASK);
378 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw].u, 0);
379 }
380
381 /* paranoia / a bit assumptive. */
382 if ( (off & 7)
383 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
384 {
385 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
386 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
387
388 if (uShw.pPTPae->a[iShw2].n.u1Present)
389 {
390 X86PTEPAE GstPte;
391# ifdef IN_RING3
392 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, (RTHCPTR)((RTHCUINTPTR)pvAddress + sizeof(GstPte)), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
393# else
394 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress + sizeof(GstPte), GCPhysFault + sizeof(GstPte), sizeof(GstPte));
395# endif
396 AssertRC(rc);
397 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PAE_PG_MASK));
398 pgmPoolTracDerefGCPhysHint(pPool, pPage,
399 uShw.pPTPae->a[iShw2].u & X86_PTE_PAE_PG_MASK,
400 GstPte.u & X86_PTE_PAE_PG_MASK);
401 ASMAtomicWriteSize(&uShw.pPTPae->a[iShw2].u ,0);
402 }
403 }
404 break;
405 }
406
407 case PGMPOOLKIND_32BIT_PD:
408 {
409 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
410 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
411
412 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
413 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
414# ifndef IN_RING0
415 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
416 {
417 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
418 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
419 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
420 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
421 break;
422 }
423# endif /* !IN_RING0 */
424# ifndef IN_RING0
425 else
426# endif /* !IN_RING0 */
427 {
428 if (uShw.pPD->a[iShw].n.u1Present)
429 {
430 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
431 pgmPoolFree(pVM,
432 uShw.pPD->a[iShw].u & X86_PDE_PAE_PG_MASK,
433 pPage->idx,
434 iShw);
435 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
436 }
437 }
438 /* paranoia / a bit assumptive. */
439 if ( (off & 3)
440 && (off & 3) + cbWrite > sizeof(X86PTE))
441 {
442 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
443 if ( iShw2 != iShw
444 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
445 {
446# ifndef IN_RING0
447 if (uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
448 {
449 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
450 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
451 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
452 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
453 break;
454 }
455# endif /* !IN_RING0 */
456# ifndef IN_RING0
457 else
458# endif /* !IN_RING0 */
459 {
460 if (uShw.pPD->a[iShw2].n.u1Present)
461 {
462 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPD->a[iShw2].u));
463 pgmPoolFree(pVM,
464 uShw.pPD->a[iShw2].u & X86_PDE_PAE_PG_MASK,
465 pPage->idx,
466 iShw2);
467 ASMAtomicWriteSize(&uShw.pPD->a[iShw2].u, 0);
468 }
469 }
470 }
471 }
472#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
473 if ( uShw.pPD->a[iShw].n.u1Present
474 && !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
475 {
476 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
477# ifdef IN_RC /* TLB load - we're pushing things a bit... */
478 ASMProbeReadByte(pvAddress);
479# endif
480 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
481 ASMAtomicWriteSize(&uShw.pPD->a[iShw].u, 0);
482 }
483#endif
484 break;
485 }
486
487 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
488 {
489 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
490 const unsigned iShw = off / sizeof(X86PDEPAE);
491 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
492#ifndef IN_RING0
493 if (uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING)
494 {
495 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
496 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
497 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
498 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
499 break;
500 }
501#endif /* !IN_RING0 */
502 /*
503 * Causes trouble when the guest uses a PDE to refer to the whole page table level
504 * structure. (Invalidate here; faults later on when it tries to change the page
505 * table entries -> recheck; probably only applies to the RC case.)
506 */
507# ifndef IN_RING0
508 else
509# endif /* !IN_RING0 */
510 {
511 if (uShw.pPDPae->a[iShw].n.u1Present)
512 {
513 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
514 pgmPoolFree(pVM,
515 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
516 pPage->idx,
517 iShw);
518 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
519 }
520 }
521 /* paranoia / a bit assumptive. */
522 if ( (off & 7)
523 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
524 {
525 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
526 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
527
528#ifndef IN_RING0
529 if ( iShw2 != iShw
530 && uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING)
531 {
532 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
533 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
534 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
535 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
536 break;
537 }
538#endif /* !IN_RING0 */
539# ifndef IN_RING0
540 else
541# endif /* !IN_RING0 */
542 if (uShw.pPDPae->a[iShw2].n.u1Present)
543 {
544 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
545 pgmPoolFree(pVM,
546 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
547 pPage->idx,
548 iShw2);
549 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
550 }
551 }
552 break;
553 }
554
555 case PGMPOOLKIND_PAE_PDPT:
556 {
557 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
558 /*
559 * Hopefully this doesn't happen very often:
560 * - touching unused parts of the page
561 * - messing with the bits of pd pointers without changing the physical address
562 */
563 /* PDPT roots are not page aligned; 32 byte only! */
564 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
565
566 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
567 const unsigned iShw = offPdpt / sizeof(X86PDPE);
568 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
569 {
570# ifndef IN_RING0
571 if (uShw.pPDPT->a[iShw].u & PGM_PLXFLAGS_MAPPING)
572 {
573 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
574 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
575 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
576 LogFlow(("pgmPoolMonitorChainChanging: Detected pdpt conflict at iShw=%#x!\n", iShw));
577 break;
578 }
579# endif /* !IN_RING0 */
580# ifndef IN_RING0
581 else
582# endif /* !IN_RING0 */
583 if (uShw.pPDPT->a[iShw].n.u1Present)
584 {
585 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
586 pgmPoolFree(pVM,
587 uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK,
588 pPage->idx,
589 iShw);
590 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
591 }
592
593 /* paranoia / a bit assumptive. */
594 if ( (offPdpt & 7)
595 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
596 {
597 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
598 if ( iShw2 != iShw
599 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
600 {
601# ifndef IN_RING0
602 if (uShw.pPDPT->a[iShw2].u & PGM_PLXFLAGS_MAPPING)
603 {
604 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
605 STAM_COUNTER_INC(&(pVCpu->pgm.s.StatRZGuestCR3WriteConflict));
606 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
607 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
608 break;
609 }
610# endif /* !IN_RING0 */
611# ifndef IN_RING0
612 else
613# endif /* !IN_RING0 */
614 if (uShw.pPDPT->a[iShw2].n.u1Present)
615 {
616 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
617 pgmPoolFree(pVM,
618 uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK,
619 pPage->idx,
620 iShw2);
621 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
622 }
623 }
624 }
625 }
626 break;
627 }
628
629#ifndef IN_RC
630 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
631 {
632 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
633 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
634 const unsigned iShw = off / sizeof(X86PDEPAE);
635 Assert(!(uShw.pPDPae->a[iShw].u & PGM_PDFLAGS_MAPPING));
636 if (uShw.pPDPae->a[iShw].n.u1Present)
637 {
638 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
639 pgmPoolFree(pVM,
640 uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK,
641 pPage->idx,
642 iShw);
643 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw].u, 0);
644 }
645 /* paranoia / a bit assumptive. */
646 if ( (off & 7)
647 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
648 {
649 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
650 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
651
652 Assert(!(uShw.pPDPae->a[iShw2].u & PGM_PDFLAGS_MAPPING));
653 if (uShw.pPDPae->a[iShw2].n.u1Present)
654 {
655 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPae->a[iShw2].u));
656 pgmPoolFree(pVM,
657 uShw.pPDPae->a[iShw2].u & X86_PDE_PAE_PG_MASK,
658 pPage->idx,
659 iShw2);
660 ASMAtomicWriteSize(&uShw.pPDPae->a[iShw2].u, 0);
661 }
662 }
663 break;
664 }
665
666 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
667 {
668 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
669 /*
670 * Hopefully this doesn't happen very often:
671 * - messing with the bits of pd pointers without changing the physical address
672 */
673 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
674 const unsigned iShw = off / sizeof(X86PDPE);
675 if (uShw.pPDPT->a[iShw].n.u1Present)
676 {
677 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
678 pgmPoolFree(pVM, uShw.pPDPT->a[iShw].u & X86_PDPE_PG_MASK, pPage->idx, iShw);
679 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw].u, 0);
680 }
681 /* paranoia / a bit assumptive. */
682 if ( (off & 7)
683 && (off & 7) + cbWrite > sizeof(X86PDPE))
684 {
685 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
686 if (uShw.pPDPT->a[iShw2].n.u1Present)
687 {
688 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
689 pgmPoolFree(pVM, uShw.pPDPT->a[iShw2].u & X86_PDPE_PG_MASK, pPage->idx, iShw2);
690 ASMAtomicWriteSize(&uShw.pPDPT->a[iShw2].u, 0);
691 }
692 }
693 break;
694 }
695
696 case PGMPOOLKIND_64BIT_PML4:
697 {
698 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
699 /*
700 * Hopefully this doesn't happen very often:
701 * - messing with the bits of pd pointers without changing the physical address
702 */
703 uShw.pv = PGMPOOL_PAGE_2_LOCKED_PTR(pVM, pPage);
704 const unsigned iShw = off / sizeof(X86PDPE);
705 if (uShw.pPML4->a[iShw].n.u1Present)
706 {
707 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPML4->a[iShw].u));
708 pgmPoolFree(pVM, uShw.pPML4->a[iShw].u & X86_PML4E_PG_MASK, pPage->idx, iShw);
709 ASMAtomicWriteSize(&uShw.pPML4->a[iShw].u, 0);
710 }
711 /* paranoia / a bit assumptive. */
712 if ( (off & 7)
713 && (off & 7) + cbWrite > sizeof(X86PDPE))
714 {
715 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
716 if (uShw.pPML4->a[iShw2].n.u1Present)
717 {
718 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPML4->a[iShw2].u));
719 pgmPoolFree(pVM, uShw.pPML4->a[iShw2].u & X86_PML4E_PG_MASK, pPage->idx, iShw2);
720 ASMAtomicWriteSize(&uShw.pPML4->a[iShw2].u, 0);
721 }
722 }
723 break;
724 }
725#endif /* IN_RING0 */
726
727 default:
728 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
729 }
730 PGMPOOL_UNLOCK_PTR(pVM, uShw.pv);
731
732 /* next */
733 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
734 return;
735 pPage = &pPool->aPages[pPage->iMonitoredNext];
736 }
737}
738
739# ifndef IN_RING3
740/**
741 * Checks if a access could be a fork operation in progress.
742 *
743 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
744 *
745 * @returns true if it's likly that we're forking, otherwise false.
746 * @param pPool The pool.
747 * @param pDis The disassembled instruction.
748 * @param offFault The access offset.
749 */
750DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
751{
752 /*
753 * i386 linux is using btr to clear X86_PTE_RW.
754 * The functions involved are (2.6.16 source inspection):
755 * clear_bit
756 * ptep_set_wrprotect
757 * copy_one_pte
758 * copy_pte_range
759 * copy_pmd_range
760 * copy_pud_range
761 * copy_page_range
762 * dup_mmap
763 * dup_mm
764 * copy_mm
765 * copy_process
766 * do_fork
767 */
768 if ( pDis->pCurInstr->opcode == OP_BTR
769 && !(offFault & 4)
770 /** @todo Validate that the bit index is X86_PTE_RW. */
771 )
772 {
773 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,Fork));
774 return true;
775 }
776 return false;
777}
778
779
780/**
781 * Determine whether the page is likely to have been reused.
782 *
783 * @returns true if we consider the page as being reused for a different purpose.
784 * @returns false if we consider it to still be a paging page.
785 * @param pVM VM Handle.
786 * @param pVCpu VMCPU Handle.
787 * @param pRegFrame Trap register frame.
788 * @param pDis The disassembly info for the faulting instruction.
789 * @param pvFault The fault address.
790 *
791 * @remark The REP prefix check is left to the caller because of STOSD/W.
792 */
793DECLINLINE(bool) pgmPoolMonitorIsReused(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault)
794{
795#ifndef IN_RC
796 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
797 if ( HWACCMHasPendingIrq(pVM)
798 && (pRegFrame->rsp - pvFault) < 32)
799 {
800 /* Fault caused by stack writes while trying to inject an interrupt event. */
801 Log(("pgmPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
802 return true;
803 }
804#else
805 NOREF(pVM); NOREF(pvFault);
806#endif
807
808 LogFlow(("Reused instr %RGv %d at %RGv param1.flags=%x param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->opcode, pvFault, pDis->param1.flags, pDis->param1.base.reg_gen));
809
810 /* Non-supervisor mode write means it's used for something else. */
811 if (CPUMGetGuestCPL(pVCpu, pRegFrame) != 0)
812 return true;
813
814 switch (pDis->pCurInstr->opcode)
815 {
816 /* call implies the actual push of the return address faulted */
817 case OP_CALL:
818 Log4(("pgmPoolMonitorIsReused: CALL\n"));
819 return true;
820 case OP_PUSH:
821 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
822 return true;
823 case OP_PUSHF:
824 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
825 return true;
826 case OP_PUSHA:
827 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
828 return true;
829 case OP_FXSAVE:
830 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
831 return true;
832 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
833 Log4(("pgmPoolMonitorIsReused: MOVNTI\n"));
834 return true;
835 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
836 Log4(("pgmPoolMonitorIsReused: MOVNTDQ\n"));
837 return true;
838 case OP_MOVSWD:
839 case OP_STOSWD:
840 if ( pDis->prefix == (PREFIX_REP|PREFIX_REX)
841 && pRegFrame->rcx >= 0x40
842 )
843 {
844 Assert(pDis->mode == CPUMODE_64BIT);
845
846 Log(("pgmPoolMonitorIsReused: OP_STOSQ\n"));
847 return true;
848 }
849 return false;
850 }
851 if ( ( (pDis->param1.flags & USE_REG_GEN32)
852 || (pDis->param1.flags & USE_REG_GEN64))
853 && (pDis->param1.base.reg_gen == USE_REG_ESP))
854 {
855 Log4(("pgmPoolMonitorIsReused: ESP\n"));
856 return true;
857 }
858
859 return false;
860}
861
862/**
863 * Flushes the page being accessed.
864 *
865 * @returns VBox status code suitable for scheduling.
866 * @param pVM The VM handle.
867 * @param pVCpu The VMCPU handle.
868 * @param pPool The pool.
869 * @param pPage The pool page (head).
870 * @param pDis The disassembly of the write instruction.
871 * @param pRegFrame The trap register frame.
872 * @param GCPhysFault The fault address as guest physical address.
873 * @param pvFault The fault address.
874 */
875static int pgmPoolAccessHandlerFlush(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
876 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
877{
878 /*
879 * First, do the flushing.
880 */
881 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
882
883 /*
884 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection). Must do this in raw mode (!); XP boot will fail otherwise
885 */
886 uint32_t cbWritten;
887 int rc2 = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, &cbWritten);
888 if (RT_SUCCESS(rc2))
889 pRegFrame->rip += pDis->opsize;
890 else if (rc2 == VERR_EM_INTERPRETER)
891 {
892#ifdef IN_RC
893 if (PATMIsPatchGCAddr(pVM, (RTRCPTR)pRegFrame->eip))
894 {
895 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
896 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
897 rc = VINF_SUCCESS;
898 STAM_COUNTER_INC(&pPool->StatMonitorRZIntrFailPatch2);
899 }
900 else
901#endif
902 {
903 rc = VINF_EM_RAW_EMULATE_INSTR;
904 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
905 }
906 }
907 else
908 rc = rc2;
909
910 LogFlow(("pgmPoolAccessHandlerPT: returns %Rrc (flushed)\n", rc));
911 return rc;
912}
913
914/**
915 * Handles the STOSD write accesses.
916 *
917 * @returns VBox status code suitable for scheduling.
918 * @param pVM The VM handle.
919 * @param pPool The pool.
920 * @param pPage The pool page (head).
921 * @param pDis The disassembly of the write instruction.
922 * @param pRegFrame The trap register frame.
923 * @param GCPhysFault The fault address as guest physical address.
924 * @param pvFault The fault address.
925 */
926DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
927 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
928{
929 unsigned uIncrement = pDis->param1.size;
930
931 Assert(pDis->mode == CPUMODE_32BIT || pDis->mode == CPUMODE_64BIT);
932 Assert(pRegFrame->rcx <= 0x20);
933
934#ifdef VBOX_STRICT
935 if (pDis->opmode == CPUMODE_32BIT)
936 Assert(uIncrement == 4);
937 else
938 Assert(uIncrement == 8);
939#endif
940
941 Log3(("pgmPoolAccessHandlerSTOSD\n"));
942
943 /*
944 * Increment the modification counter and insert it into the list
945 * of modified pages the first time.
946 */
947 if (!pPage->cModifications++)
948 pgmPoolMonitorModifiedInsert(pPool, pPage);
949
950 /*
951 * Execute REP STOSD.
952 *
953 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
954 * write situation, meaning that it's safe to write here.
955 */
956 PVMCPU pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
957 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
958 while (pRegFrame->rcx)
959 {
960#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
961 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
962 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
963 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
964#else
965 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, (RTGCPTR)pu32, uIncrement);
966#endif
967#ifdef IN_RC
968 *(uint32_t *)pu32 = pRegFrame->eax;
969#else
970 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
971#endif
972 pu32 += uIncrement;
973 GCPhysFault += uIncrement;
974 pRegFrame->rdi += uIncrement;
975 pRegFrame->rcx--;
976 }
977 pRegFrame->rip += pDis->opsize;
978
979 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
980 return VINF_SUCCESS;
981}
982
983
984/**
985 * Handles the simple write accesses.
986 *
987 * @returns VBox status code suitable for scheduling.
988 * @param pVM The VM handle.
989 * @param pVCpu The VMCPU handle.
990 * @param pPool The pool.
991 * @param pPage The pool page (head).
992 * @param pDis The disassembly of the write instruction.
993 * @param pRegFrame The trap register frame.
994 * @param GCPhysFault The fault address as guest physical address.
995 * @param pvFault The fault address.
996 * @param pfReused Reused state (out)
997 */
998DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
999 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
1000{
1001 Log3(("pgmPoolAccessHandlerSimple\n"));
1002 /*
1003 * Increment the modification counter and insert it into the list
1004 * of modified pages the first time.
1005 */
1006 if (!pPage->cModifications++)
1007 pgmPoolMonitorModifiedInsert(pPool, pPage);
1008
1009 /*
1010 * Clear all the pages. ASSUMES that pvFault is readable.
1011 */
1012#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
1013 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
1014 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
1015 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
1016#else
1017 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, pvFault, DISGetParamSize(pDis, &pDis->param1));
1018#endif
1019
1020 /*
1021 * Interpret the instruction.
1022 */
1023 uint32_t cb;
1024 int rc = EMInterpretInstructionCPU(pVM, pVCpu, pDis, pRegFrame, pvFault, &cb);
1025 if (RT_SUCCESS(rc))
1026 pRegFrame->rip += pDis->opsize;
1027 else if (rc == VERR_EM_INTERPRETER)
1028 {
1029 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for %04x:%RGv - opcode=%d\n",
1030 pRegFrame->cs, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode));
1031 rc = VINF_EM_RAW_EMULATE_INSTR;
1032 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,EmulateInstr));
1033 }
1034
1035#if 0 /* experimental code */
1036 if (rc == VINF_SUCCESS)
1037 {
1038 switch (pPage->enmKind)
1039 {
1040 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1041 {
1042 X86PTEPAE GstPte;
1043 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
1044 AssertRC(rc);
1045
1046 /* Check the new value written by the guest. If present and with a bogus physical address, then
1047 * it's fairly safe to assume the guest is reusing the PT.
1048 */
1049 if (GstPte.n.u1Present)
1050 {
1051 RTHCPHYS HCPhys = -1;
1052 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
1053 if (rc != VINF_SUCCESS)
1054 {
1055 *pfReused = true;
1056 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1057 }
1058 }
1059 break;
1060 }
1061 }
1062 }
1063#endif
1064
1065 LogFlow(("pgmPoolAccessHandlerSimple: returns %Rrc cb=%d\n", rc, cb));
1066 return rc;
1067}
1068
1069/**
1070 * \#PF Handler callback for PT write accesses.
1071 *
1072 * @returns VBox status code (appropriate for GC return).
1073 * @param pVM VM Handle.
1074 * @param uErrorCode CPU Error code.
1075 * @param pRegFrame Trap register frame.
1076 * NULL on DMA and other non CPU access.
1077 * @param pvFault The fault address (cr2).
1078 * @param GCPhysFault The GC physical address corresponding to pvFault.
1079 * @param pvUser User argument.
1080 */
1081DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
1082{
1083 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1084 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1085 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
1086 PVMCPU pVCpu = VMMGetCpu(pVM);
1087 unsigned cMaxModifications;
1088 bool fForcedFlush = false;
1089
1090 LogFlow(("pgmPoolAccessHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
1091
1092 pgmLock(pVM);
1093 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
1094 {
1095 /* Pool page changed while we were waiting for the lock; ignore. */
1096 Log(("CPU%d: pgmPoolAccessHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1097 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1098 pgmUnlock(pVM);
1099 return VINF_SUCCESS;
1100 }
1101#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1102 if (pPage->fDirty)
1103 {
1104 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH));
1105 pgmUnlock(pVM);
1106 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
1107 }
1108#endif
1109
1110#if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
1111 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1112 {
1113 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1114 void *pvGst;
1115 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1116 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
1117 }
1118#endif
1119
1120 /*
1121 * Disassemble the faulting instruction.
1122 */
1123 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1124 int rc = EMInterpretDisasOne(pVM, pVCpu, pRegFrame, pDis, NULL);
1125 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1126 {
1127 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1128 pgmUnlock(pVM);
1129 return rc;
1130 }
1131
1132 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1133
1134 /*
1135 * We should ALWAYS have the list head as user parameter. This
1136 * is because we use that page to record the changes.
1137 */
1138 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1139
1140#ifdef IN_RING0
1141 /* Maximum nr of modifications depends on the page type. */
1142 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1143 cMaxModifications = 4;
1144 else
1145 cMaxModifications = 24;
1146#else
1147 cMaxModifications = 48;
1148#endif
1149
1150 /*
1151 * Incremental page table updates should weight more than random ones.
1152 * (Only applies when started from offset 0)
1153 */
1154 pVCpu->pgm.s.cPoolAccessHandler++;
1155 if ( pPage->pvLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1156 && pPage->pvLastAccessHandlerRip < pRegFrame->rip + 0x40
1157 && pvFault == (pPage->pvLastAccessHandlerFault + pDis->param1.size)
1158 && pVCpu->pgm.s.cPoolAccessHandler == (pPage->cLastAccessHandlerCount + 1))
1159 {
1160 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1161 pPage->cModifications = pPage->cModifications * 2;
1162 pPage->pvLastAccessHandlerFault = pvFault;
1163 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1164 if (pPage->cModifications >= cMaxModifications)
1165 {
1166 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
1167 fForcedFlush = true;
1168 }
1169 }
1170
1171 if (pPage->cModifications >= cMaxModifications)
1172 Log(("Mod overflow %VGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(&pVM->pgm.s, pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1173
1174 /*
1175 * Check if it's worth dealing with.
1176 */
1177 bool fReused = false;
1178 bool fNotReusedNotForking = false;
1179 if ( ( pPage->cModifications < cMaxModifications /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1180 || pgmPoolIsPageLocked(&pVM->pgm.s, pPage)
1181 )
1182 && !(fReused = pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault))
1183 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1184 {
1185 /*
1186 * Simple instructions, no REP prefix.
1187 */
1188 if (!(pDis->prefix & (PREFIX_REP | PREFIX_REPNE)))
1189 {
1190 rc = pgmPoolAccessHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1191 if (fReused)
1192 goto flushPage;
1193
1194 /* A mov instruction to change the first page table entry will be remembered so we can detect
1195 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1196 */
1197 if ( rc == VINF_SUCCESS
1198 && pDis->pCurInstr->opcode == OP_MOV
1199 && (pvFault & PAGE_OFFSET_MASK) == 0)
1200 {
1201 pPage->pvLastAccessHandlerFault = pvFault;
1202 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1203 pPage->pvLastAccessHandlerRip = pRegFrame->rip;
1204 /* Make sure we don't kick out a page too quickly. */
1205 if (pPage->cModifications > 8)
1206 pPage->cModifications = 2;
1207 }
1208 else
1209 if (pPage->pvLastAccessHandlerFault == pvFault)
1210 {
1211 /* ignore the 2nd write to this page table entry. */
1212 pPage->cLastAccessHandlerCount = pVCpu->pgm.s.cPoolAccessHandler;
1213 }
1214 else
1215 {
1216 pPage->pvLastAccessHandlerFault = 0;
1217 pPage->pvLastAccessHandlerRip = 0;
1218 }
1219
1220 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,Handled), a);
1221 pgmUnlock(pVM);
1222 return rc;
1223 }
1224
1225 /*
1226 * Windows is frequently doing small memset() operations (netio test 4k+).
1227 * We have to deal with these or we'll kill the cache and performance.
1228 */
1229 if ( pDis->pCurInstr->opcode == OP_STOSWD
1230 && !pRegFrame->eflags.Bits.u1DF
1231 && pDis->opmode == pDis->mode
1232 && pDis->addrmode == pDis->mode)
1233 {
1234 bool fValidStosd = false;
1235
1236 if ( pDis->mode == CPUMODE_32BIT
1237 && pDis->prefix == PREFIX_REP
1238 && pRegFrame->ecx <= 0x20
1239 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1240 && !((uintptr_t)pvFault & 3)
1241 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1242 )
1243 {
1244 fValidStosd = true;
1245 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1246 }
1247 else
1248 if ( pDis->mode == CPUMODE_64BIT
1249 && pDis->prefix == (PREFIX_REP | PREFIX_REX)
1250 && pRegFrame->rcx <= 0x20
1251 && pRegFrame->rcx * 8 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
1252 && !((uintptr_t)pvFault & 7)
1253 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1254 )
1255 {
1256 fValidStosd = true;
1257 }
1258
1259 if (fValidStosd)
1260 {
1261 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1262 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,RepStosd), a);
1263 pgmUnlock(pVM);
1264 return rc;
1265 }
1266 }
1267
1268 /* REP prefix, don't bother. */
1269 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,RepPrefix));
1270 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1271 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode, pDis->prefix));
1272 fNotReusedNotForking = true;
1273 }
1274
1275#if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1276 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1277 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1278 */
1279 if ( pPage->cModifications >= cMaxModifications
1280 && !fForcedFlush
1281 && pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1282 && ( fNotReusedNotForking
1283 || ( !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
1284 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1285 )
1286 )
1287 {
1288 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
1289 Assert(pPage->fDirty == false);
1290
1291 /* Flush any monitored duplicates as we will disable write protection. */
1292 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1293 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1294 {
1295 PPGMPOOLPAGE pPageHead = pPage;
1296
1297 /* Find the monitor head. */
1298 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1299 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1300
1301 while (pPageHead)
1302 {
1303 unsigned idxNext = pPageHead->iMonitoredNext;
1304
1305 if (pPageHead != pPage)
1306 {
1307 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1308 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1309 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1310 AssertRC(rc2);
1311 }
1312
1313 if (idxNext == NIL_PGMPOOL_IDX)
1314 break;
1315
1316 pPageHead = &pPool->aPages[idxNext];
1317 }
1318 }
1319
1320 /* The flushing above might fail for locked pages, so double check. */
1321 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1322 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1323 {
1324 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1325
1326 /* Temporarily allow write access to the page table again. */
1327 rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys, pPage->GCPhys);
1328 if (rc == VINF_SUCCESS)
1329 {
1330 rc = PGMShwModifyPage(pVCpu, pvFault, 1, X86_PTE_RW, ~(uint64_t)X86_PTE_RW);
1331 AssertMsg(rc == VINF_SUCCESS
1332 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1333 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1334 || rc == VERR_PAGE_NOT_PRESENT,
1335 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1336
1337 pPage->pvDirtyFault = pvFault;
1338
1339 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
1340 pgmUnlock(pVM);
1341 return rc;
1342 }
1343 }
1344 }
1345#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1346
1347 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushModOverflow));
1348flushPage:
1349 /*
1350 * Not worth it, so flush it.
1351 *
1352 * If we considered it to be reused, don't go back to ring-3
1353 * to emulate failed instructions since we usually cannot
1354 * interpret then. This may be a bit risky, in which case
1355 * the reuse detection must be fixed.
1356 */
1357 rc = pgmPoolAccessHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1358 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1359 && fReused)
1360 {
1361 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1362 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1363 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1364 }
1365 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1366 pgmUnlock(pVM);
1367 return rc;
1368}
1369
1370# endif /* !IN_RING3 */
1371
1372# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1373
1374# ifdef VBOX_STRICT
1375/**
1376 * Check references to guest physical memory in a PAE / PAE page table.
1377 *
1378 * @param pPool The pool.
1379 * @param pPage The page.
1380 * @param pShwPT The shadow page table (mapping of the page).
1381 * @param pGstPT The guest page table.
1382 */
1383static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
1384{
1385 unsigned cErrors = 0;
1386 int LastRc = -1; /* initialized to shut up gcc */
1387 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1388 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1389
1390#ifdef VBOX_STRICT
1391 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1392 AssertMsg(!pShwPT->a[i].n.u1Present, ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, pShwPT->a[i].u, pPage->iFirstPresent));
1393#endif
1394 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1395 {
1396 if (pShwPT->a[i].n.u1Present)
1397 {
1398 RTHCPHYS HCPhys = -1;
1399 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1400 if ( rc != VINF_SUCCESS
1401 || (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) != HCPhys)
1402 {
1403 RTHCPHYS HCPhysPT = -1;
1404 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1405 LastPTE = i;
1406 LastRc = rc;
1407 LastHCPhys = HCPhys;
1408 cErrors++;
1409
1410 rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pPage->GCPhys, &HCPhysPT);
1411 AssertRC(rc);
1412
1413 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1414 {
1415 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1416
1417 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1418 {
1419 PX86PTPAE pShwPT2 = (PX86PTPAE)PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pTempPage);
1420
1421 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1422 {
1423 if ( pShwPT2->a[j].n.u1Present
1424 && pShwPT2->a[j].n.u1Write
1425 && ((pShwPT2->a[j].u & X86_PTE_PAE_PG_MASK) == HCPhysPT))
1426 {
1427 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, pShwPT->a[j].u, pShwPT2->a[j].u));
1428 }
1429 }
1430 }
1431 }
1432 }
1433 }
1434 }
1435 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, pShwPT->a[LastPTE].u, LastHCPhys));
1436}
1437# endif /* VBOX_STRICT */
1438
1439/**
1440 * Clear references to guest physical memory in a PAE / PAE page table.
1441 *
1442 * @returns nr of changed PTEs
1443 * @param pPool The pool.
1444 * @param pPage The page.
1445 * @param pShwPT The shadow page table (mapping of the page).
1446 * @param pGstPT The guest page table.
1447 * @param pOldGstPT The old cached guest page table.
1448 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1449 * @param pfFlush Flush reused page table (out)
1450 */
1451DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT, PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1452{
1453 unsigned cChanged = 0;
1454
1455#ifdef VBOX_STRICT
1456 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1457 AssertMsg(!pShwPT->a[i].n.u1Present, ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, pShwPT->a[i].u, pPage->iFirstPresent));
1458#endif
1459 *pfFlush = false;
1460
1461 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1462 {
1463 /* Check the new value written by the guest. If present and with a bogus physical address, then
1464 * it's fairly safe to assume the guest is reusing the PT.
1465 */
1466 if ( fAllowRemoval
1467 && pGstPT->a[i].n.u1Present)
1468 {
1469 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1470 {
1471 *pfFlush = true;
1472 return ++cChanged;
1473 }
1474 }
1475 if (pShwPT->a[i].n.u1Present)
1476 {
1477 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1478 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1479 {
1480#ifdef VBOX_STRICT
1481 RTHCPHYS HCPhys = -1;
1482 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1483 AssertMsg(rc == VINF_SUCCESS && (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
1484#endif
1485 uint64_t uHostAttr = pShwPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1486 bool fHostRW = !!(pShwPT->a[i].u & X86_PTE_RW);
1487 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1488 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1489
1490 if ( uHostAttr == uGuestAttr
1491 && fHostRW <= fGuestRW)
1492 continue;
1493 }
1494 cChanged++;
1495 /* Something was changed, so flush it. */
1496 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1497 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1498 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
1499 ASMAtomicWriteSize(&pShwPT->a[i].u, 0);
1500 }
1501 }
1502 return cChanged;
1503}
1504
1505
1506/**
1507 * Flush a dirty page
1508 *
1509 * @param pVM VM Handle.
1510 * @param pPool The pool.
1511 * @param idxSlot Dirty array slot index
1512 * @param fAllowRemoval Allow a reused page table to be removed
1513 */
1514static void pgmPoolFlushDirtyPage(PVM pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1515{
1516 PPGMPOOLPAGE pPage;
1517 unsigned idxPage;
1518
1519 Assert(idxSlot < RT_ELEMENTS(pPool->aIdxDirtyPages));
1520 if (pPool->aIdxDirtyPages[idxSlot] == NIL_PGMPOOL_IDX)
1521 return;
1522
1523 idxPage = pPool->aIdxDirtyPages[idxSlot];
1524 AssertRelease(idxPage != NIL_PGMPOOL_IDX);
1525 pPage = &pPool->aPages[idxPage];
1526 Assert(pPage->idx == idxPage);
1527 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1528
1529 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1530 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1531
1532 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1533 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys);
1534 Assert(rc == VINF_SUCCESS);
1535 pPage->fDirty = false;
1536
1537#ifdef VBOX_STRICT
1538 uint64_t fFlags = 0;
1539 RTHCPHYS HCPhys;
1540 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->pvDirtyFault, &fFlags, &HCPhys);
1541 AssertMsg( ( rc == VINF_SUCCESS
1542 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1543 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1544 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1545 || rc == VERR_PAGE_NOT_PRESENT,
1546 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->pvDirtyFault, rc, fFlags));
1547#endif
1548
1549 /* Flush those PTEs that have changed. */
1550 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1551 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1552 void *pvGst;
1553 bool fFlush;
1554 rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1555 unsigned cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst, (PCX86PTPAE)&pPool->aDirtyPages[idxSlot][0], fAllowRemoval, &fFlush);
1556 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1557 /** Note: we might want to consider keeping the dirty page active in case there were many changes. */
1558
1559 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1560 Assert(pPage->cModifications);
1561 if (cChanges < 4)
1562 pPage->cModifications = 1; /* must use > 0 here */
1563 else
1564 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1565
1566 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1567 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages))
1568 pPool->idxFreeDirtyPage = idxSlot;
1569
1570 pPool->cDirtyPages--;
1571 pPool->aIdxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1572 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1573 if (fFlush)
1574 {
1575 Assert(fAllowRemoval);
1576 Log(("Flush reused page table!\n"));
1577 pgmPoolFlushPage(pPool, pPage);
1578 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1579 }
1580 else
1581 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1582}
1583
1584# ifndef IN_RING3
1585/**
1586 * Add a new dirty page
1587 *
1588 * @param pVM VM Handle.
1589 * @param pPool The pool.
1590 * @param pPage The page.
1591 */
1592void pgmPoolAddDirtyPage(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1593{
1594 unsigned idxFree;
1595
1596 Assert(PGMIsLocked(pVM));
1597 AssertCompile(RT_ELEMENTS(pPool->aIdxDirtyPages) == 8 || RT_ELEMENTS(pPool->aIdxDirtyPages) == 16);
1598 Assert(!pPage->fDirty);
1599
1600 idxFree = pPool->idxFreeDirtyPage;
1601 Assert(idxFree < RT_ELEMENTS(pPool->aIdxDirtyPages));
1602 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1603
1604 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aIdxDirtyPages))
1605 {
1606 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1607 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1608 }
1609 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages));
1610 AssertMsg(pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1611
1612 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1613
1614 /* Make a copy of the guest page table as we require valid GCPhys addresses when removing
1615 * references to physical pages. (the HCPhys linear lookup is *extremely* expensive!)
1616 */
1617 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
1618 void *pvGst;
1619 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1620 memcpy(&pPool->aDirtyPages[idxFree][0], pvGst, PAGE_SIZE);
1621#ifdef VBOX_STRICT
1622 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
1623#endif
1624
1625 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1626 pPage->fDirty = true;
1627 pPage->idxDirty = idxFree;
1628 pPool->aIdxDirtyPages[idxFree] = pPage->idx;
1629 pPool->cDirtyPages++;
1630
1631 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1632 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aIdxDirtyPages)
1633 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1634 {
1635 unsigned i;
1636 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1637 {
1638 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aIdxDirtyPages) - 1);
1639 if (pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1640 {
1641 pPool->idxFreeDirtyPage = idxFree;
1642 break;
1643 }
1644 }
1645 Assert(i != RT_ELEMENTS(pPool->aIdxDirtyPages));
1646 }
1647
1648 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages) || pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1649 return;
1650}
1651# endif /* !IN_RING3 */
1652
1653/**
1654 * Check if the specified page is dirty (not write monitored)
1655 *
1656 * @return dirty or not
1657 * @param pVM VM Handle.
1658 * @param GCPhys Guest physical address
1659 */
1660bool pgmPoolIsDirtyPage(PVM pVM, RTGCPHYS GCPhys)
1661{
1662 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1663 Assert(PGMIsLocked(pVM));
1664 if (!pPool->cDirtyPages)
1665 return false;
1666
1667 GCPhys = GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1668
1669 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1670 {
1671 if (pPool->aIdxDirtyPages[i] != NIL_PGMPOOL_IDX)
1672 {
1673 PPGMPOOLPAGE pPage;
1674 unsigned idxPage = pPool->aIdxDirtyPages[i];
1675
1676 pPage = &pPool->aPages[idxPage];
1677 if (pPage->GCPhys == GCPhys)
1678 return true;
1679 }
1680 }
1681 return false;
1682}
1683
1684/**
1685 * Reset all dirty pages by reinstating page monitoring.
1686 *
1687 * @param pVM VM Handle.
1688 */
1689void pgmPoolResetDirtyPages(PVM pVM)
1690{
1691 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1692 Assert(PGMIsLocked(pVM));
1693 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1694
1695 if (!pPool->cDirtyPages)
1696 return;
1697
1698 Log(("pgmPoolResetDirtyPages\n"));
1699 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1700 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1701
1702 pPool->idxFreeDirtyPage = 0;
1703 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aIdxDirtyPages)
1704 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1705 {
1706 unsigned i;
1707 for (i = 1; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1708 {
1709 if (pPool->aIdxDirtyPages[i] == NIL_PGMPOOL_IDX)
1710 {
1711 pPool->idxFreeDirtyPage = i;
1712 break;
1713 }
1714 }
1715 AssertMsg(i != RT_ELEMENTS(pPool->aIdxDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1716 }
1717
1718 Assert(pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aIdxDirtyPages));
1719 return;
1720}
1721
1722/**
1723 * Reset all dirty pages by reinstating page monitoring.
1724 *
1725 * @param pVM VM Handle.
1726 * @param GCPhysPT Physical address of the page table
1727 */
1728void pgmPoolInvalidateDirtyPage(PVM pVM, RTGCPHYS GCPhysPT)
1729{
1730 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1731 Assert(PGMIsLocked(pVM));
1732 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aIdxDirtyPages));
1733 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aIdxDirtyPages);
1734
1735 if (!pPool->cDirtyPages)
1736 return;
1737
1738 GCPhysPT = GCPhysPT & ~(RTGCPHYS)(PAGE_SIZE - 1);
1739
1740 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1741 {
1742 if (pPool->aIdxDirtyPages[i] != NIL_PGMPOOL_IDX)
1743 {
1744 unsigned idxPage = pPool->aIdxDirtyPages[i];
1745
1746 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1747 if (pPage->GCPhys == GCPhysPT)
1748 {
1749 idxDirtyPage = i;
1750 break;
1751 }
1752 }
1753 }
1754
1755 if (idxDirtyPage != RT_ELEMENTS(pPool->aIdxDirtyPages))
1756 {
1757 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1758 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aIdxDirtyPages)
1759 && pPool->aIdxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1760 {
1761 unsigned i;
1762 for (i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
1763 {
1764 if (pPool->aIdxDirtyPages[i] == NIL_PGMPOOL_IDX)
1765 {
1766 pPool->idxFreeDirtyPage = i;
1767 break;
1768 }
1769 }
1770 AssertMsg(i != RT_ELEMENTS(pPool->aIdxDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1771 }
1772 }
1773}
1774
1775# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1776
1777/**
1778 * Inserts a page into the GCPhys hash table.
1779 *
1780 * @param pPool The pool.
1781 * @param pPage The page.
1782 */
1783DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1784{
1785 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1786 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1787 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1788 pPage->iNext = pPool->aiHash[iHash];
1789 pPool->aiHash[iHash] = pPage->idx;
1790}
1791
1792
1793/**
1794 * Removes a page from the GCPhys hash table.
1795 *
1796 * @param pPool The pool.
1797 * @param pPage The page.
1798 */
1799DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1800{
1801 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1802 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1803 if (pPool->aiHash[iHash] == pPage->idx)
1804 pPool->aiHash[iHash] = pPage->iNext;
1805 else
1806 {
1807 uint16_t iPrev = pPool->aiHash[iHash];
1808 for (;;)
1809 {
1810 const int16_t i = pPool->aPages[iPrev].iNext;
1811 if (i == pPage->idx)
1812 {
1813 pPool->aPages[iPrev].iNext = pPage->iNext;
1814 break;
1815 }
1816 if (i == NIL_PGMPOOL_IDX)
1817 {
1818 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%#x\n", pPage->GCPhys, pPage->idx));
1819 break;
1820 }
1821 iPrev = i;
1822 }
1823 }
1824 pPage->iNext = NIL_PGMPOOL_IDX;
1825}
1826
1827
1828/**
1829 * Frees up one cache page.
1830 *
1831 * @returns VBox status code.
1832 * @retval VINF_SUCCESS on success.
1833 * @param pPool The pool.
1834 * @param iUser The user index.
1835 */
1836static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
1837{
1838#ifndef IN_RC
1839 const PVM pVM = pPool->CTX_SUFF(pVM);
1840#endif
1841 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
1842 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
1843
1844 /*
1845 * Select one page from the tail of the age list.
1846 */
1847 PPGMPOOLPAGE pPage;
1848 for (unsigned iLoop = 0; ; iLoop++)
1849 {
1850 uint16_t iToFree = pPool->iAgeTail;
1851 if (iToFree == iUser)
1852 iToFree = pPool->aPages[iToFree].iAgePrev;
1853/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
1854 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
1855 {
1856 uint16_t i = pPool->aPages[iToFree].iAgePrev;
1857 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
1858 {
1859 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
1860 continue;
1861 iToFree = i;
1862 break;
1863 }
1864 }
1865*/
1866 Assert(iToFree != iUser);
1867 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
1868 pPage = &pPool->aPages[iToFree];
1869
1870 /*
1871 * Reject any attempts at flushing the currently active shadow CR3 mapping.
1872 * Call pgmPoolCacheUsed to move the page to the head of the age list.
1873 */
1874 if (!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage))
1875 break;
1876 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
1877 pgmPoolCacheUsed(pPool, pPage);
1878 AssertLogRelReturn(iLoop < 8192, VERR_INTERNAL_ERROR);
1879 }
1880
1881 /*
1882 * Found a usable page, flush it and return.
1883 */
1884 int rc = pgmPoolFlushPage(pPool, pPage);
1885 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
1886 /* todo: find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
1887 if (rc == VINF_SUCCESS)
1888 PGM_INVL_ALL_VCPU_TLBS(pVM);
1889 return rc;
1890}
1891
1892
1893/**
1894 * Checks if a kind mismatch is really a page being reused
1895 * or if it's just normal remappings.
1896 *
1897 * @returns true if reused and the cached page (enmKind1) should be flushed
1898 * @returns false if not reused.
1899 * @param enmKind1 The kind of the cached page.
1900 * @param enmKind2 The kind of the requested page.
1901 */
1902static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
1903{
1904 switch (enmKind1)
1905 {
1906 /*
1907 * Never reuse them. There is no remapping in non-paging mode.
1908 */
1909 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1910 case PGMPOOLKIND_32BIT_PD_PHYS:
1911 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1912 case PGMPOOLKIND_PAE_PD_PHYS:
1913 case PGMPOOLKIND_PAE_PDPT_PHYS:
1914 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1915 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1916 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1917 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1918 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1919 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
1920 return false;
1921
1922 /*
1923 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1924 */
1925 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1926 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1927 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1928 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1929 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1930 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1931 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1932 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1933 case PGMPOOLKIND_32BIT_PD:
1934 case PGMPOOLKIND_PAE_PDPT:
1935 switch (enmKind2)
1936 {
1937 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1938 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1939 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1940 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1941 case PGMPOOLKIND_64BIT_PML4:
1942 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1943 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1944 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1945 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1946 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1947 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1948 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1949 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1950 return true;
1951 default:
1952 return false;
1953 }
1954
1955 /*
1956 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
1957 */
1958 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1959 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1960 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
1961 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
1962 case PGMPOOLKIND_64BIT_PML4:
1963 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1964 switch (enmKind2)
1965 {
1966 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1967 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1968 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1969 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1970 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
1971 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
1972 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
1973 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
1974 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1975 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1976 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
1977 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
1978 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
1979 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
1980 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
1981 return true;
1982 default:
1983 return false;
1984 }
1985
1986 /*
1987 * These cannot be flushed, and it's common to reuse the PDs as PTs.
1988 */
1989 case PGMPOOLKIND_ROOT_NESTED:
1990 return false;
1991
1992 default:
1993 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
1994 }
1995}
1996
1997
1998/**
1999 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2000 *
2001 * @returns VBox status code.
2002 * @retval VINF_PGM_CACHED_PAGE on success.
2003 * @retval VERR_FILE_NOT_FOUND if not found.
2004 * @param pPool The pool.
2005 * @param GCPhys The GC physical address of the page we're gonna shadow.
2006 * @param enmKind The kind of mapping.
2007 * @param enmAccess Access type for the mapping (only relevant for big pages)
2008 * @param iUser The shadow page pool index of the user table.
2009 * @param iUserTable The index into the user table (shadowed).
2010 * @param ppPage Where to store the pointer to the page.
2011 */
2012static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2013{
2014#ifndef IN_RC
2015 const PVM pVM = pPool->CTX_SUFF(pVM);
2016#endif
2017 /*
2018 * Look up the GCPhys in the hash.
2019 */
2020 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2021 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%x iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2022 if (i != NIL_PGMPOOL_IDX)
2023 {
2024 do
2025 {
2026 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2027 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2028 if (pPage->GCPhys == GCPhys)
2029 {
2030 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2031 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess)
2032 {
2033 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2034 * doesn't flush it in case there are no more free use records.
2035 */
2036 pgmPoolCacheUsed(pPool, pPage);
2037
2038 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2039 if (RT_SUCCESS(rc))
2040 {
2041 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2042 *ppPage = pPage;
2043 if (pPage->cModifications)
2044 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2045 STAM_COUNTER_INC(&pPool->StatCacheHits);
2046 return VINF_PGM_CACHED_PAGE;
2047 }
2048 return rc;
2049 }
2050
2051 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2052 {
2053 /*
2054 * The kind is different. In some cases we should now flush the page
2055 * as it has been reused, but in most cases this is normal remapping
2056 * of PDs as PT or big pages using the GCPhys field in a slightly
2057 * different way than the other kinds.
2058 */
2059 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2060 {
2061 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2062 pgmPoolFlushPage(pPool, pPage);
2063 break;
2064 }
2065 }
2066 }
2067
2068 /* next */
2069 i = pPage->iNext;
2070 } while (i != NIL_PGMPOOL_IDX);
2071 }
2072
2073 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2074 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2075 return VERR_FILE_NOT_FOUND;
2076}
2077
2078
2079/**
2080 * Inserts a page into the cache.
2081 *
2082 * @param pPool The pool.
2083 * @param pPage The cached page.
2084 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2085 */
2086static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2087{
2088 /*
2089 * Insert into the GCPhys hash if the page is fit for that.
2090 */
2091 Assert(!pPage->fCached);
2092 if (fCanBeCached)
2093 {
2094 pPage->fCached = true;
2095 pgmPoolHashInsert(pPool, pPage);
2096 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2097 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2098 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2099 }
2100 else
2101 {
2102 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2103 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2104 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2105 }
2106
2107 /*
2108 * Insert at the head of the age list.
2109 */
2110 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2111 pPage->iAgeNext = pPool->iAgeHead;
2112 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2113 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2114 else
2115 pPool->iAgeTail = pPage->idx;
2116 pPool->iAgeHead = pPage->idx;
2117}
2118
2119
2120/**
2121 * Flushes a cached page.
2122 *
2123 * @param pPool The pool.
2124 * @param pPage The cached page.
2125 */
2126static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2127{
2128 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2129
2130 /*
2131 * Remove the page from the hash.
2132 */
2133 if (pPage->fCached)
2134 {
2135 pPage->fCached = false;
2136 pgmPoolHashRemove(pPool, pPage);
2137 }
2138 else
2139 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2140
2141 /*
2142 * Remove it from the age list.
2143 */
2144 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2145 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2146 else
2147 pPool->iAgeTail = pPage->iAgePrev;
2148 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2149 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2150 else
2151 pPool->iAgeHead = pPage->iAgeNext;
2152 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2153 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2154}
2155
2156
2157/**
2158 * Looks for pages sharing the monitor.
2159 *
2160 * @returns Pointer to the head page.
2161 * @returns NULL if not found.
2162 * @param pPool The Pool
2163 * @param pNewPage The page which is going to be monitored.
2164 */
2165static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2166{
2167 /*
2168 * Look up the GCPhys in the hash.
2169 */
2170 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2171 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2172 if (i == NIL_PGMPOOL_IDX)
2173 return NULL;
2174 do
2175 {
2176 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2177 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2178 && pPage != pNewPage)
2179 {
2180 switch (pPage->enmKind)
2181 {
2182 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2183 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2184 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2185 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2186 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2187 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2188 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2189 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2190 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2191 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2192 case PGMPOOLKIND_64BIT_PML4:
2193 case PGMPOOLKIND_32BIT_PD:
2194 case PGMPOOLKIND_PAE_PDPT:
2195 {
2196 /* find the head */
2197 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2198 {
2199 Assert(pPage->iMonitoredPrev != pPage->idx);
2200 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2201 }
2202 return pPage;
2203 }
2204
2205 /* ignore, no monitoring. */
2206 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2207 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2208 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2209 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2210 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2211 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2212 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2213 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2214 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2215 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2216 case PGMPOOLKIND_ROOT_NESTED:
2217 case PGMPOOLKIND_PAE_PD_PHYS:
2218 case PGMPOOLKIND_PAE_PDPT_PHYS:
2219 case PGMPOOLKIND_32BIT_PD_PHYS:
2220 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2221 break;
2222 default:
2223 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2224 }
2225 }
2226
2227 /* next */
2228 i = pPage->iNext;
2229 } while (i != NIL_PGMPOOL_IDX);
2230 return NULL;
2231}
2232
2233
2234/**
2235 * Enabled write monitoring of a guest page.
2236 *
2237 * @returns VBox status code.
2238 * @retval VINF_SUCCESS on success.
2239 * @param pPool The pool.
2240 * @param pPage The cached page.
2241 */
2242static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2243{
2244 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1)));
2245
2246 /*
2247 * Filter out the relevant kinds.
2248 */
2249 switch (pPage->enmKind)
2250 {
2251 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2252 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2253 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2254 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2255 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2256 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2257 case PGMPOOLKIND_64BIT_PML4:
2258 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2259 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2260 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2261 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2262 case PGMPOOLKIND_32BIT_PD:
2263 case PGMPOOLKIND_PAE_PDPT:
2264 break;
2265
2266 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2267 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2268 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2269 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2270 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2271 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2272 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2273 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2274 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2275 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2276 case PGMPOOLKIND_ROOT_NESTED:
2277 /* Nothing to monitor here. */
2278 return VINF_SUCCESS;
2279
2280 case PGMPOOLKIND_32BIT_PD_PHYS:
2281 case PGMPOOLKIND_PAE_PDPT_PHYS:
2282 case PGMPOOLKIND_PAE_PD_PHYS:
2283 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2284 /* Nothing to monitor here. */
2285 return VINF_SUCCESS;
2286 default:
2287 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2288 }
2289
2290 /*
2291 * Install handler.
2292 */
2293 int rc;
2294 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2295 if (pPageHead)
2296 {
2297 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2298 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2299
2300#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2301 if (pPageHead->fDirty)
2302 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirty, false /* do not remove */);
2303#endif
2304
2305 pPage->iMonitoredPrev = pPageHead->idx;
2306 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2307 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2308 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2309 pPageHead->iMonitoredNext = pPage->idx;
2310 rc = VINF_SUCCESS;
2311 }
2312 else
2313 {
2314 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2315 PVM pVM = pPool->CTX_SUFF(pVM);
2316 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
2317 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
2318 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
2319 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
2320 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
2321 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
2322 pPool->pszAccessHandler);
2323 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2324 * the heap size should suffice. */
2325 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2326 Assert(!(VMMGetCpu(pVM)->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(VMMGetCpu(pVM), VMCPU_FF_PGM_SYNC_CR3));
2327 }
2328 pPage->fMonitored = true;
2329 return rc;
2330}
2331
2332
2333/**
2334 * Disables write monitoring of a guest page.
2335 *
2336 * @returns VBox status code.
2337 * @retval VINF_SUCCESS on success.
2338 * @param pPool The pool.
2339 * @param pPage The cached page.
2340 */
2341static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2342{
2343 /*
2344 * Filter out the relevant kinds.
2345 */
2346 switch (pPage->enmKind)
2347 {
2348 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2349 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2350 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2351 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2352 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2353 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2354 case PGMPOOLKIND_64BIT_PML4:
2355 case PGMPOOLKIND_32BIT_PD:
2356 case PGMPOOLKIND_PAE_PDPT:
2357 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2358 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2359 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2360 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2361 break;
2362
2363 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2364 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2365 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2366 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2367 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2368 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2369 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2370 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2371 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2372 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2373 case PGMPOOLKIND_ROOT_NESTED:
2374 case PGMPOOLKIND_PAE_PD_PHYS:
2375 case PGMPOOLKIND_PAE_PDPT_PHYS:
2376 case PGMPOOLKIND_32BIT_PD_PHYS:
2377 /* Nothing to monitor here. */
2378 return VINF_SUCCESS;
2379
2380 default:
2381 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2382 }
2383
2384 /*
2385 * Remove the page from the monitored list or uninstall it if last.
2386 */
2387 const PVM pVM = pPool->CTX_SUFF(pVM);
2388 int rc;
2389 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2390 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2391 {
2392 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2393 {
2394 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2395 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2396 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
2397 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
2398 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
2399 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pNewHead),
2400 pPool->pszAccessHandler);
2401 AssertFatalRCSuccess(rc);
2402 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2403 }
2404 else
2405 {
2406 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2407 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2408 {
2409 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2410 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2411 }
2412 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2413 rc = VINF_SUCCESS;
2414 }
2415 }
2416 else
2417 {
2418 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
2419 AssertFatalRC(rc);
2420#ifdef VBOX_STRICT
2421 PVMCPU pVCpu = VMMGetCpu(pVM);
2422#endif
2423 AssertMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2424 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2425 }
2426 pPage->fMonitored = false;
2427
2428 /*
2429 * Remove it from the list of modified pages (if in it).
2430 */
2431 pgmPoolMonitorModifiedRemove(pPool, pPage);
2432
2433 return rc;
2434}
2435
2436
2437/**
2438 * Inserts the page into the list of modified pages.
2439 *
2440 * @param pPool The pool.
2441 * @param pPage The page.
2442 */
2443void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2444{
2445 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2446 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2447 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2448 && pPool->iModifiedHead != pPage->idx,
2449 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2450 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2451 pPool->iModifiedHead, pPool->cModifiedPages));
2452
2453 pPage->iModifiedNext = pPool->iModifiedHead;
2454 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2455 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2456 pPool->iModifiedHead = pPage->idx;
2457 pPool->cModifiedPages++;
2458#ifdef VBOX_WITH_STATISTICS
2459 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2460 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2461#endif
2462}
2463
2464
2465/**
2466 * Removes the page from the list of modified pages and resets the
2467 * moficiation counter.
2468 *
2469 * @param pPool The pool.
2470 * @param pPage The page which is believed to be in the list of modified pages.
2471 */
2472static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2473{
2474 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2475 if (pPool->iModifiedHead == pPage->idx)
2476 {
2477 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2478 pPool->iModifiedHead = pPage->iModifiedNext;
2479 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2480 {
2481 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2482 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2483 }
2484 pPool->cModifiedPages--;
2485 }
2486 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2487 {
2488 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2489 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2490 {
2491 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2492 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2493 }
2494 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2495 pPool->cModifiedPages--;
2496 }
2497 else
2498 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2499 pPage->cModifications = 0;
2500}
2501
2502
2503/**
2504 * Zaps the list of modified pages, resetting their modification counters in the process.
2505 *
2506 * @param pVM The VM handle.
2507 */
2508static void pgmPoolMonitorModifiedClearAll(PVM pVM)
2509{
2510 pgmLock(pVM);
2511 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2512 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2513
2514 unsigned cPages = 0; NOREF(cPages);
2515
2516#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2517 pgmPoolResetDirtyPages(pVM);
2518#endif
2519
2520 uint16_t idx = pPool->iModifiedHead;
2521 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2522 while (idx != NIL_PGMPOOL_IDX)
2523 {
2524 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2525 idx = pPage->iModifiedNext;
2526 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2527 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2528 pPage->cModifications = 0;
2529 Assert(++cPages);
2530 }
2531 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2532 pPool->cModifiedPages = 0;
2533 pgmUnlock(pVM);
2534}
2535
2536
2537/**
2538 * Handle SyncCR3 pool tasks
2539 *
2540 * @returns VBox status code.
2541 * @retval VINF_SUCCESS if successfully added.
2542 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2543 * @param pVCpu The VMCPU handle.
2544 * @remark Should only be used when monitoring is available, thus placed in
2545 * the PGMPOOL_WITH_MONITORING #ifdef.
2546 */
2547int pgmPoolSyncCR3(PVMCPU pVCpu)
2548{
2549 PVM pVM = pVCpu->CTX_SUFF(pVM);
2550 LogFlow(("pgmPoolSyncCR3\n"));
2551
2552 /*
2553 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2554 * Occasionally we will have to clear all the shadow page tables because we wanted
2555 * to monitor a page which was mapped by too many shadowed page tables. This operation
2556 * sometimes refered to as a 'lightweight flush'.
2557 */
2558# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2559 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2560 pgmR3PoolClearAll(pVM);
2561# else /* !IN_RING3 */
2562 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2563 {
2564 LogFlow(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2565 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2566
2567 /* Make sure all other VCPUs return to ring 3. */
2568 if (pVM->cCpus > 1)
2569 {
2570 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2571 PGM_INVL_ALL_VCPU_TLBS(pVM);
2572 }
2573 return VINF_PGM_SYNC_CR3;
2574 }
2575# endif /* !IN_RING3 */
2576 else
2577 pgmPoolMonitorModifiedClearAll(pVM);
2578
2579 return VINF_SUCCESS;
2580}
2581
2582
2583/**
2584 * Frees up at least one user entry.
2585 *
2586 * @returns VBox status code.
2587 * @retval VINF_SUCCESS if successfully added.
2588 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2589 * @param pPool The pool.
2590 * @param iUser The user index.
2591 */
2592static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2593{
2594 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2595 /*
2596 * Just free cached pages in a braindead fashion.
2597 */
2598 /** @todo walk the age list backwards and free the first with usage. */
2599 int rc = VINF_SUCCESS;
2600 do
2601 {
2602 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2603 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2604 rc = rc2;
2605 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2606 return rc;
2607}
2608
2609
2610/**
2611 * Inserts a page into the cache.
2612 *
2613 * This will create user node for the page, insert it into the GCPhys
2614 * hash, and insert it into the age list.
2615 *
2616 * @returns VBox status code.
2617 * @retval VINF_SUCCESS if successfully added.
2618 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2619 * @param pPool The pool.
2620 * @param pPage The cached page.
2621 * @param GCPhys The GC physical address of the page we're gonna shadow.
2622 * @param iUser The user index.
2623 * @param iUserTable The user table index.
2624 */
2625DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2626{
2627 int rc = VINF_SUCCESS;
2628 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2629
2630 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser %x iUserTable %x\n", GCPhys, iUser, iUserTable));
2631
2632#ifdef VBOX_STRICT
2633 /*
2634 * Check that the entry doesn't already exists.
2635 */
2636 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2637 {
2638 uint16_t i = pPage->iUserHead;
2639 do
2640 {
2641 Assert(i < pPool->cMaxUsers);
2642 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2643 i = paUsers[i].iNext;
2644 } while (i != NIL_PGMPOOL_USER_INDEX);
2645 }
2646#endif
2647
2648 /*
2649 * Find free a user node.
2650 */
2651 uint16_t i = pPool->iUserFreeHead;
2652 if (i == NIL_PGMPOOL_USER_INDEX)
2653 {
2654 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2655 if (RT_FAILURE(rc))
2656 return rc;
2657 i = pPool->iUserFreeHead;
2658 }
2659
2660 /*
2661 * Unlink the user node from the free list,
2662 * initialize and insert it into the user list.
2663 */
2664 pPool->iUserFreeHead = paUsers[i].iNext;
2665 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2666 paUsers[i].iUser = iUser;
2667 paUsers[i].iUserTable = iUserTable;
2668 pPage->iUserHead = i;
2669
2670 /*
2671 * Insert into cache and enable monitoring of the guest page if enabled.
2672 *
2673 * Until we implement caching of all levels, including the CR3 one, we'll
2674 * have to make sure we don't try monitor & cache any recursive reuse of
2675 * a monitored CR3 page. Because all windows versions are doing this we'll
2676 * have to be able to do combined access monitoring, CR3 + PT and
2677 * PD + PT (guest PAE).
2678 *
2679 * Update:
2680 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2681 */
2682 const bool fCanBeMonitored = true;
2683 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2684 if (fCanBeMonitored)
2685 {
2686 rc = pgmPoolMonitorInsert(pPool, pPage);
2687 AssertRC(rc);
2688 }
2689 return rc;
2690}
2691
2692
2693/**
2694 * Adds a user reference to a page.
2695 *
2696 * This will move the page to the head of the
2697 *
2698 * @returns VBox status code.
2699 * @retval VINF_SUCCESS if successfully added.
2700 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
2701 * @param pPool The pool.
2702 * @param pPage The cached page.
2703 * @param iUser The user index.
2704 * @param iUserTable The user table.
2705 */
2706static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2707{
2708 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2709
2710 Log3(("pgmPoolTrackAddUser GCPhys = %RGp iUser %x iUserTable %x\n", pPage->GCPhys, iUser, iUserTable));
2711
2712# ifdef VBOX_STRICT
2713 /*
2714 * Check that the entry doesn't already exists. We only allow multiple users of top-level paging structures (SHW_POOL_ROOT_IDX).
2715 */
2716 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2717 {
2718 uint16_t i = pPage->iUserHead;
2719 do
2720 {
2721 Assert(i < pPool->cMaxUsers);
2722 AssertMsg(iUser != PGMPOOL_IDX_PD || iUser != PGMPOOL_IDX_PDPT || iUser != PGMPOOL_IDX_NESTED_ROOT || iUser != PGMPOOL_IDX_AMD64_CR3 ||
2723 paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2724 i = paUsers[i].iNext;
2725 } while (i != NIL_PGMPOOL_USER_INDEX);
2726 }
2727# endif
2728
2729 /*
2730 * Allocate a user node.
2731 */
2732 uint16_t i = pPool->iUserFreeHead;
2733 if (i == NIL_PGMPOOL_USER_INDEX)
2734 {
2735 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2736 if (RT_FAILURE(rc))
2737 return rc;
2738 i = pPool->iUserFreeHead;
2739 }
2740 pPool->iUserFreeHead = paUsers[i].iNext;
2741
2742 /*
2743 * Initialize the user node and insert it.
2744 */
2745 paUsers[i].iNext = pPage->iUserHead;
2746 paUsers[i].iUser = iUser;
2747 paUsers[i].iUserTable = iUserTable;
2748 pPage->iUserHead = i;
2749
2750# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2751 if (pPage->fDirty)
2752 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirty, false /* do not remove */);
2753# endif
2754
2755 /*
2756 * Tell the cache to update its replacement stats for this page.
2757 */
2758 pgmPoolCacheUsed(pPool, pPage);
2759 return VINF_SUCCESS;
2760}
2761
2762
2763/**
2764 * Frees a user record associated with a page.
2765 *
2766 * This does not clear the entry in the user table, it simply replaces the
2767 * user record to the chain of free records.
2768 *
2769 * @param pPool The pool.
2770 * @param HCPhys The HC physical address of the shadow page.
2771 * @param iUser The shadow page pool index of the user table.
2772 * @param iUserTable The index into the user table (shadowed).
2773 */
2774static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2775{
2776 /*
2777 * Unlink and free the specified user entry.
2778 */
2779 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2780
2781 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2782 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2783 uint16_t i = pPage->iUserHead;
2784 if ( i != NIL_PGMPOOL_USER_INDEX
2785 && paUsers[i].iUser == iUser
2786 && paUsers[i].iUserTable == iUserTable)
2787 {
2788 pPage->iUserHead = paUsers[i].iNext;
2789
2790 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2791 paUsers[i].iNext = pPool->iUserFreeHead;
2792 pPool->iUserFreeHead = i;
2793 return;
2794 }
2795
2796 /* General: Linear search. */
2797 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2798 while (i != NIL_PGMPOOL_USER_INDEX)
2799 {
2800 if ( paUsers[i].iUser == iUser
2801 && paUsers[i].iUserTable == iUserTable)
2802 {
2803 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2804 paUsers[iPrev].iNext = paUsers[i].iNext;
2805 else
2806 pPage->iUserHead = paUsers[i].iNext;
2807
2808 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2809 paUsers[i].iNext = pPool->iUserFreeHead;
2810 pPool->iUserFreeHead = i;
2811 return;
2812 }
2813 iPrev = i;
2814 i = paUsers[i].iNext;
2815 }
2816
2817 /* Fatal: didn't find it */
2818 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%RGp\n",
2819 iUser, iUserTable, pPage->GCPhys));
2820}
2821
2822
2823/**
2824 * Gets the entry size of a shadow table.
2825 *
2826 * @param enmKind The kind of page.
2827 *
2828 * @returns The size of the entry in bytes. That is, 4 or 8.
2829 * @returns If the kind is not for a table, an assertion is raised and 0 is
2830 * returned.
2831 */
2832DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
2833{
2834 switch (enmKind)
2835 {
2836 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2837 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2838 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2839 case PGMPOOLKIND_32BIT_PD:
2840 case PGMPOOLKIND_32BIT_PD_PHYS:
2841 return 4;
2842
2843 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2844 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2845 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2846 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2847 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2848 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2849 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2850 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2851 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2852 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2853 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2854 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2855 case PGMPOOLKIND_64BIT_PML4:
2856 case PGMPOOLKIND_PAE_PDPT:
2857 case PGMPOOLKIND_ROOT_NESTED:
2858 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2859 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2860 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2861 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2862 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2863 case PGMPOOLKIND_PAE_PD_PHYS:
2864 case PGMPOOLKIND_PAE_PDPT_PHYS:
2865 return 8;
2866
2867 default:
2868 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2869 }
2870}
2871
2872
2873/**
2874 * Gets the entry size of a guest table.
2875 *
2876 * @param enmKind The kind of page.
2877 *
2878 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
2879 * @returns If the kind is not for a table, an assertion is raised and 0 is
2880 * returned.
2881 */
2882DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
2883{
2884 switch (enmKind)
2885 {
2886 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2887 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2888 case PGMPOOLKIND_32BIT_PD:
2889 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2890 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2891 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2892 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2893 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2894 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2895 return 4;
2896
2897 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2898 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2899 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2900 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2901 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2902 case PGMPOOLKIND_64BIT_PML4:
2903 case PGMPOOLKIND_PAE_PDPT:
2904 return 8;
2905
2906 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2907 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2908 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2909 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2910 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2911 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2912 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2913 case PGMPOOLKIND_ROOT_NESTED:
2914 case PGMPOOLKIND_PAE_PD_PHYS:
2915 case PGMPOOLKIND_PAE_PDPT_PHYS:
2916 case PGMPOOLKIND_32BIT_PD_PHYS:
2917 /** @todo can we return 0? (nobody is calling this...) */
2918 AssertFailed();
2919 return 0;
2920
2921 default:
2922 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
2923 }
2924}
2925
2926
2927/**
2928 * Scans one shadow page table for mappings of a physical page.
2929 *
2930 * @returns true/false indicating removal of all relevant PTEs
2931 * @param pVM The VM handle.
2932 * @param pPhysPage The guest page in question.
2933 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
2934 * @param iShw The shadow page table.
2935 * @param cRefs The number of references made in that PT.
2936 */
2937static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t cRefs)
2938{
2939 LogFlow(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d cRefs=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, cRefs));
2940 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2941 bool bRet = false;
2942
2943 /*
2944 * Assert sanity.
2945 */
2946 Assert(cRefs == 1);
2947 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2948 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2949
2950 /*
2951 * Then, clear the actual mappings to the page in the shadow PT.
2952 */
2953 switch (pPage->enmKind)
2954 {
2955 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2956 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2957 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2958 {
2959 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2960 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2961 uint32_t u32AndMask, u32OrMask;
2962
2963 u32AndMask = 0;
2964 u32OrMask = 0;
2965
2966 if (!fFlushPTEs)
2967 {
2968 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
2969 {
2970 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
2971 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
2972 u32OrMask = X86_PTE_RW;
2973 u32AndMask = UINT32_MAX;
2974 bRet = true;
2975 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
2976 break;
2977
2978 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
2979 u32OrMask = 0;
2980 u32AndMask = ~X86_PTE_RW;
2981 bRet = true;
2982 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
2983 break;
2984 default:
2985 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
2986 break;
2987 }
2988 }
2989 else
2990 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
2991
2992 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
2993 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2994 {
2995 X86PTE Pte;
2996
2997 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2998 Pte.u = (pPT->a[i].u & u32AndMask) | u32OrMask;
2999 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3000 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3001
3002 ASMAtomicWriteSize(&pPT->a[i].u, Pte.u);
3003 cRefs--;
3004 if (!cRefs)
3005 return bRet;
3006 }
3007#ifdef LOG_ENABLED
3008 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3009 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3010 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3011 {
3012 Log(("i=%d cRefs=%d\n", i, cRefs--));
3013 }
3014#endif
3015 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3016 break;
3017 }
3018
3019 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3020 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3021 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3022 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3023 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3024 {
3025 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3026 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3027 uint64_t u64AndMask, u64OrMask;
3028
3029 u64OrMask = 0;
3030 u64AndMask = 0;
3031 if (!fFlushPTEs)
3032 {
3033 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3034 {
3035 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /** No handler installed. */
3036 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /** Monitoring is temporarily disabled. */
3037 u64OrMask = X86_PTE_RW;
3038 u64AndMask = UINT64_MAX;
3039 bRet = true;
3040 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3041 break;
3042
3043 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /** Write access is monitored. */
3044 u64OrMask = 0;
3045 u64AndMask = ~((uint64_t)X86_PTE_RW);
3046 bRet = true;
3047 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3048 break;
3049
3050 default:
3051 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3052 break;
3053 }
3054 }
3055 else
3056 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3057
3058 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3059 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3060 {
3061 X86PTEPAE Pte;
3062
3063 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
3064 Pte.u = (pPT->a[i].u & u64AndMask) | u64OrMask;
3065 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3066 Pte.n.u1Write = 0; /* need to disallow writes when dirty bit tracking is still active. */
3067
3068 ASMAtomicWriteSize(&pPT->a[i].u, Pte.u);
3069 cRefs--;
3070 if (!cRefs)
3071 return bRet;
3072 }
3073#ifdef LOG_ENABLED
3074 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3075 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3076 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3077 {
3078 Log(("i=%d cRefs=%d\n", i, cRefs--));
3079 }
3080#endif
3081 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d u64=%RX64\n", cRefs, pPage->iFirstPresent, pPage->cPresent, u64));
3082 break;
3083 }
3084
3085 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3086 {
3087 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3088 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3089 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3090 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3091 {
3092 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
3093 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3094 pPT->a[i].u = 0;
3095 cRefs--;
3096 if (!cRefs)
3097 return bRet;
3098 }
3099#ifdef LOG_ENABLED
3100 Log(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3101 for (unsigned i = 0; i < RT_ELEMENTS(pPT->a); i++)
3102 if ((pPT->a[i].u & (EPT_PTE_PG_MASK | X86_PTE_P)) == u64)
3103 {
3104 Log(("i=%d cRefs=%d\n", i, cRefs--));
3105 }
3106#endif
3107 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
3108 break;
3109 }
3110
3111 default:
3112 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3113 }
3114 return bRet;
3115}
3116
3117
3118/**
3119 * Scans one shadow page table for mappings of a physical page.
3120 *
3121 * @param pVM The VM handle.
3122 * @param pPhysPage The guest page in question.
3123 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3124 * @param iShw The shadow page table.
3125 * @param cRefs The number of references made in that PT.
3126 */
3127static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t cRefs)
3128{
3129 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3130
3131 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d cRefs=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, cRefs));
3132 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3133 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, cRefs);
3134 if (!fKeptPTEs)
3135 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3136 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3137}
3138
3139
3140/**
3141 * Flushes a list of shadow page tables mapping the same physical page.
3142 *
3143 * @param pVM The VM handle.
3144 * @param pPhysPage The guest page in question.
3145 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3146 * @param iPhysExt The physical cross reference extent list to flush.
3147 */
3148static void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3149{
3150 Assert(PGMIsLockOwner(pVM));
3151 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3152 bool fKeepList = false;
3153
3154 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3155 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3156
3157 const uint16_t iPhysExtStart = iPhysExt;
3158 PPGMPOOLPHYSEXT pPhysExt;
3159 do
3160 {
3161 Assert(iPhysExt < pPool->cMaxPhysExts);
3162 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3163 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3164 {
3165 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3166 {
3167 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], 1);
3168 if (!fKeptPTEs)
3169 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3170 else
3171 fKeepList = true;
3172 }
3173 }
3174 /* next */
3175 iPhysExt = pPhysExt->iNext;
3176 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3177
3178 if (!fKeepList)
3179 {
3180 /* insert the list into the free list and clear the ram range entry. */
3181 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3182 pPool->iPhysExtFreeHead = iPhysExtStart;
3183 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3184 }
3185
3186 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3187}
3188
3189
3190/**
3191 * Flushes all shadow page table mappings of the given guest page.
3192 *
3193 * This is typically called when the host page backing the guest one has been
3194 * replaced or when the page protection was changed due to an access handler.
3195 *
3196 * @returns VBox status code.
3197 * @retval VINF_SUCCESS if all references has been successfully cleared.
3198 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3199 * pool cleaning. FF and sync flags are set.
3200 *
3201 * @param pVM The VM handle.
3202 * @param pPhysPage The guest page in question.
3203 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3204 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3205 * flushed, it is NOT touched if this isn't necessary.
3206 * The caller MUST initialized this to @a false.
3207 */
3208int pgmPoolTrackUpdateGCPhys(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3209{
3210 PVMCPU pVCpu = VMMGetCpu(pVM);
3211 pgmLock(pVM);
3212 int rc = VINF_SUCCESS;
3213 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3214 if (u16)
3215 {
3216 /*
3217 * The zero page is currently screwing up the tracking and we'll
3218 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3219 * is defined, zero pages won't normally be mapped. Some kind of solution
3220 * will be needed for this problem of course, but it will have to wait...
3221 */
3222 if (PGM_PAGE_IS_ZERO(pPhysPage))
3223 rc = VINF_PGM_GCPHYS_ALIASED;
3224 else
3225 {
3226# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3227 /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow and
3228 pgmPoolTrackFlushGCPhysPTs will/may kill the pool otherwise. */
3229 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
3230# endif
3231
3232 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3233 pgmPoolTrackFlushGCPhysPT(pVM,
3234 pPhysPage,
3235 fFlushPTEs,
3236 PGMPOOL_TD_GET_IDX(u16),
3237 PGMPOOL_TD_GET_CREFS(u16));
3238 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3239 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3240 else
3241 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3242 *pfFlushTLBs = true;
3243
3244# ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3245 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
3246# endif
3247 }
3248 }
3249
3250 if (rc == VINF_PGM_GCPHYS_ALIASED)
3251 {
3252 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3253 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3254 rc = VINF_PGM_SYNC_CR3;
3255 }
3256 pgmUnlock(pVM);
3257 return rc;
3258}
3259
3260
3261/**
3262 * Scans all shadow page tables for mappings of a physical page.
3263 *
3264 * This may be slow, but it's most likely more efficient than cleaning
3265 * out the entire page pool / cache.
3266 *
3267 * @returns VBox status code.
3268 * @retval VINF_SUCCESS if all references has been successfully cleared.
3269 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3270 * a page pool cleaning.
3271 *
3272 * @param pVM The VM handle.
3273 * @param pPhysPage The guest page in question.
3274 */
3275int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
3276{
3277 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3278 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3279 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3280 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3281
3282#if 1
3283 /*
3284 * There is a limit to what makes sense.
3285 */
3286 if (pPool->cPresent > 1024)
3287 {
3288 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3289 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3290 return VINF_PGM_GCPHYS_ALIASED;
3291 }
3292#endif
3293
3294 /*
3295 * Iterate all the pages until we've encountered all that in use.
3296 * This is simple but not quite optimal solution.
3297 */
3298 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3299 const uint32_t u32 = u64;
3300 unsigned cLeft = pPool->cUsedPages;
3301 unsigned iPage = pPool->cCurPages;
3302 while (--iPage >= PGMPOOL_IDX_FIRST)
3303 {
3304 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3305 if (pPage->GCPhys != NIL_RTGCPHYS)
3306 {
3307 switch (pPage->enmKind)
3308 {
3309 /*
3310 * We only care about shadow page tables.
3311 */
3312 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3313 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3314 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3315 {
3316 unsigned cPresent = pPage->cPresent;
3317 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3318 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3319 if (pPT->a[i].n.u1Present)
3320 {
3321 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3322 {
3323 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3324 pPT->a[i].u = 0;
3325 }
3326 if (!--cPresent)
3327 break;
3328 }
3329 break;
3330 }
3331
3332 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3333 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3334 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3335 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3336 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3337 {
3338 unsigned cPresent = pPage->cPresent;
3339 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3340 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3341 if (pPT->a[i].n.u1Present)
3342 {
3343 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
3344 {
3345 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3346 pPT->a[i].u = 0;
3347 }
3348 if (!--cPresent)
3349 break;
3350 }
3351 break;
3352 }
3353 }
3354 if (!--cLeft)
3355 break;
3356 }
3357 }
3358
3359 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3360 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3361 return VINF_SUCCESS;
3362}
3363
3364
3365/**
3366 * Clears the user entry in a user table.
3367 *
3368 * This is used to remove all references to a page when flushing it.
3369 */
3370static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3371{
3372 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3373 Assert(pUser->iUser < pPool->cCurPages);
3374 uint32_t iUserTable = pUser->iUserTable;
3375
3376 /*
3377 * Map the user page.
3378 */
3379 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3380 union
3381 {
3382 uint64_t *pau64;
3383 uint32_t *pau32;
3384 } u;
3385 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3386
3387 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3388
3389 /* Safety precaution in case we change the paging for other modes too in the future. */
3390 Assert(!pgmPoolIsPageLocked(&pPool->CTX_SUFF(pVM)->pgm.s, pPage));
3391
3392#ifdef VBOX_STRICT
3393 /*
3394 * Some sanity checks.
3395 */
3396 switch (pUserPage->enmKind)
3397 {
3398 case PGMPOOLKIND_32BIT_PD:
3399 case PGMPOOLKIND_32BIT_PD_PHYS:
3400 Assert(iUserTable < X86_PG_ENTRIES);
3401 break;
3402 case PGMPOOLKIND_PAE_PDPT:
3403 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3404 case PGMPOOLKIND_PAE_PDPT_PHYS:
3405 Assert(iUserTable < 4);
3406 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3407 break;
3408 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3409 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3410 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3411 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3412 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3413 case PGMPOOLKIND_PAE_PD_PHYS:
3414 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3415 break;
3416 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3417 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3418 Assert(!(u.pau64[iUserTable] & PGM_PDFLAGS_MAPPING));
3419 break;
3420 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3421 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3422 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3423 break;
3424 case PGMPOOLKIND_64BIT_PML4:
3425 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3426 /* GCPhys >> PAGE_SHIFT is the index here */
3427 break;
3428 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3429 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3430 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3431 break;
3432
3433 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3434 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3435 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3436 break;
3437
3438 case PGMPOOLKIND_ROOT_NESTED:
3439 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3440 break;
3441
3442 default:
3443 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3444 break;
3445 }
3446#endif /* VBOX_STRICT */
3447
3448 /*
3449 * Clear the entry in the user page.
3450 */
3451 switch (pUserPage->enmKind)
3452 {
3453 /* 32-bit entries */
3454 case PGMPOOLKIND_32BIT_PD:
3455 case PGMPOOLKIND_32BIT_PD_PHYS:
3456 ASMAtomicWriteSize(&u.pau32[iUserTable], 0);
3457 break;
3458
3459 /* 64-bit entries */
3460 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3461 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3462 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3463 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3464 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3465#if defined(IN_RC)
3466 /* In 32 bits PAE mode we *must* invalidate the TLB when changing a PDPT entry; the CPU fetches them only during cr3 load, so any
3467 * non-present PDPT will continue to cause page faults.
3468 */
3469 ASMReloadCR3();
3470#endif
3471 /* no break */
3472 case PGMPOOLKIND_PAE_PD_PHYS:
3473 case PGMPOOLKIND_PAE_PDPT_PHYS:
3474 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3475 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3476 case PGMPOOLKIND_64BIT_PML4:
3477 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3478 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3479 case PGMPOOLKIND_PAE_PDPT:
3480 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3481 case PGMPOOLKIND_ROOT_NESTED:
3482 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3483 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3484 ASMAtomicWriteSize(&u.pau64[iUserTable], 0);
3485 break;
3486
3487 default:
3488 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3489 }
3490}
3491
3492
3493/**
3494 * Clears all users of a page.
3495 */
3496static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3497{
3498 /*
3499 * Free all the user records.
3500 */
3501 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3502
3503 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3504 uint16_t i = pPage->iUserHead;
3505 while (i != NIL_PGMPOOL_USER_INDEX)
3506 {
3507 /* Clear enter in user table. */
3508 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3509
3510 /* Free it. */
3511 const uint16_t iNext = paUsers[i].iNext;
3512 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3513 paUsers[i].iNext = pPool->iUserFreeHead;
3514 pPool->iUserFreeHead = i;
3515
3516 /* Next. */
3517 i = iNext;
3518 }
3519 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3520}
3521
3522
3523/**
3524 * Allocates a new physical cross reference extent.
3525 *
3526 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3527 * @param pVM The VM handle.
3528 * @param piPhysExt Where to store the phys ext index.
3529 */
3530PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
3531{
3532 Assert(PGMIsLockOwner(pVM));
3533 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3534 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3535 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3536 {
3537 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3538 return NULL;
3539 }
3540 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3541 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3542 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3543 *piPhysExt = iPhysExt;
3544 return pPhysExt;
3545}
3546
3547
3548/**
3549 * Frees a physical cross reference extent.
3550 *
3551 * @param pVM The VM handle.
3552 * @param iPhysExt The extent to free.
3553 */
3554void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
3555{
3556 Assert(PGMIsLockOwner(pVM));
3557 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3558 Assert(iPhysExt < pPool->cMaxPhysExts);
3559 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3560 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3561 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3562 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3563 pPool->iPhysExtFreeHead = iPhysExt;
3564}
3565
3566
3567/**
3568 * Frees a physical cross reference extent.
3569 *
3570 * @param pVM The VM handle.
3571 * @param iPhysExt The extent to free.
3572 */
3573void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
3574{
3575 Assert(PGMIsLockOwner(pVM));
3576 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3577
3578 const uint16_t iPhysExtStart = iPhysExt;
3579 PPGMPOOLPHYSEXT pPhysExt;
3580 do
3581 {
3582 Assert(iPhysExt < pPool->cMaxPhysExts);
3583 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3584 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3585 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3586
3587 /* next */
3588 iPhysExt = pPhysExt->iNext;
3589 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3590
3591 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3592 pPool->iPhysExtFreeHead = iPhysExtStart;
3593}
3594
3595
3596/**
3597 * Insert a reference into a list of physical cross reference extents.
3598 *
3599 * @returns The new tracking data for PGMPAGE.
3600 *
3601 * @param pVM The VM handle.
3602 * @param iPhysExt The physical extent index of the list head.
3603 * @param iShwPT The shadow page table index.
3604 *
3605 */
3606static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
3607{
3608 Assert(PGMIsLockOwner(pVM));
3609 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3610 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3611
3612 /* special common case. */
3613 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3614 {
3615 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3616 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3617 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d}\n", iPhysExt, iShwPT));
3618 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3619 }
3620
3621 /* general treatment. */
3622 const uint16_t iPhysExtStart = iPhysExt;
3623 unsigned cMax = 15;
3624 for (;;)
3625 {
3626 Assert(iPhysExt < pPool->cMaxPhysExts);
3627 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3628 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3629 {
3630 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3631 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
3632 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
3633 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
3634 }
3635 if (!--cMax)
3636 {
3637 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3638 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3639 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
3640 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3641 }
3642 }
3643
3644 /* add another extent to the list. */
3645 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3646 if (!pNew)
3647 {
3648 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
3649 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
3650 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
3651 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3652 }
3653 pNew->iNext = iPhysExtStart;
3654 pNew->aidx[0] = iShwPT;
3655 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
3656 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3657}
3658
3659
3660/**
3661 * Add a reference to guest physical page where extents are in use.
3662 *
3663 * @returns The new tracking data for PGMPAGE.
3664 *
3665 * @param pVM The VM handle.
3666 * @param u16 The ram range flags (top 16-bits).
3667 * @param iShwPT The shadow page table index.
3668 */
3669uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
3670{
3671 pgmLock(pVM);
3672 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3673 {
3674 /*
3675 * Convert to extent list.
3676 */
3677 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3678 uint16_t iPhysExt;
3679 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
3680 if (pPhysExt)
3681 {
3682 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
3683 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
3684 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
3685 pPhysExt->aidx[1] = iShwPT;
3686 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3687 }
3688 else
3689 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
3690 }
3691 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3692 {
3693 /*
3694 * Insert into the extent list.
3695 */
3696 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT);
3697 }
3698 else
3699 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
3700 pgmUnlock(pVM);
3701 return u16;
3702}
3703
3704
3705/**
3706 * Clear references to guest physical memory.
3707 *
3708 * @param pPool The pool.
3709 * @param pPage The page.
3710 * @param pPhysPage Pointer to the aPages entry in the ram range.
3711 */
3712void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
3713{
3714 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
3715 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3716
3717 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
3718 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
3719 {
3720 PVM pVM = pPool->CTX_SUFF(pVM);
3721 pgmLock(pVM);
3722
3723 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
3724 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3725 do
3726 {
3727 Assert(iPhysExt < pPool->cMaxPhysExts);
3728
3729 /*
3730 * Look for the shadow page and check if it's all freed.
3731 */
3732 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3733 {
3734 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
3735 {
3736 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
3737
3738 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3739 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
3740 {
3741 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3742 pgmUnlock(pVM);
3743 return;
3744 }
3745
3746 /* we can free the node. */
3747 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
3748 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
3749 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
3750 {
3751 /* lonely node */
3752 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3753 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
3754 PGM_PAGE_SET_TRACKING(pPhysPage, 0);
3755 }
3756 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
3757 {
3758 /* head */
3759 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
3760 PGM_PAGE_SET_TRACKING(pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
3761 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3762 }
3763 else
3764 {
3765 /* in list */
3766 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
3767 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
3768 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
3769 }
3770 iPhysExt = iPhysExtNext;
3771 pgmUnlock(pVM);
3772 return;
3773 }
3774 }
3775
3776 /* next */
3777 iPhysExtPrev = iPhysExt;
3778 iPhysExt = paPhysExts[iPhysExt].iNext;
3779 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3780
3781 pgmUnlock(pVM);
3782 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
3783 }
3784 else /* nothing to do */
3785 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
3786}
3787
3788
3789/**
3790 * Clear references to guest physical memory.
3791 *
3792 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
3793 * is assumed to be correct, so the linear search can be skipped and we can assert
3794 * at an earlier point.
3795 *
3796 * @param pPool The pool.
3797 * @param pPage The page.
3798 * @param HCPhys The host physical address corresponding to the guest page.
3799 * @param GCPhys The guest physical address corresponding to HCPhys.
3800 */
3801static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
3802{
3803 /*
3804 * Walk range list.
3805 */
3806 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3807 while (pRam)
3808 {
3809 RTGCPHYS off = GCPhys - pRam->GCPhys;
3810 if (off < pRam->cb)
3811 {
3812 /* does it match? */
3813 const unsigned iPage = off >> PAGE_SHIFT;
3814 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3815#ifdef LOG_ENABLED
3816RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]);
3817Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
3818#endif
3819 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3820 {
3821 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3822 return;
3823 }
3824 break;
3825 }
3826 pRam = pRam->CTX_SUFF(pNext);
3827 }
3828 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
3829}
3830
3831
3832/**
3833 * Clear references to guest physical memory.
3834 *
3835 * @param pPool The pool.
3836 * @param pPage The page.
3837 * @param HCPhys The host physical address corresponding to the guest page.
3838 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
3839 */
3840void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
3841{
3842 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
3843
3844 /*
3845 * Walk range list.
3846 */
3847 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3848 while (pRam)
3849 {
3850 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
3851 if (off < pRam->cb)
3852 {
3853 /* does it match? */
3854 const unsigned iPage = off >> PAGE_SHIFT;
3855 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
3856 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3857 {
3858 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3859 return;
3860 }
3861 break;
3862 }
3863 pRam = pRam->CTX_SUFF(pNext);
3864 }
3865
3866 /*
3867 * Damn, the hint didn't work. We'll have to do an expensive linear search.
3868 */
3869 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
3870 pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRanges);
3871 while (pRam)
3872 {
3873 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3874 while (iPage-- > 0)
3875 {
3876 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
3877 {
3878 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
3879 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
3880 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
3881 return;
3882 }
3883 }
3884 pRam = pRam->CTX_SUFF(pNext);
3885 }
3886
3887 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp\n", HCPhys, GCPhysHint));
3888}
3889
3890
3891/**
3892 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
3893 *
3894 * @param pPool The pool.
3895 * @param pPage The page.
3896 * @param pShwPT The shadow page table (mapping of the page).
3897 * @param pGstPT The guest page table.
3898 */
3899DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
3900{
3901 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3902 if (pShwPT->a[i].n.u1Present)
3903 {
3904 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
3905 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3906 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3907 if (!--pPage->cPresent)
3908 break;
3909 }
3910}
3911
3912
3913/**
3914 * Clear references to guest physical memory in a PAE / 32-bit page table.
3915 *
3916 * @param pPool The pool.
3917 * @param pPage The page.
3918 * @param pShwPT The shadow page table (mapping of the page).
3919 * @param pGstPT The guest page table (just a half one).
3920 */
3921DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
3922{
3923 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3924 if (pShwPT->a[i].n.u1Present)
3925 {
3926 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
3927 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
3928 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
3929 if (!--pPage->cPresent)
3930 break;
3931 }
3932}
3933
3934
3935/**
3936 * Clear references to guest physical memory in a PAE / PAE page table.
3937 *
3938 * @param pPool The pool.
3939 * @param pPage The page.
3940 * @param pShwPT The shadow page table (mapping of the page).
3941 * @param pGstPT The guest page table.
3942 */
3943DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
3944{
3945 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
3946 if (pShwPT->a[i].n.u1Present)
3947 {
3948 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
3949 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
3950 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
3951 if (!--pPage->cPresent)
3952 break;
3953 }
3954}
3955
3956
3957/**
3958 * Clear references to guest physical memory in a 32-bit / 4MB page table.
3959 *
3960 * @param pPool The pool.
3961 * @param pPage The page.
3962 * @param pShwPT The shadow page table (mapping of the page).
3963 */
3964DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
3965{
3966 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
3967 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3968 if (pShwPT->a[i].n.u1Present)
3969 {
3970 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
3971 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
3972 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
3973 if (!--pPage->cPresent)
3974 break;
3975 }
3976}
3977
3978
3979/**
3980 * Clear references to guest physical memory in a PAE / 2/4MB page table.
3981 *
3982 * @param pPool The pool.
3983 * @param pPage The page.
3984 * @param pShwPT The shadow page table (mapping of the page).
3985 */
3986DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
3987{
3988 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
3989 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
3990 if (pShwPT->a[i].n.u1Present)
3991 {
3992 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
3993 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
3994 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
3995 if (!--pPage->cPresent)
3996 break;
3997 }
3998}
3999
4000
4001/**
4002 * Clear references to shadowed pages in an EPT page table.
4003 *
4004 * @param pPool The pool.
4005 * @param pPage The page.
4006 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4007 */
4008DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4009{
4010 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4011 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4012 if (pShwPT->a[i].n.u1Present)
4013 {
4014 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4015 i, pShwPT->a[i].u & EPT_PTE_PG_MASK, pPage->GCPhys));
4016 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & EPT_PTE_PG_MASK, GCPhys);
4017 if (!--pPage->cPresent)
4018 break;
4019 }
4020}
4021
4022
4023
4024/**
4025 * Clear references to shadowed pages in a 32 bits page directory.
4026 *
4027 * @param pPool The pool.
4028 * @param pPage The page.
4029 * @param pShwPD The shadow page directory (mapping of the page).
4030 */
4031DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4032{
4033 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4034 {
4035 if ( pShwPD->a[i].n.u1Present
4036 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4037 )
4038 {
4039 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4040 if (pSubPage)
4041 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4042 else
4043 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4044 }
4045 }
4046}
4047
4048/**
4049 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4050 *
4051 * @param pPool The pool.
4052 * @param pPage The page.
4053 * @param pShwPD The shadow page directory (mapping of the page).
4054 */
4055DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4056{
4057 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4058 {
4059 if ( pShwPD->a[i].n.u1Present
4060 && !(pShwPD->a[i].u & PGM_PDFLAGS_MAPPING)
4061 )
4062 {
4063 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
4064 if (pSubPage)
4065 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4066 else
4067 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
4068 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4069 }
4070 }
4071}
4072
4073/**
4074 * Clear references to shadowed pages in a PAE page directory pointer table.
4075 *
4076 * @param pPool The pool.
4077 * @param pPage The page.
4078 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4079 */
4080DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4081{
4082 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4083 {
4084 if ( pShwPDPT->a[i].n.u1Present
4085 && !(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING)
4086 )
4087 {
4088 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4089 if (pSubPage)
4090 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4091 else
4092 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4093 }
4094 }
4095}
4096
4097
4098/**
4099 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4100 *
4101 * @param pPool The pool.
4102 * @param pPage The page.
4103 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4104 */
4105DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4106{
4107 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4108 {
4109 Assert(!(pShwPDPT->a[i].u & PGM_PLXFLAGS_MAPPING));
4110 if (pShwPDPT->a[i].n.u1Present)
4111 {
4112 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & X86_PDPE_PG_MASK);
4113 if (pSubPage)
4114 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4115 else
4116 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & X86_PDPE_PG_MASK));
4117 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4118 }
4119 }
4120}
4121
4122
4123/**
4124 * Clear references to shadowed pages in a 64-bit level 4 page table.
4125 *
4126 * @param pPool The pool.
4127 * @param pPage The page.
4128 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4129 */
4130DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4131{
4132 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4133 {
4134 if (pShwPML4->a[i].n.u1Present)
4135 {
4136 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPML4->a[i].u & X86_PDPE_PG_MASK);
4137 if (pSubPage)
4138 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4139 else
4140 AssertFatalMsgFailed(("%RX64\n", pShwPML4->a[i].u & X86_PML4E_PG_MASK));
4141 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4142 }
4143 }
4144}
4145
4146
4147/**
4148 * Clear references to shadowed pages in an EPT page directory.
4149 *
4150 * @param pPool The pool.
4151 * @param pPage The page.
4152 * @param pShwPD The shadow page directory (mapping of the page).
4153 */
4154DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4155{
4156 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4157 {
4158 if (pShwPD->a[i].n.u1Present)
4159 {
4160 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & EPT_PDE_PG_MASK);
4161 if (pSubPage)
4162 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4163 else
4164 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4165 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4166 }
4167 }
4168}
4169
4170
4171/**
4172 * Clear references to shadowed pages in an EPT page directory pointer table.
4173 *
4174 * @param pPool The pool.
4175 * @param pPage The page.
4176 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4177 */
4178DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4179{
4180 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4181 {
4182 if (pShwPDPT->a[i].n.u1Present)
4183 {
4184 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK);
4185 if (pSubPage)
4186 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4187 else
4188 AssertFatalMsgFailed(("%RX64\n", pShwPDPT->a[i].u & EPT_PDPTE_PG_MASK));
4189 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4190 }
4191 }
4192}
4193
4194
4195/**
4196 * Clears all references made by this page.
4197 *
4198 * This includes other shadow pages and GC physical addresses.
4199 *
4200 * @param pPool The pool.
4201 * @param pPage The page.
4202 */
4203static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4204{
4205 /*
4206 * Map the shadow page and take action according to the page kind.
4207 */
4208 void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
4209 switch (pPage->enmKind)
4210 {
4211 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4212 {
4213 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4214 void *pvGst;
4215 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4216 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4217 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4218 break;
4219 }
4220
4221 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4222 {
4223 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4224 void *pvGst;
4225 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4226 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
4227 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4228 break;
4229 }
4230
4231 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4232 {
4233 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4234 void *pvGst;
4235 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4236 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
4237 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4238 break;
4239 }
4240
4241 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4242 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4243 {
4244 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4245 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4246 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4247 break;
4248 }
4249
4250 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4251 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4252 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4253 {
4254 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4255 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
4256 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4257 break;
4258 }
4259
4260 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4261 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4262 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4263 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4264 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4265 case PGMPOOLKIND_PAE_PD_PHYS:
4266 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4267 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4268 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4269 break;
4270
4271 case PGMPOOLKIND_32BIT_PD_PHYS:
4272 case PGMPOOLKIND_32BIT_PD:
4273 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4274 break;
4275
4276 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4277 case PGMPOOLKIND_PAE_PDPT:
4278 case PGMPOOLKIND_PAE_PDPT_PHYS:
4279 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4280 break;
4281
4282 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4283 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4284 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4285 break;
4286
4287 case PGMPOOLKIND_64BIT_PML4:
4288 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4289 break;
4290
4291 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4292 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4293 break;
4294
4295 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4296 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4297 break;
4298
4299 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4300 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4301 break;
4302
4303 default:
4304 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4305 }
4306
4307 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4308 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4309 ASMMemZeroPage(pvShw);
4310 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4311 pPage->fZeroed = true;
4312 PGMPOOL_UNLOCK_PTR(pPool->CTX_SUFF(pVM), pvShw);
4313}
4314
4315/**
4316 * Flushes a pool page.
4317 *
4318 * This moves the page to the free list after removing all user references to it.
4319 *
4320 * @returns VBox status code.
4321 * @retval VINF_SUCCESS on success.
4322 * @param pPool The pool.
4323 * @param HCPhys The HC physical address of the shadow page.
4324 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4325 */
4326int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4327{
4328 PVM pVM = pPool->CTX_SUFF(pVM);
4329 bool fFlushRequired = false;
4330
4331 int rc = VINF_SUCCESS;
4332 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4333 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4334 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4335
4336 /*
4337 * Quietly reject any attempts at flushing any of the special root pages.
4338 */
4339 if (pPage->idx < PGMPOOL_IDX_FIRST)
4340 {
4341 AssertFailed(); /* can no longer happen */
4342 Log(("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4343 return VINF_SUCCESS;
4344 }
4345
4346 pgmLock(pVM);
4347
4348 /*
4349 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4350 */
4351 if (pgmPoolIsPageLocked(&pVM->pgm.s, pPage))
4352 {
4353 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4354 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4355 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4356 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4357 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4358 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4359 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4360 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4361 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD,
4362 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4363 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4364 pgmUnlock(pVM);
4365 return VINF_SUCCESS;
4366 }
4367
4368#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4369 /* Start a subset so we won't run out of mapping space. */
4370 PVMCPU pVCpu = VMMGetCpu(pVM);
4371 uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
4372#endif
4373
4374 /*
4375 * Mark the page as being in need of an ASMMemZeroPage().
4376 */
4377 pPage->fZeroed = false;
4378
4379#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4380 if (pPage->fDirty)
4381 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirty, false /* do not remove */);
4382#endif
4383
4384 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4385 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4386 fFlushRequired = true;
4387
4388 /*
4389 * Clear the page.
4390 */
4391 pgmPoolTrackClearPageUsers(pPool, pPage);
4392 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4393 pgmPoolTrackDeref(pPool, pPage);
4394 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4395
4396 /*
4397 * Flush it from the cache.
4398 */
4399 pgmPoolCacheFlushPage(pPool, pPage);
4400
4401#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
4402 /* Heavy stuff done. */
4403 PGMDynMapPopAutoSubset(pVCpu, iPrevSubset);
4404#endif
4405
4406 /*
4407 * Deregistering the monitoring.
4408 */
4409 if (pPage->fMonitored)
4410 rc = pgmPoolMonitorFlush(pPool, pPage);
4411
4412 /*
4413 * Free the page.
4414 */
4415 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4416 pPage->iNext = pPool->iFreeHead;
4417 pPool->iFreeHead = pPage->idx;
4418 pPage->enmKind = PGMPOOLKIND_FREE;
4419 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4420 pPage->GCPhys = NIL_RTGCPHYS;
4421 pPage->fReusedFlushPending = false;
4422
4423 pPool->cUsedPages--;
4424
4425 /* Flush the TLBs of all VCPUs if required. */
4426 if ( fFlushRequired
4427 && fFlush)
4428 {
4429 PGM_INVL_ALL_VCPU_TLBS(pVM);
4430 }
4431
4432 pgmUnlock(pVM);
4433 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4434 return rc;
4435}
4436
4437
4438/**
4439 * Frees a usage of a pool page.
4440 *
4441 * The caller is responsible to updating the user table so that it no longer
4442 * references the shadow page.
4443 *
4444 * @param pPool The pool.
4445 * @param HCPhys The HC physical address of the shadow page.
4446 * @param iUser The shadow page pool index of the user table.
4447 * @param iUserTable The index into the user table (shadowed).
4448 */
4449void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4450{
4451 PVM pVM = pPool->CTX_SUFF(pVM);
4452
4453 STAM_PROFILE_START(&pPool->StatFree, a);
4454 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%#x iUserTable=%#x\n",
4455 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4456 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
4457 pgmLock(pVM);
4458 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4459 if (!pPage->fCached)
4460 pgmPoolFlushPage(pPool, pPage);
4461 pgmUnlock(pVM);
4462 STAM_PROFILE_STOP(&pPool->StatFree, a);
4463}
4464
4465
4466/**
4467 * Makes one or more free page free.
4468 *
4469 * @returns VBox status code.
4470 * @retval VINF_SUCCESS on success.
4471 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4472 *
4473 * @param pPool The pool.
4474 * @param enmKind Page table kind
4475 * @param iUser The user of the page.
4476 */
4477static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4478{
4479 PVM pVM = pPool->CTX_SUFF(pVM);
4480
4481 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
4482
4483 /*
4484 * If the pool isn't full grown yet, expand it.
4485 */
4486 if ( pPool->cCurPages < pPool->cMaxPages
4487#if defined(IN_RC)
4488 /* Hack alert: we can't deal with jumps to ring 3 when called from MapCR3 and allocating pages for PAE PDs. */
4489 && enmKind != PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4490 && (enmKind < PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD || enmKind > PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD)
4491#endif
4492 )
4493 {
4494 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4495#ifdef IN_RING3
4496 int rc = PGMR3PoolGrow(pVM);
4497#else
4498 int rc = VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_PGM_POOL_GROW, 0);
4499#endif
4500 if (RT_FAILURE(rc))
4501 return rc;
4502 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4503 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4504 return VINF_SUCCESS;
4505 }
4506
4507 /*
4508 * Free one cached page.
4509 */
4510 return pgmPoolCacheFreeOne(pPool, iUser);
4511}
4512
4513/**
4514 * Allocates a page from the pool.
4515 *
4516 * This page may actually be a cached page and not in need of any processing
4517 * on the callers part.
4518 *
4519 * @returns VBox status code.
4520 * @retval VINF_SUCCESS if a NEW page was allocated.
4521 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4522 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
4523 * @param pVM The VM handle.
4524 * @param GCPhys The GC physical address of the page we're gonna shadow.
4525 * For 4MB and 2MB PD entries, it's the first address the
4526 * shadow PT is covering.
4527 * @param enmKind The kind of mapping.
4528 * @param enmAccess Access type for the mapping (only relevant for big pages)
4529 * @param iUser The shadow page pool index of the user table.
4530 * @param iUserTable The index into the user table (shadowed).
4531 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4532 * @param fLockPage Lock the page
4533 */
4534int pgmPoolAllocEx(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage, bool fLockPage)
4535{
4536 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4537 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4538 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%#x iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4539 *ppPage = NULL;
4540 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4541 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4542 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4543
4544 pgmLock(pVM);
4545
4546 if (pPool->fCacheEnabled)
4547 {
4548 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, iUser, iUserTable, ppPage);
4549 if (RT_SUCCESS(rc2))
4550 {
4551 if (fLockPage)
4552 pgmPoolLockPage(pPool, *ppPage);
4553 pgmUnlock(pVM);
4554 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4555 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
4556 return rc2;
4557 }
4558 }
4559
4560 /*
4561 * Allocate a new one.
4562 */
4563 int rc = VINF_SUCCESS;
4564 uint16_t iNew = pPool->iFreeHead;
4565 if (iNew == NIL_PGMPOOL_IDX)
4566 {
4567 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
4568 if (RT_FAILURE(rc))
4569 {
4570 pgmUnlock(pVM);
4571 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
4572 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4573 return rc;
4574 }
4575 iNew = pPool->iFreeHead;
4576 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
4577 }
4578
4579 /* unlink the free head */
4580 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
4581 pPool->iFreeHead = pPage->iNext;
4582 pPage->iNext = NIL_PGMPOOL_IDX;
4583
4584 /*
4585 * Initialize it.
4586 */
4587 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
4588 pPage->enmKind = enmKind;
4589 pPage->enmAccess = enmAccess;
4590 pPage->GCPhys = GCPhys;
4591 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
4592 pPage->fMonitored = false;
4593 pPage->fCached = false;
4594#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4595 pPage->fDirty = false;
4596#endif
4597 pPage->fReusedFlushPending = false;
4598 pPage->cModifications = 0;
4599 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4600 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4601 pPage->cPresent = 0;
4602 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
4603 pPage->pvLastAccessHandlerFault = 0;
4604 pPage->cLastAccessHandlerCount = 0;
4605 pPage->pvLastAccessHandlerRip = 0;
4606
4607 /*
4608 * Insert into the tracking and cache. If this fails, free the page.
4609 */
4610 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
4611 if (RT_FAILURE(rc3))
4612 {
4613 pPool->cUsedPages--;
4614 pPage->enmKind = PGMPOOLKIND_FREE;
4615 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4616 pPage->GCPhys = NIL_RTGCPHYS;
4617 pPage->iNext = pPool->iFreeHead;
4618 pPool->iFreeHead = pPage->idx;
4619 pgmUnlock(pVM);
4620 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4621 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
4622 return rc3;
4623 }
4624
4625 /*
4626 * Commit the allocation, clear the page and return.
4627 */
4628#ifdef VBOX_WITH_STATISTICS
4629 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
4630 pPool->cUsedPagesHigh = pPool->cUsedPages;
4631#endif
4632
4633 if (!pPage->fZeroed)
4634 {
4635 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4636 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4637 ASMMemZeroPage(pv);
4638 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4639 }
4640
4641 *ppPage = pPage;
4642 if (fLockPage)
4643 pgmPoolLockPage(pPool, pPage);
4644 pgmUnlock(pVM);
4645 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
4646 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
4647 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
4648 return rc;
4649}
4650
4651
4652/**
4653 * Frees a usage of a pool page.
4654 *
4655 * @param pVM The VM handle.
4656 * @param HCPhys The HC physical address of the shadow page.
4657 * @param iUser The shadow page pool index of the user table.
4658 * @param iUserTable The index into the user table (shadowed).
4659 */
4660void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
4661{
4662 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
4663 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4664 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
4665}
4666
4667/**
4668 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
4669 *
4670 * @returns Pointer to the shadow page structure.
4671 * @param pPool The pool.
4672 * @param HCPhys The HC physical address of the shadow page.
4673 */
4674PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
4675{
4676 PVM pVM = pPool->CTX_SUFF(pVM);
4677
4678 Assert(PGMIsLockOwner(pVM));
4679
4680 /*
4681 * Look up the page.
4682 */
4683 pgmLock(pVM);
4684 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
4685 pgmUnlock(pVM);
4686
4687 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
4688 return pPage;
4689}
4690
4691#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
4692/**
4693 * Flush the specified page if present
4694 *
4695 * @param pVM The VM handle.
4696 * @param GCPhys Guest physical address of the page to flush
4697 */
4698void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
4699{
4700 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4701
4702 VM_ASSERT_EMT(pVM);
4703
4704 /*
4705 * Look up the GCPhys in the hash.
4706 */
4707 GCPhys = GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
4708 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
4709 if (i == NIL_PGMPOOL_IDX)
4710 return;
4711
4712 do
4713 {
4714 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4715 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
4716 {
4717 switch (pPage->enmKind)
4718 {
4719 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4720 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4721 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4722 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4723 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4724 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4725 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4726 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4727 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4728 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4729 case PGMPOOLKIND_64BIT_PML4:
4730 case PGMPOOLKIND_32BIT_PD:
4731 case PGMPOOLKIND_PAE_PDPT:
4732 {
4733 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
4734#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4735 if (pPage->fDirty)
4736 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
4737 else
4738#endif
4739 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
4740 Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
4741 pgmPoolMonitorChainFlush(pPool, pPage);
4742 return;
4743 }
4744
4745 /* ignore, no monitoring. */
4746 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4747 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4748 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4749 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4750 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4751 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4752 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4753 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4754 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4755 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4756 case PGMPOOLKIND_ROOT_NESTED:
4757 case PGMPOOLKIND_PAE_PD_PHYS:
4758 case PGMPOOLKIND_PAE_PDPT_PHYS:
4759 case PGMPOOLKIND_32BIT_PD_PHYS:
4760 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4761 break;
4762
4763 default:
4764 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
4765 }
4766 }
4767
4768 /* next */
4769 i = pPage->iNext;
4770 } while (i != NIL_PGMPOOL_IDX);
4771 return;
4772}
4773#endif /* IN_RING3 */
4774
4775#ifdef IN_RING3
4776
4777
4778/**
4779 * Reset CPU on hot plugging.
4780 *
4781 * @param pVM The VM handle.
4782 * @param pVCpu The virtual CPU.
4783 */
4784void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
4785{
4786 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
4787
4788 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
4789 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
4790 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
4791}
4792
4793
4794/**
4795 * Flushes the entire cache.
4796 *
4797 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
4798 * this and execute this CR3 flush.
4799 *
4800 * @param pPool The pool.
4801 */
4802void pgmR3PoolReset(PVM pVM)
4803{
4804 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4805
4806 Assert(PGMIsLockOwner(pVM));
4807 STAM_PROFILE_START(&pPool->StatR3Reset, a);
4808 LogFlow(("pgmR3PoolReset:\n"));
4809
4810 /*
4811 * If there are no pages in the pool, there is nothing to do.
4812 */
4813 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
4814 {
4815 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
4816 return;
4817 }
4818
4819 /*
4820 * Exit the shadow mode since we're going to clear everything,
4821 * including the root page.
4822 */
4823 for (VMCPUID i = 0; i < pVM->cCpus; i++)
4824 {
4825 PVMCPU pVCpu = &pVM->aCpus[i];
4826 pgmR3ExitShadowModeBeforePoolFlush(pVM, pVCpu);
4827 }
4828
4829 /*
4830 * Nuke the free list and reinsert all pages into it.
4831 */
4832 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
4833 {
4834 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4835
4836 Assert(pPage->Core.Key == MMPage2Phys(pVM, pPage->pvPageR3));
4837 if (pPage->fMonitored)
4838 pgmPoolMonitorFlush(pPool, pPage);
4839 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4840 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4841 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4842 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4843 pPage->cModifications = 0;
4844 pPage->GCPhys = NIL_RTGCPHYS;
4845 pPage->enmKind = PGMPOOLKIND_FREE;
4846 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4847 Assert(pPage->idx == i);
4848 pPage->iNext = i + 1;
4849 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
4850 pPage->fSeenNonGlobal = false;
4851 pPage->fMonitored = false;
4852#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4853 pPage->fDirty = false;
4854#endif
4855 pPage->fCached = false;
4856 pPage->fReusedFlushPending = false;
4857 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
4858 pPage->iAgeNext = NIL_PGMPOOL_IDX;
4859 pPage->iAgePrev = NIL_PGMPOOL_IDX;
4860 pPage->cLocked = 0;
4861 }
4862 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
4863 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
4864 pPool->cUsedPages = 0;
4865
4866 /*
4867 * Zap and reinitialize the user records.
4868 */
4869 pPool->cPresent = 0;
4870 pPool->iUserFreeHead = 0;
4871 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
4872 const unsigned cMaxUsers = pPool->cMaxUsers;
4873 for (unsigned i = 0; i < cMaxUsers; i++)
4874 {
4875 paUsers[i].iNext = i + 1;
4876 paUsers[i].iUser = NIL_PGMPOOL_IDX;
4877 paUsers[i].iUserTable = 0xfffffffe;
4878 }
4879 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
4880
4881 /*
4882 * Clear all the GCPhys links and rebuild the phys ext free list.
4883 */
4884 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
4885 pRam;
4886 pRam = pRam->CTX_SUFF(pNext))
4887 {
4888 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4889 while (iPage-- > 0)
4890 PGM_PAGE_SET_TRACKING(&pRam->aPages[iPage], 0);
4891 }
4892
4893 pPool->iPhysExtFreeHead = 0;
4894 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4895 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
4896 for (unsigned i = 0; i < cMaxPhysExts; i++)
4897 {
4898 paPhysExts[i].iNext = i + 1;
4899 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
4900 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
4901 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
4902 }
4903 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
4904
4905 /*
4906 * Just zap the modified list.
4907 */
4908 pPool->cModifiedPages = 0;
4909 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
4910
4911 /*
4912 * Clear the GCPhys hash and the age list.
4913 */
4914 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
4915 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
4916 pPool->iAgeHead = NIL_PGMPOOL_IDX;
4917 pPool->iAgeTail = NIL_PGMPOOL_IDX;
4918
4919#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4920 /* Clear all dirty pages. */
4921 pPool->idxFreeDirtyPage = 0;
4922 pPool->cDirtyPages = 0;
4923 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aIdxDirtyPages); i++)
4924 pPool->aIdxDirtyPages[i] = NIL_PGMPOOL_IDX;
4925#endif
4926
4927 /*
4928 * Reinsert active pages into the hash and ensure monitoring chains are correct.
4929 */
4930 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
4931 {
4932 PPGMPOOLPAGE pPage = &pPool->aPages[i];
4933 pPage->iNext = NIL_PGMPOOL_IDX;
4934 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
4935 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
4936 pPage->cModifications = 0;
4937 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
4938 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
4939 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
4940 if (pPage->fMonitored)
4941 {
4942 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
4943 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
4944 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
4945 pPool->pfnAccessHandlerRC, MMHyperCCToRC(pVM, pPage),
4946 pPool->pszAccessHandler);
4947 AssertFatalRCSuccess(rc);
4948 pgmPoolHashInsert(pPool, pPage);
4949 }
4950 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
4951 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
4952 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
4953 }
4954
4955 for (VMCPUID i = 0; i < pVM->cCpus; i++)
4956 {
4957 /*
4958 * Re-enter the shadowing mode and assert Sync CR3 FF.
4959 */
4960 PVMCPU pVCpu = &pVM->aCpus[i];
4961 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
4962 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
4963 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
4964 }
4965
4966 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
4967}
4968#endif /* IN_RING3 */
4969
4970#ifdef LOG_ENABLED
4971static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
4972{
4973 switch(enmKind)
4974 {
4975 case PGMPOOLKIND_INVALID:
4976 return "PGMPOOLKIND_INVALID";
4977 case PGMPOOLKIND_FREE:
4978 return "PGMPOOLKIND_FREE";
4979 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
4980 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
4981 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4982 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
4983 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4984 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
4985 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
4986 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
4987 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4988 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
4989 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4990 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
4991 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4992 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
4993 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4994 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
4995 case PGMPOOLKIND_32BIT_PD:
4996 return "PGMPOOLKIND_32BIT_PD";
4997 case PGMPOOLKIND_32BIT_PD_PHYS:
4998 return "PGMPOOLKIND_32BIT_PD_PHYS";
4999 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5000 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5001 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5002 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5003 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5004 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5005 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5006 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5007 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5008 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5009 case PGMPOOLKIND_PAE_PD_PHYS:
5010 return "PGMPOOLKIND_PAE_PD_PHYS";
5011 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5012 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5013 case PGMPOOLKIND_PAE_PDPT:
5014 return "PGMPOOLKIND_PAE_PDPT";
5015 case PGMPOOLKIND_PAE_PDPT_PHYS:
5016 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5017 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5018 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5019 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5020 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5021 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5022 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5023 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5024 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5025 case PGMPOOLKIND_64BIT_PML4:
5026 return "PGMPOOLKIND_64BIT_PML4";
5027 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5028 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5029 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5030 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5031 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5032 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5033 case PGMPOOLKIND_ROOT_NESTED:
5034 return "PGMPOOLKIND_ROOT_NESTED";
5035 }
5036 return "Unknown kind!";
5037}
5038#endif /* LOG_ENABLED*/
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette