VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 20671

最後變更 在這個檔案從20671是 20671,由 vboxsync 提交於 16 年 前

Bigger lock for the pagefault handler.
Avoid deadlocks when syncing notification handlers with our recompiler.

  • 屬性 svn:eol-style 設為 native
  • 屬性 svn:keywords 設為 Id
檔案大小: 193.6 KB
 
1/* $Id: PGMAllBth.h 20671 2009-06-17 15:23:14Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * This file is a big challenge!
6 */
7
8/*
9 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
10 *
11 * This file is part of VirtualBox Open Source Edition (OSE), as
12 * available from http://www.alldomusa.eu.org. This file is free software;
13 * you can redistribute it and/or modify it under the terms of the GNU
14 * General Public License (GPL) as published by the Free Software
15 * Foundation, in version 2 as it comes in the "COPYING" file of the
16 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
17 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
20 * Clara, CA 95054 USA or visit http://www.sun.com if you need
21 * additional information or have any questions.
22 */
23
24/*******************************************************************************
25* Internal Functions *
26*******************************************************************************/
27RT_C_DECLS_BEGIN
28PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault);
29PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
30PGM_BTH_DECL(int, SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr);
31PGM_BTH_DECL(int, CheckPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage);
32PGM_BTH_DECL(int, SyncPT)(PVMCPU pVCpu, unsigned iPD, PGSTPD pPDSrc, RTGCPTR GCPtrPage);
33PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR Addr, unsigned fPage, unsigned uErr);
34PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
35PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
36#ifdef VBOX_STRICT
37PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr = 0, RTGCPTR cb = ~(RTGCPTR)0);
38#endif
39#ifdef PGMPOOL_WITH_USER_TRACKING
40DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys);
41#endif
42PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3);
43PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu);
44RT_C_DECLS_END
45
46
47/* Filter out some illegal combinations of guest and shadow paging, so we can remove redundant checks inside functions. */
48#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
49# error "Invalid combination; PAE guest implies PAE shadow"
50#endif
51
52#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
53 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
54# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
55#endif
56
57#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
58 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
59# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
60#endif
61
62#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT) \
63 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
64# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
65#endif
66
67#ifdef IN_RING0 /* no mappings in VT-x and AMD-V mode */
68# define PGM_WITHOUT_MAPPINGS
69#endif
70
71
72#ifndef IN_RING3
73/**
74 * #PF Handler for raw-mode guest execution.
75 *
76 * @returns VBox status code (appropriate for trap handling and GC return).
77 *
78 * @param pVCpu VMCPU Handle.
79 * @param uErr The trap error code.
80 * @param pRegFrame Trap register frame.
81 * @param pvFault The fault address.
82 */
83PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault)
84{
85 PVM pVM = pVCpu->CTX_SUFF(pVM);
86
87# if defined(IN_RC) && defined(VBOX_STRICT)
88 PGMDynCheckLocks(pVM);
89# endif
90
91# if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
92 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
93 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
94
95# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_PAE
96 /*
97 * Hide the instruction fetch trap indicator for now.
98 */
99 /** @todo NXE will change this and we must fix NXE in the switcher too! */
100 if (uErr & X86_TRAP_PF_ID)
101 {
102 uErr &= ~X86_TRAP_PF_ID;
103 TRPMSetErrorCode(pVCpu, uErr);
104 }
105# endif
106
107 /*
108 * Get PDs.
109 */
110 int rc;
111# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
112# if PGM_GST_TYPE == PGM_TYPE_32BIT
113 const unsigned iPDSrc = pvFault >> GST_PD_SHIFT;
114 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
115
116# elif PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
117
118# if PGM_GST_TYPE == PGM_TYPE_PAE
119 unsigned iPDSrc;
120 X86PDPE PdpeSrc;
121 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, pvFault, &iPDSrc, &PdpeSrc);
122
123# elif PGM_GST_TYPE == PGM_TYPE_AMD64
124 unsigned iPDSrc;
125 PX86PML4E pPml4eSrc;
126 X86PDPE PdpeSrc;
127 PGSTPD pPDSrc;
128
129 pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, pvFault, &pPml4eSrc, &PdpeSrc, &iPDSrc);
130 Assert(pPml4eSrc);
131# endif
132
133 /* Quick check for a valid guest trap. (PAE & AMD64) */
134 if (!pPDSrc)
135 {
136# if PGM_GST_TYPE == PGM_TYPE_AMD64 && GC_ARCH_BITS == 64
137 LogFlow(("Trap0eHandler: guest PML4 %d not present CR3=%RGp\n", (int)((pvFault >> X86_PML4_SHIFT) & X86_PML4_MASK), CPUMGetGuestCR3(pVCpu) & X86_CR3_PAGE_MASK));
138# else
139 LogFlow(("Trap0eHandler: guest iPDSrc=%u not present CR3=%RGp\n", iPDSrc, CPUMGetGuestCR3(pVCpu) & X86_CR3_PAGE_MASK));
140# endif
141 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
142 TRPMSetErrorCode(pVCpu, uErr);
143 return VINF_EM_RAW_GUEST_TRAP;
144 }
145# endif
146
147# else /* !PGM_WITH_PAGING */
148 PGSTPD pPDSrc = NULL;
149 const unsigned iPDSrc = 0;
150# endif /* !PGM_WITH_PAGING */
151
152 /* Fetch the guest PDE */
153# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
154 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
155# else
156 GSTPDE PdeSrc;
157 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
158 PdeSrc.n.u1Present = 1;
159 PdeSrc.n.u1Write = 1;
160 PdeSrc.n.u1Accessed = 1;
161 PdeSrc.n.u1User = 1;
162# endif
163
164# if PGM_SHW_TYPE == PGM_TYPE_32BIT
165 const unsigned iPDDst = pvFault >> SHW_PD_SHIFT;
166 PX86PD pPDDst = pgmShwGet32BitPDPtr(&pVCpu->pgm.s);
167
168# elif PGM_SHW_TYPE == PGM_TYPE_PAE
169 const unsigned iPDDst = (pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK; /* pPDDst index, not used with the pool. */
170
171 PX86PDPAE pPDDst;
172# if PGM_GST_TYPE != PGM_TYPE_PAE
173 X86PDPE PdpeSrc;
174
175 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
176 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
177# endif
178 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, &PdpeSrc, &pPDDst);
179 if (rc != VINF_SUCCESS)
180 {
181 AssertRC(rc);
182 return rc;
183 }
184 Assert(pPDDst);
185
186# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
187 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
188 PX86PDPAE pPDDst;
189# if PGM_GST_TYPE == PGM_TYPE_PROT
190 /* AMD-V nested paging */
191 X86PML4E Pml4eSrc;
192 X86PDPE PdpeSrc;
193 PX86PML4E pPml4eSrc = &Pml4eSrc;
194
195 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
196 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
197 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
198# endif
199
200 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, pPml4eSrc, &PdpeSrc, &pPDDst);
201 if (rc != VINF_SUCCESS)
202 {
203 AssertRC(rc);
204 return rc;
205 }
206 Assert(pPDDst);
207
208# elif PGM_SHW_TYPE == PGM_TYPE_EPT
209 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
210 PEPTPD pPDDst;
211
212 rc = pgmShwGetEPTPDPtr(pVCpu, pvFault, NULL, &pPDDst);
213 if (rc != VINF_SUCCESS)
214 {
215 AssertRC(rc);
216 return rc;
217 }
218 Assert(pPDDst);
219# endif
220
221# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
222 /*
223 * If we successfully correct the write protection fault due to dirty bit
224 * tracking, or this page fault is a genuine one, then return immediately.
225 */
226 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
227 rc = PGM_BTH_NAME(CheckPageFault)(pVCpu, uErr, &pPDDst->a[iPDDst], &pPDSrc->a[iPDSrc], pvFault);
228 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
229 if ( rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT
230 || rc == VINF_EM_RAW_GUEST_TRAP)
231 {
232 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution)
233 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? &pVCpu->pgm.s.StatRZTrap0eTime2DirtyAndAccessed : &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
234 LogBird(("Trap0eHandler: returns %s\n", rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? "VINF_SUCCESS" : "VINF_EM_RAW_GUEST_TRAP"));
235 return rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? VINF_SUCCESS : rc;
236 }
237
238 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0ePD[iPDSrc]);
239# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
240
241 /*
242 * A common case is the not-present error caused by lazy page table syncing.
243 *
244 * It is IMPORTANT that we weed out any access to non-present shadow PDEs here
245 * so we can safely assume that the shadow PT is present when calling SyncPage later.
246 *
247 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
248 * of mapping conflict and defer to SyncCR3 in R3.
249 * (Again, we do NOT support access handlers for non-present guest pages.)
250 *
251 */
252 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
253 && !pPDDst->a[iPDDst].n.u1Present
254 && PdeSrc.n.u1Present
255 )
256 {
257 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2SyncPT; });
258 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
259 LogFlow(("=>SyncPT %04x = %08x\n", iPDSrc, PdeSrc.au32[0]));
260 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, pvFault);
261 if (RT_SUCCESS(rc))
262 {
263 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
264 return rc;
265 }
266 Log(("SyncPT: %d failed!! rc=%d\n", iPDSrc, rc));
267 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
268 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
269 return VINF_PGM_SYNC_CR3;
270 }
271
272# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(PGM_WITHOUT_MAPPINGS)
273 /*
274 * Check if this address is within any of our mappings.
275 *
276 * This is *very* fast and it's gonna save us a bit of effort below and prevent
277 * us from screwing ourself with MMIO2 pages which have a GC Mapping (VRam).
278 * (BTW, it's impossible to have physical access handlers in a mapping.)
279 */
280 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
281 {
282 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
283 PPGMMAPPING pMapping = pVM->pgm.s.CTX_SUFF(pMappings);
284 for ( ; pMapping; pMapping = pMapping->CTX_SUFF(pNext))
285 {
286 if (pvFault < pMapping->GCPtr)
287 break;
288 if (pvFault - pMapping->GCPtr < pMapping->cb)
289 {
290 /*
291 * The first thing we check is if we've got an undetected conflict.
292 */
293 if (!pVM->pgm.s.fMappingsFixed)
294 {
295 unsigned iPT = pMapping->cb >> GST_PD_SHIFT;
296 while (iPT-- > 0)
297 if (pPDSrc->a[iPDSrc + iPT].n.u1Present)
298 {
299 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eConflicts);
300 Log(("Trap0e: Detected Conflict %RGv-%RGv\n", pMapping->GCPtr, pMapping->GCPtrLast));
301 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync,right? */
302 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
303 return VINF_PGM_SYNC_CR3;
304 }
305 }
306
307 /*
308 * Check if the fault address is in a virtual page access handler range.
309 */
310 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->HyperVirtHandlers, pvFault);
311 if ( pCur
312 && pvFault - pCur->Core.Key < pCur->cb
313 && uErr & X86_TRAP_PF_RW)
314 {
315# ifdef IN_RC
316 STAM_PROFILE_START(&pCur->Stat, h);
317 pgmUnlock(pVM);
318 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
319 pgmLock(pVM);
320 STAM_PROFILE_STOP(&pCur->Stat, h);
321# else
322 AssertFailed();
323 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
324# endif
325 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersMapping);
326 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
327 return rc;
328 }
329
330 /*
331 * Pretend we're not here and let the guest handle the trap.
332 */
333 TRPMSetErrorCode(pVCpu, uErr & ~X86_TRAP_PF_P);
334 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eGuestPFMapping);
335 LogFlow(("PGM: Mapping access -> route trap to recompiler!\n"));
336 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
337 return VINF_EM_RAW_GUEST_TRAP;
338 }
339 }
340 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
341 } /* pgmAreMappingsEnabled(&pVM->pgm.s) */
342# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
343
344 /*
345 * Check if this fault address is flagged for special treatment,
346 * which means we'll have to figure out the physical address and
347 * check flags associated with it.
348 *
349 * ASSUME that we can limit any special access handling to pages
350 * in page tables which the guest believes to be present.
351 */
352 if (PdeSrc.n.u1Present)
353 {
354 RTGCPHYS GCPhys = NIL_RTGCPHYS;
355
356# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
357# if PGM_GST_TYPE == PGM_TYPE_AMD64
358 bool fBigPagesSupported = true;
359# else
360 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVCpu) & X86_CR4_PSE);
361# endif
362 if ( PdeSrc.b.u1Size
363 && fBigPagesSupported)
364 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc)
365 | ((RTGCPHYS)pvFault & (GST_BIG_PAGE_OFFSET_MASK ^ PAGE_OFFSET_MASK));
366 else
367 {
368 PGSTPT pPTSrc;
369 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
370 if (RT_SUCCESS(rc))
371 {
372 unsigned iPTESrc = (pvFault >> GST_PT_SHIFT) & GST_PT_MASK;
373 if (pPTSrc->a[iPTESrc].n.u1Present)
374 GCPhys = pPTSrc->a[iPTESrc].u & GST_PTE_PG_MASK;
375 }
376 }
377# else
378 /* No paging so the fault address is the physical address */
379 GCPhys = (RTGCPHYS)(pvFault & ~PAGE_OFFSET_MASK);
380# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
381
382 /*
383 * If we have a GC address we'll check if it has any flags set.
384 */
385 if (GCPhys != NIL_RTGCPHYS)
386 {
387 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
388
389 PPGMPAGE pPage;
390 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
391 if (RT_SUCCESS(rc)) /** just handle the failure immediate (it returns) and make things easier to read. */
392 {
393 if ( PGM_PAGE_HAS_ACTIVE_PHYSICAL_HANDLERS(pPage)
394 || PGM_PAGE_HAS_ACTIVE_VIRTUAL_HANDLERS(pPage))
395 {
396 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
397 {
398 /*
399 * Physical page access handler.
400 */
401 const RTGCPHYS GCPhysFault = GCPhys | (pvFault & PAGE_OFFSET_MASK);
402 PPGMPHYSHANDLER pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
403 if (pCur)
404 {
405# ifdef PGM_SYNC_N_PAGES
406 /*
407 * If the region is write protected and we got a page not present fault, then sync
408 * the pages. If the fault was caused by a read, then restart the instruction.
409 * In case of write access continue to the GC write handler.
410 *
411 * ASSUMES that there is only one handler per page or that they have similar write properties.
412 */
413 if ( pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
414 && !(uErr & X86_TRAP_PF_P))
415 {
416 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
417 if ( RT_FAILURE(rc)
418 || !(uErr & X86_TRAP_PF_RW)
419 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
420 {
421 AssertRC(rc);
422 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
423 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
424 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
425 return rc;
426 }
427 }
428# endif
429
430 AssertMsg( pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
431 || (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE && (uErr & X86_TRAP_PF_RW)),
432 ("Unexpected trap for physical handler: %08X (phys=%08x) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
433
434# if defined(IN_RC) || defined(IN_RING0)
435 if (pCur->CTX_SUFF(pfnHandler))
436 {
437 STAM_PROFILE_START(&pCur->Stat, h);
438 pgmUnlock(pVM); /* @todo: Not entirely safe. */
439 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, GCPhysFault, pCur->CTX_SUFF(pvUser));
440 pgmLock(pVM);
441 STAM_PROFILE_STOP(&pCur->Stat, h);
442 }
443 else
444# endif
445 rc = VINF_EM_RAW_EMULATE_INSTR;
446
447 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersPhysical);
448 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
449 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndPhys; });
450 return rc;
451 }
452 }
453# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
454 else
455 {
456# ifdef PGM_SYNC_N_PAGES
457 /*
458 * If the region is write protected and we got a page not present fault, then sync
459 * the pages. If the fault was caused by a read, then restart the instruction.
460 * In case of write access continue to the GC write handler.
461 */
462 if ( PGM_PAGE_GET_HNDL_VIRT_STATE(pPage) < PGM_PAGE_HNDL_PHYS_STATE_ALL
463 && !(uErr & X86_TRAP_PF_P))
464 {
465 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
466 if ( RT_FAILURE(rc)
467 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
468 || !(uErr & X86_TRAP_PF_RW))
469 {
470 AssertRC(rc);
471 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
472 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
473 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndVirt; });
474 return rc;
475 }
476 }
477# endif
478 /*
479 * Ok, it's an virtual page access handler.
480 *
481 * Since it's faster to search by address, we'll do that first
482 * and then retry by GCPhys if that fails.
483 */
484 /** @todo r=bird: perhaps we should consider looking up by physical address directly now? */
485 /** @note r=svl: true, but lookup on virtual address should remain as a fallback as phys & virt trees might be out of sync, because the
486 * page was changed without us noticing it (not-present -> present without invlpg or mov cr3, xxx)
487 */
488 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
489 if (pCur)
490 {
491 AssertMsg(!(pvFault - pCur->Core.Key < pCur->cb)
492 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
493 || !(uErr & X86_TRAP_PF_P)
494 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
495 ("Unexpected trap for virtual handler: %RGv (phys=%RGp) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
496
497 if ( pvFault - pCur->Core.Key < pCur->cb
498 && ( uErr & X86_TRAP_PF_RW
499 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
500 {
501# ifdef IN_RC
502 STAM_PROFILE_START(&pCur->Stat, h);
503 pgmUnlock(pVM);
504 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
505 pgmLock(pVM);
506 STAM_PROFILE_STOP(&pCur->Stat, h);
507# else
508 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
509# endif
510 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtual);
511 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
512 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
513 return rc;
514 }
515 /* Unhandled part of a monitored page */
516 }
517 else
518 {
519 /* Check by physical address. */
520 PPGMVIRTHANDLER pCur;
521 unsigned iPage;
522 rc = pgmHandlerVirtualFindByPhysAddr(pVM, GCPhys + (pvFault & PAGE_OFFSET_MASK),
523 &pCur, &iPage);
524 Assert(RT_SUCCESS(rc) || !pCur);
525 if ( pCur
526 && ( uErr & X86_TRAP_PF_RW
527 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
528 {
529 Assert((pCur->aPhysToVirt[iPage].Core.Key & X86_PTE_PAE_PG_MASK) == GCPhys);
530# ifdef IN_RC
531 RTGCPTR off = (iPage << PAGE_SHIFT) + (pvFault & PAGE_OFFSET_MASK) - (pCur->Core.Key & PAGE_OFFSET_MASK);
532 Assert(off < pCur->cb);
533 STAM_PROFILE_START(&pCur->Stat, h);
534 pgmUnlock(pVM);
535 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, off);
536 pgmLock(pVM);
537 STAM_PROFILE_STOP(&pCur->Stat, h);
538# else
539 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
540# endif
541 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtualByPhys);
542 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
543 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
544 return rc;
545 }
546 }
547 }
548# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
549
550 /*
551 * There is a handled area of the page, but this fault doesn't belong to it.
552 * We must emulate the instruction.
553 *
554 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
555 * we first check if this was a page-not-present fault for a page with only
556 * write access handlers. Restart the instruction if it wasn't a write access.
557 */
558 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersUnhandled);
559
560 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
561 && !(uErr & X86_TRAP_PF_P))
562 {
563 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
564 if ( RT_FAILURE(rc)
565 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
566 || !(uErr & X86_TRAP_PF_RW))
567 {
568 AssertRC(rc);
569 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
570 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
571 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
572 return rc;
573 }
574 }
575
576 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
577 * It's writing to an unhandled part of the LDT page several million times.
578 */
579 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
580 LogFlow(("PGM: PGMInterpretInstruction -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
581 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
582 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndUnhandled; });
583 return rc;
584 } /* if any kind of handler */
585
586# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
587 if (uErr & X86_TRAP_PF_P)
588 {
589 /*
590 * The page isn't marked, but it might still be monitored by a virtual page access handler.
591 * (ASSUMES no temporary disabling of virtual handlers.)
592 */
593 /** @todo r=bird: Since the purpose is to catch out of sync pages with virtual handler(s) here,
594 * we should correct both the shadow page table and physical memory flags, and not only check for
595 * accesses within the handler region but for access to pages with virtual handlers. */
596 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
597 if (pCur)
598 {
599 AssertMsg( !(pvFault - pCur->Core.Key < pCur->cb)
600 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
601 || !(uErr & X86_TRAP_PF_P)
602 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
603 ("Unexpected trap for virtual handler: %08X (phys=%08x) %R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
604
605 if ( pvFault - pCur->Core.Key < pCur->cb
606 && ( uErr & X86_TRAP_PF_RW
607 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
608 {
609# ifdef IN_RC
610 STAM_PROFILE_START(&pCur->Stat, h);
611 pgmUnlock(pVM);
612 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
613 pgmLock(pVM);
614 STAM_PROFILE_STOP(&pCur->Stat, h);
615# else
616 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
617# endif
618 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtualUnmarked);
619 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
620 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
621 return rc;
622 }
623 }
624 }
625# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
626 }
627 else
628 {
629 /*
630 * When the guest accesses invalid physical memory (e.g. probing
631 * of RAM or accessing a remapped MMIO range), then we'll fall
632 * back to the recompiler to emulate the instruction.
633 */
634 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhys, rc));
635 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersInvalid);
636 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
637 return VINF_EM_RAW_EMULATE_INSTR;
638 }
639
640 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
641
642# ifdef PGM_OUT_OF_SYNC_IN_GC /** @todo remove this bugger. */
643 /*
644 * We are here only if page is present in Guest page tables and
645 * trap is not handled by our handlers.
646 *
647 * Check it for page out-of-sync situation.
648 */
649 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
650
651 if (!(uErr & X86_TRAP_PF_P))
652 {
653 /*
654 * Page is not present in our page tables.
655 * Try to sync it!
656 * BTW, fPageShw is invalid in this branch!
657 */
658 if (uErr & X86_TRAP_PF_US)
659 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
660 else /* supervisor */
661 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
662
663# if defined(LOG_ENABLED) && !defined(IN_RING0)
664 RTGCPHYS GCPhys;
665 uint64_t fPageGst;
666 PGMGstGetPage(pVCpu, pvFault, &fPageGst, &GCPhys);
667 Log(("Page out of sync: %RGv eip=%08x PdeSrc.n.u1User=%d fPageGst=%08llx GCPhys=%RGp scan=%d\n",
668 pvFault, pRegFrame->eip, PdeSrc.n.u1User, fPageGst, GCPhys, CSAMDoesPageNeedScanning(pVM, (RTRCPTR)pRegFrame->eip)));
669# endif /* LOG_ENABLED */
670
671# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0)
672 if (CPUMGetGuestCPL(pVCpu, pRegFrame) == 0)
673 {
674 uint64_t fPageGst;
675 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
676 if ( RT_SUCCESS(rc)
677 && !(fPageGst & X86_PTE_US))
678 {
679 /* Note: can't check for X86_TRAP_ID bit, because that requires execute disable support on the CPU */
680 if ( pvFault == (RTGCPTR)pRegFrame->eip
681 || pvFault - pRegFrame->eip < 8 /* instruction crossing a page boundary */
682# ifdef CSAM_DETECT_NEW_CODE_PAGES
683 || ( !PATMIsPatchGCAddr(pVM, (RTGCPTR)pRegFrame->eip)
684 && CSAMDoesPageNeedScanning(pVM, (RTRCPTR)pRegFrame->eip)) /* any new code we encounter here */
685# endif /* CSAM_DETECT_NEW_CODE_PAGES */
686 )
687 {
688 LogFlow(("CSAMExecFault %RX32\n", pRegFrame->eip));
689 rc = CSAMExecFault(pVM, (RTRCPTR)pRegFrame->eip);
690 if (rc != VINF_SUCCESS)
691 {
692 /*
693 * CSAM needs to perform a job in ring 3.
694 *
695 * Sync the page before going to the host context; otherwise we'll end up in a loop if
696 * CSAM fails (e.g. instruction crosses a page boundary and the next page is not present)
697 */
698 LogFlow(("CSAM ring 3 job\n"));
699 int rc2 = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, 1, uErr);
700 AssertRC(rc2);
701
702 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
703 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2CSAM; });
704 return rc;
705 }
706 }
707# ifdef CSAM_DETECT_NEW_CODE_PAGES
708 else if ( uErr == X86_TRAP_PF_RW
709 && pRegFrame->ecx >= 0x100 /* early check for movswd count */
710 && pRegFrame->ecx < 0x10000)
711 {
712 /* In case of a write to a non-present supervisor shadow page, we'll take special precautions
713 * to detect loading of new code pages.
714 */
715
716 /*
717 * Decode the instruction.
718 */
719 RTGCPTR PC;
720 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs, &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &PC);
721 if (rc == VINF_SUCCESS)
722 {
723 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
724 uint32_t cbOp;
725 rc = EMInterpretDisasOneEx(pVM, pVCpu, PC, pRegFrame, pDis, &cbOp);
726
727 /* For now we'll restrict this to rep movsw/d instructions */
728 if ( rc == VINF_SUCCESS
729 && pDis->pCurInstr->opcode == OP_MOVSWD
730 && (pDis->prefix & PREFIX_REP))
731 {
732 CSAMMarkPossibleCodePage(pVM, pvFault);
733 }
734 }
735 }
736# endif /* CSAM_DETECT_NEW_CODE_PAGES */
737
738 /*
739 * Mark this page as safe.
740 */
741 /** @todo not correct for pages that contain both code and data!! */
742 Log2(("CSAMMarkPage %RGv; scanned=%d\n", pvFault, true));
743 CSAMMarkPage(pVM, (RTRCPTR)pvFault, true);
744 }
745 }
746# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0) */
747 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
748 if (RT_SUCCESS(rc))
749 {
750 /* The page was successfully synced, return to the guest. */
751 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
752 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSync; });
753 return VINF_SUCCESS;
754 }
755 }
756 else /* uErr & X86_TRAP_PF_P: */
757 {
758 /*
759 * Write protected pages are make writable when the guest makes the first
760 * write to it. This happens for pages that are shared, write monitored
761 * and not yet allocated.
762 *
763 * Also, a side effect of not flushing global PDEs are out of sync pages due
764 * to physical monitored regions, that are no longer valid.
765 * Assume for now it only applies to the read/write flag.
766 */
767 if (RT_SUCCESS(rc) && (uErr & X86_TRAP_PF_RW))
768 {
769 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
770 {
771 Log(("PGM #PF: Make writable: %RGp %R[pgmpage] pvFault=%RGp uErr=%#x\n",
772 GCPhys, pPage, pvFault, uErr));
773 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, GCPhys);
774 if (rc != VINF_SUCCESS)
775 {
776 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
777 return rc;
778 }
779 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
780 return VINF_EM_NO_MEMORY;
781 }
782 /// @todo count the above case; else
783 if (uErr & X86_TRAP_PF_US)
784 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
785 else /* supervisor */
786 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
787
788 /*
789 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
790 * page is not present, which is not true in this case.
791 */
792 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, 1, uErr);
793 if (RT_SUCCESS(rc))
794 {
795 /*
796 * Page was successfully synced, return to guest.
797 */
798# ifdef VBOX_STRICT
799 RTGCPHYS GCPhys;
800 uint64_t fPageGst;
801 if (!HWACCMIsNestedPagingActive(pVM))
802 {
803 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, &GCPhys);
804 AssertMsg(RT_SUCCESS(rc) && (fPageGst & X86_PTE_RW), ("rc=%d fPageGst=%RX64\n"));
805 LogFlow(("Obsolete physical monitor page out of sync %RGv - phys %RGp flags=%08llx\n", pvFault, GCPhys, (uint64_t)fPageGst));
806 }
807 uint64_t fPageShw;
808 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
809 AssertMsg((RT_SUCCESS(rc) && ((fPageShw & X86_PTE_RW) || pVM->cCPUs > 1 /* new monitor can be installed during trap e execution */)), ("rc=%Rrc fPageShw=%RX64\n", rc, fPageShw));
810# endif /* VBOX_STRICT */
811 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
812 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndObs; });
813 return VINF_SUCCESS;
814 }
815
816 /* Check to see if we need to emulate the instruction as X86_CR0_WP has been cleared. */
817 if ( CPUMGetGuestCPL(pVCpu, pRegFrame) == 0
818 && ((CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG)
819 && (uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P))
820 {
821 uint64_t fPageGst;
822 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
823 if ( RT_SUCCESS(rc)
824 && !(fPageGst & X86_PTE_RW))
825 {
826 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
827 if (RT_SUCCESS(rc))
828 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eWPEmulInRZ);
829 else
830 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eWPEmulToR3);
831 return rc;
832 }
833 AssertMsgFailed(("Unexpected r/w page %RGv flag=%x rc=%Rrc\n", pvFault, (uint32_t)fPageGst, rc));
834 }
835 }
836
837# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
838# ifdef VBOX_STRICT
839 /*
840 * Check for VMM page flags vs. Guest page flags consistency.
841 * Currently only for debug purposes.
842 */
843 if (RT_SUCCESS(rc))
844 {
845 /* Get guest page flags. */
846 uint64_t fPageGst;
847 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
848 if (RT_SUCCESS(rc))
849 {
850 uint64_t fPageShw;
851 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
852
853 /*
854 * Compare page flags.
855 * Note: we have AVL, A, D bits desynched.
856 */
857 AssertMsg((fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)),
858 ("Page flags mismatch! pvFault=%RGv uErr=%x GCPhys=%RGp fPageShw=%RX64 fPageGst=%RX64\n", pvFault, (uint32_t)uErr, GCPhys, fPageShw, fPageGst));
859 }
860 else
861 AssertMsgFailed(("PGMGstGetPage rc=%Rrc\n", rc));
862 }
863 else
864 AssertMsgFailed(("PGMGCGetPage rc=%Rrc\n", rc));
865# endif /* VBOX_STRICT */
866# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
867 }
868 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
869# endif /* PGM_OUT_OF_SYNC_IN_GC */
870 }
871 else /* GCPhys == NIL_RTGCPHYS */
872 {
873 /*
874 * Page not present in Guest OS or invalid page table address.
875 * This is potential virtual page access handler food.
876 *
877 * For the present we'll say that our access handlers don't
878 * work for this case - we've already discarded the page table
879 * not present case which is identical to this.
880 *
881 * When we perchance find we need this, we will probably have AVL
882 * trees (offset based) to operate on and we can measure their speed
883 * agains mapping a page table and probably rearrange this handling
884 * a bit. (Like, searching virtual ranges before checking the
885 * physical address.)
886 */
887 }
888 }
889 /* else: !present (guest) */
890
891
892# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
893 /*
894 * Conclusion, this is a guest trap.
895 */
896 LogFlow(("PGM: Unhandled #PF -> route trap to recompiler!\n"));
897 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eGuestPFUnh);
898 return VINF_EM_RAW_GUEST_TRAP;
899# else
900 /* present, but not a monitored page; perhaps the guest is probing physical memory */
901 return VINF_EM_RAW_EMULATE_INSTR;
902# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
903
904
905# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
906
907 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
908 return VERR_INTERNAL_ERROR;
909# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
910}
911#endif /* !IN_RING3 */
912
913
914/**
915 * Emulation of the invlpg instruction.
916 *
917 *
918 * @returns VBox status code.
919 *
920 * @param pVCpu The VMCPU handle.
921 * @param GCPtrPage Page to invalidate.
922 *
923 * @remark ASSUMES that the guest is updating before invalidating. This order
924 * isn't required by the CPU, so this is speculative and could cause
925 * trouble.
926 * @remark No TLB shootdown is done on any other VCPU as we assume that
927 * invlpg emulation is the *only* reason for calling this function.
928 * (The guest has to shoot down TLB entries on other CPUs itself)
929 * Currently true, but keep in mind!
930 *
931 * @todo Flush page or page directory only if necessary!
932 * @todo Add a #define for simply invalidating the page.
933 */
934PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
935{
936#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
937 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
938 && PGM_SHW_TYPE != PGM_TYPE_EPT
939 int rc;
940 PVM pVM = pVCpu->CTX_SUFF(pVM);
941 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
942
943 Assert(PGMIsLockOwner(pVM));
944
945 LogFlow(("InvalidatePage %RGv\n", GCPtrPage));
946 /*
947 * Get the shadow PD entry and skip out if this PD isn't present.
948 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
949 */
950# if PGM_SHW_TYPE == PGM_TYPE_32BIT
951 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
952 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
953
954 /* Fetch the pgm pool shadow descriptor. */
955 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
956 Assert(pShwPde);
957
958# elif PGM_SHW_TYPE == PGM_TYPE_PAE
959 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT);
960 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVCpu->pgm.s);
961
962 /* If the shadow PDPE isn't present, then skip the invalidate. */
963 if (!pPdptDst->a[iPdpt].n.u1Present)
964 {
965 Assert(!(pPdptDst->a[iPdpt].u & PGM_PLXFLAGS_MAPPING));
966 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
967 return VINF_SUCCESS;
968 }
969
970 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
971 PPGMPOOLPAGE pShwPde = NULL;
972 PX86PDPAE pPDDst;
973
974 /* Fetch the pgm pool shadow descriptor. */
975 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
976 AssertRCSuccessReturn(rc, rc);
977 Assert(pShwPde);
978
979 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
980 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
981
982# else /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
983 /* PML4 */
984 const unsigned iPml4 = (GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;
985 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
986 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
987 PX86PDPAE pPDDst;
988 PX86PDPT pPdptDst;
989 PX86PML4E pPml4eDst;
990 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eDst, &pPdptDst, &pPDDst);
991 if (rc != VINF_SUCCESS)
992 {
993 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
994 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
995 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
996 PGM_INVL_VCPU_TLBS(pVCpu);
997 return VINF_SUCCESS;
998 }
999 Assert(pPDDst);
1000
1001 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1002 PX86PDPE pPdpeDst = &pPdptDst->a[iPdpt];
1003
1004 if (!pPdpeDst->n.u1Present)
1005 {
1006 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1007 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
1008 PGM_INVL_VCPU_TLBS(pVCpu);
1009 return VINF_SUCCESS;
1010 }
1011
1012# endif /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1013
1014 const SHWPDE PdeDst = *pPdeDst;
1015 if (!PdeDst.n.u1Present)
1016 {
1017 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1018 return VINF_SUCCESS;
1019 }
1020
1021# if defined(IN_RC)
1022 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1023 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1024# endif
1025
1026 /*
1027 * Get the guest PD entry and calc big page.
1028 */
1029# if PGM_GST_TYPE == PGM_TYPE_32BIT
1030 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
1031 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
1032 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1033# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1034 unsigned iPDSrc = 0;
1035# if PGM_GST_TYPE == PGM_TYPE_PAE
1036 X86PDPE PdpeSrc;
1037 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
1038# else /* AMD64 */
1039 PX86PML4E pPml4eSrc;
1040 X86PDPE PdpeSrc;
1041 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
1042# endif
1043 GSTPDE PdeSrc;
1044
1045 if (pPDSrc)
1046 PdeSrc = pPDSrc->a[iPDSrc];
1047 else
1048 PdeSrc.u = 0;
1049# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1050
1051# if PGM_GST_TYPE == PGM_TYPE_AMD64
1052 const bool fIsBigPage = PdeSrc.b.u1Size;
1053# else
1054 const bool fIsBigPage = PdeSrc.b.u1Size && (CPUMGetGuestCR4(pVCpu) & X86_CR4_PSE);
1055# endif
1056
1057# ifdef IN_RING3
1058 /*
1059 * If a CR3 Sync is pending we may ignore the invalidate page operation
1060 * depending on the kind of sync and if it's a global page or not.
1061 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
1062 */
1063# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
1064 if ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)
1065 || ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)
1066 && fIsBigPage
1067 && PdeSrc.b.u1Global
1068 )
1069 )
1070# else
1071 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
1072# endif
1073 {
1074 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1075 return VINF_SUCCESS;
1076 }
1077# endif /* IN_RING3 */
1078
1079# if PGM_GST_TYPE == PGM_TYPE_AMD64
1080 /* Fetch the pgm pool shadow descriptor. */
1081 PPGMPOOLPAGE pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
1082 Assert(pShwPdpt);
1083
1084 /* Fetch the pgm pool shadow descriptor. */
1085 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & SHW_PDPE_PG_MASK);
1086 Assert(pShwPde);
1087
1088 Assert(pPml4eDst->n.u1Present && (pPml4eDst->u & SHW_PDPT_MASK));
1089 RTGCPHYS GCPhysPdpt = pPml4eSrc->u & X86_PML4E_PG_MASK;
1090
1091 if ( !pPml4eSrc->n.u1Present
1092 || pShwPdpt->GCPhys != GCPhysPdpt)
1093 {
1094 LogFlow(("InvalidatePage: Out-of-sync PML4E (P/GCPhys) at %RGv GCPhys=%RGp vs %RGp Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1095 GCPtrPage, pShwPdpt->GCPhys, GCPhysPdpt, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1096 pgmPoolFreeByPage(pPool, pShwPdpt, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3)->idx, iPml4);
1097 ASMAtomicWriteSize(pPml4eDst, 0);
1098 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1099 PGM_INVL_VCPU_TLBS(pVCpu);
1100 return VINF_SUCCESS;
1101 }
1102 if ( pPml4eSrc->n.u1User != pPml4eDst->n.u1User
1103 || (!pPml4eSrc->n.u1Write && pPml4eDst->n.u1Write))
1104 {
1105 /*
1106 * Mark not present so we can resync the PML4E when it's used.
1107 */
1108 LogFlow(("InvalidatePage: Out-of-sync PML4E at %RGv Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1109 GCPtrPage, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1110 pgmPoolFreeByPage(pPool, pShwPdpt, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3)->idx, iPml4);
1111 ASMAtomicWriteSize(pPml4eDst, 0);
1112 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1113 PGM_INVL_VCPU_TLBS(pVCpu);
1114 }
1115 else if (!pPml4eSrc->n.u1Accessed)
1116 {
1117 /*
1118 * Mark not present so we can set the accessed bit.
1119 */
1120 LogFlow(("InvalidatePage: Out-of-sync PML4E (A) at %RGv Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1121 GCPtrPage, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1122 pgmPoolFreeByPage(pPool, pShwPdpt, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3)->idx, iPml4);
1123 ASMAtomicWriteSize(pPml4eDst, 0);
1124 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNAs));
1125 PGM_INVL_VCPU_TLBS(pVCpu);
1126 }
1127
1128 /* Check if the PDPT entry has changed. */
1129 Assert(pPdpeDst->n.u1Present && pPdpeDst->u & SHW_PDPT_MASK);
1130 RTGCPHYS GCPhysPd = PdpeSrc.u & GST_PDPE_PG_MASK;
1131 if ( !PdpeSrc.n.u1Present
1132 || pShwPde->GCPhys != GCPhysPd)
1133 {
1134 LogFlow(("InvalidatePage: Out-of-sync PDPE (P/GCPhys) at %RGv GCPhys=%RGp vs %RGp PdpeSrc=%RX64 PdpeDst=%RX64\n",
1135 GCPtrPage, pShwPde->GCPhys, GCPhysPd, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1136 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpt);
1137 ASMAtomicWriteSize(pPdpeDst, 0);
1138 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1139 PGM_INVL_VCPU_TLBS(pVCpu);
1140 return VINF_SUCCESS;
1141 }
1142 if ( PdpeSrc.lm.u1User != pPdpeDst->lm.u1User
1143 || (!PdpeSrc.lm.u1Write && pPdpeDst->lm.u1Write))
1144 {
1145 /*
1146 * Mark not present so we can resync the PDPTE when it's used.
1147 */
1148 LogFlow(("InvalidatePage: Out-of-sync PDPE at %RGv PdpeSrc=%RX64 PdpeDst=%RX64\n",
1149 GCPtrPage, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1150 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpt);
1151 ASMAtomicWriteSize(pPdpeDst, 0);
1152 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1153 PGM_INVL_VCPU_TLBS(pVCpu);
1154 }
1155 else if (!PdpeSrc.lm.u1Accessed)
1156 {
1157 /*
1158 * Mark not present so we can set the accessed bit.
1159 */
1160 LogFlow(("InvalidatePage: Out-of-sync PDPE (A) at %RGv PdpeSrc=%RX64 PdpeDst=%RX64\n",
1161 GCPtrPage, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1162 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpt);
1163 ASMAtomicWriteSize(pPdpeDst, 0);
1164 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNAs));
1165 PGM_INVL_VCPU_TLBS(pVCpu);
1166 }
1167# endif /* PGM_GST_TYPE == PGM_TYPE_AMD64 */
1168
1169 /*
1170 * Deal with the Guest PDE.
1171 */
1172 rc = VINF_SUCCESS;
1173 if (PdeSrc.n.u1Present)
1174 {
1175# ifndef PGM_WITHOUT_MAPPING
1176 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
1177 {
1178 /*
1179 * Conflict - Let SyncPT deal with it to avoid duplicate code.
1180 */
1181 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1182 Assert(PGMGetGuestMode(pVCpu) <= PGMMODE_PAE);
1183 pgmLock(pVM);
1184 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
1185 pgmUnlock(pVM);
1186 }
1187 else
1188# endif /* !PGM_WITHOUT_MAPPING */
1189 if ( PdeSrc.n.u1User != PdeDst.n.u1User
1190 || (!PdeSrc.n.u1Write && PdeDst.n.u1Write))
1191 {
1192 /*
1193 * Mark not present so we can resync the PDE when it's used.
1194 */
1195 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1196 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1197 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1198 ASMAtomicWriteSize(pPdeDst, 0);
1199 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1200 PGM_INVL_VCPU_TLBS(pVCpu);
1201 }
1202 else if (!PdeSrc.n.u1Accessed)
1203 {
1204 /*
1205 * Mark not present so we can set the accessed bit.
1206 */
1207 LogFlow(("InvalidatePage: Out-of-sync (A) at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1208 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1209 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1210 ASMAtomicWriteSize(pPdeDst, 0);
1211 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNAs));
1212 PGM_INVL_VCPU_TLBS(pVCpu);
1213 }
1214 else if (!fIsBigPage)
1215 {
1216 /*
1217 * 4KB - page.
1218 */
1219 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1220 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1221# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1222 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1223 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1224# endif
1225 if (pShwPage->GCPhys == GCPhys)
1226 {
1227# if 0 /* likely cause of a major performance regression; must be SyncPageWorkerTrackDeref then */
1228 const unsigned iPTEDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1229 PSHWPT pPT = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1230 if (pPT->a[iPTEDst].n.u1Present)
1231 {
1232# ifdef PGMPOOL_WITH_USER_TRACKING
1233 /* This is very unlikely with caching/monitoring enabled. */
1234 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pShwPage, pPT->a[iPTEDst].u & SHW_PTE_PG_MASK);
1235# endif
1236 ASMAtomicWriteSize(&pPT->a[iPTEDst], 0);
1237 }
1238# else /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1239 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
1240 if (RT_SUCCESS(rc))
1241 rc = VINF_SUCCESS;
1242# endif
1243 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4KBPages));
1244 PGM_INVL_PG(pVCpu, GCPtrPage);
1245 }
1246 else
1247 {
1248 /*
1249 * The page table address changed.
1250 */
1251 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%RGp iPDDst=%#x\n",
1252 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1253 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1254 ASMAtomicWriteSize(pPdeDst, 0);
1255 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1256 PGM_INVL_VCPU_TLBS(pVCpu);
1257 }
1258 }
1259 else
1260 {
1261 /*
1262 * 2/4MB - page.
1263 */
1264 /* Before freeing the page, check if anything really changed. */
1265 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1266 RTGCPHYS GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1267# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1268 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1269 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1270# endif
1271 if ( pShwPage->GCPhys == GCPhys
1272 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1273 {
1274 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1275 /** @todo PAT */
1276 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1277 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1278 && ( PdeSrc.b.u1Dirty /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1279 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1280 {
1281 LogFlow(("Skipping flush for big page containing %RGv (PD=%X .u=%RX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1282 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPagesSkip));
1283# if defined(IN_RC)
1284 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1285 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1286# endif
1287 return VINF_SUCCESS;
1288 }
1289 }
1290
1291 /*
1292 * Ok, the page table is present and it's been changed in the guest.
1293 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1294 * We could do this for some flushes in GC too, but we need an algorithm for
1295 * deciding which 4MB pages containing code likely to be executed very soon.
1296 */
1297 LogFlow(("InvalidatePage: Out-of-sync PD at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1298 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1299 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1300 ASMAtomicWriteSize(pPdeDst, 0);
1301 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPages));
1302 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
1303 }
1304 }
1305 else
1306 {
1307 /*
1308 * Page directory is not present, mark shadow PDE not present.
1309 */
1310 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
1311 {
1312 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1313 ASMAtomicWriteSize(pPdeDst, 0);
1314 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1315 PGM_INVL_PG(pVCpu, GCPtrPage);
1316 }
1317 else
1318 {
1319 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1320 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDMappings));
1321 }
1322 }
1323# if defined(IN_RC)
1324 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1325 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1326# endif
1327 return rc;
1328
1329#else /* guest real and protected mode */
1330 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1331 return VINF_SUCCESS;
1332#endif
1333}
1334
1335
1336#ifdef PGMPOOL_WITH_USER_TRACKING
1337/**
1338 * Update the tracking of shadowed pages.
1339 *
1340 * @param pVCpu The VMCPU handle.
1341 * @param pShwPage The shadow page.
1342 * @param HCPhys The physical page we is being dereferenced.
1343 */
1344DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys)
1345{
1346# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1347 PVM pVM = pVCpu->CTX_SUFF(pVM);
1348
1349 STAM_PROFILE_START(&pVM->pgm.s.StatTrackDeref, a);
1350 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%RHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1351
1352 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1353 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1354 * 2. write protect all shadowed pages. I.e. implement caching.
1355 */
1356 /*
1357 * Find the guest address.
1358 */
1359 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
1360 pRam;
1361 pRam = pRam->CTX_SUFF(pNext))
1362 {
1363 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1364 while (iPage-- > 0)
1365 {
1366 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1367 {
1368 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1369 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage]);
1370 pShwPage->cPresent--;
1371 pPool->cPresent--;
1372 STAM_PROFILE_STOP(&pVM->pgm.s.StatTrackDeref, a);
1373 return;
1374 }
1375 }
1376 }
1377
1378 for (;;)
1379 AssertReleaseMsgFailed(("HCPhys=%RHp wasn't found!\n", HCPhys));
1380# else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1381 pShwPage->cPresent--;
1382 pVM->pgm.s.CTX_SUFF(pPool)->cPresent--;
1383# endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1384}
1385
1386
1387/**
1388 * Update the tracking of shadowed pages.
1389 *
1390 * @param pVCpu The VMCPU handle.
1391 * @param pShwPage The shadow page.
1392 * @param u16 The top 16-bit of the pPage->HCPhys.
1393 * @param pPage Pointer to the guest page. this will be modified.
1394 * @param iPTDst The index into the shadow table.
1395 */
1396DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, uint16_t u16, PPGMPAGE pPage, const unsigned iPTDst)
1397{
1398 PVM pVM = pVCpu->CTX_SUFF(pVM);
1399# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1400 /*
1401 * Just deal with the simple first time here.
1402 */
1403 if (!u16)
1404 {
1405 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackVirgin);
1406 u16 = PGMPOOL_TD_MAKE(1, pShwPage->idx);
1407 }
1408 else
1409 u16 = pgmPoolTrackPhysExtAddref(pVM, u16, pShwPage->idx);
1410
1411 /* write back */
1412 Log2(("SyncPageWorkerTrackAddRef: u16=%#x->%#x iPTDst=%#x\n", u16, PGM_PAGE_GET_TRACKING(pPage), iPTDst));
1413 PGM_PAGE_SET_TRACKING(pPage, u16);
1414
1415# endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
1416
1417 /* update statistics. */
1418 pVM->pgm.s.CTX_SUFF(pPool)->cPresent++;
1419 pShwPage->cPresent++;
1420 if (pShwPage->iFirstPresent > iPTDst)
1421 pShwPage->iFirstPresent = iPTDst;
1422}
1423#endif /* PGMPOOL_WITH_USER_TRACKING */
1424
1425
1426/**
1427 * Creates a 4K shadow page for a guest page.
1428 *
1429 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1430 * physical address. The PdeSrc argument only the flags are used. No page structured
1431 * will be mapped in this function.
1432 *
1433 * @param pVCpu The VMCPU handle.
1434 * @param pPteDst Destination page table entry.
1435 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1436 * Can safely assume that only the flags are being used.
1437 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1438 * @param pShwPage Pointer to the shadow page.
1439 * @param iPTDst The index into the shadow table.
1440 *
1441 * @remark Not used for 2/4MB pages!
1442 */
1443DECLINLINE(void) PGM_BTH_NAME(SyncPageWorker)(PVMCPU pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1444{
1445 if (PteSrc.n.u1Present)
1446 {
1447 PVM pVM = pVCpu->CTX_SUFF(pVM);
1448
1449 /*
1450 * Find the ram range.
1451 */
1452 PPGMPAGE pPage;
1453 int rc = pgmPhysGetPageEx(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK, &pPage);
1454 if (RT_SUCCESS(rc))
1455 {
1456#ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1457 /* Try make the page writable if necessary. */
1458 if ( PteSrc.n.u1Write
1459 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1460 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
1461 {
1462 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, PteSrc.u & GST_PTE_PG_MASK);
1463 AssertRC(rc);
1464 }
1465#endif
1466
1467 /** @todo investiage PWT, PCD and PAT. */
1468 /*
1469 * Make page table entry.
1470 */
1471 SHWPTE PteDst;
1472 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1473 {
1474 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No. */
1475 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1476 {
1477#if PGM_SHW_TYPE == PGM_TYPE_EPT
1478 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1479 PteDst.n.u1Present = 1;
1480 PteDst.n.u1Execute = 1;
1481 PteDst.n.u1IgnorePAT = 1;
1482 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1483 /* PteDst.n.u1Write = 0 && PteDst.n.u1Size = 0 */
1484#else
1485 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1486 | PGM_PAGE_GET_HCPHYS(pPage);
1487#endif
1488 }
1489 else
1490 {
1491 LogFlow(("SyncPageWorker: monitored page (%RHp) -> mark not present\n", PGM_PAGE_GET_HCPHYS(pPage)));
1492 PteDst.u = 0;
1493 }
1494 /** @todo count these two kinds. */
1495 }
1496 else
1497 {
1498#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1499 /*
1500 * If the page or page directory entry is not marked accessed,
1501 * we mark the page not present.
1502 */
1503 if (!PteSrc.n.u1Accessed || !PdeSrc.n.u1Accessed)
1504 {
1505 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1506 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,AccessedPage));
1507 PteDst.u = 0;
1508 }
1509 else
1510 /*
1511 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1512 * when the page is modified.
1513 */
1514 if (!PteSrc.n.u1Dirty && (PdeSrc.n.u1Write & PteSrc.n.u1Write))
1515 {
1516 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPage));
1517 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1518 | PGM_PAGE_GET_HCPHYS(pPage)
1519 | PGM_PTFLAGS_TRACK_DIRTY;
1520 }
1521 else
1522#endif
1523 {
1524 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageSkipped));
1525#if PGM_SHW_TYPE == PGM_TYPE_EPT
1526 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1527 PteDst.n.u1Present = 1;
1528 PteDst.n.u1Write = 1;
1529 PteDst.n.u1Execute = 1;
1530 PteDst.n.u1IgnorePAT = 1;
1531 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1532 /* PteDst.n.u1Size = 0 */
1533#else
1534 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1535 | PGM_PAGE_GET_HCPHYS(pPage);
1536#endif
1537 }
1538 }
1539
1540 /*
1541 * Make sure only allocated pages are mapped writable.
1542 */
1543 if ( PteDst.n.u1Write
1544 && PteDst.n.u1Present
1545 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1546 {
1547 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet. */
1548 Log3(("SyncPageWorker: write-protecting %RGp pPage=%R[pgmpage]at iPTDst=%d\n", (RTGCPHYS)(PteSrc.u & X86_PTE_PAE_PG_MASK), pPage, iPTDst));
1549 }
1550
1551#ifdef PGMPOOL_WITH_USER_TRACKING
1552 /*
1553 * Keep user track up to date.
1554 */
1555 if (PteDst.n.u1Present)
1556 {
1557 if (!pPteDst->n.u1Present)
1558 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1559 else if ((pPteDst->u & SHW_PTE_PG_MASK) != (PteDst.u & SHW_PTE_PG_MASK))
1560 {
1561 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", (uint64_t)pPteDst->u, (uint64_t)PteDst.u));
1562 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1563 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1564 }
1565 }
1566 else if (pPteDst->n.u1Present)
1567 {
1568 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1569 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1570 }
1571#endif /* PGMPOOL_WITH_USER_TRACKING */
1572
1573 /*
1574 * Update statistics and commit the entry.
1575 */
1576#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1577 if (!PteSrc.n.u1Global)
1578 pShwPage->fSeenNonGlobal = true;
1579#endif
1580 ASMAtomicWriteSize(pPteDst, PteDst.u);
1581 }
1582 /* else MMIO or invalid page, we must handle them manually in the #PF handler. */
1583 /** @todo count these. */
1584 }
1585 else
1586 {
1587 /*
1588 * Page not-present.
1589 */
1590 LogFlow(("SyncPageWorker: page not present in Pte\n"));
1591#ifdef PGMPOOL_WITH_USER_TRACKING
1592 /* Keep user track up to date. */
1593 if (pPteDst->n.u1Present)
1594 {
1595 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1596 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1597 }
1598#endif /* PGMPOOL_WITH_USER_TRACKING */
1599 ASMAtomicWriteSize(pPteDst, 0);
1600 /** @todo count these. */
1601 }
1602}
1603
1604
1605/**
1606 * Syncs a guest OS page.
1607 *
1608 * There are no conflicts at this point, neither is there any need for
1609 * page table allocations.
1610 *
1611 * @returns VBox status code.
1612 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1613 * @param pVCpu The VMCPU handle.
1614 * @param PdeSrc Page directory entry of the guest.
1615 * @param GCPtrPage Guest context page address.
1616 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1617 * @param uErr Fault error (X86_TRAP_PF_*).
1618 */
1619PGM_BTH_DECL(int, SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr)
1620{
1621 PVM pVM = pVCpu->CTX_SUFF(pVM);
1622 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1623 LogFlow(("SyncPage: GCPtrPage=%RGv cPages=%u uErr=%#x\n", GCPtrPage, cPages, uErr));
1624
1625 Assert(PGMIsLockOwner(pVM));
1626
1627#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
1628 || PGM_GST_TYPE == PGM_TYPE_PAE \
1629 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1630 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1631 && PGM_SHW_TYPE != PGM_TYPE_EPT
1632
1633# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1634 bool fNoExecuteBitValid = !!(CPUMGetGuestEFER(pVCpu) & MSR_K6_EFER_NXE);
1635# endif
1636
1637 /*
1638 * Assert preconditions.
1639 */
1640 Assert(PdeSrc.n.u1Present);
1641 Assert(cPages);
1642 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1643
1644 /*
1645 * Get the shadow PDE, find the shadow page table in the pool.
1646 */
1647# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1648 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1649 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
1650
1651 /* Fetch the pgm pool shadow descriptor. */
1652 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1653 Assert(pShwPde);
1654
1655# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1656 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1657 PPGMPOOLPAGE pShwPde = NULL;
1658 PX86PDPAE pPDDst;
1659
1660 /* Fetch the pgm pool shadow descriptor. */
1661 int rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
1662 AssertRCSuccessReturn(rc, rc);
1663 Assert(pShwPde);
1664
1665 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1666 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1667
1668# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1669 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1670 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1671 PX86PDPAE pPDDst;
1672 PX86PDPT pPdptDst;
1673
1674 int rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1675 AssertRCSuccessReturn(rc, rc);
1676 Assert(pPDDst && pPdptDst);
1677 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1678# endif
1679 SHWPDE PdeDst = *pPdeDst;
1680 if (!PdeDst.n.u1Present)
1681 {
1682 AssertMsg(pVM->cCPUs > 1, ("%Unexpected missing PDE p=%llx\n", pPdeDst, (uint64_t)PdeDst.u));
1683 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", GCPtrPage));
1684 return VINF_SUCCESS; /* force the instruction to be executed again. */
1685 }
1686
1687 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1688
1689# if PGM_GST_TYPE == PGM_TYPE_AMD64
1690 /* Fetch the pgm pool shadow descriptor. */
1691 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1692 Assert(pShwPde);
1693# endif
1694
1695# if defined(IN_RC)
1696 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1697 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1698# endif
1699
1700 /*
1701 * Check that the page is present and that the shadow PDE isn't out of sync.
1702 */
1703# if PGM_GST_TYPE == PGM_TYPE_AMD64
1704 const bool fBigPage = PdeSrc.b.u1Size;
1705# else
1706 const bool fBigPage = PdeSrc.b.u1Size && (CPUMGetGuestCR4(pVCpu) & X86_CR4_PSE);
1707# endif
1708 RTGCPHYS GCPhys;
1709 if (!fBigPage)
1710 {
1711 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1712# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1713 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1714 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1715# endif
1716 }
1717 else
1718 {
1719 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1720# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1721 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1722 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1723# endif
1724 }
1725 if ( pShwPage->GCPhys == GCPhys
1726 && PdeSrc.n.u1Present
1727 && (PdeSrc.n.u1User == PdeDst.n.u1User)
1728 && (PdeSrc.n.u1Write == PdeDst.n.u1Write || !PdeDst.n.u1Write)
1729# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1730 && (!fNoExecuteBitValid || PdeSrc.n.u1NoExecute == PdeDst.n.u1NoExecute)
1731# endif
1732 )
1733 {
1734 /*
1735 * Check that the PDE is marked accessed already.
1736 * Since we set the accessed bit *before* getting here on a #PF, this
1737 * check is only meant for dealing with non-#PF'ing paths.
1738 */
1739 if (PdeSrc.n.u1Accessed)
1740 {
1741 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1742 if (!fBigPage)
1743 {
1744 /*
1745 * 4KB Page - Map the guest page table.
1746 */
1747 PGSTPT pPTSrc;
1748 int rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
1749 if (RT_SUCCESS(rc))
1750 {
1751# ifdef PGM_SYNC_N_PAGES
1752 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1753 if ( cPages > 1
1754 && !(uErr & X86_TRAP_PF_P)
1755 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
1756 {
1757 /*
1758 * This code path is currently only taken when the caller is PGMTrap0eHandler
1759 * for non-present pages!
1760 *
1761 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1762 * deal with locality.
1763 */
1764 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1765# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1766 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1767 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1768# else
1769 const unsigned offPTSrc = 0;
1770# endif
1771 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1772 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1773 iPTDst = 0;
1774 else
1775 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1776 for (; iPTDst < iPTDstEnd; iPTDst++)
1777 {
1778 if (!pPTDst->a[iPTDst].n.u1Present)
1779 {
1780 GSTPTE PteSrc = pPTSrc->a[offPTSrc + iPTDst];
1781 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1782 NOREF(GCPtrCurPage);
1783#ifndef IN_RING0
1784 /*
1785 * Assuming kernel code will be marked as supervisor - and not as user level
1786 * and executed using a conforming code selector - And marked as readonly.
1787 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
1788 */
1789 PPGMPAGE pPage;
1790 if ( ((PdeSrc.u & PteSrc.u) & (X86_PTE_RW | X86_PTE_US))
1791 || iPTDst == ((GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK) /* always sync GCPtrPage */
1792 || !CSAMDoesPageNeedScanning(pVM, (RTRCPTR)GCPtrCurPage)
1793 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
1794 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1795 )
1796#endif /* else: CSAM not active */
1797 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1798 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1799 GCPtrCurPage, PteSrc.n.u1Present,
1800 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1801 PteSrc.n.u1User & PdeSrc.n.u1User,
1802 (uint64_t)PteSrc.u,
1803 (uint64_t)pPTDst->a[iPTDst].u,
1804 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1805 }
1806 }
1807 }
1808 else
1809# endif /* PGM_SYNC_N_PAGES */
1810 {
1811 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1812 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1813 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1814 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1815 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s\n",
1816 GCPtrPage, PteSrc.n.u1Present,
1817 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1818 PteSrc.n.u1User & PdeSrc.n.u1User,
1819 (uint64_t)PteSrc.u,
1820 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1821 }
1822 }
1823 else /* MMIO or invalid page: emulated in #PF handler. */
1824 {
1825 LogFlow(("PGM_GCPHYS_2_PTR %RGp failed with %Rrc\n", GCPhys, rc));
1826 Assert(!pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK].n.u1Present);
1827 }
1828 }
1829 else
1830 {
1831 /*
1832 * 4/2MB page - lazy syncing shadow 4K pages.
1833 * (There are many causes of getting here, it's no longer only CSAM.)
1834 */
1835 /* Calculate the GC physical address of this 4KB shadow page. */
1836 RTGCPHYS GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc) | (GCPtrPage & GST_BIG_PAGE_OFFSET_MASK);
1837 /* Find ram range. */
1838 PPGMPAGE pPage;
1839 int rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
1840 if (RT_SUCCESS(rc))
1841 {
1842# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1843 /* Try make the page writable if necessary. */
1844 if ( PdeSrc.n.u1Write
1845 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1846 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
1847 {
1848 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, GCPhys);
1849 AssertRC(rc);
1850 }
1851# endif
1852
1853 /*
1854 * Make shadow PTE entry.
1855 */
1856 SHWPTE PteDst;
1857 PteDst.u = (PdeSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1858 | PGM_PAGE_GET_HCPHYS(pPage);
1859 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1860 {
1861 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1862 PteDst.n.u1Write = 0;
1863 else
1864 PteDst.u = 0;
1865 }
1866 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1867# ifdef PGMPOOL_WITH_USER_TRACKING
1868 if (PteDst.n.u1Present && !pPTDst->a[iPTDst].n.u1Present)
1869 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1870# endif
1871 /* Make sure only allocated pages are mapped writable. */
1872 if ( PteDst.n.u1Write
1873 && PteDst.n.u1Present
1874 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1875 {
1876 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
1877 Log3(("SyncPage: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, GCPtrPage));
1878 }
1879
1880 ASMAtomicWriteSize(&pPTDst->a[iPTDst], PteDst.u);
1881
1882 /*
1883 * If the page is not flagged as dirty and is writable, then make it read-only
1884 * at PD level, so we can set the dirty bit when the page is modified.
1885 *
1886 * ASSUMES that page access handlers are implemented on page table entry level.
1887 * Thus we will first catch the dirty access and set PDE.D and restart. If
1888 * there is an access handler, we'll trap again and let it work on the problem.
1889 */
1890 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
1891 * As for invlpg, it simply frees the whole shadow PT.
1892 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
1893 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
1894 {
1895 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
1896 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1897 PdeDst.n.u1Write = 0;
1898 }
1899 else
1900 {
1901 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1902 PdeDst.n.u1Write = PdeSrc.n.u1Write;
1903 }
1904 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
1905 Log2(("SyncPage: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%RGp%s\n",
1906 GCPtrPage, PdeSrc.n.u1Present, PdeSrc.n.u1Write, PdeSrc.n.u1User, (uint64_t)PdeSrc.u, GCPhys,
1907 PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1908 }
1909 else
1910 LogFlow(("PGM_GCPHYS_2_PTR %RGp (big) failed with %Rrc\n", GCPhys, rc));
1911 }
1912# if defined(IN_RC)
1913 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1914 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1915# endif
1916 return VINF_SUCCESS;
1917 }
1918 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDNAs));
1919 }
1920 else
1921 {
1922 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSync));
1923 Log2(("SyncPage: Out-Of-Sync PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
1924 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
1925 }
1926
1927 /*
1928 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
1929 * Yea, I'm lazy.
1930 */
1931 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPDDst);
1932 ASMAtomicWriteSize(pPdeDst, 0);
1933
1934# if defined(IN_RC)
1935 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1936 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1937# endif
1938 PGM_INVL_VCPU_TLBS(pVCpu);
1939 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
1940
1941#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
1942 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1943 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
1944 && !defined(IN_RC)
1945
1946# ifdef PGM_SYNC_N_PAGES
1947 /*
1948 * Get the shadow PDE, find the shadow page table in the pool.
1949 */
1950# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1951 X86PDE PdeDst = pgmShwGet32BitPDE(&pVCpu->pgm.s, GCPtrPage);
1952
1953# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1954 X86PDEPAE PdeDst = pgmShwGetPaePDE(&pVCpu->pgm.s, GCPtrPage);
1955
1956# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1957 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1958 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; NOREF(iPdpt);
1959 PX86PDPAE pPDDst;
1960 X86PDEPAE PdeDst;
1961 PX86PDPT pPdptDst;
1962
1963 int rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1964 AssertRCSuccessReturn(rc, rc);
1965 Assert(pPDDst && pPdptDst);
1966 PdeDst = pPDDst->a[iPDDst];
1967# elif PGM_SHW_TYPE == PGM_TYPE_EPT
1968 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1969 PEPTPD pPDDst;
1970 EPTPDE PdeDst;
1971
1972 int rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, NULL, &pPDDst);
1973 if (rc != VINF_SUCCESS)
1974 {
1975 AssertRC(rc);
1976 return rc;
1977 }
1978 Assert(pPDDst);
1979 PdeDst = pPDDst->a[iPDDst];
1980# endif
1981 AssertMsg(PdeDst.n.u1Present, ("%#llx\n", (uint64_t)PdeDst.u));
1982 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1983 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1984
1985 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1986 if ( cPages > 1
1987 && !(uErr & X86_TRAP_PF_P)
1988 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
1989 {
1990 /*
1991 * This code path is currently only taken when the caller is PGMTrap0eHandler
1992 * for non-present pages!
1993 *
1994 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1995 * deal with locality.
1996 */
1997 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1998 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1999 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
2000 iPTDst = 0;
2001 else
2002 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2003 for (; iPTDst < iPTDstEnd; iPTDst++)
2004 {
2005 if (!pPTDst->a[iPTDst].n.u1Present)
2006 {
2007 GSTPTE PteSrc;
2008
2009 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2010
2011 /* Fake the page table entry */
2012 PteSrc.u = GCPtrCurPage;
2013 PteSrc.n.u1Present = 1;
2014 PteSrc.n.u1Dirty = 1;
2015 PteSrc.n.u1Accessed = 1;
2016 PteSrc.n.u1Write = 1;
2017 PteSrc.n.u1User = 1;
2018
2019 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2020
2021 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
2022 GCPtrCurPage, PteSrc.n.u1Present,
2023 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2024 PteSrc.n.u1User & PdeSrc.n.u1User,
2025 (uint64_t)PteSrc.u,
2026 (uint64_t)pPTDst->a[iPTDst].u,
2027 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2028
2029 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
2030 break;
2031 }
2032 else
2033 Log4(("%RGv iPTDst=%x pPTDst->a[iPTDst] %RX64\n", (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT), iPTDst, pPTDst->a[iPTDst].u));
2034 }
2035 }
2036 else
2037# endif /* PGM_SYNC_N_PAGES */
2038 {
2039 GSTPTE PteSrc;
2040 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2041 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2042
2043 /* Fake the page table entry */
2044 PteSrc.u = GCPtrCurPage;
2045 PteSrc.n.u1Present = 1;
2046 PteSrc.n.u1Dirty = 1;
2047 PteSrc.n.u1Accessed = 1;
2048 PteSrc.n.u1Write = 1;
2049 PteSrc.n.u1User = 1;
2050 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2051
2052 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}PteDst=%08llx%s\n",
2053 GCPtrPage, PteSrc.n.u1Present,
2054 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2055 PteSrc.n.u1User & PdeSrc.n.u1User,
2056 (uint64_t)PteSrc.u,
2057 (uint64_t)pPTDst->a[iPTDst].u,
2058 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2059 }
2060 return VINF_SUCCESS;
2061
2062#else
2063 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2064 return VERR_INTERNAL_ERROR;
2065#endif
2066}
2067
2068
2069#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
2070/**
2071 * Investigate page fault and handle write protection page faults caused by
2072 * dirty bit tracking.
2073 *
2074 * @returns VBox status code.
2075 * @param pVCpu The VMCPU handle.
2076 * @param uErr Page fault error code.
2077 * @param pPdeDst Shadow page directory entry.
2078 * @param pPdeSrc Guest page directory entry.
2079 * @param GCPtrPage Guest context page address.
2080 */
2081PGM_BTH_DECL(int, CheckPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage)
2082{
2083 bool fWriteProtect = !!(CPUMGetGuestCR0(pVCpu) & X86_CR0_WP);
2084 bool fUserLevelFault = !!(uErr & X86_TRAP_PF_US);
2085 bool fWriteFault = !!(uErr & X86_TRAP_PF_RW);
2086# if PGM_GST_TYPE == PGM_TYPE_AMD64
2087 bool fBigPagesSupported = true;
2088# else
2089 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVCpu) & X86_CR4_PSE);
2090# endif
2091# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2092 bool fNoExecuteBitValid = !!(CPUMGetGuestEFER(pVCpu) & MSR_K6_EFER_NXE);
2093# endif
2094 unsigned uPageFaultLevel;
2095 int rc;
2096 PVM pVM = pVCpu->CTX_SUFF(pVM);
2097 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2098
2099 Assert(PGMIsLockOwner(pVM));
2100
2101 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2102 LogFlow(("CheckPageFault: GCPtrPage=%RGv uErr=%#x PdeSrc=%08x\n", GCPtrPage, uErr, pPdeSrc->u));
2103
2104# if PGM_GST_TYPE == PGM_TYPE_PAE \
2105 || PGM_GST_TYPE == PGM_TYPE_AMD64
2106
2107# if PGM_GST_TYPE == PGM_TYPE_AMD64
2108 PX86PML4E pPml4eSrc;
2109 PX86PDPE pPdpeSrc;
2110
2111 pPdpeSrc = pgmGstGetLongModePDPTPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc);
2112 Assert(pPml4eSrc);
2113
2114 /*
2115 * Real page fault? (PML4E level)
2116 */
2117 if ( (uErr & X86_TRAP_PF_RSVD)
2118 || !pPml4eSrc->n.u1Present
2119 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPml4eSrc->n.u1NoExecute)
2120 || (fWriteFault && !pPml4eSrc->n.u1Write && (fUserLevelFault || fWriteProtect))
2121 || (fUserLevelFault && !pPml4eSrc->n.u1User)
2122 )
2123 {
2124 uPageFaultLevel = 0;
2125 goto l_UpperLevelPageFault;
2126 }
2127 Assert(pPdpeSrc);
2128
2129# else /* PAE */
2130 PX86PDPE pPdpeSrc = pgmGstGetPaePDPEPtr(&pVCpu->pgm.s, GCPtrPage);
2131# endif /* PAE */
2132
2133 /*
2134 * Real page fault? (PDPE level)
2135 */
2136 if ( (uErr & X86_TRAP_PF_RSVD)
2137 || !pPdpeSrc->n.u1Present
2138# if PGM_GST_TYPE == PGM_TYPE_AMD64 /* NX, r/w, u/s bits in the PDPE are long mode only */
2139 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPdpeSrc->lm.u1NoExecute)
2140 || (fWriteFault && !pPdpeSrc->lm.u1Write && (fUserLevelFault || fWriteProtect))
2141 || (fUserLevelFault && !pPdpeSrc->lm.u1User)
2142# endif
2143 )
2144 {
2145 uPageFaultLevel = 1;
2146 goto l_UpperLevelPageFault;
2147 }
2148# endif
2149
2150 /*
2151 * Real page fault? (PDE level)
2152 */
2153 if ( (uErr & X86_TRAP_PF_RSVD)
2154 || !pPdeSrc->n.u1Present
2155# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2156 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPdeSrc->n.u1NoExecute)
2157# endif
2158 || (fWriteFault && !pPdeSrc->n.u1Write && (fUserLevelFault || fWriteProtect))
2159 || (fUserLevelFault && !pPdeSrc->n.u1User) )
2160 {
2161 uPageFaultLevel = 2;
2162 goto l_UpperLevelPageFault;
2163 }
2164
2165 /*
2166 * First check the easy case where the page directory has been marked read-only to track
2167 * the dirty bit of an emulated BIG page
2168 */
2169 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2170 {
2171 /* Mark guest page directory as accessed */
2172# if PGM_GST_TYPE == PGM_TYPE_AMD64
2173 pPml4eSrc->n.u1Accessed = 1;
2174 pPdpeSrc->lm.u1Accessed = 1;
2175# endif
2176 pPdeSrc->b.u1Accessed = 1;
2177
2178 /*
2179 * Only write protection page faults are relevant here.
2180 */
2181 if (fWriteFault)
2182 {
2183 /* Mark guest page directory as dirty (BIG page only). */
2184 pPdeSrc->b.u1Dirty = 1;
2185
2186 if (pPdeDst->n.u1Present)
2187 {
2188 if (pPdeDst->u & PGM_PDFLAGS_TRACK_DIRTY)
2189 {
2190 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2191 Assert(pPdeSrc->b.u1Write);
2192
2193 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2194 * fault again and take this path to only invalidate the entry.
2195 */
2196 pPdeDst->n.u1Write = 1;
2197 pPdeDst->n.u1Accessed = 1;
2198 pPdeDst->au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
2199 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
2200 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2201 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
2202 }
2203# ifdef IN_RING0
2204 else
2205 /* Check for stale TLB entry; only applies to the SMP guest case. */
2206 if ( pVM->cCPUs > 1
2207 && pPdeDst->n.u1Write
2208 && pPdeDst->n.u1Accessed)
2209 {
2210 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2211 if (pShwPage)
2212 {
2213 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2214 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2215 if ( pPteDst->n.u1Present
2216 && pPteDst->n.u1Write)
2217 {
2218 /* Stale TLB entry. */
2219 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageStale));
2220 PGM_INVL_PG(pVCpu, GCPtrPage);
2221
2222 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2223 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
2224 }
2225 }
2226 }
2227# endif /* IN_RING0 */
2228 }
2229 }
2230 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2231 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2232 }
2233 /* else: 4KB page table */
2234
2235 /*
2236 * Map the guest page table.
2237 */
2238 PGSTPT pPTSrc;
2239 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2240 if (RT_SUCCESS(rc))
2241 {
2242 /*
2243 * Real page fault?
2244 */
2245 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2246 const GSTPTE PteSrc = *pPteSrc;
2247 if ( !PteSrc.n.u1Present
2248# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2249 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && PteSrc.n.u1NoExecute)
2250# endif
2251 || (fWriteFault && !PteSrc.n.u1Write && (fUserLevelFault || fWriteProtect))
2252 || (fUserLevelFault && !PteSrc.n.u1User)
2253 )
2254 {
2255 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2256 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2257 LogFlow(("CheckPageFault: real page fault at %RGv PteSrc.u=%08x (2)\n", GCPtrPage, PteSrc.u));
2258
2259 /* Check the present bit as the shadow tables can cause different error codes by being out of sync.
2260 * See the 2nd case above as well.
2261 */
2262 if (pPdeSrc->n.u1Present && pPteSrc->n.u1Present)
2263 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2264
2265 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2266 return VINF_EM_RAW_GUEST_TRAP;
2267 }
2268 LogFlow(("CheckPageFault: page fault at %RGv PteSrc.u=%08x\n", GCPtrPage, PteSrc.u));
2269
2270 /*
2271 * Set the accessed bits in the page directory and the page table.
2272 */
2273# if PGM_GST_TYPE == PGM_TYPE_AMD64
2274 pPml4eSrc->n.u1Accessed = 1;
2275 pPdpeSrc->lm.u1Accessed = 1;
2276# endif
2277 pPdeSrc->n.u1Accessed = 1;
2278 pPteSrc->n.u1Accessed = 1;
2279
2280 /*
2281 * Only write protection page faults are relevant here.
2282 */
2283 if (fWriteFault)
2284 {
2285 /* Write access, so mark guest entry as dirty. */
2286# ifdef VBOX_WITH_STATISTICS
2287 if (!pPteSrc->n.u1Dirty)
2288 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtiedPage));
2289 else
2290 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageAlreadyDirty));
2291# endif
2292
2293 pPteSrc->n.u1Dirty = 1;
2294
2295 if (pPdeDst->n.u1Present)
2296 {
2297#ifndef IN_RING0
2298 /* Bail out here as pgmPoolGetPageByHCPhys will return NULL and we'll crash below.
2299 * Our individual shadow handlers will provide more information and force a fatal exit.
2300 */
2301 if (MMHyperIsInsideArea(pVM, (RTGCPTR)GCPtrPage))
2302 {
2303 LogRel(("CheckPageFault: write to hypervisor region %RGv\n", GCPtrPage));
2304 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2305 return VINF_SUCCESS;
2306 }
2307#endif
2308 /*
2309 * Map shadow page table.
2310 */
2311 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2312 if (pShwPage)
2313 {
2314 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2315 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2316 if (pPteDst->n.u1Present) /** @todo Optimize accessed bit emulation? */
2317 {
2318 if (pPteDst->u & PGM_PTFLAGS_TRACK_DIRTY)
2319 {
2320 LogFlow(("DIRTY page trap addr=%RGv\n", GCPtrPage));
2321# ifdef VBOX_STRICT
2322 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, pPteSrc->u & GST_PTE_PG_MASK);
2323 if (pPage)
2324 AssertMsg(!PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage),
2325 ("Unexpected dirty bit tracking on monitored page %RGv (phys %RGp)!!!!!!\n", GCPtrPage, pPteSrc->u & X86_PTE_PAE_PG_MASK));
2326# endif
2327 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2328
2329 Assert(pPteSrc->n.u1Write);
2330
2331 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2332 * fault again and take this path to only invalidate the entry.
2333 */
2334 pPteDst->n.u1Write = 1;
2335 pPteDst->n.u1Dirty = 1;
2336 pPteDst->n.u1Accessed = 1;
2337 pPteDst->au32[0] &= ~PGM_PTFLAGS_TRACK_DIRTY;
2338 PGM_INVL_PG(pVCpu, GCPtrPage);
2339
2340 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2341 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
2342 }
2343# ifdef IN_RING0
2344 else
2345 /* Check for stale TLB entry; only applies to the SMP guest case. */
2346 if ( pVM->cCPUs > 1
2347 && pPteDst->n.u1Write == 1
2348 && pPteDst->n.u1Accessed == 1)
2349 {
2350 /* Stale TLB entry. */
2351 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageStale));
2352 PGM_INVL_PG(pVCpu, GCPtrPage);
2353
2354 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2355 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
2356 }
2357# endif
2358 }
2359 }
2360 else
2361 AssertMsgFailed(("pgmPoolGetPageByHCPhys %RGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2362 }
2363 }
2364/** @todo Optimize accessed bit emulation? */
2365# ifdef VBOX_STRICT
2366 /*
2367 * Sanity check.
2368 */
2369 else if ( !pPteSrc->n.u1Dirty
2370 && (pPdeSrc->n.u1Write & pPteSrc->n.u1Write)
2371 && pPdeDst->n.u1Present)
2372 {
2373 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2374 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2375 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2376 if ( pPteDst->n.u1Present
2377 && pPteDst->n.u1Write)
2378 LogFlow(("Writable present page %RGv not marked for dirty bit tracking!!!\n", GCPtrPage));
2379 }
2380# endif /* VBOX_STRICT */
2381 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2382 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2383 }
2384 AssertRC(rc);
2385 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2386 return rc;
2387
2388
2389l_UpperLevelPageFault:
2390 /*
2391 * Pagefault detected while checking the PML4E, PDPE or PDE.
2392 * Single exit handler to get rid of duplicate code paths.
2393 */
2394 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2395 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2396 Log(("CheckPageFault: real page fault at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2397
2398 if (
2399# if PGM_GST_TYPE == PGM_TYPE_AMD64
2400 pPml4eSrc->n.u1Present &&
2401# endif
2402# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
2403 pPdpeSrc->n.u1Present &&
2404# endif
2405 pPdeSrc->n.u1Present)
2406 {
2407 /* Check the present bit as the shadow tables can cause different error codes by being out of sync. */
2408 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2409 {
2410 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2411 }
2412 else
2413 {
2414 /*
2415 * Map the guest page table.
2416 */
2417 PGSTPT pPTSrc;
2418 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2419 if (RT_SUCCESS(rc))
2420 {
2421 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2422 const GSTPTE PteSrc = *pPteSrc;
2423 if (pPteSrc->n.u1Present)
2424 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2425 }
2426 AssertRC(rc);
2427 }
2428 }
2429 return VINF_EM_RAW_GUEST_TRAP;
2430}
2431#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
2432
2433
2434/**
2435 * Sync a shadow page table.
2436 *
2437 * The shadow page table is not present. This includes the case where
2438 * there is a conflict with a mapping.
2439 *
2440 * @returns VBox status code.
2441 * @param pVCpu The VMCPU handle.
2442 * @param iPD Page directory index.
2443 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2444 * Assume this is a temporary mapping.
2445 * @param GCPtrPage GC Pointer of the page that caused the fault
2446 */
2447PGM_BTH_DECL(int, SyncPT)(PVMCPU pVCpu, unsigned iPDSrc, PGSTPD pPDSrc, RTGCPTR GCPtrPage)
2448{
2449 PVM pVM = pVCpu->CTX_SUFF(pVM);
2450 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2451
2452 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2453 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPtPD[iPDSrc]);
2454 LogFlow(("SyncPT: GCPtrPage=%RGv\n", GCPtrPage));
2455
2456 Assert(PGMIsLocked(pVM));
2457
2458#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2459 || PGM_GST_TYPE == PGM_TYPE_PAE \
2460 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2461 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2462 && PGM_SHW_TYPE != PGM_TYPE_EPT
2463
2464 int rc = VINF_SUCCESS;
2465
2466 /*
2467 * Validate input a little bit.
2468 */
2469 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%RGv\n", iPDSrc, GCPtrPage));
2470# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2471 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2472 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
2473
2474 /* Fetch the pgm pool shadow descriptor. */
2475 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2476 Assert(pShwPde);
2477
2478# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2479 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2480 PPGMPOOLPAGE pShwPde = NULL;
2481 PX86PDPAE pPDDst;
2482 PSHWPDE pPdeDst;
2483
2484 /* Fetch the pgm pool shadow descriptor. */
2485 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
2486 AssertRCSuccessReturn(rc, rc);
2487 Assert(pShwPde);
2488
2489 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2490 pPdeDst = &pPDDst->a[iPDDst];
2491
2492# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2493 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2494 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2495 PX86PDPAE pPDDst;
2496 PX86PDPT pPdptDst;
2497 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2498 AssertRCSuccessReturn(rc, rc);
2499 Assert(pPDDst);
2500 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2501# endif
2502 SHWPDE PdeDst = *pPdeDst;
2503
2504# if PGM_GST_TYPE == PGM_TYPE_AMD64
2505 /* Fetch the pgm pool shadow descriptor. */
2506 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2507 Assert(pShwPde);
2508# endif
2509
2510# ifndef PGM_WITHOUT_MAPPINGS
2511 /*
2512 * Check for conflicts.
2513 * GC: In case of a conflict we'll go to Ring-3 and do a full SyncCR3.
2514 * HC: Simply resolve the conflict.
2515 */
2516 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
2517 {
2518 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2519# ifndef IN_RING3
2520 Log(("SyncPT: Conflict at %RGv\n", GCPtrPage));
2521 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2522 return VERR_ADDRESS_CONFLICT;
2523# else
2524 PPGMMAPPING pMapping = pgmGetMapping(pVM, (RTGCPTR)GCPtrPage);
2525 Assert(pMapping);
2526# if PGM_GST_TYPE == PGM_TYPE_32BIT
2527 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2528# elif PGM_GST_TYPE == PGM_TYPE_PAE
2529 int rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2530# else
2531 AssertFailed(); /* can't happen for amd64 */
2532# endif
2533 if (RT_FAILURE(rc))
2534 {
2535 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2536 return rc;
2537 }
2538 PdeDst = *pPdeDst;
2539# endif
2540 }
2541# else /* PGM_WITHOUT_MAPPINGS */
2542 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
2543# endif /* PGM_WITHOUT_MAPPINGS */
2544 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2545
2546# if defined(IN_RC)
2547 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
2548 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
2549# endif
2550
2551 /*
2552 * Sync page directory entry.
2553 */
2554 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2555 if (PdeSrc.n.u1Present)
2556 {
2557 /*
2558 * Allocate & map the page table.
2559 */
2560 PSHWPT pPTDst;
2561# if PGM_GST_TYPE == PGM_TYPE_AMD64
2562 const bool fPageTable = !PdeSrc.b.u1Size;
2563# else
2564 const bool fPageTable = !PdeSrc.b.u1Size || !(CPUMGetGuestCR4(pVCpu) & X86_CR4_PSE);
2565# endif
2566 PPGMPOOLPAGE pShwPage;
2567 RTGCPHYS GCPhys;
2568 if (fPageTable)
2569 {
2570 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
2571# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2572 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2573 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
2574# endif
2575 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2576 }
2577 else
2578 {
2579 PGMPOOLACCESS enmAccess;
2580
2581 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
2582# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2583 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2584 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
2585# endif
2586 /* Determine the right kind of large page to avoid incorrect cached entry reuse. */
2587 if (PdeSrc.n.u1User)
2588 {
2589 if (PdeSrc.n.u1Write)
2590 enmAccess = PGMPOOLACCESS_USER_RW;
2591 else
2592 enmAccess = PGMPOOLACCESS_USER_R;
2593 }
2594 else
2595 {
2596 if (PdeSrc.n.u1Write)
2597 enmAccess = PGMPOOLACCESS_SUPERVISOR_RW;
2598 else
2599 enmAccess = PGMPOOLACCESS_SUPERVISOR_R;
2600 }
2601 rc = pgmPoolAllocEx(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, enmAccess, pShwPde->idx, iPDDst, &pShwPage);
2602 }
2603 if (rc == VINF_SUCCESS)
2604 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2605 else if (rc == VINF_PGM_CACHED_PAGE)
2606 {
2607 /*
2608 * The PT was cached, just hook it up.
2609 */
2610 if (fPageTable)
2611 PdeDst.u = pShwPage->Core.Key
2612 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2613 else
2614 {
2615 PdeDst.u = pShwPage->Core.Key
2616 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2617 /* (see explanation and assumptions further down.) */
2618 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2619 {
2620 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2621 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2622 PdeDst.b.u1Write = 0;
2623 }
2624 }
2625 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2626# if defined(IN_RC)
2627 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2628# endif
2629 return VINF_SUCCESS;
2630 }
2631 else if (rc == VERR_PGM_POOL_FLUSHED)
2632 {
2633 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2634# if defined(IN_RC)
2635 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2636# endif
2637 return VINF_PGM_SYNC_CR3;
2638 }
2639 else
2640 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
2641 PdeDst.u &= X86_PDE_AVL_MASK;
2642 PdeDst.u |= pShwPage->Core.Key;
2643
2644 /*
2645 * Page directory has been accessed (this is a fault situation, remember).
2646 */
2647 pPDSrc->a[iPDSrc].n.u1Accessed = 1;
2648 if (fPageTable)
2649 {
2650 /*
2651 * Page table - 4KB.
2652 *
2653 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2654 */
2655 Log2(("SyncPT: 4K %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2656 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u));
2657 PGSTPT pPTSrc;
2658 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
2659 if (RT_SUCCESS(rc))
2660 {
2661 /*
2662 * Start by syncing the page directory entry so CSAM's TLB trick works.
2663 */
2664 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2665 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2666 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2667# if defined(IN_RC)
2668 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2669# endif
2670
2671 /*
2672 * Directory/page user or supervisor privilege: (same goes for read/write)
2673 *
2674 * Directory Page Combined
2675 * U/S U/S U/S
2676 * 0 0 0
2677 * 0 1 0
2678 * 1 0 0
2679 * 1 1 1
2680 *
2681 * Simple AND operation. Table listed for completeness.
2682 *
2683 */
2684 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT4K));
2685# ifdef PGM_SYNC_N_PAGES
2686 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2687 unsigned iPTDst = iPTBase;
2688 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2689 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2690 iPTDst = 0;
2691 else
2692 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2693# else /* !PGM_SYNC_N_PAGES */
2694 unsigned iPTDst = 0;
2695 const unsigned iPTDstEnd = RT_ELEMENTS(pPTDst->a);
2696# endif /* !PGM_SYNC_N_PAGES */
2697# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2698 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2699 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2700# else
2701 const unsigned offPTSrc = 0;
2702# endif
2703 for (; iPTDst < iPTDstEnd; iPTDst++)
2704 {
2705 const unsigned iPTSrc = iPTDst + offPTSrc;
2706 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2707
2708 if (PteSrc.n.u1Present) /* we've already cleared it above */
2709 {
2710# ifndef IN_RING0
2711 /*
2712 * Assuming kernel code will be marked as supervisor - and not as user level
2713 * and executed using a conforming code selector - And marked as readonly.
2714 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
2715 */
2716 PPGMPAGE pPage;
2717 if ( ((PdeSrc.u & pPTSrc->a[iPTSrc].u) & (X86_PTE_RW | X86_PTE_US))
2718 || !CSAMDoesPageNeedScanning(pVM, (RTRCPTR)((iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT)))
2719 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
2720 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2721 )
2722# endif
2723 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2724 Log2(("SyncPT: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%RGp\n",
2725 (RTGCPTR)(((RTGCPTR)iPDSrc << GST_PD_SHIFT) | ((RTGCPTR)iPTSrc << PAGE_SHIFT)),
2726 PteSrc.n.u1Present,
2727 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2728 PteSrc.n.u1User & PdeSrc.n.u1User,
2729 (uint64_t)PteSrc.u,
2730 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : "", pPTDst->a[iPTDst].u, iPTSrc, PdeSrc.au32[0],
2731 (RTGCPHYS)((PdeSrc.u & GST_PDE_PG_MASK) + iPTSrc*sizeof(PteSrc)) ));
2732 }
2733 } /* for PTEs */
2734 }
2735 }
2736 else
2737 {
2738 /*
2739 * Big page - 2/4MB.
2740 *
2741 * We'll walk the ram range list in parallel and optimize lookups.
2742 * We will only sync on shadow page table at a time.
2743 */
2744 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT4M));
2745
2746 /**
2747 * @todo It might be more efficient to sync only a part of the 4MB page (similar to what we do for 4kb PDs).
2748 */
2749
2750 /*
2751 * Start by syncing the page directory entry.
2752 */
2753 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2754 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2755
2756 /*
2757 * If the page is not flagged as dirty and is writable, then make it read-only
2758 * at PD level, so we can set the dirty bit when the page is modified.
2759 *
2760 * ASSUMES that page access handlers are implemented on page table entry level.
2761 * Thus we will first catch the dirty access and set PDE.D and restart. If
2762 * there is an access handler, we'll trap again and let it work on the problem.
2763 */
2764 /** @todo move the above stuff to a section in the PGM documentation. */
2765 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2766 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2767 {
2768 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2769 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2770 PdeDst.b.u1Write = 0;
2771 }
2772 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2773# if defined(IN_RC)
2774 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2775# endif
2776
2777 /*
2778 * Fill the shadow page table.
2779 */
2780 /* Get address and flags from the source PDE. */
2781 SHWPTE PteDstBase;
2782 PteDstBase.u = PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT);
2783
2784 /* Loop thru the entries in the shadow PT. */
2785 const RTGCPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2786 Log2(("SyncPT: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%RGv GCPhys=%RGp %s\n",
2787 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u, GCPtr,
2788 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2789 PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
2790 unsigned iPTDst = 0;
2791 while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2792 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2793 {
2794 /* Advance ram range list. */
2795 while (pRam && GCPhys > pRam->GCPhysLast)
2796 pRam = pRam->CTX_SUFF(pNext);
2797 if (pRam && GCPhys >= pRam->GCPhys)
2798 {
2799 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
2800 do
2801 {
2802 /* Make shadow PTE. */
2803 PPGMPAGE pPage = &pRam->aPages[iHCPage];
2804 SHWPTE PteDst;
2805
2806# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2807 /* Try make the page writable if necessary. */
2808 if ( PteDstBase.n.u1Write
2809 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2810 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2811 {
2812 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, GCPhys);
2813 AssertRCReturn(rc, rc);
2814 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2815 break;
2816 }
2817# endif
2818
2819 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2820 {
2821 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
2822 {
2823 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2824 PteDst.n.u1Write = 0;
2825 }
2826 else
2827 PteDst.u = 0;
2828 }
2829# ifndef IN_RING0
2830 /*
2831 * Assuming kernel code will be marked as supervisor and not as user level and executed
2832 * using a conforming code selector. Don't check for readonly, as that implies the whole
2833 * 4MB can be code or readonly data. Linux enables write access for its large pages.
2834 */
2835 else if ( !PdeSrc.n.u1User
2836 && CSAMDoesPageNeedScanning(pVM, (RTRCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))))
2837 PteDst.u = 0;
2838# endif
2839 else
2840 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2841
2842 /* Only map writable pages writable. */
2843 if ( PteDst.n.u1Write
2844 && PteDst.n.u1Present
2845 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2846 {
2847 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
2848 Log3(("SyncPT: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))));
2849 }
2850
2851# ifdef PGMPOOL_WITH_USER_TRACKING
2852 if (PteDst.n.u1Present)
2853 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2854# endif
2855 /* commit it */
2856 pPTDst->a[iPTDst] = PteDst;
2857 Log4(("SyncPT: BIG %RGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2858 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), PteDst.n.u1Present, PteDst.n.u1Write, PteDst.n.u1User, (uint64_t)PteDst.u,
2859 PteDst.u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2860
2861 /* advance */
2862 GCPhys += PAGE_SIZE;
2863 iHCPage++;
2864 iPTDst++;
2865 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2866 && GCPhys <= pRam->GCPhysLast);
2867 }
2868 else if (pRam)
2869 {
2870 Log(("Invalid pages at %RGp\n", GCPhys));
2871 do
2872 {
2873 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2874 GCPhys += PAGE_SIZE;
2875 iPTDst++;
2876 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2877 && GCPhys < pRam->GCPhys);
2878 }
2879 else
2880 {
2881 Log(("Invalid pages at %RGp (2)\n", GCPhys));
2882 for ( ; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
2883 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2884 }
2885 } /* while more PTEs */
2886 } /* 4KB / 4MB */
2887 }
2888 else
2889 AssertRelease(!PdeDst.n.u1Present);
2890
2891 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2892 if (RT_FAILURE(rc))
2893 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPTFailed));
2894 return rc;
2895
2896#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2897 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2898 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
2899 && !defined(IN_RC)
2900
2901 /*
2902 * Validate input a little bit.
2903 */
2904 int rc = VINF_SUCCESS;
2905# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2906 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2907 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
2908
2909 /* Fetch the pgm pool shadow descriptor. */
2910 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2911 Assert(pShwPde);
2912
2913# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2914 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2915 PPGMPOOLPAGE pShwPde;
2916 PX86PDPAE pPDDst;
2917 PSHWPDE pPdeDst;
2918
2919 /* Fetch the pgm pool shadow descriptor. */
2920 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
2921 AssertRCSuccessReturn(rc, rc);
2922 Assert(pShwPde);
2923
2924 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2925 pPdeDst = &pPDDst->a[iPDDst];
2926
2927# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2928 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2929 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2930 PX86PDPAE pPDDst;
2931 PX86PDPT pPdptDst;
2932 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2933 AssertRCSuccessReturn(rc, rc);
2934 Assert(pPDDst);
2935 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2936
2937 /* Fetch the pgm pool shadow descriptor. */
2938 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2939 Assert(pShwPde);
2940
2941# elif PGM_SHW_TYPE == PGM_TYPE_EPT
2942 const unsigned iPdpt = (GCPtrPage >> EPT_PDPT_SHIFT) & EPT_PDPT_MASK;
2943 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2944 PEPTPD pPDDst;
2945 PEPTPDPT pPdptDst;
2946
2947 rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, &pPdptDst, &pPDDst);
2948 if (rc != VINF_SUCCESS)
2949 {
2950 AssertRC(rc);
2951 return rc;
2952 }
2953 Assert(pPDDst);
2954 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2955
2956 /* Fetch the pgm pool shadow descriptor. */
2957 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & EPT_PDPTE_PG_MASK);
2958 Assert(pShwPde);
2959# endif
2960 SHWPDE PdeDst = *pPdeDst;
2961
2962 Assert(!(PdeDst.u & PGM_PDFLAGS_MAPPING));
2963 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2964
2965 GSTPDE PdeSrc;
2966 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2967 PdeSrc.n.u1Present = 1;
2968 PdeSrc.n.u1Write = 1;
2969 PdeSrc.n.u1Accessed = 1;
2970 PdeSrc.n.u1User = 1;
2971
2972 /*
2973 * Allocate & map the page table.
2974 */
2975 PSHWPT pPTDst;
2976 PPGMPOOLPAGE pShwPage;
2977 RTGCPHYS GCPhys;
2978
2979 /* Virtual address = physical address */
2980 GCPhys = GCPtrPage & X86_PAGE_4K_BASE_MASK;
2981 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2982
2983 if ( rc == VINF_SUCCESS
2984 || rc == VINF_PGM_CACHED_PAGE)
2985 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2986 else
2987 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
2988
2989 PdeDst.u &= X86_PDE_AVL_MASK;
2990 PdeDst.u |= pShwPage->Core.Key;
2991 PdeDst.n.u1Present = 1;
2992 PdeDst.n.u1Write = 1;
2993# if PGM_SHW_TYPE == PGM_TYPE_EPT
2994 PdeDst.n.u1Execute = 1;
2995# else
2996 PdeDst.n.u1User = 1;
2997 PdeDst.n.u1Accessed = 1;
2998# endif
2999 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3000
3001 pgmLock(pVM);
3002 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, PGM_SYNC_NR_PAGES, 0 /* page not present */);
3003 pgmUnlock(pVM);
3004 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3005 return rc;
3006
3007#else
3008 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
3009 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3010 return VERR_INTERNAL_ERROR;
3011#endif
3012}
3013
3014
3015
3016/**
3017 * Prefetch a page/set of pages.
3018 *
3019 * Typically used to sync commonly used pages before entering raw mode
3020 * after a CR3 reload.
3021 *
3022 * @returns VBox status code.
3023 * @param pVCpu The VMCPU handle.
3024 * @param GCPtrPage Page to invalidate.
3025 */
3026PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
3027{
3028#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
3029 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3030 /*
3031 * Check that all Guest levels thru the PDE are present, getting the
3032 * PD and PDE in the processes.
3033 */
3034 int rc = VINF_SUCCESS;
3035# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3036# if PGM_GST_TYPE == PGM_TYPE_32BIT
3037 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3038 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3039# elif PGM_GST_TYPE == PGM_TYPE_PAE
3040 unsigned iPDSrc;
3041 X86PDPE PdpeSrc;
3042 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
3043 if (!pPDSrc)
3044 return VINF_SUCCESS; /* not present */
3045# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3046 unsigned iPDSrc;
3047 PX86PML4E pPml4eSrc;
3048 X86PDPE PdpeSrc;
3049 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3050 if (!pPDSrc)
3051 return VINF_SUCCESS; /* not present */
3052# endif
3053 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3054# else
3055 PGSTPD pPDSrc = NULL;
3056 const unsigned iPDSrc = 0;
3057 GSTPDE PdeSrc;
3058
3059 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
3060 PdeSrc.n.u1Present = 1;
3061 PdeSrc.n.u1Write = 1;
3062 PdeSrc.n.u1Accessed = 1;
3063 PdeSrc.n.u1User = 1;
3064# endif
3065
3066 if (PdeSrc.n.u1Present && PdeSrc.n.u1Accessed)
3067 {
3068 PVM pVM = pVCpu->CTX_SUFF(pVM);
3069 pgmLock(pVM);
3070
3071# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3072 const X86PDE PdeDst = pgmShwGet32BitPDE(&pVCpu->pgm.s, GCPtrPage);
3073# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3074 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3075 PX86PDPAE pPDDst;
3076 X86PDEPAE PdeDst;
3077# if PGM_GST_TYPE != PGM_TYPE_PAE
3078 X86PDPE PdpeSrc;
3079
3080 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3081 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3082# endif
3083 int rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, &PdpeSrc, &pPDDst);
3084 if (rc != VINF_SUCCESS)
3085 {
3086 pgmUnlock(pVM);
3087 AssertRC(rc);
3088 return rc;
3089 }
3090 Assert(pPDDst);
3091 PdeDst = pPDDst->a[iPDDst];
3092
3093# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3094 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3095 PX86PDPAE pPDDst;
3096 X86PDEPAE PdeDst;
3097
3098# if PGM_GST_TYPE == PGM_TYPE_PROT
3099 /* AMD-V nested paging */
3100 X86PML4E Pml4eSrc;
3101 X86PDPE PdpeSrc;
3102 PX86PML4E pPml4eSrc = &Pml4eSrc;
3103
3104 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3105 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
3106 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
3107# endif
3108
3109 int rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3110 if (rc != VINF_SUCCESS)
3111 {
3112 pgmUnlock(pVM);
3113 AssertRC(rc);
3114 return rc;
3115 }
3116 Assert(pPDDst);
3117 PdeDst = pPDDst->a[iPDDst];
3118# endif
3119 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
3120 {
3121 if (!PdeDst.n.u1Present)
3122 {
3123 /** r=bird: This guy will set the A bit on the PDE, probably harmless. */
3124 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3125 }
3126 else
3127 {
3128 /** @note We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
3129 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
3130 * makes no sense to prefetch more than one page.
3131 */
3132 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3133 if (RT_SUCCESS(rc))
3134 rc = VINF_SUCCESS;
3135 }
3136 }
3137 pgmUnlock(pVM);
3138 }
3139 return rc;
3140
3141#elif PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3142 return VINF_SUCCESS; /* ignore */
3143#endif
3144}
3145
3146
3147
3148
3149/**
3150 * Syncs a page during a PGMVerifyAccess() call.
3151 *
3152 * @returns VBox status code (informational included).
3153 * @param pVCpu The VMCPU handle.
3154 * @param GCPtrPage The address of the page to sync.
3155 * @param fPage The effective guest page flags.
3156 * @param uErr The trap error code.
3157 */
3158PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage, unsigned fPage, unsigned uErr)
3159{
3160 PVM pVM = pVCpu->CTX_SUFF(pVM);
3161
3162 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%RGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
3163
3164 Assert(!HWACCMIsNestedPagingActive(pVM));
3165#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_TYPE_AMD64) \
3166 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3167
3168# ifndef IN_RING0
3169 if (!(fPage & X86_PTE_US))
3170 {
3171 /*
3172 * Mark this page as safe.
3173 */
3174 /** @todo not correct for pages that contain both code and data!! */
3175 Log(("CSAMMarkPage %RGv; scanned=%d\n", GCPtrPage, true));
3176 CSAMMarkPage(pVM, (RTRCPTR)GCPtrPage, true);
3177 }
3178# endif
3179
3180 /*
3181 * Get guest PD and index.
3182 */
3183# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3184# if PGM_GST_TYPE == PGM_TYPE_32BIT
3185 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3186 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3187# elif PGM_GST_TYPE == PGM_TYPE_PAE
3188 unsigned iPDSrc = 0;
3189 X86PDPE PdpeSrc;
3190 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
3191
3192 if (pPDSrc)
3193 {
3194 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3195 return VINF_EM_RAW_GUEST_TRAP;
3196 }
3197# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3198 unsigned iPDSrc;
3199 PX86PML4E pPml4eSrc;
3200 X86PDPE PdpeSrc;
3201 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3202 if (!pPDSrc)
3203 {
3204 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3205 return VINF_EM_RAW_GUEST_TRAP;
3206 }
3207# endif
3208# else
3209 PGSTPD pPDSrc = NULL;
3210 const unsigned iPDSrc = 0;
3211# endif
3212 int rc = VINF_SUCCESS;
3213
3214 pgmLock(pVM);
3215
3216 /*
3217 * First check if the shadow pd is present.
3218 */
3219# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3220 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
3221# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3222 PX86PDEPAE pPdeDst;
3223 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3224 PX86PDPAE pPDDst;
3225# if PGM_GST_TYPE != PGM_TYPE_PAE
3226 X86PDPE PdpeSrc;
3227
3228 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3229 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3230# endif
3231 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, &PdpeSrc, &pPDDst);
3232 if (rc != VINF_SUCCESS)
3233 {
3234 pgmUnlock(pVM);
3235 AssertRC(rc);
3236 return rc;
3237 }
3238 Assert(pPDDst);
3239 pPdeDst = &pPDDst->a[iPDDst];
3240
3241# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3242 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3243 PX86PDPAE pPDDst;
3244 PX86PDEPAE pPdeDst;
3245
3246# if PGM_GST_TYPE == PGM_TYPE_PROT
3247 /* AMD-V nested paging */
3248 X86PML4E Pml4eSrc;
3249 X86PDPE PdpeSrc;
3250 PX86PML4E pPml4eSrc = &Pml4eSrc;
3251
3252 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3253 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
3254 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
3255# endif
3256
3257 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3258 if (rc != VINF_SUCCESS)
3259 {
3260 pgmUnlock(pVM);
3261 AssertRC(rc);
3262 return rc;
3263 }
3264 Assert(pPDDst);
3265 pPdeDst = &pPDDst->a[iPDDst];
3266# endif
3267
3268# if defined(IN_RC)
3269 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3270 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
3271# endif
3272
3273 if (!pPdeDst->n.u1Present)
3274 {
3275 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3276 if (rc != VINF_SUCCESS)
3277 {
3278# if defined(IN_RC)
3279 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3280 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3281# endif
3282 pgmUnlock(pVM);
3283 AssertRC(rc);
3284 return rc;
3285 }
3286 }
3287
3288# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3289 /* Check for dirty bit fault */
3290 rc = PGM_BTH_NAME(CheckPageFault)(pVCpu, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
3291 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
3292 Log(("PGMVerifyAccess: success (dirty)\n"));
3293 else
3294 {
3295 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3296# else
3297 {
3298 GSTPDE PdeSrc;
3299 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
3300 PdeSrc.n.u1Present = 1;
3301 PdeSrc.n.u1Write = 1;
3302 PdeSrc.n.u1Accessed = 1;
3303 PdeSrc.n.u1User = 1;
3304
3305# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
3306 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
3307 if (uErr & X86_TRAP_PF_US)
3308 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
3309 else /* supervisor */
3310 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
3311
3312 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3313 if (RT_SUCCESS(rc))
3314 {
3315 /* Page was successfully synced */
3316 Log2(("PGMVerifyAccess: success (sync)\n"));
3317 rc = VINF_SUCCESS;
3318 }
3319 else
3320 {
3321 Log(("PGMVerifyAccess: access violation for %RGv rc=%d\n", GCPtrPage, rc));
3322 rc = VINF_EM_RAW_GUEST_TRAP;
3323 }
3324 }
3325# if defined(IN_RC)
3326 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3327 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3328# endif
3329 pgmUnlock(pVM);
3330 return rc;
3331
3332#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3333
3334 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
3335 return VERR_INTERNAL_ERROR;
3336#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3337}
3338
3339#undef MY_STAM_COUNTER_INC
3340#define MY_STAM_COUNTER_INC(a) do { } while (0)
3341
3342
3343/**
3344 * Syncs the paging hierarchy starting at CR3.
3345 *
3346 * @returns VBox status code, no specials.
3347 * @param pVCpu The VMCPU handle.
3348 * @param cr0 Guest context CR0 register
3349 * @param cr3 Guest context CR3 register
3350 * @param cr4 Guest context CR4 register
3351 * @param fGlobal Including global page directories or not
3352 */
3353PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
3354{
3355 PVM pVM = pVCpu->CTX_SUFF(pVM);
3356
3357 if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
3358 fGlobal = true; /* Change this CR3 reload to be a global one. */
3359
3360 LogFlow(("SyncCR3 %d\n", fGlobal));
3361
3362#if PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3363 /*
3364 * Update page access handlers.
3365 * The virtual are always flushed, while the physical are only on demand.
3366 * WARNING: We are incorrectly not doing global flushing on Virtual Handler updates. We'll
3367 * have to look into that later because it will have a bad influence on the performance.
3368 * @note SvL: There's no need for that. Just invalidate the virtual range(s).
3369 * bird: Yes, but that won't work for aliases.
3370 */
3371 /** @todo this MUST go away. See #1557. */
3372 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3373 PGM_GST_NAME(HandlerVirtualUpdate)(pVM, cr4);
3374 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3375#endif
3376
3377#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3378 /*
3379 * Nested / EPT - almost no work.
3380 */
3381 /** @todo check if this is really necessary; the call does it as well... */
3382 HWACCMFlushTLB(pVCpu);
3383 return VINF_SUCCESS;
3384
3385#elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3386 /*
3387 * AMD64 (Shw & Gst) - No need to check all paging levels; we zero
3388 * out the shadow parts when the guest modifies its tables.
3389 */
3390 return VINF_SUCCESS;
3391
3392#else /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3393
3394# ifdef PGM_WITHOUT_MAPPINGS
3395 Assert(pVM->pgm.s.fMappingsFixed);
3396 return VINF_SUCCESS;
3397# else
3398 /* Nothing to do when mappings are fixed. */
3399 if (pVM->pgm.s.fMappingsFixed)
3400 return VINF_SUCCESS;
3401
3402 int rc = PGMMapResolveConflicts(pVM);
3403 Assert(rc == VINF_SUCCESS || rc == VINF_PGM_SYNC_CR3);
3404 if (rc == VINF_PGM_SYNC_CR3)
3405 {
3406 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3407 return VINF_PGM_SYNC_CR3;
3408 }
3409# endif
3410 return VINF_SUCCESS;
3411#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3412}
3413
3414
3415
3416
3417#ifdef VBOX_STRICT
3418#ifdef IN_RC
3419# undef AssertMsgFailed
3420# define AssertMsgFailed Log
3421#endif
3422#ifdef IN_RING3
3423# include <VBox/dbgf.h>
3424
3425/**
3426 * Dumps a page table hierarchy use only physical addresses and cr4/lm flags.
3427 *
3428 * @returns VBox status code (VINF_SUCCESS).
3429 * @param cr3 The root of the hierarchy.
3430 * @param crr The cr4, only PAE and PSE is currently used.
3431 * @param fLongMode Set if long mode, false if not long mode.
3432 * @param cMaxDepth Number of levels to dump.
3433 * @param pHlp Pointer to the output functions.
3434 */
3435RT_C_DECLS_BEGIN
3436VMMR3DECL(int) PGMR3DumpHierarchyHC(PVM pVM, uint32_t cr3, uint32_t cr4, bool fLongMode, unsigned cMaxDepth, PCDBGFINFOHLP pHlp);
3437RT_C_DECLS_END
3438
3439#endif
3440
3441/**
3442 * Checks that the shadow page table is in sync with the guest one.
3443 *
3444 * @returns The number of errors.
3445 * @param pVM The virtual machine.
3446 * @param pVCpu The VMCPU handle.
3447 * @param cr3 Guest context CR3 register
3448 * @param cr4 Guest context CR4 register
3449 * @param GCPtr Where to start. Defaults to 0.
3450 * @param cb How much to check. Defaults to everything.
3451 */
3452PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr, RTGCPTR cb)
3453{
3454#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3455 return 0;
3456#else
3457 unsigned cErrors = 0;
3458 PVM pVM = pVCpu->CTX_SUFF(pVM);
3459 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3460
3461#if PGM_GST_TYPE == PGM_TYPE_PAE
3462 /** @todo currently broken; crashes below somewhere */
3463 AssertFailed();
3464#endif
3465
3466#if PGM_GST_TYPE == PGM_TYPE_32BIT \
3467 || PGM_GST_TYPE == PGM_TYPE_PAE \
3468 || PGM_GST_TYPE == PGM_TYPE_AMD64
3469
3470# if PGM_GST_TYPE == PGM_TYPE_AMD64
3471 bool fBigPagesSupported = true;
3472# else
3473 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVCpu) & X86_CR4_PSE);
3474# endif
3475 PPGMCPU pPGM = &pVCpu->pgm.s;
3476 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3477 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3478# ifndef IN_RING0
3479 RTHCPHYS HCPhys; /* general usage. */
3480# endif
3481 int rc;
3482
3483 /*
3484 * Check that the Guest CR3 and all its mappings are correct.
3485 */
3486 AssertMsgReturn(pPGM->GCPhysCR3 == (cr3 & GST_CR3_PAGE_MASK),
3487 ("Invalid GCPhysCR3=%RGp cr3=%RGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3488 false);
3489# if !defined(IN_RING0) && PGM_GST_TYPE != PGM_TYPE_AMD64
3490# if PGM_GST_TYPE == PGM_TYPE_32BIT
3491 rc = PGMShwGetPage(pVCpu, (RTGCPTR)pPGM->pGst32BitPdRC, NULL, &HCPhysShw);
3492# else
3493 rc = PGMShwGetPage(pVCpu, (RTGCPTR)pPGM->pGstPaePdptRC, NULL, &HCPhysShw);
3494# endif
3495 AssertRCReturn(rc, 1);
3496 HCPhys = NIL_RTHCPHYS;
3497 rc = pgmRamGCPhys2HCPhys(&pVM->pgm.s, cr3 & GST_CR3_PAGE_MASK, &HCPhys);
3498 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%RHp HCPhyswShw=%RHp (cr3)\n", HCPhys, HCPhysShw), false);
3499# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3500 pgmGstGet32bitPDPtr(pPGM);
3501 RTGCPHYS GCPhys;
3502 rc = PGMR3DbgR3Ptr2GCPhys(pVM, pPGM->pGst32BitPdR3, &GCPhys);
3503 AssertRCReturn(rc, 1);
3504 AssertMsgReturn((cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%RGp cr3=%RGp\n", GCPhys, (RTGCPHYS)cr3), false);
3505# endif
3506# endif /* !IN_RING0 */
3507
3508 /*
3509 * Get and check the Shadow CR3.
3510 */
3511# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3512 unsigned cPDEs = X86_PG_ENTRIES;
3513 unsigned cIncrement = X86_PG_ENTRIES * PAGE_SIZE;
3514# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3515# if PGM_GST_TYPE == PGM_TYPE_32BIT
3516 unsigned cPDEs = X86_PG_PAE_ENTRIES * 4; /* treat it as a 2048 entry table. */
3517# else
3518 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3519# endif
3520 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3521# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3522 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3523 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3524# endif
3525 if (cb != ~(RTGCPTR)0)
3526 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3527
3528/** @todo call the other two PGMAssert*() functions. */
3529
3530# if PGM_GST_TYPE == PGM_TYPE_AMD64
3531 unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK;
3532
3533 for (; iPml4 < X86_PG_PAE_ENTRIES; iPml4++)
3534 {
3535 PPGMPOOLPAGE pShwPdpt = NULL;
3536 PX86PML4E pPml4eSrc;
3537 PX86PML4E pPml4eDst;
3538 RTGCPHYS GCPhysPdptSrc;
3539
3540 pPml4eSrc = pgmGstGetLongModePML4EPtr(&pVCpu->pgm.s, iPml4);
3541 pPml4eDst = pgmShwGetLongModePML4EPtr(&pVCpu->pgm.s, iPml4);
3542
3543 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
3544 if (!pPml4eDst->n.u1Present)
3545 {
3546 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3547 continue;
3548 }
3549
3550 pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
3551 GCPhysPdptSrc = pPml4eSrc->u & X86_PML4E_PG_MASK_FULL;
3552
3553 if (pPml4eSrc->n.u1Present != pPml4eDst->n.u1Present)
3554 {
3555 AssertMsgFailed(("Present bit doesn't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3556 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3557 cErrors++;
3558 continue;
3559 }
3560
3561 if (GCPhysPdptSrc != pShwPdpt->GCPhys)
3562 {
3563 AssertMsgFailed(("Physical address doesn't match! iPml4 %d pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, pPml4eDst->u, pPml4eSrc->u, pShwPdpt->GCPhys, GCPhysPdptSrc));
3564 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3565 cErrors++;
3566 continue;
3567 }
3568
3569 if ( pPml4eDst->n.u1User != pPml4eSrc->n.u1User
3570 || pPml4eDst->n.u1Write != pPml4eSrc->n.u1Write
3571 || pPml4eDst->n.u1NoExecute != pPml4eSrc->n.u1NoExecute)
3572 {
3573 AssertMsgFailed(("User/Write/NoExec bits don't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3574 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3575 cErrors++;
3576 continue;
3577 }
3578# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3579 {
3580# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3581
3582# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3583 /*
3584 * Check the PDPTEs too.
3585 */
3586 unsigned iPdpt = (GCPtr >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
3587
3588 for (;iPdpt <= SHW_PDPT_MASK; iPdpt++)
3589 {
3590 unsigned iPDSrc;
3591 PPGMPOOLPAGE pShwPde = NULL;
3592 PX86PDPE pPdpeDst;
3593 RTGCPHYS GCPhysPdeSrc;
3594# if PGM_GST_TYPE == PGM_TYPE_PAE
3595 X86PDPE PdpeSrc;
3596 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVCpu->pgm.s, GCPtr, &iPDSrc, &PdpeSrc);
3597 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVCpu->pgm.s);
3598# else
3599 PX86PML4E pPml4eSrc;
3600 X86PDPE PdpeSrc;
3601 PX86PDPT pPdptDst;
3602 PX86PDPAE pPDDst;
3603 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVCpu->pgm.s, GCPtr, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3604
3605 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtr, NULL, &pPdptDst, &pPDDst);
3606 if (rc != VINF_SUCCESS)
3607 {
3608 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
3609 GCPtr += 512 * _2M;
3610 continue; /* next PDPTE */
3611 }
3612 Assert(pPDDst);
3613# endif
3614 Assert(iPDSrc == 0);
3615
3616 pPdpeDst = &pPdptDst->a[iPdpt];
3617
3618 if (!pPdpeDst->n.u1Present)
3619 {
3620 GCPtr += 512 * _2M;
3621 continue; /* next PDPTE */
3622 }
3623
3624 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
3625 GCPhysPdeSrc = PdpeSrc.u & X86_PDPE_PG_MASK;
3626
3627 if (pPdpeDst->n.u1Present != PdpeSrc.n.u1Present)
3628 {
3629 AssertMsgFailed(("Present bit doesn't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3630 GCPtr += 512 * _2M;
3631 cErrors++;
3632 continue;
3633 }
3634
3635 if (GCPhysPdeSrc != pShwPde->GCPhys)
3636 {
3637# if PGM_GST_TYPE == PGM_TYPE_AMD64
3638 AssertMsgFailed(("Physical address doesn't match! iPml4 %d iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3639# else
3640 AssertMsgFailed(("Physical address doesn't match! iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3641# endif
3642 GCPtr += 512 * _2M;
3643 cErrors++;
3644 continue;
3645 }
3646
3647# if PGM_GST_TYPE == PGM_TYPE_AMD64
3648 if ( pPdpeDst->lm.u1User != PdpeSrc.lm.u1User
3649 || pPdpeDst->lm.u1Write != PdpeSrc.lm.u1Write
3650 || pPdpeDst->lm.u1NoExecute != PdpeSrc.lm.u1NoExecute)
3651 {
3652 AssertMsgFailed(("User/Write/NoExec bits don't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3653 GCPtr += 512 * _2M;
3654 cErrors++;
3655 continue;
3656 }
3657# endif
3658
3659# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3660 {
3661# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3662# if PGM_GST_TYPE == PGM_TYPE_32BIT
3663 GSTPD const *pPDSrc = pgmGstGet32bitPDPtr(&pVCpu->pgm.s);
3664# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3665 PCX86PD pPDDst = pgmShwGet32BitPDPtr(&pVCpu->pgm.s);
3666# endif
3667# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
3668 /*
3669 * Iterate the shadow page directory.
3670 */
3671 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
3672 unsigned iPDDst = (GCPtr >> SHW_PD_SHIFT) & SHW_PD_MASK;
3673
3674 for (;
3675 iPDDst < cPDEs;
3676 iPDDst++, GCPtr += cIncrement)
3677 {
3678# if PGM_SHW_TYPE == PGM_TYPE_PAE
3679 const SHWPDE PdeDst = *pgmShwGetPaePDEPtr(pPGM, GCPtr);
3680# else
3681 const SHWPDE PdeDst = pPDDst->a[iPDDst];
3682# endif
3683 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
3684 {
3685 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
3686 if ((PdeDst.u & X86_PDE_AVL_MASK) != PGM_PDFLAGS_MAPPING)
3687 {
3688 AssertMsgFailed(("Mapping shall only have PGM_PDFLAGS_MAPPING set! PdeDst.u=%#RX64\n", (uint64_t)PdeDst.u));
3689 cErrors++;
3690 continue;
3691 }
3692 }
3693 else if ( (PdeDst.u & X86_PDE_P)
3694 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
3695 )
3696 {
3697 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
3698 PPGMPOOLPAGE pPoolPage = pgmPoolGetPage(pPool, HCPhysShw);
3699 if (!pPoolPage)
3700 {
3701 AssertMsgFailed(("Invalid page table address %RHp at %RGv! PdeDst=%#RX64\n",
3702 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
3703 cErrors++;
3704 continue;
3705 }
3706 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR(pVM, pPoolPage);
3707
3708 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
3709 {
3710 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %RGv! These flags are not virtualized! PdeDst=%#RX64\n",
3711 GCPtr, (uint64_t)PdeDst.u));
3712 cErrors++;
3713 }
3714
3715 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
3716 {
3717 AssertMsgFailed(("4K PDE reserved flags at %RGv! PdeDst=%#RX64\n",
3718 GCPtr, (uint64_t)PdeDst.u));
3719 cErrors++;
3720 }
3721
3722 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
3723 if (!PdeSrc.n.u1Present)
3724 {
3725 AssertMsgFailed(("Guest PDE at %RGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
3726 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
3727 cErrors++;
3728 continue;
3729 }
3730
3731 if ( !PdeSrc.b.u1Size
3732 || !fBigPagesSupported)
3733 {
3734 GCPhysGst = PdeSrc.u & GST_PDE_PG_MASK;
3735# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3736 GCPhysGst |= (iPDDst & 1) * (PAGE_SIZE / 2);
3737# endif
3738 }
3739 else
3740 {
3741# if PGM_GST_TYPE == PGM_TYPE_32BIT
3742 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
3743 {
3744 AssertMsgFailed(("Guest PDE at %RGv is using PSE36 or similar! PdeSrc=%#RX64\n",
3745 GCPtr, (uint64_t)PdeSrc.u));
3746 cErrors++;
3747 continue;
3748 }
3749# endif
3750 GCPhysGst = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
3751# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3752 GCPhysGst |= GCPtr & RT_BIT(X86_PAGE_2M_SHIFT);
3753# endif
3754 }
3755
3756 if ( pPoolPage->enmKind
3757 != (!PdeSrc.b.u1Size || !fBigPagesSupported ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
3758 {
3759 AssertMsgFailed(("Invalid shadow page table kind %d at %RGv! PdeSrc=%#RX64\n",
3760 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
3761 cErrors++;
3762 }
3763
3764 PPGMPAGE pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
3765 if (!pPhysPage)
3766 {
3767 AssertMsgFailed(("Cannot find guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3768 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3769 cErrors++;
3770 continue;
3771 }
3772
3773 if (GCPhysGst != pPoolPage->GCPhys)
3774 {
3775 AssertMsgFailed(("GCPhysGst=%RGp != pPage->GCPhys=%RGp at %RGv\n",
3776 GCPhysGst, pPoolPage->GCPhys, GCPtr));
3777 cErrors++;
3778 continue;
3779 }
3780
3781 if ( !PdeSrc.b.u1Size
3782 || !fBigPagesSupported)
3783 {
3784 /*
3785 * Page Table.
3786 */
3787 const GSTPT *pPTSrc;
3788 rc = PGM_GCPHYS_2_PTR(pVM, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1), &pPTSrc);
3789 if (RT_FAILURE(rc))
3790 {
3791 AssertMsgFailed(("Cannot map/convert guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3792 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3793 cErrors++;
3794 continue;
3795 }
3796 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
3797 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
3798 {
3799 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
3800 // (This problem will go away when/if we shadow multiple CR3s.)
3801 AssertMsgFailed(("4K PDE flags mismatch at %RGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3802 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3803 cErrors++;
3804 continue;
3805 }
3806 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3807 {
3808 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%RGv PdeDst=%#RX64\n",
3809 GCPtr, (uint64_t)PdeDst.u));
3810 cErrors++;
3811 continue;
3812 }
3813
3814 /* iterate the page table. */
3815# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3816 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3817 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
3818# else
3819 const unsigned offPTSrc = 0;
3820# endif
3821 for (unsigned iPT = 0, off = 0;
3822 iPT < RT_ELEMENTS(pPTDst->a);
3823 iPT++, off += PAGE_SIZE)
3824 {
3825 const SHWPTE PteDst = pPTDst->a[iPT];
3826
3827 /* skip not-present entries. */
3828 if (!(PteDst.u & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
3829 continue;
3830 Assert(PteDst.n.u1Present);
3831
3832 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
3833 if (!PteSrc.n.u1Present)
3834 {
3835# ifdef IN_RING3
3836 PGMAssertHandlerAndFlagsInSync(pVM);
3837 PGMR3DumpHierarchyGC(pVM, cr3, cr4, (PdeSrc.u & GST_PDE_PG_MASK));
3838# endif
3839 AssertMsgFailed(("Out of sync (!P) PTE at %RGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%RGv iPTSrc=%x PdeSrc=%x physpte=%RGp\n",
3840 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u, pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
3841 (PdeSrc.u & GST_PDE_PG_MASK) + (iPT + offPTSrc)*sizeof(PteSrc)));
3842 cErrors++;
3843 continue;
3844 }
3845
3846 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3847# if 1 /** @todo sync accessed bit properly... */
3848 fIgnoreFlags |= X86_PTE_A;
3849# endif
3850
3851 /* match the physical addresses */
3852 HCPhysShw = PteDst.u & SHW_PTE_PG_MASK;
3853 GCPhysGst = PteSrc.u & GST_PTE_PG_MASK;
3854
3855# ifdef IN_RING3
3856 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
3857 if (RT_FAILURE(rc))
3858 {
3859 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
3860 {
3861 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3862 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3863 cErrors++;
3864 continue;
3865 }
3866 }
3867 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
3868 {
3869 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3870 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3871 cErrors++;
3872 continue;
3873 }
3874# endif
3875
3876 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
3877 if (!pPhysPage)
3878 {
3879# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
3880 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
3881 {
3882 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3883 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3884 cErrors++;
3885 continue;
3886 }
3887# endif
3888 if (PteDst.n.u1Write)
3889 {
3890 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3891 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3892 cErrors++;
3893 }
3894 fIgnoreFlags |= X86_PTE_RW;
3895 }
3896 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
3897 {
3898 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage:%R[pgmpage] GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3899 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3900 cErrors++;
3901 continue;
3902 }
3903
3904 /* flags */
3905 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
3906 {
3907 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
3908 {
3909 if (PteDst.n.u1Write)
3910 {
3911 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
3912 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3913 cErrors++;
3914 continue;
3915 }
3916 fIgnoreFlags |= X86_PTE_RW;
3917 }
3918 else
3919 {
3920 if (PteDst.n.u1Present)
3921 {
3922 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
3923 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3924 cErrors++;
3925 continue;
3926 }
3927 fIgnoreFlags |= X86_PTE_P;
3928 }
3929 }
3930 else
3931 {
3932 if (!PteSrc.n.u1Dirty && PteSrc.n.u1Write)
3933 {
3934 if (PteDst.n.u1Write)
3935 {
3936 AssertMsgFailed(("!DIRTY page at %RGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
3937 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3938 cErrors++;
3939 continue;
3940 }
3941 if (!(PteDst.u & PGM_PTFLAGS_TRACK_DIRTY))
3942 {
3943 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3944 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3945 cErrors++;
3946 continue;
3947 }
3948 if (PteDst.n.u1Dirty)
3949 {
3950 AssertMsgFailed(("!DIRTY page at %RGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3951 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3952 cErrors++;
3953 }
3954# if 0 /** @todo sync access bit properly... */
3955 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
3956 {
3957 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
3958 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3959 cErrors++;
3960 }
3961 fIgnoreFlags |= X86_PTE_RW;
3962# else
3963 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
3964# endif
3965 }
3966 else if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
3967 {
3968 /* access bit emulation (not implemented). */
3969 if (PteSrc.n.u1Accessed || PteDst.n.u1Present)
3970 {
3971 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
3972 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3973 cErrors++;
3974 continue;
3975 }
3976 if (!PteDst.n.u1Accessed)
3977 {
3978 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
3979 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3980 cErrors++;
3981 }
3982 fIgnoreFlags |= X86_PTE_P;
3983 }
3984# ifdef DEBUG_sandervl
3985 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
3986# endif
3987 }
3988
3989 if ( (PteSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
3990 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags)
3991 )
3992 {
3993 AssertMsgFailed(("Flags mismatch at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
3994 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
3995 fIgnoreFlags, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3996 cErrors++;
3997 continue;
3998 }
3999 } /* foreach PTE */
4000 }
4001 else
4002 {
4003 /*
4004 * Big Page.
4005 */
4006 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
4007 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
4008 {
4009 if (PdeDst.n.u1Write)
4010 {
4011 AssertMsgFailed(("!DIRTY page at %RGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4012 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4013 cErrors++;
4014 continue;
4015 }
4016 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
4017 {
4018 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4019 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4020 cErrors++;
4021 continue;
4022 }
4023# if 0 /** @todo sync access bit properly... */
4024 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
4025 {
4026 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4027 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4028 cErrors++;
4029 }
4030 fIgnoreFlags |= X86_PTE_RW;
4031# else
4032 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4033# endif
4034 }
4035 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4036 {
4037 /* access bit emulation (not implemented). */
4038 if (PdeSrc.b.u1Accessed || PdeDst.n.u1Present)
4039 {
4040 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4041 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4042 cErrors++;
4043 continue;
4044 }
4045 if (!PdeDst.n.u1Accessed)
4046 {
4047 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4048 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4049 cErrors++;
4050 }
4051 fIgnoreFlags |= X86_PTE_P;
4052 }
4053
4054 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
4055 {
4056 AssertMsgFailed(("Flags mismatch (B) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
4057 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
4058 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4059 cErrors++;
4060 }
4061
4062 /* iterate the page table. */
4063 for (unsigned iPT = 0, off = 0;
4064 iPT < RT_ELEMENTS(pPTDst->a);
4065 iPT++, off += PAGE_SIZE, GCPhysGst += PAGE_SIZE)
4066 {
4067 const SHWPTE PteDst = pPTDst->a[iPT];
4068
4069 if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4070 {
4071 AssertMsgFailed(("The PTE at %RGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
4072 GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4073 cErrors++;
4074 }
4075
4076 /* skip not-present entries. */
4077 if (!PteDst.n.u1Present) /** @todo deal with ALL handlers and CSAM !P pages! */
4078 continue;
4079
4080 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT | X86_PTE_D | X86_PTE_A | X86_PTE_G | X86_PTE_PAE_NX;
4081
4082 /* match the physical addresses */
4083 HCPhysShw = PteDst.u & X86_PTE_PAE_PG_MASK;
4084
4085# ifdef IN_RING3
4086 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4087 if (RT_FAILURE(rc))
4088 {
4089 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4090 {
4091 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4092 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4093 cErrors++;
4094 }
4095 }
4096 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
4097 {
4098 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4099 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4100 cErrors++;
4101 continue;
4102 }
4103# endif
4104 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
4105 if (!pPhysPage)
4106 {
4107# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4108 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4109 {
4110 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4111 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4112 cErrors++;
4113 continue;
4114 }
4115# endif
4116 if (PteDst.n.u1Write)
4117 {
4118 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4119 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4120 cErrors++;
4121 }
4122 fIgnoreFlags |= X86_PTE_RW;
4123 }
4124 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4125 {
4126 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage=%R[pgmpage] GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4127 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4128 cErrors++;
4129 continue;
4130 }
4131
4132 /* flags */
4133 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4134 {
4135 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4136 {
4137 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
4138 {
4139 if (PteDst.n.u1Write)
4140 {
4141 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4142 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4143 cErrors++;
4144 continue;
4145 }
4146 fIgnoreFlags |= X86_PTE_RW;
4147 }
4148 }
4149 else
4150 {
4151 if (PteDst.n.u1Present)
4152 {
4153 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4154 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4155 cErrors++;
4156 continue;
4157 }
4158 fIgnoreFlags |= X86_PTE_P;
4159 }
4160 }
4161
4162 if ( (PdeSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4163 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags) /* lazy phys handler dereg. */
4164 )
4165 {
4166 AssertMsgFailed(("Flags mismatch (BT) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
4167 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4168 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4169 cErrors++;
4170 continue;
4171 }
4172 } /* for each PTE */
4173 }
4174 }
4175 /* not present */
4176
4177 } /* for each PDE */
4178
4179 } /* for each PDPTE */
4180
4181 } /* for each PML4E */
4182
4183# ifdef DEBUG
4184 if (cErrors)
4185 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
4186# endif
4187
4188#endif /* GST == 32BIT, PAE or AMD64 */
4189 return cErrors;
4190
4191#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT */
4192}
4193#endif /* VBOX_STRICT */
4194
4195
4196/**
4197 * Sets up the CR3 for shadow paging
4198 *
4199 * @returns Strict VBox status code.
4200 * @retval VINF_SUCCESS.
4201 *
4202 * @param pVCpu The VMCPU handle.
4203 * @param GCPhysCR3 The physical address in the CR3 register.
4204 */
4205PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3)
4206{
4207 PVM pVM = pVCpu->CTX_SUFF(pVM);
4208
4209 /* Update guest paging info. */
4210#if PGM_GST_TYPE == PGM_TYPE_32BIT \
4211 || PGM_GST_TYPE == PGM_TYPE_PAE \
4212 || PGM_GST_TYPE == PGM_TYPE_AMD64
4213
4214 LogFlow(("MapCR3: %RGp\n", GCPhysCR3));
4215
4216 /*
4217 * Map the page CR3 points at.
4218 */
4219 RTHCPTR HCPtrGuestCR3;
4220 RTHCPHYS HCPhysGuestCR3;
4221 pgmLock(pVM);
4222 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysCR3);
4223 AssertReturn(pPage, VERR_INTERNAL_ERROR_2);
4224 HCPhysGuestCR3 = PGM_PAGE_GET_HCPHYS(pPage);
4225 /** @todo this needs some reworking wrt. locking. */
4226# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4227 HCPtrGuestCR3 = NIL_RTHCPTR;
4228 int rc = VINF_SUCCESS;
4229# else
4230 int rc = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhysCR3 & GST_CR3_PAGE_MASK, (void **)&HCPtrGuestCR3);
4231# endif
4232 pgmUnlock(pVM);
4233 if (RT_SUCCESS(rc))
4234 {
4235 rc = PGMMap(pVM, (RTGCPTR)pVM->pgm.s.GCPtrCR3Mapping, HCPhysGuestCR3, PAGE_SIZE, 0);
4236 if (RT_SUCCESS(rc))
4237 {
4238# ifdef IN_RC
4239 PGM_INVL_PG(pVCpu, pVM->pgm.s.GCPtrCR3Mapping);
4240# endif
4241# if PGM_GST_TYPE == PGM_TYPE_32BIT
4242 pVCpu->pgm.s.pGst32BitPdR3 = (R3PTRTYPE(PX86PD))HCPtrGuestCR3;
4243# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4244 pVCpu->pgm.s.pGst32BitPdR0 = (R0PTRTYPE(PX86PD))HCPtrGuestCR3;
4245# endif
4246 pVCpu->pgm.s.pGst32BitPdRC = (RCPTRTYPE(PX86PD))pVM->pgm.s.GCPtrCR3Mapping;
4247
4248# elif PGM_GST_TYPE == PGM_TYPE_PAE
4249 unsigned off = GCPhysCR3 & GST_CR3_PAGE_MASK & PAGE_OFFSET_MASK;
4250 pVCpu->pgm.s.pGstPaePdptR3 = (R3PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4251# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4252 pVCpu->pgm.s.pGstPaePdptR0 = (R0PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4253# endif
4254 pVCpu->pgm.s.pGstPaePdptRC = (RCPTRTYPE(PX86PDPT))((RCPTRTYPE(uint8_t *))pVM->pgm.s.GCPtrCR3Mapping + off);
4255 Log(("Cached mapping %RRv\n", pVCpu->pgm.s.pGstPaePdptRC));
4256
4257 /*
4258 * Map the 4 PDs too.
4259 */
4260 PX86PDPT pGuestPDPT = pgmGstGetPaePDPTPtr(&pVCpu->pgm.s);
4261 RTGCPTR GCPtr = pVM->pgm.s.GCPtrCR3Mapping + PAGE_SIZE;
4262 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++, GCPtr += PAGE_SIZE)
4263 {
4264 if (pGuestPDPT->a[i].n.u1Present)
4265 {
4266 RTHCPTR HCPtr;
4267 RTHCPHYS HCPhys;
4268 RTGCPHYS GCPhys = pGuestPDPT->a[i].u & X86_PDPE_PG_MASK;
4269 pgmLock(pVM);
4270 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhys);
4271 AssertReturn(pPage, VERR_INTERNAL_ERROR_2);
4272 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
4273# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4274 HCPtr = NIL_RTHCPTR;
4275 int rc2 = VINF_SUCCESS;
4276# else
4277 int rc2 = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhys, (void **)&HCPtr);
4278# endif
4279 pgmUnlock(pVM);
4280 if (RT_SUCCESS(rc2))
4281 {
4282 rc = PGMMap(pVM, GCPtr, HCPhys, PAGE_SIZE, 0);
4283 AssertRCReturn(rc, rc);
4284
4285 pVCpu->pgm.s.apGstPaePDsR3[i] = (R3PTRTYPE(PX86PDPAE))HCPtr;
4286# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4287 pVCpu->pgm.s.apGstPaePDsR0[i] = (R0PTRTYPE(PX86PDPAE))HCPtr;
4288# endif
4289 pVCpu->pgm.s.apGstPaePDsRC[i] = (RCPTRTYPE(PX86PDPAE))GCPtr;
4290 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = GCPhys;
4291# ifdef IN_RC
4292 PGM_INVL_PG(pVCpu, GCPtr);
4293# endif
4294 continue;
4295 }
4296 AssertMsgFailed(("pgmR3Gst32BitMapCR3: rc2=%d GCPhys=%RGp i=%d\n", rc2, GCPhys, i));
4297 }
4298
4299 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4300# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4301 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4302# endif
4303 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4304 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4305# ifdef IN_RC
4306 PGM_INVL_PG(pVCpu, GCPtr); /** @todo this shouldn't be necessary? */
4307# endif
4308 }
4309
4310# elif PGM_GST_TYPE == PGM_TYPE_AMD64
4311 pVCpu->pgm.s.pGstAmd64Pml4R3 = (R3PTRTYPE(PX86PML4))HCPtrGuestCR3;
4312# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4313 pVCpu->pgm.s.pGstAmd64Pml4R0 = (R0PTRTYPE(PX86PML4))HCPtrGuestCR3;
4314# endif
4315# endif
4316 }
4317 else
4318 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4319 }
4320 else
4321 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4322
4323#else /* prot/real stub */
4324 int rc = VINF_SUCCESS;
4325#endif
4326
4327 /* Update shadow paging info for guest modes with paging (32, pae, 64). */
4328# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4329 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4330 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
4331 && ( PGM_GST_TYPE != PGM_TYPE_REAL \
4332 && PGM_GST_TYPE != PGM_TYPE_PROT))
4333
4334 Assert(!HWACCMIsNestedPagingActive(pVM));
4335
4336 /*
4337 * Update the shadow root page as well since that's not fixed.
4338 */
4339 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4340 PPGMPOOLPAGE pOldShwPageCR3 = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
4341 uint32_t iOldShwUserTable = pVCpu->pgm.s.iShwUserTable;
4342 uint32_t iOldShwUser = pVCpu->pgm.s.iShwUser;
4343 PPGMPOOLPAGE pNewShwPageCR3;
4344
4345 Assert(!(GCPhysCR3 >> (PAGE_SHIFT + 32)));
4346 rc = pgmPoolAlloc(pVM, GCPhysCR3 & GST_CR3_PAGE_MASK, BTH_PGMPOOLKIND_ROOT, SHW_POOL_ROOT_IDX, GCPhysCR3 >> PAGE_SHIFT, &pNewShwPageCR3);
4347 AssertFatalRC(rc);
4348 rc = VINF_SUCCESS;
4349
4350 /* Mark the page as locked; disallow flushing. */
4351 pgmPoolLockPage(pPool, pNewShwPageCR3);
4352
4353# ifdef IN_RC
4354 /* NOTE: We can't deal with jumps to ring 3 here as we're now in an inconsistent state! */
4355 bool fLog = VMMGCLogDisable(pVM);
4356 pgmLock(pVM);
4357# endif
4358
4359 pVCpu->pgm.s.iShwUser = SHW_POOL_ROOT_IDX;
4360 pVCpu->pgm.s.iShwUserTable = GCPhysCR3 >> PAGE_SHIFT;
4361 pVCpu->pgm.s.CTX_SUFF(pShwPageCR3) = pNewShwPageCR3;
4362# ifdef IN_RING0
4363 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4364 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4365# elif defined(IN_RC)
4366 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4367 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4368# else
4369 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4370 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4371# endif
4372
4373# ifndef PGM_WITHOUT_MAPPINGS
4374 /*
4375 * Apply all hypervisor mappings to the new CR3.
4376 * Note that SyncCR3 will be executed in case CR3 is changed in a guest paging mode; this will
4377 * make sure we check for conflicts in the new CR3 root.
4378 */
4379# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
4380 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
4381# endif
4382 rc = pgmMapActivateCR3(pVM, pNewShwPageCR3);
4383 AssertRCReturn(rc, rc);
4384# endif
4385
4386 /* Set the current hypervisor CR3. */
4387 CPUMSetHyperCR3(pVCpu, PGMGetHyperCR3(pVCpu));
4388 SELMShadowCR3Changed(pVM, pVCpu);
4389
4390# ifdef IN_RC
4391 pgmUnlock(pVM);
4392 VMMGCLogRestore(pVM, fLog);
4393# endif
4394
4395 /* Clean up the old CR3 root. */
4396 if (pOldShwPageCR3)
4397 {
4398 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
4399# ifndef PGM_WITHOUT_MAPPINGS
4400 /* Remove the hypervisor mappings from the shadow page table. */
4401 pgmMapDeactivateCR3(pVM, pOldShwPageCR3);
4402# endif
4403 /* Mark the page as unlocked; allow flushing again. */
4404 pgmPoolUnlockPage(pPool, pOldShwPageCR3);
4405
4406 pgmPoolFreeByPage(pPool, pOldShwPageCR3, iOldShwUser, iOldShwUserTable);
4407 }
4408
4409# endif
4410
4411 return rc;
4412}
4413
4414/**
4415 * Unmaps the shadow CR3.
4416 *
4417 * @returns VBox status, no specials.
4418 * @param pVCpu The VMCPU handle.
4419 */
4420PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu)
4421{
4422 LogFlow(("UnmapCR3\n"));
4423
4424 int rc = VINF_SUCCESS;
4425 PVM pVM = pVCpu->CTX_SUFF(pVM);
4426
4427 /*
4428 * Update guest paging info.
4429 */
4430#if PGM_GST_TYPE == PGM_TYPE_32BIT
4431 pVCpu->pgm.s.pGst32BitPdR3 = 0;
4432# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4433 pVCpu->pgm.s.pGst32BitPdR0 = 0;
4434# endif
4435 pVCpu->pgm.s.pGst32BitPdRC = 0;
4436
4437#elif PGM_GST_TYPE == PGM_TYPE_PAE
4438 pVCpu->pgm.s.pGstPaePdptR3 = 0;
4439# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4440 pVCpu->pgm.s.pGstPaePdptR0 = 0;
4441# endif
4442 pVCpu->pgm.s.pGstPaePdptRC = 0;
4443 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4444 {
4445 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4446# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4447 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4448# endif
4449 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4450 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4451 }
4452
4453#elif PGM_GST_TYPE == PGM_TYPE_AMD64
4454 pVCpu->pgm.s.pGstAmd64Pml4R3 = 0;
4455# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4456 pVCpu->pgm.s.pGstAmd64Pml4R0 = 0;
4457# endif
4458
4459#else /* prot/real mode stub */
4460 /* nothing to do */
4461#endif
4462
4463#if !defined(IN_RC) /* In RC we rely on MapCR3 to do the shadow part for us at a safe time */
4464 /*
4465 * Update shadow paging info.
4466 */
4467# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4468 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4469 || PGM_SHW_TYPE == PGM_TYPE_AMD64))
4470
4471# if PGM_GST_TYPE != PGM_TYPE_REAL
4472 Assert(!HWACCMIsNestedPagingActive(pVM));
4473# endif
4474
4475# ifndef PGM_WITHOUT_MAPPINGS
4476 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4477 /* Remove the hypervisor mappings from the shadow page table. */
4478 pgmMapDeactivateCR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4479# endif
4480
4481 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4482 {
4483 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4484
4485 Assert(pVCpu->pgm.s.iShwUser != PGMPOOL_IDX_NESTED_ROOT);
4486
4487 /* Mark the page as unlocked; allow flushing again. */
4488 pgmPoolUnlockPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4489
4490 pgmPoolFreeByPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3), pVCpu->pgm.s.iShwUser, pVCpu->pgm.s.iShwUserTable);
4491 pVCpu->pgm.s.pShwPageCR3R3 = 0;
4492 pVCpu->pgm.s.pShwPageCR3R0 = 0;
4493 pVCpu->pgm.s.pShwPageCR3RC = 0;
4494 pVCpu->pgm.s.iShwUser = 0;
4495 pVCpu->pgm.s.iShwUserTable = 0;
4496 }
4497# endif
4498#endif /* !IN_RC*/
4499
4500 return rc;
4501}
4502
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette