VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/PGMR0.cpp@ 108379

最後變更 在這個檔案從108379是 108132,由 vboxsync 提交於 6 週 前

VMM/PGM: Merge and deduplicate code targeting x86 & amd64 in PGM.cpp. Don't bother compiling pool stuff on arm and darwin.amd64. jiraref:VBP-1531

  • 屬性 svn:eol-style 設為 native
  • 屬性 svn:keywords 設為 Id Revision
檔案大小: 58.7 KB
 
1/* $Id: PGMR0.cpp 108132 2025-02-10 11:05:23Z vboxsync $ */
2/** @file
3 * PGM - Page Manager and Monitor, Ring-0.
4 */
5
6/*
7 * Copyright (C) 2007-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.alldomusa.eu.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define VBOX_VMM_TARGET_X86
33#define LOG_GROUP LOG_GROUP_PGM
34#define VBOX_WITHOUT_PAGING_BIT_FIELDS /* 64-bit bitfields are just asking for trouble. See @bugref{9841} and others. */
35#include <VBox/rawpci.h>
36#include <VBox/vmm/pgm.h>
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/gmm.h>
39#include "PGMInternal.h"
40#include <VBox/vmm/pdmdev.h>
41#include <VBox/vmm/vmcc.h>
42#include <VBox/vmm/gvm.h>
43#include "PGMInline.h"
44#include <VBox/log.h>
45#include <VBox/err.h>
46#include <iprt/assert.h>
47#include <iprt/mem.h>
48#include <iprt/memobj.h>
49#include <iprt/process.h>
50#include <iprt/rand.h>
51#include <iprt/string.h>
52#include <iprt/time.h>
53
54
55/*
56 * Instantiate the ring-0 header/code templates.
57 */
58/** @todo r=bird: Gotta love this nested paging hacking we're still carrying with us... (Split PGM_TYPE_NESTED.) */
59#define PGM_BTH_NAME(name) PGM_BTH_NAME_32BIT_PROT(name)
60#include "PGMR0Bth.h"
61#undef PGM_BTH_NAME
62
63#define PGM_BTH_NAME(name) PGM_BTH_NAME_PAE_PROT(name)
64#include "PGMR0Bth.h"
65#undef PGM_BTH_NAME
66
67#define PGM_BTH_NAME(name) PGM_BTH_NAME_AMD64_PROT(name)
68#include "PGMR0Bth.h"
69#undef PGM_BTH_NAME
70
71#define PGM_BTH_NAME(name) PGM_BTH_NAME_EPT_PROT(name)
72#include "PGMR0Bth.h"
73#undef PGM_BTH_NAME
74
75
76/**
77 * Initializes the per-VM data for the PGM.
78 *
79 * This is called from under the GVMM lock, so it should only initialize the
80 * data so PGMR0CleanupVM and others will work smoothly.
81 *
82 * @returns VBox status code.
83 * @param pGVM Pointer to the global VM structure.
84 * @param hMemObj Handle to the memory object backing pGVM.
85 */
86VMMR0_INT_DECL(int) PGMR0InitPerVMData(PGVM pGVM, RTR0MEMOBJ hMemObj)
87{
88 AssertCompile(sizeof(pGVM->pgm.s) <= sizeof(pGVM->pgm.padding));
89 AssertCompile(sizeof(pGVM->pgmr0.s) <= sizeof(pGVM->pgmr0.padding));
90 AssertCompile(sizeof(pGVM->aCpus[0].pgm.s) <= sizeof(pGVM->aCpus[0].pgm.padding));
91 AssertCompile(sizeof(pGVM->aCpus[0].pgmr0.s) <= sizeof(pGVM->aCpus[0].pgmr0.padding));
92
93 /* Set the RAM range memory handles to NIL. */
94 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.acRamRangePages) == RT_ELEMENTS(pGVM->pgmr0.s.apRamRanges));
95 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.ahRamRangeMemObjs) == RT_ELEMENTS(pGVM->pgmr0.s.apRamRanges));
96 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.ahRamRangeMapObjs) == RT_ELEMENTS(pGVM->pgmr0.s.apRamRanges));
97 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahRamRangeMemObjs); i++)
98 {
99 pGVM->pgmr0.s.ahRamRangeMemObjs[i] = NIL_RTR0MEMOBJ;
100 pGVM->pgmr0.s.ahRamRangeMapObjs[i] = NIL_RTR0MEMOBJ;
101 }
102 Assert(pGVM->pgmr0.s.idRamRangeMax == 0); /* the structure is ZERO'ed */
103
104 /* Set the MMIO2 range memory handles to NIL. */
105 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.ahMmio2MemObjs) == RT_ELEMENTS(pGVM->pgmr0.s.apMmio2RamRanges));
106 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.ahMmio2MapObjs) == RT_ELEMENTS(pGVM->pgmr0.s.apMmio2RamRanges));
107 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahMmio2MemObjs); i++)
108 {
109 pGVM->pgmr0.s.ahMmio2MemObjs[i] = NIL_RTR0MEMOBJ;
110 pGVM->pgmr0.s.ahMmio2MapObjs[i] = NIL_RTR0MEMOBJ;
111 }
112
113 /* Set the ROM range memory handles to NIL. */
114 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.ahRomRangeMemObjs) == RT_ELEMENTS(pGVM->pgmr0.s.apRomRanges));
115 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.ahRomRangeMapObjs) == RT_ELEMENTS(pGVM->pgmr0.s.apRomRanges));
116 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahRomRangeMemObjs); i++)
117 {
118 pGVM->pgmr0.s.ahRomRangeMemObjs[i] = NIL_RTR0MEMOBJ;
119 pGVM->pgmr0.s.ahRomRangeMapObjs[i] = NIL_RTR0MEMOBJ;
120 }
121
122 /* Set the physical handler related memory handles to NIL. */
123 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs) == RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMapObjs));
124 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs); i++)
125 {
126 pGVM->pgmr0.s.ahPoolMemObjs[i] = NIL_RTR0MEMOBJ;
127 pGVM->pgmr0.s.ahPoolMapObjs[i] = NIL_RTR0MEMOBJ;
128 }
129 pGVM->pgmr0.s.hPhysHandlerMemObj = NIL_RTR0MEMOBJ;
130 pGVM->pgmr0.s.hPhysHandlerMapObj = NIL_RTR0MEMOBJ;
131
132 /*
133 * Initialize the handler type table with return to ring-3 callbacks so we
134 * don't have to do anything special for ring-3 only registrations.
135 *
136 * Note! The random bits of the hType value is mainly for prevent trouble
137 * with zero initialized handles w/o needing to sacrifice handle zero.
138 */
139 for (size_t i = 0; i < RT_ELEMENTS(pGVM->pgm.s.aPhysHandlerTypes); i++)
140 {
141 pGVM->pgmr0.s.aPhysHandlerTypes[i].hType = i | (RTRandU64() & ~(uint64_t)PGMPHYSHANDLERTYPE_IDX_MASK);
142 pGVM->pgmr0.s.aPhysHandlerTypes[i].enmKind = PGMPHYSHANDLERKIND_INVALID;
143 pGVM->pgmr0.s.aPhysHandlerTypes[i].pfnHandler = pgmR0HandlerPhysicalHandlerToRing3;
144 pGVM->pgmr0.s.aPhysHandlerTypes[i].pfnPfHandler = pgmR0HandlerPhysicalPfHandlerToRing3;
145
146 pGVM->pgm.s.aPhysHandlerTypes[i].hType = pGVM->pgmr0.s.aPhysHandlerTypes[i].hType;
147 pGVM->pgm.s.aPhysHandlerTypes[i].enmKind = PGMPHYSHANDLERKIND_INVALID;
148 }
149
150 /*
151 * Get the physical address of the ZERO and MMIO-dummy pages.
152 */
153 AssertReturn(((uintptr_t)&pGVM->pgm.s.abZeroPg[0] & HOST_PAGE_OFFSET_MASK) == 0, VERR_INTERNAL_ERROR_2);
154 pGVM->pgm.s.HCPhysZeroPg = RTR0MemObjGetPagePhysAddr(hMemObj, RT_UOFFSETOF_DYN(GVM, pgm.s.abZeroPg) >> HOST_PAGE_SHIFT);
155 AssertReturn(pGVM->pgm.s.HCPhysZeroPg != NIL_RTHCPHYS, VERR_INTERNAL_ERROR_3);
156
157 AssertReturn(((uintptr_t)&pGVM->pgm.s.abMmioPg[0] & HOST_PAGE_OFFSET_MASK) == 0, VERR_INTERNAL_ERROR_2);
158 pGVM->pgm.s.HCPhysMmioPg = RTR0MemObjGetPagePhysAddr(hMemObj, RT_UOFFSETOF_DYN(GVM, pgm.s.abMmioPg) >> HOST_PAGE_SHIFT);
159 AssertReturn(pGVM->pgm.s.HCPhysMmioPg != NIL_RTHCPHYS, VERR_INTERNAL_ERROR_3);
160
161 pGVM->pgm.s.HCPhysInvMmioPg = pGVM->pgm.s.HCPhysMmioPg;
162
163 return RTCritSectInit(&pGVM->pgmr0.s.PoolGrowCritSect);
164}
165
166
167/**
168 * Initalize the per-VM PGM for ring-0.
169 *
170 * @returns VBox status code.
171 * @param pGVM Pointer to the global VM structure.
172 */
173VMMR0_INT_DECL(int) PGMR0InitVM(PGVM pGVM)
174{
175 /*
176 * Set up the ring-0 context for our access handlers.
177 */
178 int rc = PGMR0HandlerPhysicalTypeSetUpContext(pGVM, PGMPHYSHANDLERKIND_WRITE, 0 /*fFlags*/,
179 pgmPhysRomWriteHandler, pgmPhysRomWritePfHandler,
180 "ROM write protection", pGVM->pgm.s.hRomPhysHandlerType);
181 AssertLogRelRCReturn(rc, rc);
182
183 /*
184 * Register the physical access handler doing dirty MMIO2 tracing.
185 */
186 rc = PGMR0HandlerPhysicalTypeSetUpContext(pGVM, PGMPHYSHANDLERKIND_WRITE, PGMPHYSHANDLER_F_KEEP_PGM_LOCK,
187 pgmPhysMmio2WriteHandler, pgmPhysMmio2WritePfHandler,
188 "MMIO2 dirty page tracing", pGVM->pgm.s.hMmio2DirtyPhysHandlerType);
189 AssertLogRelRCReturn(rc, rc);
190
191 /*
192 * The page pool.
193 */
194 return pgmR0PoolInitVM(pGVM);
195}
196
197
198/**
199 * Called at the end of the ring-0 initialization to seal access handler types.
200 *
201 * @param pGVM Pointer to the global VM structure.
202 */
203VMMR0_INT_DECL(void) PGMR0DoneInitVM(PGVM pGVM)
204{
205 /*
206 * Seal all the access handler types. Does both ring-3 and ring-0.
207 *
208 * Note! Since this is a void function and we don't have any ring-0 state
209 * machinery for marking the VM as bogus, this code will just
210 * override corrupted values as best as it can.
211 */
212 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.aPhysHandlerTypes) == RT_ELEMENTS(pGVM->pgm.s.aPhysHandlerTypes));
213 for (size_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.aPhysHandlerTypes); i++)
214 {
215 PPGMPHYSHANDLERTYPEINTR0 const pTypeR0 = &pGVM->pgmr0.s.aPhysHandlerTypes[i];
216 PPGMPHYSHANDLERTYPEINTR3 const pTypeR3 = &pGVM->pgm.s.aPhysHandlerTypes[i];
217 PGMPHYSHANDLERKIND const enmKindR3 = pTypeR3->enmKind;
218 PGMPHYSHANDLERKIND const enmKindR0 = pTypeR0->enmKind;
219 AssertLogRelMsgStmt(pTypeR0->hType == pTypeR3->hType,
220 ("i=%u %#RX64 vs %#RX64 %s\n", i, pTypeR0->hType, pTypeR3->hType, pTypeR0->pszDesc),
221 pTypeR3->hType = pTypeR0->hType);
222 switch (enmKindR3)
223 {
224 case PGMPHYSHANDLERKIND_ALL:
225 case PGMPHYSHANDLERKIND_MMIO:
226 if ( enmKindR0 == enmKindR3
227 || enmKindR0 == PGMPHYSHANDLERKIND_INVALID)
228 {
229 pTypeR3->fRing0Enabled = enmKindR0 == enmKindR3;
230 pTypeR0->uState = PGM_PAGE_HNDL_PHYS_STATE_ALL;
231 pTypeR3->uState = PGM_PAGE_HNDL_PHYS_STATE_ALL;
232 continue;
233 }
234 break;
235
236 case PGMPHYSHANDLERKIND_WRITE:
237 if ( enmKindR0 == enmKindR3
238 || enmKindR0 == PGMPHYSHANDLERKIND_INVALID)
239 {
240 pTypeR3->fRing0Enabled = enmKindR0 == enmKindR3;
241 pTypeR0->uState = PGM_PAGE_HNDL_PHYS_STATE_WRITE;
242 pTypeR3->uState = PGM_PAGE_HNDL_PHYS_STATE_WRITE;
243 continue;
244 }
245 break;
246
247 default:
248 AssertLogRelMsgFailed(("i=%u enmKindR3=%d\n", i, enmKindR3));
249 RT_FALL_THROUGH();
250 case PGMPHYSHANDLERKIND_INVALID:
251 AssertLogRelMsg(enmKindR0 == PGMPHYSHANDLERKIND_INVALID,
252 ("i=%u enmKind=%d %s\n", i, enmKindR0, pTypeR0->pszDesc));
253 AssertLogRelMsg(pTypeR0->pfnHandler == pgmR0HandlerPhysicalHandlerToRing3,
254 ("i=%u pfnHandler=%p %s\n", i, pTypeR0->pfnHandler, pTypeR0->pszDesc));
255 AssertLogRelMsg(pTypeR0->pfnPfHandler == pgmR0HandlerPhysicalPfHandlerToRing3,
256 ("i=%u pfnPfHandler=%p %s\n", i, pTypeR0->pfnPfHandler, pTypeR0->pszDesc));
257
258 /* Unused of bad ring-3 entry, make it and the ring-0 one harmless. */
259 pTypeR3->enmKind = PGMPHYSHANDLERKIND_END;
260 pTypeR3->fRing0DevInsIdx = false;
261 pTypeR3->fKeepPgmLock = false;
262 pTypeR3->uState = 0;
263 break;
264 }
265 pTypeR3->fRing0Enabled = false;
266
267 /* Make sure the entry is harmless and goes to ring-3. */
268 pTypeR0->enmKind = PGMPHYSHANDLERKIND_END;
269 pTypeR0->pfnHandler = pgmR0HandlerPhysicalHandlerToRing3;
270 pTypeR0->pfnPfHandler = pgmR0HandlerPhysicalPfHandlerToRing3;
271 pTypeR0->fRing0DevInsIdx = false;
272 pTypeR0->fKeepPgmLock = false;
273 pTypeR0->uState = 0;
274 pTypeR0->pszDesc = "invalid";
275 }
276}
277
278
279/**
280 * Cleans up any loose ends before the GVM structure is destroyed.
281 */
282VMMR0_INT_DECL(void) PGMR0CleanupVM(PGVM pGVM)
283{
284 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs); i++)
285 {
286 if (pGVM->pgmr0.s.ahPoolMapObjs[i] != NIL_RTR0MEMOBJ)
287 {
288 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahPoolMapObjs[i], true /*fFreeMappings*/);
289 AssertRC(rc);
290 pGVM->pgmr0.s.ahPoolMapObjs[i] = NIL_RTR0MEMOBJ;
291 }
292
293 if (pGVM->pgmr0.s.ahPoolMemObjs[i] != NIL_RTR0MEMOBJ)
294 {
295 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahPoolMemObjs[i], true /*fFreeMappings*/);
296 AssertRC(rc);
297 pGVM->pgmr0.s.ahPoolMemObjs[i] = NIL_RTR0MEMOBJ;
298 }
299 }
300
301 if (pGVM->pgmr0.s.hPhysHandlerMapObj != NIL_RTR0MEMOBJ)
302 {
303 int rc = RTR0MemObjFree(pGVM->pgmr0.s.hPhysHandlerMapObj, true /*fFreeMappings*/);
304 AssertRC(rc);
305 pGVM->pgmr0.s.hPhysHandlerMapObj = NIL_RTR0MEMOBJ;
306 }
307
308 if (pGVM->pgmr0.s.hPhysHandlerMemObj != NIL_RTR0MEMOBJ)
309 {
310 int rc = RTR0MemObjFree(pGVM->pgmr0.s.hPhysHandlerMemObj, true /*fFreeMappings*/);
311 AssertRC(rc);
312 pGVM->pgmr0.s.hPhysHandlerMemObj = NIL_RTR0MEMOBJ;
313 }
314
315 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahRomRangeMemObjs); i++)
316 {
317 if (pGVM->pgmr0.s.ahRomRangeMapObjs[i] != NIL_RTR0MEMOBJ)
318 {
319 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahRomRangeMapObjs[i], true /*fFreeMappings*/);
320 AssertRC(rc);
321 pGVM->pgmr0.s.ahRomRangeMapObjs[i] = NIL_RTR0MEMOBJ;
322 }
323
324 if (pGVM->pgmr0.s.ahRomRangeMemObjs[i] != NIL_RTR0MEMOBJ)
325 {
326 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahRomRangeMemObjs[i], true /*fFreeMappings*/);
327 AssertRC(rc);
328 pGVM->pgmr0.s.ahRomRangeMemObjs[i] = NIL_RTR0MEMOBJ;
329 }
330 }
331
332 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahMmio2MemObjs); i++)
333 {
334 if (pGVM->pgmr0.s.ahMmio2MapObjs[i] != NIL_RTR0MEMOBJ)
335 {
336 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahMmio2MapObjs[i], true /*fFreeMappings*/);
337 AssertRC(rc);
338 pGVM->pgmr0.s.ahMmio2MapObjs[i] = NIL_RTR0MEMOBJ;
339 }
340
341 if (pGVM->pgmr0.s.ahMmio2MemObjs[i] != NIL_RTR0MEMOBJ)
342 {
343 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahMmio2MemObjs[i], true /*fFreeMappings*/);
344 AssertRC(rc);
345 pGVM->pgmr0.s.ahMmio2MemObjs[i] = NIL_RTR0MEMOBJ;
346 }
347 }
348
349 uint32_t const cRangesMax = RT_MIN(pGVM->pgmr0.s.idRamRangeMax, RT_ELEMENTS(pGVM->pgmr0.s.ahRamRangeMemObjs) - 1U) + 1U;
350 for (uint32_t i = 0; i < cRangesMax; i++)
351 {
352 if (pGVM->pgmr0.s.ahRamRangeMapObjs[i] != NIL_RTR0MEMOBJ)
353 {
354 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahRamRangeMapObjs[i], true /*fFreeMappings*/);
355 AssertRC(rc);
356 pGVM->pgmr0.s.ahRamRangeMapObjs[i] = NIL_RTR0MEMOBJ;
357 }
358
359 if (pGVM->pgmr0.s.ahRamRangeMemObjs[i] != NIL_RTR0MEMOBJ)
360 {
361 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahRamRangeMemObjs[i], true /*fFreeMappings*/);
362 AssertRC(rc);
363 pGVM->pgmr0.s.ahRamRangeMemObjs[i] = NIL_RTR0MEMOBJ;
364 }
365 }
366
367 if (RTCritSectIsInitialized(&pGVM->pgmr0.s.PoolGrowCritSect))
368 RTCritSectDelete(&pGVM->pgmr0.s.PoolGrowCritSect);
369}
370
371
372/**
373 * Worker function for PGMR3PhysAllocateHandyPages and pgmPhysEnsureHandyPage.
374 *
375 * @returns The following VBox status codes.
376 * @retval VINF_SUCCESS on success. FF cleared.
377 * @retval VINF_EM_NO_MEMORY if we're out of memory. The FF is set in this case.
378 *
379 * @param pGVM The global (ring-0) VM structure.
380 * @param idCpu The ID of the calling EMT.
381 * @param fRing3 Set if the caller is ring-3. Determins whether to
382 * return VINF_EM_NO_MEMORY or not.
383 *
384 * @thread EMT(idCpu)
385 *
386 * @remarks Must be called from within the PGM critical section. The caller
387 * must clear the new pages.
388 */
389int pgmR0PhysAllocateHandyPages(PGVM pGVM, VMCPUID idCpu, bool fRing3)
390{
391 /*
392 * Validate inputs.
393 */
394 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
395 Assert(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf());
396 PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
397
398 /*
399 * Check for error injection.
400 */
401 if (RT_LIKELY(!pGVM->pgm.s.fErrInjHandyPages))
402 { /* likely */ }
403 else
404 return VERR_NO_MEMORY;
405
406 /*
407 * Try allocate a full set of handy pages.
408 */
409 uint32_t const iFirst = pGVM->pgm.s.cHandyPages;
410 AssertMsgReturn(iFirst <= RT_ELEMENTS(pGVM->pgm.s.aHandyPages), ("%#x\n", iFirst), VERR_PGM_HANDY_PAGE_IPE);
411
412 uint32_t const cPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages) - iFirst;
413 if (!cPages)
414 return VINF_SUCCESS;
415
416 int rc = GMMR0AllocateHandyPages(pGVM, idCpu, cPages, cPages, &pGVM->pgm.s.aHandyPages[iFirst]);
417 if (RT_SUCCESS(rc))
418 {
419 uint32_t const cHandyPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages); /** @todo allow allocating less... */
420 pGVM->pgm.s.cHandyPages = cHandyPages;
421 VM_FF_CLEAR(pGVM, VM_FF_PGM_NEED_HANDY_PAGES);
422 VM_FF_CLEAR(pGVM, VM_FF_PGM_NO_MEMORY);
423
424#ifdef VBOX_STRICT
425 for (uint32_t i = 0; i < cHandyPages; i++)
426 {
427 Assert(pGVM->pgm.s.aHandyPages[i].idPage != NIL_GMM_PAGEID);
428 Assert(pGVM->pgm.s.aHandyPages[i].idPage <= GMM_PAGEID_LAST);
429 Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
430 Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys != NIL_GMMPAGEDESC_PHYS);
431 Assert(!(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys & ~X86_PTE_PAE_PG_MASK));
432 }
433#endif
434
435 /*
436 * Clear the pages.
437 */
438 for (uint32_t iPage = iFirst; iPage < cHandyPages; iPage++)
439 {
440 PGMMPAGEDESC pPage = &pGVM->pgm.s.aHandyPages[iPage];
441 if (!pPage->fZeroed)
442 {
443 void *pv = NULL;
444#ifdef VBOX_WITH_LINEAR_HOST_PHYS_MEM
445 rc = SUPR0HCPhysToVirt(pPage->HCPhysGCPhys, &pv);
446#else
447 rc = GMMR0PageIdToVirt(pGVM, pPage->idPage, &pv);
448#endif
449 AssertMsgRCReturn(rc, ("idPage=%#x HCPhys=%RHp rc=%Rrc\n", pPage->idPage, pPage->HCPhysGCPhys, rc), rc);
450
451 RT_BZERO(pv, GUEST_PAGE_SIZE);
452 pPage->fZeroed = true;
453 }
454#ifdef VBOX_STRICT
455 else
456 {
457 void *pv = NULL;
458# ifdef VBOX_WITH_LINEAR_HOST_PHYS_MEM
459 rc = SUPR0HCPhysToVirt(pPage->HCPhysGCPhys, &pv);
460# else
461 rc = GMMR0PageIdToVirt(pGVM, pPage->idPage, &pv);
462# endif
463 AssertMsgRCReturn(rc, ("idPage=%#x HCPhys=%RHp rc=%Rrc\n", pPage->idPage, pPage->HCPhysGCPhys, rc), rc);
464 AssertReturn(ASMMemIsZero(pv, GUEST_PAGE_SIZE), VERR_PGM_HANDY_PAGE_IPE);
465 }
466#endif
467 Log3(("PGMR0PhysAllocateHandyPages: idPage=%#x HCPhys=%RGp\n", pPage->idPage, pPage->HCPhysGCPhys));
468 }
469 }
470 else
471 {
472 /*
473 * We should never get here unless there is a genuine shortage of
474 * memory (or some internal error). Flag the error so the VM can be
475 * suspended ASAP and the user informed. If we're totally out of
476 * handy pages we will return failure.
477 */
478 /* Report the failure. */
479 LogRel(("PGM: Failed to procure handy pages; rc=%Rrc cHandyPages=%#x\n"
480 " cAllPages=%#x cPrivatePages=%#x cSharedPages=%#x cZeroPages=%#x\n",
481 rc, pGVM->pgm.s.cHandyPages,
482 pGVM->pgm.s.cAllPages, pGVM->pgm.s.cPrivatePages, pGVM->pgm.s.cSharedPages, pGVM->pgm.s.cZeroPages));
483
484 GMMMEMSTATSREQ Stats = { { SUPVMMR0REQHDR_MAGIC, sizeof(Stats) }, 0, 0, 0, 0, 0 };
485 if (RT_SUCCESS(GMMR0QueryMemoryStatsReq(pGVM, idCpu, &Stats)))
486 LogRel(("GMM: Statistics:\n"
487 " Allocated pages: %RX64\n"
488 " Free pages: %RX64\n"
489 " Shared pages: %RX64\n"
490 " Maximum pages: %RX64\n"
491 " Ballooned pages: %RX64\n",
492 Stats.cAllocPages, Stats.cFreePages, Stats.cSharedPages, Stats.cMaxPages, Stats.cBalloonedPages));
493
494 if ( rc != VERR_NO_MEMORY
495 && rc != VERR_NO_PHYS_MEMORY
496 && rc != VERR_LOCK_FAILED)
497 for (uint32_t iPage = 0; iPage < RT_ELEMENTS(pGVM->pgm.s.aHandyPages); iPage++)
498 LogRel(("PGM: aHandyPages[#%#04x] = {.HCPhysGCPhys=%RHp, .idPage=%#08x, .idSharedPage=%#08x}\n",
499 iPage, pGVM->pgm.s.aHandyPages[iPage].HCPhysGCPhys, pGVM->pgm.s.aHandyPages[iPage].idPage,
500 pGVM->pgm.s.aHandyPages[iPage].idSharedPage));
501
502 /* Set the FFs and adjust rc. */
503 VM_FF_SET(pGVM, VM_FF_PGM_NEED_HANDY_PAGES);
504 VM_FF_SET(pGVM, VM_FF_PGM_NO_MEMORY);
505 if (!fRing3)
506 if ( rc == VERR_NO_MEMORY
507 || rc == VERR_NO_PHYS_MEMORY
508 || rc == VERR_LOCK_FAILED
509 || rc == VERR_MAP_FAILED)
510 rc = VINF_EM_NO_MEMORY;
511 }
512
513 LogFlow(("PGMR0PhysAllocateHandyPages: cPages=%d rc=%Rrc\n", cPages, rc));
514 return rc;
515}
516
517
518/**
519 * Worker function for PGMR3PhysAllocateHandyPages / VMMR0_DO_PGM_ALLOCATE_HANDY_PAGES.
520 *
521 * @returns The following VBox status codes.
522 * @retval VINF_SUCCESS on success. FF cleared.
523 * @retval VINF_EM_NO_MEMORY if we're out of memory. The FF is set in this case.
524 *
525 * @param pGVM The global (ring-0) VM structure.
526 * @param idCpu The ID of the calling EMT.
527 *
528 * @thread EMT(idCpu)
529 *
530 * @remarks Must be called from within the PGM critical section. The caller
531 * must clear the new pages.
532 */
533VMMR0_INT_DECL(int) PGMR0PhysAllocateHandyPages(PGVM pGVM, VMCPUID idCpu)
534{
535 /*
536 * Validate inputs.
537 */
538 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
539 AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
540
541 /*
542 * Enter the PGM lock and call the worker.
543 */
544 int rc = PGM_LOCK(pGVM);
545 if (RT_SUCCESS(rc))
546 {
547 rc = pgmR0PhysAllocateHandyPages(pGVM, idCpu, true /*fRing3*/);
548 PGM_UNLOCK(pGVM);
549 }
550 return rc;
551}
552
553
554/**
555 * Flushes any changes pending in the handy page array.
556 *
557 * It is very important that this gets done when page sharing is enabled.
558 *
559 * @returns The following VBox status codes.
560 * @retval VINF_SUCCESS on success. FF cleared.
561 *
562 * @param pGVM The global (ring-0) VM structure.
563 * @param idCpu The ID of the calling EMT.
564 *
565 * @thread EMT(idCpu)
566 *
567 * @remarks Must be called from within the PGM critical section.
568 */
569VMMR0_INT_DECL(int) PGMR0PhysFlushHandyPages(PGVM pGVM, VMCPUID idCpu)
570{
571 /*
572 * Validate inputs.
573 */
574 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
575 AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
576 PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
577
578 /*
579 * Try allocate a full set of handy pages.
580 */
581 uint32_t iFirst = pGVM->pgm.s.cHandyPages;
582 AssertReturn(iFirst <= RT_ELEMENTS(pGVM->pgm.s.aHandyPages), VERR_PGM_HANDY_PAGE_IPE);
583 uint32_t cPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages) - iFirst;
584 if (!cPages)
585 return VINF_SUCCESS;
586 int rc = GMMR0AllocateHandyPages(pGVM, idCpu, cPages, 0, &pGVM->pgm.s.aHandyPages[iFirst]);
587
588 LogFlow(("PGMR0PhysFlushHandyPages: cPages=%d rc=%Rrc\n", cPages, rc));
589 return rc;
590}
591
592
593/**
594 * Allocate a large page at @a GCPhys.
595 *
596 * @returns The following VBox status codes.
597 * @retval VINF_SUCCESS on success.
598 * @retval VINF_EM_NO_MEMORY if we're out of memory.
599 *
600 * @param pGVM The global (ring-0) VM structure.
601 * @param idCpu The ID of the calling EMT.
602 * @param GCPhys The guest physical address of the page.
603 *
604 * @thread EMT(idCpu)
605 *
606 * @remarks Must be called from within the PGM critical section. The caller
607 * must clear the new pages.
608 */
609int pgmR0PhysAllocateLargePage(PGVM pGVM, VMCPUID idCpu, RTGCPHYS GCPhys)
610{
611 STAM_PROFILE_START(&pGVM->pgm.s.Stats.StatLargePageAlloc2, a);
612 PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
613
614 /*
615 * Allocate a large page.
616 */
617 RTHCPHYS HCPhys = NIL_GMMPAGEDESC_PHYS;
618 uint32_t idPage = NIL_GMM_PAGEID;
619
620 if (true) /** @todo pre-allocate 2-3 pages on the allocation thread. */
621 {
622 uint64_t const nsAllocStart = RTTimeNanoTS();
623 if (nsAllocStart < pGVM->pgm.s.nsLargePageRetry)
624 {
625 LogFlowFunc(("returns VERR_TRY_AGAIN - %RU64 ns left of hold off period\n", pGVM->pgm.s.nsLargePageRetry - nsAllocStart));
626 return VERR_TRY_AGAIN;
627 }
628
629 int const rc = GMMR0AllocateLargePage(pGVM, idCpu, _2M, &idPage, &HCPhys);
630
631 uint64_t const nsAllocEnd = RTTimeNanoTS();
632 uint64_t const cNsElapsed = nsAllocEnd - nsAllocStart;
633 STAM_REL_PROFILE_ADD_PERIOD(&pGVM->pgm.s.StatLargePageAlloc, cNsElapsed);
634 if (cNsElapsed < RT_NS_100MS)
635 pGVM->pgm.s.cLargePageLongAllocRepeats = 0;
636 else
637 {
638 /* If a large page allocation takes more than 100ms back off for a
639 while so the host OS can reshuffle memory and make some more large
640 pages available. However if it took over a second, just disable it. */
641 STAM_REL_COUNTER_INC(&pGVM->pgm.s.StatLargePageOverflow);
642 pGVM->pgm.s.cLargePageLongAllocRepeats++;
643 if (cNsElapsed > RT_NS_1SEC)
644 {
645 LogRel(("PGMR0PhysAllocateLargePage: Disabling large pages after %'RU64 ns allocation time.\n", cNsElapsed));
646 PGMSetLargePageUsage(pGVM, false);
647 }
648 else
649 {
650 Log(("PGMR0PhysAllocateLargePage: Suspending large page allocations for %u sec after %'RU64 ns allocation time.\n",
651 30 * pGVM->pgm.s.cLargePageLongAllocRepeats, cNsElapsed));
652 pGVM->pgm.s.nsLargePageRetry = nsAllocEnd + RT_NS_30SEC * pGVM->pgm.s.cLargePageLongAllocRepeats;
653 }
654 }
655
656 if (RT_FAILURE(rc))
657 {
658 Log(("PGMR0PhysAllocateLargePage: Failed: %Rrc\n", rc));
659 STAM_REL_COUNTER_INC(&pGVM->pgm.s.StatLargePageAllocFailed);
660 if (rc == VERR_NOT_SUPPORTED)
661 {
662 LogRel(("PGM: Disabling large pages because of VERR_NOT_SUPPORTED status.\n"));
663 PGMSetLargePageUsage(pGVM, false);
664 }
665 return rc;
666 }
667 }
668
669 STAM_PROFILE_STOP_START(&pGVM->pgm.s.Stats.StatLargePageAlloc2, &pGVM->pgm.s.Stats.StatLargePageSetup, a);
670
671 /*
672 * Enter the pages into PGM.
673 */
674 bool fFlushTLBs = false;
675 VBOXSTRICTRC rc = VINF_SUCCESS;
676 unsigned cLeft = _2M / GUEST_PAGE_SIZE;
677 while (cLeft-- > 0)
678 {
679 PPGMPAGE const pPage = pgmPhysGetPage(pGVM, GCPhys);
680 AssertReturn(pPage && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM && PGM_PAGE_IS_ZERO(pPage), VERR_PGM_UNEXPECTED_PAGE_STATE);
681
682 /* Make sure there are no zero mappings. */
683 uint16_t const u16Tracking = PGM_PAGE_GET_TRACKING(pPage);
684 if (u16Tracking == 0)
685 Assert(PGM_PAGE_GET_PTE_INDEX(pPage) == 0);
686 else
687 {
688 STAM_REL_COUNTER_INC(&pGVM->pgm.s.StatLargePageZeroEvict);
689 VBOXSTRICTRC rc3 = pgmPoolTrackUpdateGCPhys(pGVM, GCPhys, pPage, true /*fFlushPTEs*/, &fFlushTLBs);
690 Log(("PGMR0PhysAllocateLargePage: GCPhys=%RGp: tracking=%#x rc3=%Rrc\n", GCPhys, u16Tracking, VBOXSTRICTRC_VAL(rc3)));
691 if (rc3 != VINF_SUCCESS && rc == VINF_SUCCESS)
692 rc = rc3; /** @todo not perfect... */
693 PGM_PAGE_SET_PTE_INDEX(pGVM, pPage, 0);
694 PGM_PAGE_SET_TRACKING(pGVM, pPage, 0);
695 }
696
697 /* Setup the new page. */
698 PGM_PAGE_SET_HCPHYS(pGVM, pPage, HCPhys);
699 PGM_PAGE_SET_STATE(pGVM, pPage, PGM_PAGE_STATE_ALLOCATED);
700 PGM_PAGE_SET_PDE_TYPE(pGVM, pPage, PGM_PAGE_PDE_TYPE_PDE);
701 PGM_PAGE_SET_PAGEID(pGVM, pPage, idPage);
702 Log3(("PGMR0PhysAllocateLargePage: GCPhys=%RGp: idPage=%#x HCPhys=%RGp (old tracking=%#x)\n",
703 GCPhys, idPage, HCPhys, u16Tracking));
704
705 /* advance */
706 idPage++;
707 HCPhys += GUEST_PAGE_SIZE;
708 GCPhys += GUEST_PAGE_SIZE;
709 }
710
711 STAM_COUNTER_ADD(&pGVM->pgm.s.Stats.StatRZPageReplaceZero, _2M / GUEST_PAGE_SIZE);
712 pGVM->pgm.s.cZeroPages -= _2M / GUEST_PAGE_SIZE;
713 pGVM->pgm.s.cPrivatePages += _2M / GUEST_PAGE_SIZE;
714
715 /*
716 * Flush all TLBs.
717 */
718 if (!fFlushTLBs)
719 { /* likely as we shouldn't normally map zero pages */ }
720 else
721 {
722 STAM_REL_COUNTER_INC(&pGVM->pgm.s.StatLargePageTlbFlush);
723 PGM_INVL_ALL_VCPU_TLBS(pGVM);
724 }
725 /** @todo this is a little expensive (~3000 ticks) since we'll have to
726 * invalidate everything. Add a version to the TLB? */
727 pgmPhysInvalidatePageMapTLB(pGVM, false /*fInRendezvous*/);
728 IEMTlbInvalidateAllPhysicalAllCpus(pGVM, idCpu, IEMTLBPHYSFLUSHREASON_ALLOCATED_LARGE);
729
730 STAM_PROFILE_STOP(&pGVM->pgm.s.Stats.StatLargePageSetup, a);
731#if 0 /** @todo returning info statuses here might not be a great idea... */
732 LogFlow(("PGMR0PhysAllocateLargePage: returns %Rrc\n", VBOXSTRICTRC_VAL(rc) ));
733 return VBOXSTRICTRC_TODO(rc);
734#else
735 LogFlow(("PGMR0PhysAllocateLargePage: returns VINF_SUCCESS (rc=%Rrc)\n", VBOXSTRICTRC_VAL(rc) ));
736 return VINF_SUCCESS;
737#endif
738}
739
740
741/**
742 * Allocate a large page at @a GCPhys.
743 *
744 * @returns The following VBox status codes.
745 * @retval VINF_SUCCESS on success.
746 * @retval VINF_EM_NO_MEMORY if we're out of memory.
747 *
748 * @param pGVM The global (ring-0) VM structure.
749 * @param idCpu The ID of the calling EMT.
750 * @param GCPhys The guest physical address of the page.
751 *
752 * @thread EMT(idCpu)
753 *
754 * @remarks Must be called from within the PGM critical section. The caller
755 * must clear the new pages.
756 */
757VMMR0_INT_DECL(int) PGMR0PhysAllocateLargePage(PGVM pGVM, VMCPUID idCpu, RTGCPHYS GCPhys)
758{
759 /*
760 * Validate inputs.
761 */
762 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
763 AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
764
765 int rc = PGM_LOCK(pGVM);
766 AssertRCReturn(rc, rc);
767
768 /* The caller might have done this already, but since we're ring-3 callable we
769 need to make sure everything is fine before starting the allocation here. */
770 for (unsigned i = 0; i < _2M / GUEST_PAGE_SIZE; i++)
771 {
772 PPGMPAGE pPage;
773 rc = pgmPhysGetPageEx(pGVM, GCPhys + i * GUEST_PAGE_SIZE, &pPage);
774 AssertRCReturnStmt(rc, PGM_UNLOCK(pGVM), rc);
775 AssertReturnStmt(PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM, PGM_UNLOCK(pGVM), VERR_PGM_PHYS_NOT_RAM);
776 AssertReturnStmt(PGM_PAGE_IS_ZERO(pPage), PGM_UNLOCK(pGVM), VERR_PGM_UNEXPECTED_PAGE_STATE);
777 }
778
779 /*
780 * Call common code.
781 */
782 rc = pgmR0PhysAllocateLargePage(pGVM, idCpu, GCPhys);
783
784 PGM_UNLOCK(pGVM);
785 return rc;
786}
787
788
789/**
790 * Locate a MMIO2 range.
791 *
792 * @returns Pointer to the MMIO2 range.
793 * @param pGVM The global (ring-0) VM structure.
794 * @param pDevIns The device instance owning the region.
795 * @param hMmio2 Handle to look up.
796 */
797DECLINLINE(int32_t) pgmR0PhysMmio2ValidateHandle(PGVM pGVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2)
798{
799 /*
800 * We use the lookup table here as list walking is tedious in ring-0 when using
801 * ring-3 pointers and this probably will require some kind of refactoring anyway.
802 */
803 AssertReturn(hMmio2 <= RT_ELEMENTS(pGVM->pgm.s.aMmio2Ranges) && hMmio2 != 0, VERR_INVALID_HANDLE);
804 uint32_t const idx = hMmio2 - 1U;
805 AssertReturn(pGVM->pgm.s.aMmio2Ranges[idx].pDevInsR3 == pDevIns->pDevInsForR3, VERR_NOT_OWNER);
806 AssertReturn(pGVM->pgm.s.aMmio2Ranges[idx].idMmio2 == hMmio2, VERR_INVALID_HANDLE);
807 AssertReturn(pGVM->pgmr0.s.ahMmio2MapObjs[idx] != NIL_RTR0MEMOBJ, VERR_INVALID_HANDLE);
808 AssertReturn(pGVM->pgmr0.s.acMmio2RangePages[idx] != 0, VERR_INVALID_HANDLE);
809 return idx;
810}
811
812
813/**
814 * Worker for PDMDEVHLPR0::pfnMmio2SetUpContext.
815 *
816 * @returns VBox status code.
817 * @param pGVM The global (ring-0) VM structure.
818 * @param pDevIns The device instance.
819 * @param hMmio2 The MMIO2 region to map into ring-0 address space.
820 * @param offSub The offset into the region.
821 * @param cbSub The size of the mapping, zero meaning all the rest.
822 * @param ppvMapping Where to return the ring-0 mapping address.
823 */
824VMMR0_INT_DECL(int) PGMR0PhysMMIO2MapKernel(PGVM pGVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2,
825 size_t offSub, size_t cbSub, void **ppvMapping)
826{
827 *ppvMapping = NULL;
828 AssertReturn(!(offSub & HOST_PAGE_OFFSET_MASK), VERR_UNSUPPORTED_ALIGNMENT);
829 AssertReturn(!(cbSub & HOST_PAGE_OFFSET_MASK), VERR_UNSUPPORTED_ALIGNMENT);
830
831 /*
832 * Validate and translate hMmio2 into an MMIO2 index.
833 */
834 uint32_t const idxFirst = pgmR0PhysMmio2ValidateHandle(pGVM, pDevIns, hMmio2);
835 AssertReturn((int32_t)idxFirst >= 0, (int32_t)idxFirst);
836
837#ifndef VBOX_WITH_LINEAR_HOST_PHYS_MEM
838 uint8_t * const pbR0 = pGVM->pgmr0.s.apbMmio2Backing[idxFirst];
839#else
840 RTR0MEMOBJ const hMemObj = pGVM->pgmr0.s.ahMmio2MemObjs[idxFirst];
841#endif
842 RTGCPHYS const cbReal = (RTGCPHYS)pGVM->pgmr0.s.acMmio2RangePages[idxFirst] << GUEST_PAGE_SHIFT;
843 ASMCompilerBarrier();
844
845 AssertReturn(offSub < cbReal, VERR_OUT_OF_RANGE);
846 if (cbSub == 0)
847 cbSub = cbReal - offSub;
848 else
849 AssertReturn(cbSub < cbReal && cbSub + offSub <= cbReal, VERR_OUT_OF_RANGE);
850
851#ifndef VBOX_WITH_LINEAR_HOST_PHYS_MEM
852 /*
853 * Just return the address of the existing ring-0 mapping.
854 */
855 AssertPtrReturn(pbR0, VERR_INTERNAL_ERROR_4);
856 *ppvMapping = &pbR0[offSub];
857 return VINF_SUCCESS;
858#else
859 /*
860 * Call IPRT to do the mapping. Cleanup is done indirectly by telling
861 * RTR0MemObjFree to include mappings. It can only be done once, so no
862 * risk of excessive mapping leaks.
863 */
864 RTR0MEMOBJ hMapObj;
865 int rc = RTR0MemObjMapKernelEx(&hMapObj, hMemObj, (void *)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, offSub, cbSub);
866 if (RT_SUCCESS(rc))
867 *ppvMapping = RTR0MemObjAddress(hMapObj);
868 return rc;
869#endif
870}
871
872
873/**
874 * This is called during PGMR3Init to init the physical access handler allocator
875 * and tree.
876 *
877 * @returns VBox status code.
878 * @param pGVM Pointer to the global VM structure.
879 * @param cEntries Desired number of physical access handlers to reserve
880 * space for (will be adjusted).
881 * @thread EMT(0)
882 */
883VMMR0_INT_DECL(int) PGMR0PhysHandlerInitReqHandler(PGVM pGVM, uint32_t cEntries)
884{
885 /*
886 * Validate the input and state.
887 */
888 int rc = GVMMR0ValidateGVMandEMT(pGVM, 0);
889 AssertRCReturn(rc, rc);
890 VM_ASSERT_STATE_RETURN(pGVM, VMSTATE_CREATING, VERR_VM_INVALID_VM_STATE); /** @todo ring-0 safe state check. */
891
892 AssertReturn(pGVM->pgmr0.s.PhysHandlerAllocator.m_paNodes == NULL, VERR_WRONG_ORDER);
893 AssertReturn(pGVM->pgm.s.PhysHandlerAllocator.m_paNodes == NULL, VERR_WRONG_ORDER);
894
895 AssertLogRelMsgReturn(cEntries <= _64K, ("%#x\n", cEntries), VERR_OUT_OF_RANGE);
896
897 /*
898 * Calculate the table size and allocate it.
899 */
900 uint32_t cbTreeAndBitmap = 0;
901 uint32_t const cbTotalAligned = pgmHandlerPhysicalCalcTableSizes(&cEntries, &cbTreeAndBitmap);
902 RTR0MEMOBJ hMemObj = NIL_RTR0MEMOBJ;
903 rc = RTR0MemObjAllocPage(&hMemObj, cbTotalAligned, false);
904 if (RT_SUCCESS(rc))
905 {
906 RTR0MEMOBJ hMapObj = NIL_RTR0MEMOBJ;
907 rc = RTR0MemObjMapUser(&hMapObj, hMemObj, (RTR3PTR)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, RTR0ProcHandleSelf());
908 if (RT_SUCCESS(rc))
909 {
910 uint8_t *pb = (uint8_t *)RTR0MemObjAddress(hMemObj);
911 if (!RTR0MemObjWasZeroInitialized(hMemObj))
912 RT_BZERO(pb, cbTotalAligned);
913
914 pGVM->pgmr0.s.PhysHandlerAllocator.initSlabAllocator(cEntries, (PPGMPHYSHANDLER)&pb[cbTreeAndBitmap],
915 (uint64_t *)&pb[sizeof(PGMPHYSHANDLERTREE)]);
916 pGVM->pgmr0.s.pPhysHandlerTree = (PPGMPHYSHANDLERTREE)pb;
917 pGVM->pgmr0.s.pPhysHandlerTree->initWithAllocator(&pGVM->pgmr0.s.PhysHandlerAllocator);
918 pGVM->pgmr0.s.hPhysHandlerMemObj = hMemObj;
919 pGVM->pgmr0.s.hPhysHandlerMapObj = hMapObj;
920
921 AssertCompile(sizeof(pGVM->pgm.s.PhysHandlerAllocator) == sizeof(pGVM->pgmr0.s.PhysHandlerAllocator));
922 RTR3PTR R3Ptr = RTR0MemObjAddressR3(hMapObj);
923 pGVM->pgm.s.pPhysHandlerTree = R3Ptr;
924 pGVM->pgm.s.PhysHandlerAllocator.m_paNodes = R3Ptr + cbTreeAndBitmap;
925 pGVM->pgm.s.PhysHandlerAllocator.m_pbmAlloc = R3Ptr + sizeof(PGMPHYSHANDLERTREE);
926 pGVM->pgm.s.PhysHandlerAllocator.m_cNodes = cEntries;
927 pGVM->pgm.s.PhysHandlerAllocator.m_cErrors = 0;
928 pGVM->pgm.s.PhysHandlerAllocator.m_idxAllocHint = 0;
929 pGVM->pgm.s.PhysHandlerAllocator.m_uPadding = 0;
930 return VINF_SUCCESS;
931 }
932
933 RTR0MemObjFree(hMemObj, true /*fFreeMappings*/);
934 }
935 return rc;
936}
937
938
939/**
940 * Updates a physical access handler type with ring-0 callback functions.
941 *
942 * The handler type must first have been registered in ring-3.
943 *
944 * @returns VBox status code.
945 * @param pGVM The global (ring-0) VM structure.
946 * @param enmKind The kind of access handler.
947 * @param fFlags PGMPHYSHANDLER_F_XXX
948 * @param pfnHandler Pointer to the ring-0 handler callback.
949 * @param pfnPfHandler Pointer to the ring-0 \#PF handler callback.
950 * callback. Can be NULL (not recommended though).
951 * @param pszDesc The type description.
952 * @param hType The handle to do ring-0 callback registrations for.
953 * @thread EMT(0)
954 */
955VMMR0_INT_DECL(int) PGMR0HandlerPhysicalTypeSetUpContext(PGVM pGVM, PGMPHYSHANDLERKIND enmKind, uint32_t fFlags,
956 PFNPGMPHYSHANDLER pfnHandler, PFNPGMRZPHYSPFHANDLER pfnPfHandler,
957 const char *pszDesc, PGMPHYSHANDLERTYPE hType)
958{
959 /*
960 * Validate input.
961 */
962 AssertPtrReturn(pfnHandler, VERR_INVALID_POINTER);
963 AssertPtrNullReturn(pfnPfHandler, VERR_INVALID_POINTER);
964
965 AssertPtrReturn(pszDesc, VERR_INVALID_POINTER);
966 AssertReturn( enmKind == PGMPHYSHANDLERKIND_WRITE
967 || enmKind == PGMPHYSHANDLERKIND_ALL
968 || enmKind == PGMPHYSHANDLERKIND_MMIO,
969 VERR_INVALID_PARAMETER);
970 AssertMsgReturn(!(fFlags & ~PGMPHYSHANDLER_F_VALID_MASK), ("%#x\n", fFlags), VERR_INVALID_FLAGS);
971
972 PPGMPHYSHANDLERTYPEINTR0 const pTypeR0 = &pGVM->pgmr0.s.aPhysHandlerTypes[hType & PGMPHYSHANDLERTYPE_IDX_MASK];
973 AssertMsgReturn(hType == pTypeR0->hType, ("%#RX64, expected=%#RX64\n", hType, pTypeR0->hType), VERR_INVALID_HANDLE);
974 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.aPhysHandlerTypes) == RT_ELEMENTS(pGVM->pgm.s.aPhysHandlerTypes));
975 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.aPhysHandlerTypes) == PGMPHYSHANDLERTYPE_IDX_MASK + 1);
976 AssertReturn(pTypeR0->enmKind == PGMPHYSHANDLERKIND_INVALID, VERR_ALREADY_INITIALIZED);
977
978 int rc = GVMMR0ValidateGVMandEMT(pGVM, 0);
979 AssertRCReturn(rc, rc);
980 VM_ASSERT_STATE_RETURN(pGVM, VMSTATE_CREATING, VERR_VM_INVALID_VM_STATE); /** @todo ring-0 safe state check. */
981
982 PPGMPHYSHANDLERTYPEINTR3 const pTypeR3 = &pGVM->pgm.s.aPhysHandlerTypes[hType & PGMPHYSHANDLERTYPE_IDX_MASK];
983 AssertMsgReturn(pTypeR3->enmKind == enmKind,
984 ("%#x: %d, expected %d\n", hType, pTypeR3->enmKind, enmKind),
985 VERR_INVALID_HANDLE);
986 AssertMsgReturn(pTypeR3->fKeepPgmLock == RT_BOOL(fFlags & PGMPHYSHANDLER_F_KEEP_PGM_LOCK),
987 ("%#x: %d, fFlags=%#x\n", hType, pTypeR3->fKeepPgmLock, fFlags),
988 VERR_INVALID_HANDLE);
989 AssertMsgReturn(pTypeR3->fRing0DevInsIdx == RT_BOOL(fFlags & PGMPHYSHANDLER_F_R0_DEVINS_IDX),
990 ("%#x: %d, fFlags=%#x\n", hType, pTypeR3->fRing0DevInsIdx, fFlags),
991 VERR_INVALID_HANDLE);
992 AssertMsgReturn(pTypeR3->fNotInHm == RT_BOOL(fFlags & PGMPHYSHANDLER_F_NOT_IN_HM),
993 ("%#x: %d, fFlags=%#x\n", hType, pTypeR3->fNotInHm, fFlags),
994 VERR_INVALID_HANDLE);
995
996 /*
997 * Update the entry.
998 */
999 pTypeR0->enmKind = enmKind;
1000 pTypeR0->uState = enmKind == PGMPHYSHANDLERKIND_WRITE
1001 ? PGM_PAGE_HNDL_PHYS_STATE_WRITE : PGM_PAGE_HNDL_PHYS_STATE_ALL;
1002 pTypeR0->fKeepPgmLock = RT_BOOL(fFlags & PGMPHYSHANDLER_F_KEEP_PGM_LOCK);
1003 pTypeR0->fRing0DevInsIdx = RT_BOOL(fFlags & PGMPHYSHANDLER_F_R0_DEVINS_IDX);
1004 pTypeR0->fNotInHm = RT_BOOL(fFlags & PGMPHYSHANDLER_F_NOT_IN_HM);
1005 pTypeR0->pfnHandler = pfnHandler;
1006 pTypeR0->pfnPfHandler = pfnPfHandler;
1007 pTypeR0->pszDesc = pszDesc;
1008
1009 pTypeR3->fRing0Enabled = true;
1010
1011 LogFlow(("PGMR0HandlerPhysicalTypeRegister: hType=%#x: enmKind=%d fFlags=%#x pfnHandler=%p pfnPfHandler=%p pszDesc=%s\n",
1012 hType, enmKind, fFlags, pfnHandler, pfnPfHandler, pszDesc));
1013 return VINF_SUCCESS;
1014}
1015
1016
1017#ifdef VBOX_WITH_PCI_PASSTHROUGH
1018/* Interface sketch. The interface belongs to a global PCI pass-through
1019 manager. It shall use the global VM handle, not the user VM handle to
1020 store the per-VM info (domain) since that is all ring-0 stuff, thus
1021 passing pGVM here. I've tentitively prefixed the functions 'GPciRawR0',
1022 we can discuss the PciRaw code re-organtization when I'm back from
1023 vacation.
1024
1025 I've implemented the initial IOMMU set up below. For things to work
1026 reliably, we will probably need add a whole bunch of checks and
1027 GPciRawR0GuestPageUpdate call to the PGM code. For the present,
1028 assuming nested paging (enforced) and prealloc (enforced), no
1029 ballooning (check missing), page sharing (check missing) or live
1030 migration (check missing), it might work fine. At least if some
1031 VM power-off hook is present and can tear down the IOMMU page tables. */
1032
1033/**
1034 * Tells the global PCI pass-through manager that we are about to set up the
1035 * guest page to host page mappings for the specfied VM.
1036 *
1037 * @returns VBox status code.
1038 *
1039 * @param pGVM The ring-0 VM structure.
1040 */
1041VMMR0_INT_DECL(int) GPciRawR0GuestPageBeginAssignments(PGVM pGVM)
1042{
1043 NOREF(pGVM);
1044 return VINF_SUCCESS;
1045}
1046
1047
1048/**
1049 * Assigns a host page mapping for a guest page.
1050 *
1051 * This is only used when setting up the mappings, i.e. between
1052 * GPciRawR0GuestPageBeginAssignments and GPciRawR0GuestPageEndAssignments.
1053 *
1054 * @returns VBox status code.
1055 * @param pGVM The ring-0 VM structure.
1056 * @param GCPhys The address of the guest page (page aligned).
1057 * @param HCPhys The address of the host page (page aligned).
1058 */
1059VMMR0_INT_DECL(int) GPciRawR0GuestPageAssign(PGVM pGVM, RTGCPHYS GCPhys, RTHCPHYS HCPhys)
1060{
1061 AssertReturn(!(GCPhys & HOST_PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
1062 AssertReturn(!(HCPhys & HOST_PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
1063
1064 if (pGVM->rawpci.s.pfnContigMemInfo)
1065 /** @todo what do we do on failure? */
1066 pGVM->rawpci.s.pfnContigMemInfo(&pGVM->rawpci.s, HCPhys, GCPhys, HOST_PAGE_SIZE, PCIRAW_MEMINFO_MAP);
1067
1068 return VINF_SUCCESS;
1069}
1070
1071
1072/**
1073 * Indicates that the specified guest page doesn't exists but doesn't have host
1074 * page mapping we trust PCI pass-through with.
1075 *
1076 * This is only used when setting up the mappings, i.e. between
1077 * GPciRawR0GuestPageBeginAssignments and GPciRawR0GuestPageEndAssignments.
1078 *
1079 * @returns VBox status code.
1080 * @param pGVM The ring-0 VM structure.
1081 * @param GCPhys The address of the guest page (page aligned).
1082 * @param HCPhys The address of the host page (page aligned).
1083 */
1084VMMR0_INT_DECL(int) GPciRawR0GuestPageUnassign(PGVM pGVM, RTGCPHYS GCPhys)
1085{
1086 AssertReturn(!(GCPhys & HOST_PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
1087
1088 if (pGVM->rawpci.s.pfnContigMemInfo)
1089 /** @todo what do we do on failure? */
1090 pGVM->rawpci.s.pfnContigMemInfo(&pGVM->rawpci.s, 0, GCPhys, HOST_PAGE_SIZE, PCIRAW_MEMINFO_UNMAP);
1091
1092 return VINF_SUCCESS;
1093}
1094
1095
1096/**
1097 * Tells the global PCI pass-through manager that we have completed setting up
1098 * the guest page to host page mappings for the specfied VM.
1099 *
1100 * This complements GPciRawR0GuestPageBeginAssignments and will be called even
1101 * if some page assignment failed.
1102 *
1103 * @returns VBox status code.
1104 *
1105 * @param pGVM The ring-0 VM structure.
1106 */
1107VMMR0_INT_DECL(int) GPciRawR0GuestPageEndAssignments(PGVM pGVM)
1108{
1109 NOREF(pGVM);
1110 return VINF_SUCCESS;
1111}
1112
1113
1114/**
1115 * Tells the global PCI pass-through manager that a guest page mapping has
1116 * changed after the initial setup.
1117 *
1118 * @returns VBox status code.
1119 * @param pGVM The ring-0 VM structure.
1120 * @param GCPhys The address of the guest page (page aligned).
1121 * @param HCPhys The new host page address or NIL_RTHCPHYS if
1122 * now unassigned.
1123 */
1124VMMR0_INT_DECL(int) GPciRawR0GuestPageUpdate(PGVM pGVM, RTGCPHYS GCPhys, RTHCPHYS HCPhys)
1125{
1126 AssertReturn(!(GCPhys & HOST_PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_4);
1127 AssertReturn(!(HCPhys & HOST_PAGE_OFFSET_MASK) || HCPhys == NIL_RTHCPHYS, VERR_INTERNAL_ERROR_4);
1128 NOREF(pGVM);
1129 return VINF_SUCCESS;
1130}
1131
1132#endif /* VBOX_WITH_PCI_PASSTHROUGH */
1133
1134
1135/**
1136 * Sets up the IOMMU when raw PCI device is enabled.
1137 *
1138 * @note This is a hack that will probably be remodelled and refined later!
1139 *
1140 * @returns VBox status code.
1141 *
1142 * @param pGVM The global (ring-0) VM structure.
1143 */
1144VMMR0_INT_DECL(int) PGMR0PhysSetupIoMmu(PGVM pGVM)
1145{
1146 int rc = GVMMR0ValidateGVM(pGVM);
1147 if (RT_FAILURE(rc))
1148 return rc;
1149
1150#ifdef VBOX_WITH_PCI_PASSTHROUGH
1151# error fixme
1152 if (pGVM->pgm.s.fPciPassthrough)
1153 {
1154 /*
1155 * The Simplistic Approach - Enumerate all the pages and call tell the
1156 * IOMMU about each of them.
1157 */
1158 PGM_LOCK_VOID(pGVM);
1159 rc = GPciRawR0GuestPageBeginAssignments(pGVM);
1160 if (RT_SUCCESS(rc))
1161 {
1162 for (PPGMRAMRANGE pRam = pGVM->pgm.s.pRamRangesXR0; RT_SUCCESS(rc) && pRam; pRam = pRam->pNextR0)
1163 {
1164 PPGMPAGE pPage = &pRam->aPages[0];
1165 RTGCPHYS GCPhys = pRam->GCPhys;
1166 uint32_t cLeft = pRam->cb >> GUEST_PAGE_SHIFT;
1167 while (cLeft-- > 0)
1168 {
1169 /* Only expose pages that are 100% safe for now. */
1170 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1171 && PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED
1172 && !PGM_PAGE_HAS_ANY_HANDLERS(pPage))
1173 rc = GPciRawR0GuestPageAssign(pGVM, GCPhys, PGM_PAGE_GET_HCPHYS(pPage));
1174 else
1175 rc = GPciRawR0GuestPageUnassign(pGVM, GCPhys);
1176
1177 /* next */
1178 pPage++;
1179 GCPhys += HOST_PAGE_SIZE;
1180 }
1181 }
1182
1183 int rc2 = GPciRawR0GuestPageEndAssignments(pGVM);
1184 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
1185 rc = rc2;
1186 }
1187 PGM_UNLOCK(pGVM);
1188 }
1189 else
1190#endif
1191 rc = VERR_NOT_SUPPORTED;
1192 return rc;
1193}
1194
1195
1196/**
1197 * \#PF Handler for nested paging.
1198 *
1199 * @returns VBox status code (appropriate for trap handling and GC return).
1200 * @param pGVM The global (ring-0) VM structure.
1201 * @param pGVCpu The global (ring-0) CPU structure of the calling
1202 * EMT.
1203 * @param enmShwPagingMode Paging mode for the nested page tables.
1204 * @param uErr The trap error code.
1205 * @param pCtx Pointer to the register context for the CPU.
1206 * @param GCPhysFault The fault address.
1207 */
1208VMMR0DECL(int) PGMR0Trap0eHandlerNestedPaging(PGVM pGVM, PGVMCPU pGVCpu, PGMMODE enmShwPagingMode, RTGCUINT uErr,
1209 PCPUMCTX pCtx, RTGCPHYS GCPhysFault)
1210{
1211 int rc;
1212
1213 LogFlow(("PGMTrap0eHandler: uErr=%RGx GCPhysFault=%RGp eip=%RGv\n", uErr, GCPhysFault, (RTGCPTR)pCtx->rip));
1214 STAM_PROFILE_START(&pGVCpu->pgm.s.StatRZTrap0e, a);
1215 STAM_STATS({ pGVCpu->pgmr0.s.pStatTrap0eAttributionR0 = NULL; } );
1216
1217 /* AMD uses the host's paging mode; Intel has a single mode (EPT). */
1218 AssertMsg( enmShwPagingMode == PGMMODE_32_BIT || enmShwPagingMode == PGMMODE_PAE || enmShwPagingMode == PGMMODE_PAE_NX
1219 || enmShwPagingMode == PGMMODE_AMD64 || enmShwPagingMode == PGMMODE_AMD64_NX || enmShwPagingMode == PGMMODE_EPT,
1220 ("enmShwPagingMode=%d\n", enmShwPagingMode));
1221
1222 /* Reserved shouldn't end up here. */
1223 Assert(!(uErr & X86_TRAP_PF_RSVD));
1224
1225#ifdef VBOX_WITH_STATISTICS
1226 /*
1227 * Error code stats.
1228 */
1229 if (uErr & X86_TRAP_PF_US)
1230 {
1231 if (!(uErr & X86_TRAP_PF_P))
1232 {
1233 if (uErr & X86_TRAP_PF_RW)
1234 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSNotPresentWrite);
1235 else
1236 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSNotPresentRead);
1237 }
1238 else if (uErr & X86_TRAP_PF_RW)
1239 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSWrite);
1240 else if (uErr & X86_TRAP_PF_RSVD)
1241 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSReserved);
1242 else if (uErr & X86_TRAP_PF_ID)
1243 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSNXE);
1244 else
1245 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSRead);
1246 }
1247 else
1248 { /* Supervisor */
1249 if (!(uErr & X86_TRAP_PF_P))
1250 {
1251 if (uErr & X86_TRAP_PF_RW)
1252 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSVNotPresentWrite);
1253 else
1254 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSVNotPresentRead);
1255 }
1256 else if (uErr & X86_TRAP_PF_RW)
1257 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSVWrite);
1258 else if (uErr & X86_TRAP_PF_ID)
1259 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSNXE);
1260 else if (uErr & X86_TRAP_PF_RSVD)
1261 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSVReserved);
1262 }
1263#endif
1264
1265 /*
1266 * Call the worker.
1267 *
1268 * Note! We pretend the guest is in protected mode without paging, so we
1269 * can use existing code to build the nested page tables.
1270 */
1271/** @todo r=bird: Gotta love this nested paging hacking we're still carrying with us... (Split PGM_TYPE_NESTED.) */
1272 bool fLockTaken = false;
1273 switch (enmShwPagingMode)
1274 {
1275 case PGMMODE_32_BIT:
1276 rc = PGM_BTH_NAME_32BIT_PROT(Trap0eHandler)(pGVCpu, uErr, pCtx, GCPhysFault, &fLockTaken);
1277 break;
1278 case PGMMODE_PAE:
1279 case PGMMODE_PAE_NX:
1280 rc = PGM_BTH_NAME_PAE_PROT(Trap0eHandler)(pGVCpu, uErr, pCtx, GCPhysFault, &fLockTaken);
1281 break;
1282 case PGMMODE_AMD64:
1283 case PGMMODE_AMD64_NX:
1284 rc = PGM_BTH_NAME_AMD64_PROT(Trap0eHandler)(pGVCpu, uErr, pCtx, GCPhysFault, &fLockTaken);
1285 break;
1286 case PGMMODE_EPT:
1287 rc = PGM_BTH_NAME_EPT_PROT(Trap0eHandler)(pGVCpu, uErr, pCtx, GCPhysFault, &fLockTaken);
1288 break;
1289 default:
1290 AssertFailed();
1291 rc = VERR_INVALID_PARAMETER;
1292 break;
1293 }
1294 if (fLockTaken)
1295 {
1296 PGM_LOCK_ASSERT_OWNER(pGVM);
1297 PGM_UNLOCK(pGVM);
1298 }
1299
1300 if (rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
1301 rc = VINF_SUCCESS;
1302 /*
1303 * Handle the case where we cannot interpret the instruction because we cannot get the guest physical address
1304 * via its page tables, see @bugref{6043}.
1305 */
1306 else if ( rc == VERR_PAGE_NOT_PRESENT /* SMP only ; disassembly might fail. */
1307 || rc == VERR_PAGE_TABLE_NOT_PRESENT /* seen with UNI & SMP */
1308 || rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT /* seen with SMP */
1309 || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT) /* precaution */
1310 {
1311 Log(("WARNING: Unexpected VERR_PAGE_TABLE_NOT_PRESENT (%d) for page fault at %RGp error code %x (rip=%RGv)\n", rc, GCPhysFault, uErr, pCtx->rip));
1312 /* Some kind of inconsistency in the SMP case; it's safe to just execute the instruction again; not sure about
1313 single VCPU VMs though. */
1314 rc = VINF_SUCCESS;
1315 }
1316
1317 STAM_STATS({ if (!pGVCpu->pgmr0.s.pStatTrap0eAttributionR0)
1318 pGVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pGVCpu->pgm.s.Stats.StatRZTrap0eTime2Misc; });
1319 STAM_PROFILE_STOP_EX(&pGVCpu->pgm.s.Stats.StatRZTrap0e, pGVCpu->pgmr0.s.pStatTrap0eAttributionR0, a);
1320 return rc;
1321}
1322
1323
1324#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
1325/**
1326 * Nested \#PF Handler for nested-guest execution using nested paging.
1327 *
1328 * @returns Strict VBox status code (appropriate for trap handling and GC return).
1329 * @param pGVM The global (ring-0) VM structure.
1330 * @param pGVCpu The global (ring-0) CPU structure of the calling
1331 * EMT.
1332 * @param uErr The trap error code.
1333 * @param pCtx Pointer to the register context for the CPU.
1334 * @param GCPhysNestedFault The nested-guest physical address causing the fault.
1335 * @param fIsLinearAddrValid Whether translation of a nested-guest linear address
1336 * caused this fault. If @c false, GCPtrNestedFault
1337 * must be 0.
1338 * @param GCPtrNestedFault The nested-guest linear address that caused this
1339 * fault.
1340 * @param pWalk Where to store the SLAT walk result.
1341 */
1342VMMR0DECL(VBOXSTRICTRC) PGMR0NestedTrap0eHandlerNestedPaging(PGVMCPU pGVCpu, PGMMODE enmShwPagingMode, RTGCUINT uErr,
1343 PCPUMCTX pCtx, RTGCPHYS GCPhysNestedFault,
1344 bool fIsLinearAddrValid, RTGCPTR GCPtrNestedFault, PPGMPTWALK pWalk)
1345{
1346 Assert(enmShwPagingMode == PGMMODE_EPT);
1347 NOREF(enmShwPagingMode);
1348
1349 bool fLockTaken;
1350 VBOXSTRICTRC rcStrict = PGM_BTH_NAME_EPT_PROT(NestedTrap0eHandler)(pGVCpu, uErr, pCtx, GCPhysNestedFault,
1351 fIsLinearAddrValid, GCPtrNestedFault, pWalk, &fLockTaken);
1352 if (fLockTaken)
1353 {
1354 PGM_LOCK_ASSERT_OWNER(pGVCpu->CTX_SUFF(pVM));
1355 PGM_UNLOCK(pGVCpu->CTX_SUFF(pVM));
1356 }
1357 Assert(rcStrict != VINF_PGM_SYNCPAGE_MODIFIED_PDE); /* This rc isn't used with Nested Paging and nested-EPT. */
1358 return rcStrict;
1359}
1360#endif /* VBOX_WITH_NESTED_HWVIRT_VMX_EPT */
1361
1362
1363/**
1364 * \#PF Handler for deliberate nested paging misconfiguration (/reserved bit)
1365 * employed for MMIO pages.
1366 *
1367 * @returns VBox status code (appropriate for trap handling and GC return).
1368 * @param pGVM The global (ring-0) VM structure.
1369 * @param pGVCpu The global (ring-0) CPU structure of the calling
1370 * EMT.
1371 * @param enmShwPagingMode Paging mode for the nested page tables.
1372 * @param pCtx Pointer to the register context for the CPU.
1373 * @param GCPhysFault The fault address.
1374 * @param uErr The error code, UINT32_MAX if not available
1375 * (VT-x).
1376 */
1377VMMR0DECL(VBOXSTRICTRC) PGMR0Trap0eHandlerNPMisconfig(PGVM pGVM, PGVMCPU pGVCpu, PGMMODE enmShwPagingMode,
1378 PCPUMCTX pCtx, RTGCPHYS GCPhysFault, uint32_t uErr)
1379{
1380#ifdef PGM_WITH_MMIO_OPTIMIZATIONS
1381 STAM_PROFILE_START(&pGVCpu->CTX_SUFF(pStats)->StatR0NpMiscfg, a);
1382 VBOXSTRICTRC rc;
1383
1384 /*
1385 * Try lookup the all access physical handler for the address.
1386 */
1387 PGM_LOCK_VOID(pGVM);
1388 PPGMPHYSHANDLER pHandler;
1389 rc = pgmHandlerPhysicalLookup(pGVM, GCPhysFault, &pHandler);
1390 if (RT_SUCCESS(rc))
1391 {
1392 PCPGMPHYSHANDLERTYPEINT pHandlerType = PGMPHYSHANDLER_GET_TYPE_NO_NULL(pGVM, pHandler);
1393 if (RT_LIKELY( pHandlerType->enmKind != PGMPHYSHANDLERKIND_WRITE
1394 && !pHandlerType->fNotInHm /*paranoia*/ ))
1395 {
1396 /*
1397 * If the handle has aliases page or pages that have been temporarily
1398 * disabled, we'll have to take a detour to make sure we resync them
1399 * to avoid lots of unnecessary exits.
1400 */
1401 PPGMPAGE pPage;
1402 if ( ( pHandler->cAliasedPages
1403 || pHandler->cTmpOffPages)
1404 && ( (pPage = pgmPhysGetPage(pGVM, GCPhysFault)) == NULL
1405 || PGM_PAGE_GET_HNDL_PHYS_STATE(pPage) == PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
1406 )
1407 {
1408 Log(("PGMR0Trap0eHandlerNPMisconfig: Resyncing aliases / tmp-off page at %RGp (uErr=%#x) %R[pgmpage]\n", GCPhysFault, uErr, pPage));
1409 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatR0NpMiscfgSyncPage);
1410 rc = pgmShwSyncNestedPageLocked(pGVCpu, GCPhysFault, 1 /*cPages*/, enmShwPagingMode);
1411 PGM_UNLOCK(pGVM);
1412 }
1413 else
1414 {
1415 if (pHandlerType->pfnPfHandler)
1416 {
1417 uint64_t const uUser = !pHandlerType->fRing0DevInsIdx ? pHandler->uUser
1418 : (uintptr_t)PDMDeviceRing0IdxToInstance(pGVM, pHandler->uUser);
1419 STAM_PROFILE_START(&pHandler->Stat, h);
1420 PGM_UNLOCK(pGVM);
1421
1422 Log6(("PGMR0Trap0eHandlerNPMisconfig: calling %p(,%#x,,%RGp,%p)\n", pHandlerType->pfnPfHandler, uErr, GCPhysFault, uUser));
1423 rc = pHandlerType->pfnPfHandler(pGVM, pGVCpu, uErr == UINT32_MAX ? RTGCPTR_MAX : uErr, pCtx,
1424 GCPhysFault, GCPhysFault, uUser);
1425
1426 STAM_PROFILE_STOP(&pHandler->Stat, h); /* no locking needed, entry is unlikely reused before we get here. */
1427 }
1428 else
1429 {
1430 PGM_UNLOCK(pGVM);
1431 Log(("PGMR0Trap0eHandlerNPMisconfig: %RGp (uErr=%#x) -> R3\n", GCPhysFault, uErr));
1432 rc = VINF_EM_RAW_EMULATE_INSTR;
1433 }
1434 }
1435 STAM_PROFILE_STOP(&pGVCpu->pgm.s.Stats.StatR0NpMiscfg, a);
1436 return rc;
1437 }
1438 }
1439 else
1440 AssertMsgReturn(rc == VERR_NOT_FOUND, ("%Rrc GCPhysFault=%RGp\n", VBOXSTRICTRC_VAL(rc), GCPhysFault), rc);
1441
1442 /*
1443 * Must be out of sync, so do a SyncPage and restart the instruction.
1444 *
1445 * ASSUMES that ALL handlers are page aligned and covers whole pages
1446 * (assumption asserted in PGMHandlerPhysicalRegisterEx).
1447 */
1448 Log(("PGMR0Trap0eHandlerNPMisconfig: Out of sync page at %RGp (uErr=%#x)\n", GCPhysFault, uErr));
1449 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatR0NpMiscfgSyncPage);
1450 rc = pgmShwSyncNestedPageLocked(pGVCpu, GCPhysFault, 1 /*cPages*/, enmShwPagingMode);
1451 PGM_UNLOCK(pGVM);
1452
1453 STAM_PROFILE_STOP(&pGVCpu->pgm.s.Stats.StatR0NpMiscfg, a);
1454 return rc;
1455
1456#else
1457 AssertLogRelFailed();
1458 return VERR_PGM_NOT_USED_IN_MODE;
1459#endif
1460}
1461
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette