VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HMVMXR0.cpp@ 93928

最後變更 在這個檔案從93928是 93928,由 vboxsync 提交於 3 年 前

VMM: Nested VMX: bugref:10092 Doxygen.

  • 屬性 svn:eol-style 設為 native
  • 屬性 svn:keywords 設為 Author Date Id Revision
檔案大小: 291.8 KB
 
1/* $Id: HMVMXR0.cpp 93928 2022-02-24 15:29:52Z vboxsync $ */
2/** @file
3 * HM VMX (Intel VT-x) - Host Context Ring-0.
4 */
5
6/*
7 * Copyright (C) 2012-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.alldomusa.eu.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_HM
23#define VMCPU_INCL_CPUM_GST_CTX
24#include <iprt/x86.h>
25#include <iprt/asm-amd64-x86.h>
26#include <iprt/thread.h>
27#include <iprt/mem.h>
28#include <iprt/mp.h>
29
30#include <VBox/vmm/pdmapi.h>
31#include <VBox/vmm/dbgf.h>
32#include <VBox/vmm/iem.h>
33#include <VBox/vmm/iom.h>
34#include <VBox/vmm/tm.h>
35#include <VBox/vmm/em.h>
36#include <VBox/vmm/gim.h>
37#include <VBox/vmm/apic.h>
38#include "HMInternal.h"
39#include <VBox/vmm/vmcc.h>
40#include <VBox/vmm/hmvmxinline.h>
41#include "HMVMXR0.h"
42#include "VMXInternal.h"
43#include "dtrace/VBoxVMM.h"
44
45/*********************************************************************************************************************************
46* Defined Constants And Macros *
47*********************************************************************************************************************************/
48#ifdef DEBUG_ramshankar
49# define HMVMX_ALWAYS_SAVE_GUEST_RFLAGS
50# define HMVMX_ALWAYS_SAVE_RO_GUEST_STATE
51# define HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE
52# define HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
53# define HMVMX_ALWAYS_CLEAN_TRANSIENT
54# define HMVMX_ALWAYS_CHECK_GUEST_STATE
55# define HMVMX_ALWAYS_TRAP_ALL_XCPTS
56# define HMVMX_ALWAYS_TRAP_PF
57# define HMVMX_ALWAYS_FLUSH_TLB
58# define HMVMX_ALWAYS_SWAP_EFER
59#endif
60
61
62/*********************************************************************************************************************************
63* Structures and Typedefs *
64*********************************************************************************************************************************/
65/**
66 * VMX page allocation information.
67 */
68typedef struct
69{
70 uint32_t fValid; /**< Whether to allocate this page (e.g, based on a CPU feature). */
71 uint32_t uPadding0; /**< Padding to ensure array of these structs are aligned to a multiple of 8. */
72 PRTHCPHYS pHCPhys; /**< Where to store the host-physical address of the allocation. */
73 PRTR0PTR ppVirt; /**< Where to store the host-virtual address of the allocation. */
74} VMXPAGEALLOCINFO;
75/** Pointer to VMX page-allocation info. */
76typedef VMXPAGEALLOCINFO *PVMXPAGEALLOCINFO;
77/** Pointer to a const VMX page-allocation info. */
78typedef const VMXPAGEALLOCINFO *PCVMXPAGEALLOCINFO;
79AssertCompileSizeAlignment(VMXPAGEALLOCINFO, 8);
80
81
82/*********************************************************************************************************************************
83* Internal Functions *
84*********************************************************************************************************************************/
85static bool hmR0VmxShouldSwapEferMsr(PCVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient);
86static int hmR0VmxExitHostNmi(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo);
87
88
89/**
90 * Checks if the given MSR is part of the lastbranch-from-IP MSR stack.
91 * @returns @c true if it's part of LBR stack, @c false otherwise.
92 *
93 * @param pVM The cross context VM structure.
94 * @param idMsr The MSR.
95 * @param pidxMsr Where to store the index of the MSR in the LBR MSR array.
96 * Optional, can be NULL.
97 *
98 * @remarks Must only be called when LBR is enabled.
99 */
100DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchFromMsr(PCVMCC pVM, uint32_t idMsr, uint32_t *pidxMsr)
101{
102 Assert(pVM->hmr0.s.vmx.fLbr);
103 Assert(pVM->hmr0.s.vmx.idLbrFromIpMsrFirst);
104 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrFromIpMsrLast - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst + 1;
105 uint32_t const idxMsr = idMsr - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst;
106 if (idxMsr < cLbrStack)
107 {
108 if (pidxMsr)
109 *pidxMsr = idxMsr;
110 return true;
111 }
112 return false;
113}
114
115
116/**
117 * Checks if the given MSR is part of the lastbranch-to-IP MSR stack.
118 * @returns @c true if it's part of LBR stack, @c false otherwise.
119 *
120 * @param pVM The cross context VM structure.
121 * @param idMsr The MSR.
122 * @param pidxMsr Where to store the index of the MSR in the LBR MSR array.
123 * Optional, can be NULL.
124 *
125 * @remarks Must only be called when LBR is enabled and when lastbranch-to-IP MSRs
126 * are supported by the CPU (see hmR0VmxSetupLbrMsrRange).
127 */
128DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchToMsr(PCVMCC pVM, uint32_t idMsr, uint32_t *pidxMsr)
129{
130 Assert(pVM->hmr0.s.vmx.fLbr);
131 if (pVM->hmr0.s.vmx.idLbrToIpMsrFirst)
132 {
133 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrToIpMsrLast - pVM->hmr0.s.vmx.idLbrToIpMsrFirst + 1;
134 uint32_t const idxMsr = idMsr - pVM->hmr0.s.vmx.idLbrToIpMsrFirst;
135 if (idxMsr < cLbrStack)
136 {
137 if (pidxMsr)
138 *pidxMsr = idxMsr;
139 return true;
140 }
141 }
142 return false;
143}
144
145
146/**
147 * Gets the active (in use) VMCS info. object for the specified VCPU.
148 *
149 * This is either the guest or nested-guest VMCS info. and need not necessarily
150 * pertain to the "current" VMCS (in the VMX definition of the term). For instance,
151 * if the VM-entry failed due to an invalid-guest state, we may have "cleared" the
152 * current VMCS while returning to ring-3. However, the VMCS info. object for that
153 * VMCS would still be active and returned here so that we could dump the VMCS
154 * fields to ring-3 for diagnostics. This function is thus only used to
155 * distinguish between the nested-guest or guest VMCS.
156 *
157 * @returns The active VMCS information.
158 * @param pVCpu The cross context virtual CPU structure.
159 *
160 * @thread EMT.
161 * @remarks This function may be called with preemption or interrupts disabled!
162 */
163DECLINLINE(PVMXVMCSINFO) hmGetVmxActiveVmcsInfo(PVMCPUCC pVCpu)
164{
165 if (!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
166 return &pVCpu->hmr0.s.vmx.VmcsInfo;
167 return &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
168}
169
170
171/**
172 * Returns whether the VM-exit MSR-store area differs from the VM-exit MSR-load
173 * area.
174 *
175 * @returns @c true if it's different, @c false otherwise.
176 * @param pVmcsInfo The VMCS info. object.
177 */
178DECL_FORCE_INLINE(bool) hmR0VmxIsSeparateExitMsrStoreAreaVmcs(PCVMXVMCSINFO pVmcsInfo)
179{
180 return RT_BOOL( pVmcsInfo->pvGuestMsrStore != pVmcsInfo->pvGuestMsrLoad
181 && pVmcsInfo->pvGuestMsrStore);
182}
183
184
185/**
186 * Sets the given Processor-based VM-execution controls.
187 *
188 * @param pVmxTransient The VMX-transient structure.
189 * @param uProcCtls The Processor-based VM-execution controls to set.
190 */
191static void hmR0VmxSetProcCtlsVmcs(PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls)
192{
193 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
194 if ((pVmcsInfo->u32ProcCtls & uProcCtls) != uProcCtls)
195 {
196 pVmcsInfo->u32ProcCtls |= uProcCtls;
197 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
198 AssertRC(rc);
199 }
200}
201
202
203/**
204 * Removes the given Processor-based VM-execution controls.
205 *
206 * @param pVCpu The cross context virtual CPU structure.
207 * @param pVmxTransient The VMX-transient structure.
208 * @param uProcCtls The Processor-based VM-execution controls to remove.
209 *
210 * @remarks When executing a nested-guest, this will not remove any of the specified
211 * controls if the nested hypervisor has set any one of them.
212 */
213static void hmR0VmxRemoveProcCtlsVmcs(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls)
214{
215 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
216 if (pVmcsInfo->u32ProcCtls & uProcCtls)
217 {
218#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
219 if ( !pVmxTransient->fIsNestedGuest
220 || !CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, uProcCtls))
221#else
222 NOREF(pVCpu);
223 if (!pVmxTransient->fIsNestedGuest)
224#endif
225 {
226 pVmcsInfo->u32ProcCtls &= ~uProcCtls;
227 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
228 AssertRC(rc);
229 }
230 }
231}
232
233
234/**
235 * Sets the TSC offset for the current VMCS.
236 *
237 * @param uTscOffset The TSC offset to set.
238 * @param pVmcsInfo The VMCS info. object.
239 */
240static void hmR0VmxSetTscOffsetVmcs(PVMXVMCSINFO pVmcsInfo, uint64_t uTscOffset)
241{
242 if (pVmcsInfo->u64TscOffset != uTscOffset)
243 {
244 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, uTscOffset);
245 AssertRC(rc);
246 pVmcsInfo->u64TscOffset = uTscOffset;
247 }
248}
249
250
251/**
252 * Loads the VMCS specified by the VMCS info. object.
253 *
254 * @returns VBox status code.
255 * @param pVmcsInfo The VMCS info. object.
256 *
257 * @remarks Can be called with interrupts disabled.
258 */
259static int hmR0VmxLoadVmcs(PVMXVMCSINFO pVmcsInfo)
260{
261 Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS);
262 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
263
264 int rc = VMXLoadVmcs(pVmcsInfo->HCPhysVmcs);
265 if (RT_SUCCESS(rc))
266 pVmcsInfo->fVmcsState |= VMX_V_VMCS_LAUNCH_STATE_CURRENT;
267 return rc;
268}
269
270
271/**
272 * Clears the VMCS specified by the VMCS info. object.
273 *
274 * @returns VBox status code.
275 * @param pVmcsInfo The VMCS info. object.
276 *
277 * @remarks Can be called with interrupts disabled.
278 */
279static int hmR0VmxClearVmcs(PVMXVMCSINFO pVmcsInfo)
280{
281 Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS);
282 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
283
284 int rc = VMXClearVmcs(pVmcsInfo->HCPhysVmcs);
285 if (RT_SUCCESS(rc))
286 pVmcsInfo->fVmcsState = VMX_V_VMCS_LAUNCH_STATE_CLEAR;
287 return rc;
288}
289
290
291/**
292 * Checks whether the MSR belongs to the set of guest MSRs that we restore
293 * lazily while leaving VT-x.
294 *
295 * @returns true if it does, false otherwise.
296 * @param pVCpu The cross context virtual CPU structure.
297 * @param idMsr The MSR to check.
298 */
299static bool hmR0VmxIsLazyGuestMsr(PCVMCPUCC pVCpu, uint32_t idMsr)
300{
301 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
302 {
303 switch (idMsr)
304 {
305 case MSR_K8_LSTAR:
306 case MSR_K6_STAR:
307 case MSR_K8_SF_MASK:
308 case MSR_K8_KERNEL_GS_BASE:
309 return true;
310 }
311 }
312 return false;
313}
314
315
316/**
317 * Loads a set of guests MSRs to allow read/passthru to the guest.
318 *
319 * The name of this function is slightly confusing. This function does NOT
320 * postpone loading, but loads the MSR right now. "hmR0VmxLazy" is simply a
321 * common prefix for functions dealing with "lazy restoration" of the shared
322 * MSRs.
323 *
324 * @param pVCpu The cross context virtual CPU structure.
325 *
326 * @remarks No-long-jump zone!!!
327 */
328static void hmR0VmxLazyLoadGuestMsrs(PVMCPUCC pVCpu)
329{
330 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
331 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
332
333 Assert(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
334 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
335 {
336 /*
337 * If the guest MSRs are not loaded -and- if all the guest MSRs are identical
338 * to the MSRs on the CPU (which are the saved host MSRs, see assertion above) then
339 * we can skip a few MSR writes.
340 *
341 * Otherwise, it implies either 1. they're not loaded, or 2. they're loaded but the
342 * guest MSR values in the guest-CPU context might be different to what's currently
343 * loaded in the CPU. In either case, we need to write the new guest MSR values to the
344 * CPU, see @bugref{8728}.
345 */
346 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
347 if ( !(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
348 && pCtx->msrKERNELGSBASE == pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase
349 && pCtx->msrLSTAR == pVCpu->hmr0.s.vmx.u64HostMsrLStar
350 && pCtx->msrSTAR == pVCpu->hmr0.s.vmx.u64HostMsrStar
351 && pCtx->msrSFMASK == pVCpu->hmr0.s.vmx.u64HostMsrSfMask)
352 {
353#ifdef VBOX_STRICT
354 Assert(ASMRdMsr(MSR_K8_KERNEL_GS_BASE) == pCtx->msrKERNELGSBASE);
355 Assert(ASMRdMsr(MSR_K8_LSTAR) == pCtx->msrLSTAR);
356 Assert(ASMRdMsr(MSR_K6_STAR) == pCtx->msrSTAR);
357 Assert(ASMRdMsr(MSR_K8_SF_MASK) == pCtx->msrSFMASK);
358#endif
359 }
360 else
361 {
362 ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pCtx->msrKERNELGSBASE);
363 ASMWrMsr(MSR_K8_LSTAR, pCtx->msrLSTAR);
364 ASMWrMsr(MSR_K6_STAR, pCtx->msrSTAR);
365 /* The system call flag mask register isn't as benign and accepting of all
366 values as the above, so mask it to avoid #GP'ing on corrupted input. */
367 Assert(!(pCtx->msrSFMASK & ~(uint64_t)UINT32_MAX));
368 ASMWrMsr(MSR_K8_SF_MASK, pCtx->msrSFMASK & UINT32_MAX);
369 }
370 }
371 pVCpu->hmr0.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_LOADED_GUEST;
372}
373
374
375/**
376 * Checks if the specified guest MSR is part of the VM-entry MSR-load area.
377 *
378 * @returns @c true if found, @c false otherwise.
379 * @param pVmcsInfo The VMCS info. object.
380 * @param idMsr The MSR to find.
381 */
382static bool hmR0VmxIsAutoLoadGuestMsr(PCVMXVMCSINFO pVmcsInfo, uint32_t idMsr)
383{
384 PCVMXAUTOMSR pMsrs = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
385 uint32_t const cMsrs = pVmcsInfo->cEntryMsrLoad;
386 Assert(pMsrs);
387 Assert(sizeof(*pMsrs) * cMsrs <= X86_PAGE_4K_SIZE);
388 for (uint32_t i = 0; i < cMsrs; i++)
389 {
390 if (pMsrs[i].u32Msr == idMsr)
391 return true;
392 }
393 return false;
394}
395
396
397/**
398 * Performs lazy restoration of the set of host MSRs if they were previously
399 * loaded with guest MSR values.
400 *
401 * @param pVCpu The cross context virtual CPU structure.
402 *
403 * @remarks No-long-jump zone!!!
404 * @remarks The guest MSRs should have been saved back into the guest-CPU
405 * context by hmR0VmxImportGuestState()!!!
406 */
407static void hmR0VmxLazyRestoreHostMsrs(PVMCPUCC pVCpu)
408{
409 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
410 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
411
412 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
413 {
414 Assert(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
415 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
416 {
417 ASMWrMsr(MSR_K8_LSTAR, pVCpu->hmr0.s.vmx.u64HostMsrLStar);
418 ASMWrMsr(MSR_K6_STAR, pVCpu->hmr0.s.vmx.u64HostMsrStar);
419 ASMWrMsr(MSR_K8_SF_MASK, pVCpu->hmr0.s.vmx.u64HostMsrSfMask);
420 ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase);
421 }
422 }
423 pVCpu->hmr0.s.vmx.fLazyMsrs &= ~(VMX_LAZY_MSRS_LOADED_GUEST | VMX_LAZY_MSRS_SAVED_HOST);
424}
425
426
427/**
428 * Sets pfnStartVm to the best suited variant.
429 *
430 * This must be called whenever anything changes relative to the hmR0VmXStartVm
431 * variant selection:
432 * - pVCpu->hm.s.fLoadSaveGuestXcr0
433 * - HM_WSF_IBPB_ENTRY in pVCpu->hmr0.s.fWorldSwitcher
434 * - HM_WSF_IBPB_EXIT in pVCpu->hmr0.s.fWorldSwitcher
435 * - Perhaps: CPUMIsGuestFPUStateActive() (windows only)
436 * - Perhaps: CPUMCTX.fXStateMask (windows only)
437 *
438 * We currently ASSUME that neither HM_WSF_IBPB_ENTRY nor HM_WSF_IBPB_EXIT
439 * cannot be changed at runtime.
440 */
441static void hmR0VmxUpdateStartVmFunction(PVMCPUCC pVCpu)
442{
443 static const struct CLANGWORKAROUND { PFNHMVMXSTARTVM pfn; } s_aHmR0VmxStartVmFunctions[] =
444 {
445 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
446 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
447 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
448 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
449 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
450 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
451 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
452 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
453 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
454 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
455 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
456 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
457 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
458 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
459 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
460 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
461 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
462 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
463 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
464 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
465 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
466 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
467 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
468 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
469 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
470 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
471 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
472 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
473 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
474 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
475 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
476 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
477 };
478 uintptr_t const idx = (pVCpu->hmr0.s.fLoadSaveGuestXcr0 ? 1 : 0)
479 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_IBPB_ENTRY ? 2 : 0)
480 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_ENTRY ? 4 : 0)
481 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_ENTRY ? 8 : 0)
482 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_IBPB_EXIT ? 16 : 0);
483 PFNHMVMXSTARTVM const pfnStartVm = s_aHmR0VmxStartVmFunctions[idx].pfn;
484 if (pVCpu->hmr0.s.vmx.pfnStartVm != pfnStartVm)
485 pVCpu->hmr0.s.vmx.pfnStartVm = pfnStartVm;
486}
487
488
489/**
490 * Pushes a 2-byte value onto the real-mode (in virtual-8086 mode) guest's
491 * stack.
492 *
493 * @returns Strict VBox status code (i.e. informational status codes too).
494 * @retval VINF_EM_RESET if pushing a value to the stack caused a triple-fault.
495 * @param pVCpu The cross context virtual CPU structure.
496 * @param uValue The value to push to the guest stack.
497 */
498static VBOXSTRICTRC hmR0VmxRealModeGuestStackPush(PVMCPUCC pVCpu, uint16_t uValue)
499{
500 /*
501 * The stack limit is 0xffff in real-on-virtual 8086 mode. Real-mode with weird stack limits cannot be run in
502 * virtual 8086 mode in VT-x. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers".
503 * See Intel Instruction reference for PUSH and Intel spec. 22.33.1 "Segment Wraparound".
504 */
505 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
506 if (pCtx->sp == 1)
507 return VINF_EM_RESET;
508 pCtx->sp -= sizeof(uint16_t); /* May wrap around which is expected behaviour. */
509 int rc = PGMPhysSimpleWriteGCPhys(pVCpu->CTX_SUFF(pVM), pCtx->ss.u64Base + pCtx->sp, &uValue, sizeof(uint16_t));
510 AssertRC(rc);
511 return rc;
512}
513
514
515/**
516 * Wrapper around VMXWriteVmcs16 taking a pVCpu parameter so VCC doesn't complain about
517 * unreferenced local parameters in the template code...
518 */
519DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs16(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint16_t u16Val)
520{
521 RT_NOREF(pVCpu);
522 return VMXWriteVmcs16(uFieldEnc, u16Val);
523}
524
525
526/**
527 * Wrapper around VMXWriteVmcs32 taking a pVCpu parameter so VCC doesn't complain about
528 * unreferenced local parameters in the template code...
529 */
530DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs32(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint32_t u32Val)
531{
532 RT_NOREF(pVCpu);
533 return VMXWriteVmcs32(uFieldEnc, u32Val);
534}
535
536
537/**
538 * Wrapper around VMXWriteVmcs64 taking a pVCpu parameter so VCC doesn't complain about
539 * unreferenced local parameters in the template code...
540 */
541DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs64(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint64_t u64Val)
542{
543 RT_NOREF(pVCpu);
544 return VMXWriteVmcs64(uFieldEnc, u64Val);
545}
546
547
548/**
549 * Wrapper around VMXReadVmcs16 taking a pVCpu parameter so VCC doesn't complain about
550 * unreferenced local parameters in the template code...
551 */
552DECL_FORCE_INLINE(int) hmR0VmxReadVmcs16(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint16_t *pu16Val)
553{
554 RT_NOREF(pVCpu);
555 return VMXReadVmcs16(uFieldEnc, pu16Val);
556}
557
558
559/**
560 * Wrapper around VMXReadVmcs32 taking a pVCpu parameter so VCC doesn't complain about
561 * unreferenced local parameters in the template code...
562 */
563DECL_FORCE_INLINE(int) hmR0VmxReadVmcs32(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint32_t *pu32Val)
564{
565 RT_NOREF(pVCpu);
566 return VMXReadVmcs32(uFieldEnc, pu32Val);
567}
568
569
570/**
571 * Wrapper around VMXReadVmcs64 taking a pVCpu parameter so VCC doesn't complain about
572 * unreferenced local parameters in the template code...
573 */
574DECL_FORCE_INLINE(int) hmR0VmxReadVmcs64(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint64_t *pu64Val)
575{
576 RT_NOREF(pVCpu);
577 return VMXReadVmcs64(uFieldEnc, pu64Val);
578}
579
580
581/*
582 * Instantiate the code we share with the NEM darwin backend.
583 */
584#define VCPU_2_VMXSTATE(a_pVCpu) (a_pVCpu)->hm.s
585#define VCPU_2_VMXSTATS(a_pVCpu) (a_pVCpu)->hm.s
586
587#define VM_IS_VMX_UNRESTRICTED_GUEST(a_pVM) (a_pVM)->hmr0.s.vmx.fUnrestrictedGuest
588#define VM_IS_VMX_NESTED_PAGING(a_pVM) (a_pVM)->hmr0.s.fNestedPaging
589#define VM_IS_VMX_PREEMPT_TIMER_USED(a_pVM) (a_pVM)->hmr0.s.vmx.fUsePreemptTimer
590#define VM_IS_VMX_LBR(a_pVM) (a_pVM)->hmr0.s.vmx.fLbr
591
592#define VMX_VMCS_WRITE_16(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs16((a_pVCpu), (a_FieldEnc), (a_Val))
593#define VMX_VMCS_WRITE_32(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs32((a_pVCpu), (a_FieldEnc), (a_Val))
594#define VMX_VMCS_WRITE_64(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs64((a_pVCpu), (a_FieldEnc), (a_Val))
595#define VMX_VMCS_WRITE_NW(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs64((a_pVCpu), (a_FieldEnc), (a_Val))
596
597#define VMX_VMCS_READ_16(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs16((a_pVCpu), (a_FieldEnc), (a_pVal))
598#define VMX_VMCS_READ_32(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs32((a_pVCpu), (a_FieldEnc), (a_pVal))
599#define VMX_VMCS_READ_64(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs64((a_pVCpu), (a_FieldEnc), (a_pVal))
600#define VMX_VMCS_READ_NW(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs64((a_pVCpu), (a_FieldEnc), (a_pVal))
601
602#include "../VMMAll/VMXAllTemplate.cpp.h"
603
604#undef VMX_VMCS_WRITE_16
605#undef VMX_VMCS_WRITE_32
606#undef VMX_VMCS_WRITE_64
607#undef VMX_VMCS_WRITE_NW
608
609#undef VMX_VMCS_READ_16
610#undef VMX_VMCS_READ_32
611#undef VMX_VMCS_READ_64
612#undef VMX_VMCS_READ_NW
613
614#undef VM_IS_VMX_PREEMPT_TIMER_USED
615#undef VM_IS_VMX_NESTED_PAGING
616#undef VM_IS_VMX_UNRESTRICTED_GUEST
617#undef VCPU_2_VMXSTATS
618#undef VCPU_2_VMXSTATE
619
620
621/**
622 * Updates the VM's last error record.
623 *
624 * If there was a VMX instruction error, reads the error data from the VMCS and
625 * updates VCPU's last error record as well.
626 *
627 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
628 * Can be NULL if @a rc is not VERR_VMX_UNABLE_TO_START_VM or
629 * VERR_VMX_INVALID_VMCS_FIELD.
630 * @param rc The error code.
631 */
632static void hmR0VmxUpdateErrorRecord(PVMCPUCC pVCpu, int rc)
633{
634 if ( rc == VERR_VMX_INVALID_VMCS_FIELD
635 || rc == VERR_VMX_UNABLE_TO_START_VM)
636 {
637 AssertPtrReturnVoid(pVCpu);
638 VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
639 }
640 pVCpu->CTX_SUFF(pVM)->hm.s.ForR3.rcInit = rc;
641}
642
643
644/**
645 * Enters VMX root mode operation on the current CPU.
646 *
647 * @returns VBox status code.
648 * @param pHostCpu The HM physical-CPU structure.
649 * @param pVM The cross context VM structure. Can be
650 * NULL, after a resume.
651 * @param HCPhysCpuPage Physical address of the VMXON region.
652 * @param pvCpuPage Pointer to the VMXON region.
653 */
654static int hmR0VmxEnterRootMode(PHMPHYSCPU pHostCpu, PVMCC pVM, RTHCPHYS HCPhysCpuPage, void *pvCpuPage)
655{
656 Assert(pHostCpu);
657 Assert(HCPhysCpuPage && HCPhysCpuPage != NIL_RTHCPHYS);
658 Assert(RT_ALIGN_T(HCPhysCpuPage, _4K, RTHCPHYS) == HCPhysCpuPage);
659 Assert(pvCpuPage);
660 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
661
662 if (pVM)
663 {
664 /* Write the VMCS revision identifier to the VMXON region. */
665 *(uint32_t *)pvCpuPage = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
666 }
667
668 /* Paranoid: Disable interrupts as, in theory, interrupt handlers might mess with CR4. */
669 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
670
671 /* Enable the VMX bit in CR4 if necessary. */
672 RTCCUINTREG const uOldCr4 = SUPR0ChangeCR4(X86_CR4_VMXE, RTCCUINTREG_MAX);
673
674 /* Record whether VMXE was already prior to us enabling it above. */
675 pHostCpu->fVmxeAlreadyEnabled = RT_BOOL(uOldCr4 & X86_CR4_VMXE);
676
677 /* Enter VMX root mode. */
678 int rc = VMXEnable(HCPhysCpuPage);
679 if (RT_FAILURE(rc))
680 {
681 /* Restore CR4.VMXE if it was not set prior to our attempt to set it above. */
682 if (!pHostCpu->fVmxeAlreadyEnabled)
683 SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE);
684
685 if (pVM)
686 pVM->hm.s.ForR3.vmx.HCPhysVmxEnableError = HCPhysCpuPage;
687 }
688
689 /* Restore interrupts. */
690 ASMSetFlags(fEFlags);
691 return rc;
692}
693
694
695/**
696 * Exits VMX root mode operation on the current CPU.
697 *
698 * @returns VBox status code.
699 * @param pHostCpu The HM physical-CPU structure.
700 */
701static int hmR0VmxLeaveRootMode(PHMPHYSCPU pHostCpu)
702{
703 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
704
705 /* Paranoid: Disable interrupts as, in theory, interrupts handlers might mess with CR4. */
706 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
707
708 /* If we're for some reason not in VMX root mode, then don't leave it. */
709 RTCCUINTREG const uHostCr4 = ASMGetCR4();
710
711 int rc;
712 if (uHostCr4 & X86_CR4_VMXE)
713 {
714 /* Exit VMX root mode and clear the VMX bit in CR4. */
715 VMXDisable();
716
717 /* Clear CR4.VMXE only if it was clear prior to use setting it. */
718 if (!pHostCpu->fVmxeAlreadyEnabled)
719 SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE);
720
721 rc = VINF_SUCCESS;
722 }
723 else
724 rc = VERR_VMX_NOT_IN_VMX_ROOT_MODE;
725
726 /* Restore interrupts. */
727 ASMSetFlags(fEFlags);
728 return rc;
729}
730
731
732/**
733 * Allocates pages specified as specified by an array of VMX page allocation info
734 * objects.
735 *
736 * The pages contents are zero'd after allocation.
737 *
738 * @returns VBox status code.
739 * @param phMemObj Where to return the handle to the allocation.
740 * @param paAllocInfo The pointer to the first element of the VMX
741 * page-allocation info object array.
742 * @param cEntries The number of elements in the @a paAllocInfo array.
743 */
744static int hmR0VmxPagesAllocZ(PRTR0MEMOBJ phMemObj, PVMXPAGEALLOCINFO paAllocInfo, uint32_t cEntries)
745{
746 *phMemObj = NIL_RTR0MEMOBJ;
747
748 /* Figure out how many pages to allocate. */
749 uint32_t cPages = 0;
750 for (uint32_t iPage = 0; iPage < cEntries; iPage++)
751 cPages += !!paAllocInfo[iPage].fValid;
752
753 /* Allocate the pages. */
754 if (cPages)
755 {
756 size_t const cbPages = cPages << HOST_PAGE_SHIFT;
757 int rc = RTR0MemObjAllocPage(phMemObj, cbPages, false /* fExecutable */);
758 if (RT_FAILURE(rc))
759 return rc;
760
761 /* Zero the contents and assign each page to the corresponding VMX page-allocation entry. */
762 void *pvFirstPage = RTR0MemObjAddress(*phMemObj);
763 RT_BZERO(pvFirstPage, cbPages);
764
765 uint32_t iPage = 0;
766 for (uint32_t i = 0; i < cEntries; i++)
767 if (paAllocInfo[i].fValid)
768 {
769 RTHCPHYS const HCPhysPage = RTR0MemObjGetPagePhysAddr(*phMemObj, iPage);
770 void *pvPage = (void *)((uintptr_t)pvFirstPage + (iPage << X86_PAGE_4K_SHIFT));
771 Assert(HCPhysPage && HCPhysPage != NIL_RTHCPHYS);
772 AssertPtr(pvPage);
773
774 Assert(paAllocInfo[iPage].pHCPhys);
775 Assert(paAllocInfo[iPage].ppVirt);
776 *paAllocInfo[iPage].pHCPhys = HCPhysPage;
777 *paAllocInfo[iPage].ppVirt = pvPage;
778
779 /* Move to next page. */
780 ++iPage;
781 }
782
783 /* Make sure all valid (requested) pages have been assigned. */
784 Assert(iPage == cPages);
785 }
786 return VINF_SUCCESS;
787}
788
789
790/**
791 * Frees pages allocated using hmR0VmxPagesAllocZ.
792 *
793 * @param phMemObj Pointer to the memory object handle. Will be set to
794 * NIL.
795 */
796DECL_FORCE_INLINE(void) hmR0VmxPagesFree(PRTR0MEMOBJ phMemObj)
797{
798 /* We can cleanup wholesale since it's all one allocation. */
799 if (*phMemObj != NIL_RTR0MEMOBJ)
800 {
801 RTR0MemObjFree(*phMemObj, true /* fFreeMappings */);
802 *phMemObj = NIL_RTR0MEMOBJ;
803 }
804}
805
806
807/**
808 * Initializes a VMCS info. object.
809 *
810 * @param pVmcsInfo The VMCS info. object.
811 * @param pVmcsInfoShared The VMCS info. object shared with ring-3.
812 */
813static void hmR0VmxVmcsInfoInit(PVMXVMCSINFO pVmcsInfo, PVMXVMCSINFOSHARED pVmcsInfoShared)
814{
815 RT_ZERO(*pVmcsInfo);
816 RT_ZERO(*pVmcsInfoShared);
817
818 pVmcsInfo->pShared = pVmcsInfoShared;
819 Assert(pVmcsInfo->hMemObj == NIL_RTR0MEMOBJ);
820 pVmcsInfo->HCPhysVmcs = NIL_RTHCPHYS;
821 pVmcsInfo->HCPhysShadowVmcs = NIL_RTHCPHYS;
822 pVmcsInfo->HCPhysMsrBitmap = NIL_RTHCPHYS;
823 pVmcsInfo->HCPhysGuestMsrLoad = NIL_RTHCPHYS;
824 pVmcsInfo->HCPhysGuestMsrStore = NIL_RTHCPHYS;
825 pVmcsInfo->HCPhysHostMsrLoad = NIL_RTHCPHYS;
826 pVmcsInfo->HCPhysVirtApic = NIL_RTHCPHYS;
827 pVmcsInfo->HCPhysEPTP = NIL_RTHCPHYS;
828 pVmcsInfo->u64VmcsLinkPtr = NIL_RTHCPHYS;
829 pVmcsInfo->idHostCpuState = NIL_RTCPUID;
830 pVmcsInfo->idHostCpuExec = NIL_RTCPUID;
831}
832
833
834/**
835 * Frees the VT-x structures for a VMCS info. object.
836 *
837 * @param pVmcsInfo The VMCS info. object.
838 * @param pVmcsInfoShared The VMCS info. object shared with ring-3.
839 */
840static void hmR0VmxVmcsInfoFree(PVMXVMCSINFO pVmcsInfo, PVMXVMCSINFOSHARED pVmcsInfoShared)
841{
842 hmR0VmxPagesFree(&pVmcsInfo->hMemObj);
843 hmR0VmxVmcsInfoInit(pVmcsInfo, pVmcsInfoShared);
844}
845
846
847/**
848 * Allocates the VT-x structures for a VMCS info. object.
849 *
850 * @returns VBox status code.
851 * @param pVCpu The cross context virtual CPU structure.
852 * @param pVmcsInfo The VMCS info. object.
853 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
854 *
855 * @remarks The caller is expected to take care of any and all allocation failures.
856 * This function will not perform any cleanup for failures half-way
857 * through.
858 */
859static int hmR0VmxAllocVmcsInfo(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
860{
861 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
862
863 bool const fMsrBitmaps = RT_BOOL(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS);
864 bool const fShadowVmcs = !fIsNstGstVmcs ? pVM->hmr0.s.vmx.fUseVmcsShadowing : pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing;
865 Assert(!pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing); /* VMCS shadowing is not yet exposed to the guest. */
866 VMXPAGEALLOCINFO aAllocInfo[] =
867 {
868 { true, 0 /* Unused */, &pVmcsInfo->HCPhysVmcs, &pVmcsInfo->pvVmcs },
869 { true, 0 /* Unused */, &pVmcsInfo->HCPhysGuestMsrLoad, &pVmcsInfo->pvGuestMsrLoad },
870 { true, 0 /* Unused */, &pVmcsInfo->HCPhysHostMsrLoad, &pVmcsInfo->pvHostMsrLoad },
871 { fMsrBitmaps, 0 /* Unused */, &pVmcsInfo->HCPhysMsrBitmap, &pVmcsInfo->pvMsrBitmap },
872 { fShadowVmcs, 0 /* Unused */, &pVmcsInfo->HCPhysShadowVmcs, &pVmcsInfo->pvShadowVmcs },
873 };
874
875 int rc = hmR0VmxPagesAllocZ(&pVmcsInfo->hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo));
876 if (RT_FAILURE(rc))
877 return rc;
878
879 /*
880 * We use the same page for VM-entry MSR-load and VM-exit MSR store areas.
881 * Because they contain a symmetric list of guest MSRs to load on VM-entry and store on VM-exit.
882 */
883 AssertCompile(RT_ELEMENTS(aAllocInfo) > 0);
884 Assert(pVmcsInfo->HCPhysGuestMsrLoad != NIL_RTHCPHYS);
885 pVmcsInfo->pvGuestMsrStore = pVmcsInfo->pvGuestMsrLoad;
886 pVmcsInfo->HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrLoad;
887
888 /*
889 * Get the virtual-APIC page rather than allocating them again.
890 */
891 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW)
892 {
893 if (!fIsNstGstVmcs)
894 {
895 if (PDMHasApic(pVM))
896 {
897 rc = APICGetApicPageForCpu(pVCpu, &pVmcsInfo->HCPhysVirtApic, (PRTR0PTR)&pVmcsInfo->pbVirtApic, NULL /*pR3Ptr*/);
898 if (RT_FAILURE(rc))
899 return rc;
900 Assert(pVmcsInfo->pbVirtApic);
901 Assert(pVmcsInfo->HCPhysVirtApic && pVmcsInfo->HCPhysVirtApic != NIL_RTHCPHYS);
902 }
903 }
904 else
905 {
906 pVmcsInfo->pbVirtApic = &pVCpu->cpum.GstCtx.hwvirt.vmx.abVirtApicPage[0];
907 pVmcsInfo->HCPhysVirtApic = GVMMR0ConvertGVMPtr2HCPhys(pVM, pVmcsInfo->pbVirtApic);
908 Assert(pVmcsInfo->HCPhysVirtApic && pVmcsInfo->HCPhysVirtApic != NIL_RTHCPHYS);
909 }
910 }
911
912 return VINF_SUCCESS;
913}
914
915
916/**
917 * Free all VT-x structures for the VM.
918 *
919 * @returns IPRT status code.
920 * @param pVM The cross context VM structure.
921 */
922static void hmR0VmxStructsFree(PVMCC pVM)
923{
924 hmR0VmxPagesFree(&pVM->hmr0.s.vmx.hMemObj);
925#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
926 if (pVM->hmr0.s.vmx.fUseVmcsShadowing)
927 {
928 RTMemFree(pVM->hmr0.s.vmx.paShadowVmcsFields);
929 pVM->hmr0.s.vmx.paShadowVmcsFields = NULL;
930 RTMemFree(pVM->hmr0.s.vmx.paShadowVmcsRoFields);
931 pVM->hmr0.s.vmx.paShadowVmcsRoFields = NULL;
932 }
933#endif
934
935 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
936 {
937 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
938 hmR0VmxVmcsInfoFree(&pVCpu->hmr0.s.vmx.VmcsInfo, &pVCpu->hm.s.vmx.VmcsInfo);
939#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
940 if (pVM->cpum.ro.GuestFeatures.fVmx)
941 hmR0VmxVmcsInfoFree(&pVCpu->hmr0.s.vmx.VmcsInfoNstGst, &pVCpu->hm.s.vmx.VmcsInfoNstGst);
942#endif
943 }
944}
945
946
947/**
948 * Allocate all VT-x structures for the VM.
949 *
950 * @returns IPRT status code.
951 * @param pVM The cross context VM structure.
952 *
953 * @remarks This functions will cleanup on memory allocation failures.
954 */
955static int hmR0VmxStructsAlloc(PVMCC pVM)
956{
957 /*
958 * Sanity check the VMCS size reported by the CPU as we assume 4KB allocations.
959 * The VMCS size cannot be more than 4096 bytes.
960 *
961 * See Intel spec. Appendix A.1 "Basic VMX Information".
962 */
963 uint32_t const cbVmcs = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_SIZE);
964 if (cbVmcs <= X86_PAGE_4K_SIZE)
965 { /* likely */ }
966 else
967 {
968 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_INVALID_VMCS_SIZE;
969 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
970 }
971
972 /*
973 * Allocate per-VM VT-x structures.
974 */
975 bool const fVirtApicAccess = RT_BOOL(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS);
976 bool const fUseVmcsShadowing = pVM->hmr0.s.vmx.fUseVmcsShadowing;
977 VMXPAGEALLOCINFO aAllocInfo[] =
978 {
979 { fVirtApicAccess, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysApicAccess, (PRTR0PTR)&pVM->hmr0.s.vmx.pbApicAccess },
980 { fUseVmcsShadowing, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysVmreadBitmap, &pVM->hmr0.s.vmx.pvVmreadBitmap },
981 { fUseVmcsShadowing, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysVmwriteBitmap, &pVM->hmr0.s.vmx.pvVmwriteBitmap },
982#ifdef VBOX_WITH_CRASHDUMP_MAGIC
983 { true, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysScratch, (PRTR0PTR)&pVM->hmr0.s.vmx.pbScratch },
984#endif
985 };
986
987 int rc = hmR0VmxPagesAllocZ(&pVM->hmr0.s.vmx.hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo));
988 if (RT_SUCCESS(rc))
989 {
990#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
991 /* Allocate the shadow VMCS-fields array. */
992 if (fUseVmcsShadowing)
993 {
994 Assert(!pVM->hmr0.s.vmx.cShadowVmcsFields);
995 Assert(!pVM->hmr0.s.vmx.cShadowVmcsRoFields);
996 pVM->hmr0.s.vmx.paShadowVmcsFields = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields));
997 pVM->hmr0.s.vmx.paShadowVmcsRoFields = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields));
998 if (!pVM->hmr0.s.vmx.paShadowVmcsFields || !pVM->hmr0.s.vmx.paShadowVmcsRoFields)
999 rc = VERR_NO_MEMORY;
1000 }
1001#endif
1002
1003 /*
1004 * Allocate per-VCPU VT-x structures.
1005 */
1006 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus && RT_SUCCESS(rc); idCpu++)
1007 {
1008 /* Allocate the guest VMCS structures. */
1009 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
1010 rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfo, false /* fIsNstGstVmcs */);
1011
1012#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1013 /* Allocate the nested-guest VMCS structures, when the VMX feature is exposed to the guest. */
1014 if (pVM->cpum.ro.GuestFeatures.fVmx && RT_SUCCESS(rc))
1015 rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */);
1016#endif
1017 }
1018 if (RT_SUCCESS(rc))
1019 return VINF_SUCCESS;
1020 }
1021 hmR0VmxStructsFree(pVM);
1022 return rc;
1023}
1024
1025
1026/**
1027 * Pre-initializes non-zero fields in VMX structures that will be allocated.
1028 *
1029 * @param pVM The cross context VM structure.
1030 */
1031static void hmR0VmxStructsInit(PVMCC pVM)
1032{
1033 /* Paranoia. */
1034 Assert(pVM->hmr0.s.vmx.pbApicAccess == NULL);
1035#ifdef VBOX_WITH_CRASHDUMP_MAGIC
1036 Assert(pVM->hmr0.s.vmx.pbScratch == NULL);
1037#endif
1038
1039 /*
1040 * Initialize members up-front so we can cleanup en masse on allocation failures.
1041 */
1042#ifdef VBOX_WITH_CRASHDUMP_MAGIC
1043 pVM->hmr0.s.vmx.HCPhysScratch = NIL_RTHCPHYS;
1044#endif
1045 pVM->hmr0.s.vmx.HCPhysApicAccess = NIL_RTHCPHYS;
1046 pVM->hmr0.s.vmx.HCPhysVmreadBitmap = NIL_RTHCPHYS;
1047 pVM->hmr0.s.vmx.HCPhysVmwriteBitmap = NIL_RTHCPHYS;
1048 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
1049 {
1050 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
1051 hmR0VmxVmcsInfoInit(&pVCpu->hmr0.s.vmx.VmcsInfo, &pVCpu->hm.s.vmx.VmcsInfo);
1052 hmR0VmxVmcsInfoInit(&pVCpu->hmr0.s.vmx.VmcsInfoNstGst, &pVCpu->hm.s.vmx.VmcsInfoNstGst);
1053 }
1054}
1055
1056#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1057/**
1058 * Returns whether an MSR at the given MSR-bitmap offset is intercepted or not.
1059 *
1060 * @returns @c true if the MSR is intercepted, @c false otherwise.
1061 * @param pbMsrBitmap The MSR bitmap.
1062 * @param offMsr The MSR byte offset.
1063 * @param iBit The bit offset from the byte offset.
1064 */
1065DECLINLINE(bool) hmR0VmxIsMsrBitSet(uint8_t const *pbMsrBitmap, uint16_t offMsr, int32_t iBit)
1066{
1067 Assert(offMsr + (iBit >> 3) <= X86_PAGE_4K_SIZE);
1068 return ASMBitTest(pbMsrBitmap + offMsr, iBit);
1069}
1070#endif
1071
1072/**
1073 * Sets the permission bits for the specified MSR in the given MSR bitmap.
1074 *
1075 * If the passed VMCS is a nested-guest VMCS, this function ensures that the
1076 * read/write intercept is cleared from the MSR bitmap used for hardware-assisted
1077 * VMX execution of the nested-guest, only if nested-guest is also not intercepting
1078 * the read/write access of this MSR.
1079 *
1080 * @param pVCpu The cross context virtual CPU structure.
1081 * @param pVmcsInfo The VMCS info. object.
1082 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
1083 * @param idMsr The MSR value.
1084 * @param fMsrpm The MSR permissions (see VMXMSRPM_XXX). This must
1085 * include both a read -and- a write permission!
1086 *
1087 * @sa CPUMGetVmxMsrPermission.
1088 * @remarks Can be called with interrupts disabled.
1089 */
1090static void hmR0VmxSetMsrPermission(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs, uint32_t idMsr, uint32_t fMsrpm)
1091{
1092 uint8_t *pbMsrBitmap = (uint8_t *)pVmcsInfo->pvMsrBitmap;
1093 Assert(pbMsrBitmap);
1094 Assert(VMXMSRPM_IS_FLAG_VALID(fMsrpm));
1095
1096 /*
1097 * MSR-bitmap Layout:
1098 * Byte index MSR range Interpreted as
1099 * 0x000 - 0x3ff 0x00000000 - 0x00001fff Low MSR read bits.
1100 * 0x400 - 0x7ff 0xc0000000 - 0xc0001fff High MSR read bits.
1101 * 0x800 - 0xbff 0x00000000 - 0x00001fff Low MSR write bits.
1102 * 0xc00 - 0xfff 0xc0000000 - 0xc0001fff High MSR write bits.
1103 *
1104 * A bit corresponding to an MSR within the above range causes a VM-exit
1105 * if the bit is 1 on executions of RDMSR/WRMSR. If an MSR falls out of
1106 * the MSR range, it always cause a VM-exit.
1107 *
1108 * See Intel spec. 24.6.9 "MSR-Bitmap Address".
1109 */
1110 uint16_t const offBitmapRead = 0;
1111 uint16_t const offBitmapWrite = 0x800;
1112 uint16_t offMsr;
1113 int32_t iBit;
1114 if (idMsr <= UINT32_C(0x00001fff))
1115 {
1116 offMsr = 0;
1117 iBit = idMsr;
1118 }
1119 else if (idMsr - UINT32_C(0xc0000000) <= UINT32_C(0x00001fff))
1120 {
1121 offMsr = 0x400;
1122 iBit = idMsr - UINT32_C(0xc0000000);
1123 }
1124 else
1125 AssertMsgFailedReturnVoid(("Invalid MSR %#RX32\n", idMsr));
1126
1127 /*
1128 * Set the MSR read permission.
1129 */
1130 uint16_t const offMsrRead = offBitmapRead + offMsr;
1131 Assert(offMsrRead + (iBit >> 3) < offBitmapWrite);
1132 if (fMsrpm & VMXMSRPM_ALLOW_RD)
1133 {
1134#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1135 bool const fClear = !fIsNstGstVmcs ? true
1136 : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, offMsrRead, iBit);
1137#else
1138 RT_NOREF2(pVCpu, fIsNstGstVmcs);
1139 bool const fClear = true;
1140#endif
1141 if (fClear)
1142 ASMBitClear(pbMsrBitmap + offMsrRead, iBit);
1143 }
1144 else
1145 ASMBitSet(pbMsrBitmap + offMsrRead, iBit);
1146
1147 /*
1148 * Set the MSR write permission.
1149 */
1150 uint16_t const offMsrWrite = offBitmapWrite + offMsr;
1151 Assert(offMsrWrite + (iBit >> 3) < X86_PAGE_4K_SIZE);
1152 if (fMsrpm & VMXMSRPM_ALLOW_WR)
1153 {
1154#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1155 bool const fClear = !fIsNstGstVmcs ? true
1156 : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, offMsrWrite, iBit);
1157#else
1158 RT_NOREF2(pVCpu, fIsNstGstVmcs);
1159 bool const fClear = true;
1160#endif
1161 if (fClear)
1162 ASMBitClear(pbMsrBitmap + offMsrWrite, iBit);
1163 }
1164 else
1165 ASMBitSet(pbMsrBitmap + offMsrWrite, iBit);
1166}
1167
1168
1169/**
1170 * Updates the VMCS with the number of effective MSRs in the auto-load/store MSR
1171 * area.
1172 *
1173 * @returns VBox status code.
1174 * @param pVCpu The cross context virtual CPU structure.
1175 * @param pVmcsInfo The VMCS info. object.
1176 * @param cMsrs The number of MSRs.
1177 */
1178static int hmR0VmxSetAutoLoadStoreMsrCount(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint32_t cMsrs)
1179{
1180 /* Shouldn't ever happen but there -is- a number. We're well within the recommended 512. */
1181 uint32_t const cMaxSupportedMsrs = VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc);
1182 if (RT_LIKELY(cMsrs < cMaxSupportedMsrs))
1183 {
1184 /* Commit the MSR counts to the VMCS and update the cache. */
1185 if (pVmcsInfo->cEntryMsrLoad != cMsrs)
1186 {
1187 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, cMsrs); AssertRC(rc);
1188 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, cMsrs); AssertRC(rc);
1189 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, cMsrs); AssertRC(rc);
1190 pVmcsInfo->cEntryMsrLoad = cMsrs;
1191 pVmcsInfo->cExitMsrStore = cMsrs;
1192 pVmcsInfo->cExitMsrLoad = cMsrs;
1193 }
1194 return VINF_SUCCESS;
1195 }
1196
1197 LogRel(("Auto-load/store MSR count exceeded! cMsrs=%u MaxSupported=%u\n", cMsrs, cMaxSupportedMsrs));
1198 pVCpu->hm.s.u32HMError = VMX_UFC_INSUFFICIENT_GUEST_MSR_STORAGE;
1199 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
1200}
1201
1202
1203/**
1204 * Adds a new (or updates the value of an existing) guest/host MSR
1205 * pair to be swapped during the world-switch as part of the
1206 * auto-load/store MSR area in the VMCS.
1207 *
1208 * @returns VBox status code.
1209 * @param pVCpu The cross context virtual CPU structure.
1210 * @param pVmxTransient The VMX-transient structure.
1211 * @param idMsr The MSR.
1212 * @param uGuestMsrValue Value of the guest MSR.
1213 * @param fSetReadWrite Whether to set the guest read/write access of this
1214 * MSR (thus not causing a VM-exit).
1215 * @param fUpdateHostMsr Whether to update the value of the host MSR if
1216 * necessary.
1217 */
1218static int hmR0VmxAddAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr, uint64_t uGuestMsrValue,
1219 bool fSetReadWrite, bool fUpdateHostMsr)
1220{
1221 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
1222 bool const fIsNstGstVmcs = pVmxTransient->fIsNestedGuest;
1223 PVMXAUTOMSR pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1224 uint32_t cMsrs = pVmcsInfo->cEntryMsrLoad;
1225 uint32_t i;
1226
1227 /* Paranoia. */
1228 Assert(pGuestMsrLoad);
1229
1230#ifndef DEBUG_bird
1231 LogFlowFunc(("pVCpu=%p idMsr=%#RX32 uGuestMsrValue=%#RX64\n", pVCpu, idMsr, uGuestMsrValue));
1232#endif
1233
1234 /* Check if the MSR already exists in the VM-entry MSR-load area. */
1235 for (i = 0; i < cMsrs; i++)
1236 {
1237 if (pGuestMsrLoad[i].u32Msr == idMsr)
1238 break;
1239 }
1240
1241 bool fAdded = false;
1242 if (i == cMsrs)
1243 {
1244 /* The MSR does not exist, bump the MSR count to make room for the new MSR. */
1245 ++cMsrs;
1246 int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs);
1247 AssertMsgRCReturn(rc, ("Insufficient space to add MSR to VM-entry MSR-load/store area %u\n", idMsr), rc);
1248
1249 /* Set the guest to read/write this MSR without causing VM-exits. */
1250 if ( fSetReadWrite
1251 && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS))
1252 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_ALLOW_RD_WR);
1253
1254 Log4Func(("Added MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs));
1255 fAdded = true;
1256 }
1257
1258 /* Update the MSR value for the newly added or already existing MSR. */
1259 pGuestMsrLoad[i].u32Msr = idMsr;
1260 pGuestMsrLoad[i].u64Value = uGuestMsrValue;
1261
1262 /* Create the corresponding slot in the VM-exit MSR-store area if we use a different page. */
1263 if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo))
1264 {
1265 PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1266 pGuestMsrStore[i].u32Msr = idMsr;
1267 pGuestMsrStore[i].u64Value = uGuestMsrValue;
1268 }
1269
1270 /* Update the corresponding slot in the host MSR area. */
1271 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1272 Assert(pHostMsr != pVmcsInfo->pvGuestMsrLoad);
1273 Assert(pHostMsr != pVmcsInfo->pvGuestMsrStore);
1274 pHostMsr[i].u32Msr = idMsr;
1275
1276 /*
1277 * Only if the caller requests to update the host MSR value AND we've newly added the
1278 * MSR to the host MSR area do we actually update the value. Otherwise, it will be
1279 * updated by hmR0VmxUpdateAutoLoadHostMsrs().
1280 *
1281 * We do this for performance reasons since reading MSRs may be quite expensive.
1282 */
1283 if (fAdded)
1284 {
1285 if (fUpdateHostMsr)
1286 {
1287 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
1288 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1289 pHostMsr[i].u64Value = ASMRdMsr(idMsr);
1290 }
1291 else
1292 {
1293 /* Someone else can do the work. */
1294 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
1295 }
1296 }
1297 return VINF_SUCCESS;
1298}
1299
1300
1301/**
1302 * Removes a guest/host MSR pair to be swapped during the world-switch from the
1303 * auto-load/store MSR area in the VMCS.
1304 *
1305 * @returns VBox status code.
1306 * @param pVCpu The cross context virtual CPU structure.
1307 * @param pVmxTransient The VMX-transient structure.
1308 * @param idMsr The MSR.
1309 */
1310static int hmR0VmxRemoveAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr)
1311{
1312 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
1313 bool const fIsNstGstVmcs = pVmxTransient->fIsNestedGuest;
1314 PVMXAUTOMSR pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1315 uint32_t cMsrs = pVmcsInfo->cEntryMsrLoad;
1316
1317#ifndef DEBUG_bird
1318 LogFlowFunc(("pVCpu=%p idMsr=%#RX32\n", pVCpu, idMsr));
1319#endif
1320
1321 for (uint32_t i = 0; i < cMsrs; i++)
1322 {
1323 /* Find the MSR. */
1324 if (pGuestMsrLoad[i].u32Msr == idMsr)
1325 {
1326 /*
1327 * If it's the last MSR, we only need to reduce the MSR count.
1328 * If it's -not- the last MSR, copy the last MSR in place of it and reduce the MSR count.
1329 */
1330 if (i < cMsrs - 1)
1331 {
1332 /* Remove it from the VM-entry MSR-load area. */
1333 pGuestMsrLoad[i].u32Msr = pGuestMsrLoad[cMsrs - 1].u32Msr;
1334 pGuestMsrLoad[i].u64Value = pGuestMsrLoad[cMsrs - 1].u64Value;
1335
1336 /* Remove it from the VM-exit MSR-store area if it's in a different page. */
1337 if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo))
1338 {
1339 PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1340 Assert(pGuestMsrStore[i].u32Msr == idMsr);
1341 pGuestMsrStore[i].u32Msr = pGuestMsrStore[cMsrs - 1].u32Msr;
1342 pGuestMsrStore[i].u64Value = pGuestMsrStore[cMsrs - 1].u64Value;
1343 }
1344
1345 /* Remove it from the VM-exit MSR-load area. */
1346 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1347 Assert(pHostMsr[i].u32Msr == idMsr);
1348 pHostMsr[i].u32Msr = pHostMsr[cMsrs - 1].u32Msr;
1349 pHostMsr[i].u64Value = pHostMsr[cMsrs - 1].u64Value;
1350 }
1351
1352 /* Reduce the count to reflect the removed MSR and bail. */
1353 --cMsrs;
1354 break;
1355 }
1356 }
1357
1358 /* Update the VMCS if the count changed (meaning the MSR was found and removed). */
1359 if (cMsrs != pVmcsInfo->cEntryMsrLoad)
1360 {
1361 int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs);
1362 AssertRCReturn(rc, rc);
1363
1364 /* We're no longer swapping MSRs during the world-switch, intercept guest read/writes to them. */
1365 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
1366 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_EXIT_RD | VMXMSRPM_EXIT_WR);
1367
1368 Log4Func(("Removed MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs));
1369 return VINF_SUCCESS;
1370 }
1371
1372 return VERR_NOT_FOUND;
1373}
1374
1375
1376/**
1377 * Updates the value of all host MSRs in the VM-exit MSR-load area.
1378 *
1379 * @param pVCpu The cross context virtual CPU structure.
1380 * @param pVmcsInfo The VMCS info. object.
1381 *
1382 * @remarks No-long-jump zone!!!
1383 */
1384static void hmR0VmxUpdateAutoLoadHostMsrs(PCVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1385{
1386 RT_NOREF(pVCpu);
1387 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1388
1389 PVMXAUTOMSR pHostMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1390 uint32_t const cMsrs = pVmcsInfo->cExitMsrLoad;
1391 Assert(pHostMsrLoad);
1392 Assert(sizeof(*pHostMsrLoad) * cMsrs <= X86_PAGE_4K_SIZE);
1393 LogFlowFunc(("pVCpu=%p cMsrs=%u\n", pVCpu, cMsrs));
1394 for (uint32_t i = 0; i < cMsrs; i++)
1395 {
1396 /*
1397 * Performance hack for the host EFER MSR. We use the cached value rather than re-read it.
1398 * Strict builds will catch mismatches in hmR0VmxCheckAutoLoadStoreMsrs(). See @bugref{7368}.
1399 */
1400 if (pHostMsrLoad[i].u32Msr == MSR_K6_EFER)
1401 pHostMsrLoad[i].u64Value = g_uHmVmxHostMsrEfer;
1402 else
1403 pHostMsrLoad[i].u64Value = ASMRdMsr(pHostMsrLoad[i].u32Msr);
1404 }
1405}
1406
1407
1408/**
1409 * Saves a set of host MSRs to allow read/write passthru access to the guest and
1410 * perform lazy restoration of the host MSRs while leaving VT-x.
1411 *
1412 * @param pVCpu The cross context virtual CPU structure.
1413 *
1414 * @remarks No-long-jump zone!!!
1415 */
1416static void hmR0VmxLazySaveHostMsrs(PVMCPUCC pVCpu)
1417{
1418 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1419
1420 /*
1421 * Note: If you're adding MSRs here, make sure to update the MSR-bitmap accesses in hmR0VmxSetupVmcsProcCtls().
1422 */
1423 if (!(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST))
1424 {
1425 Assert(!(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)); /* Guest MSRs better not be loaded now. */
1426 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
1427 {
1428 pVCpu->hmr0.s.vmx.u64HostMsrLStar = ASMRdMsr(MSR_K8_LSTAR);
1429 pVCpu->hmr0.s.vmx.u64HostMsrStar = ASMRdMsr(MSR_K6_STAR);
1430 pVCpu->hmr0.s.vmx.u64HostMsrSfMask = ASMRdMsr(MSR_K8_SF_MASK);
1431 pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase = ASMRdMsr(MSR_K8_KERNEL_GS_BASE);
1432 }
1433 pVCpu->hmr0.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_SAVED_HOST;
1434 }
1435}
1436
1437
1438#ifdef VBOX_STRICT
1439
1440/**
1441 * Verifies that our cached host EFER MSR value has not changed since we cached it.
1442 *
1443 * @param pVmcsInfo The VMCS info. object.
1444 */
1445static void hmR0VmxCheckHostEferMsr(PCVMXVMCSINFO pVmcsInfo)
1446{
1447 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1448
1449 if (pVmcsInfo->u32ExitCtls & VMX_EXIT_CTLS_LOAD_EFER_MSR)
1450 {
1451 uint64_t const uHostEferMsr = ASMRdMsr(MSR_K6_EFER);
1452 uint64_t const uHostEferMsrCache = g_uHmVmxHostMsrEfer;
1453 uint64_t uVmcsEferMsrVmcs;
1454 int rc = VMXReadVmcs64(VMX_VMCS64_HOST_EFER_FULL, &uVmcsEferMsrVmcs);
1455 AssertRC(rc);
1456
1457 AssertMsgReturnVoid(uHostEferMsr == uVmcsEferMsrVmcs,
1458 ("EFER Host/VMCS mismatch! host=%#RX64 vmcs=%#RX64\n", uHostEferMsr, uVmcsEferMsrVmcs));
1459 AssertMsgReturnVoid(uHostEferMsr == uHostEferMsrCache,
1460 ("EFER Host/Cache mismatch! host=%#RX64 cache=%#RX64\n", uHostEferMsr, uHostEferMsrCache));
1461 }
1462}
1463
1464
1465/**
1466 * Verifies whether the guest/host MSR pairs in the auto-load/store area in the
1467 * VMCS are correct.
1468 *
1469 * @param pVCpu The cross context virtual CPU structure.
1470 * @param pVmcsInfo The VMCS info. object.
1471 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
1472 */
1473static void hmR0VmxCheckAutoLoadStoreMsrs(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
1474{
1475 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1476
1477 /* Read the various MSR-area counts from the VMCS. */
1478 uint32_t cEntryLoadMsrs;
1479 uint32_t cExitStoreMsrs;
1480 uint32_t cExitLoadMsrs;
1481 int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, &cEntryLoadMsrs); AssertRC(rc);
1482 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, &cExitStoreMsrs); AssertRC(rc);
1483 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, &cExitLoadMsrs); AssertRC(rc);
1484
1485 /* Verify all the MSR counts are the same. */
1486 Assert(cEntryLoadMsrs == cExitStoreMsrs);
1487 Assert(cExitStoreMsrs == cExitLoadMsrs);
1488 uint32_t const cMsrs = cExitLoadMsrs;
1489
1490 /* Verify the MSR counts do not exceed the maximum count supported by the hardware. */
1491 Assert(cMsrs < VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc));
1492
1493 /* Verify the MSR counts are within the allocated page size. */
1494 Assert(sizeof(VMXAUTOMSR) * cMsrs <= X86_PAGE_4K_SIZE);
1495
1496 /* Verify the relevant contents of the MSR areas match. */
1497 PCVMXAUTOMSR pGuestMsrLoad = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1498 PCVMXAUTOMSR pGuestMsrStore = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1499 PCVMXAUTOMSR pHostMsrLoad = (PCVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1500 bool const fSeparateExitMsrStorePage = hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo);
1501 for (uint32_t i = 0; i < cMsrs; i++)
1502 {
1503 /* Verify that the MSRs are paired properly and that the host MSR has the correct value. */
1504 if (fSeparateExitMsrStorePage)
1505 {
1506 AssertMsgReturnVoid(pGuestMsrLoad->u32Msr == pGuestMsrStore->u32Msr,
1507 ("GuestMsrLoad=%#RX32 GuestMsrStore=%#RX32 cMsrs=%u\n",
1508 pGuestMsrLoad->u32Msr, pGuestMsrStore->u32Msr, cMsrs));
1509 }
1510
1511 AssertMsgReturnVoid(pHostMsrLoad->u32Msr == pGuestMsrLoad->u32Msr,
1512 ("HostMsrLoad=%#RX32 GuestMsrLoad=%#RX32 cMsrs=%u\n",
1513 pHostMsrLoad->u32Msr, pGuestMsrLoad->u32Msr, cMsrs));
1514
1515 uint64_t const u64HostMsr = ASMRdMsr(pHostMsrLoad->u32Msr);
1516 AssertMsgReturnVoid(pHostMsrLoad->u64Value == u64HostMsr,
1517 ("u32Msr=%#RX32 VMCS Value=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n",
1518 pHostMsrLoad->u32Msr, pHostMsrLoad->u64Value, u64HostMsr, cMsrs));
1519
1520 /* Verify that cached host EFER MSR matches what's loaded on the CPU. */
1521 bool const fIsEferMsr = RT_BOOL(pHostMsrLoad->u32Msr == MSR_K6_EFER);
1522 AssertMsgReturnVoid(!fIsEferMsr || u64HostMsr == g_uHmVmxHostMsrEfer,
1523 ("Cached=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n", g_uHmVmxHostMsrEfer, u64HostMsr, cMsrs));
1524
1525 /* Verify that the accesses are as expected in the MSR bitmap for auto-load/store MSRs. */
1526 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
1527 {
1528 uint32_t const fMsrpm = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, pGuestMsrLoad->u32Msr);
1529 if (fIsEferMsr)
1530 {
1531 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_RD), ("Passthru read for EFER MSR!?\n"));
1532 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_WR), ("Passthru write for EFER MSR!?\n"));
1533 }
1534 else
1535 {
1536 /* Verify LBR MSRs (used only for debugging) are intercepted. We don't passthru these MSRs to the guest yet. */
1537 PCVMCC pVM = pVCpu->CTX_SUFF(pVM);
1538 if ( pVM->hmr0.s.vmx.fLbr
1539 && ( hmR0VmxIsLbrBranchFromMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */)
1540 || hmR0VmxIsLbrBranchToMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */)
1541 || pGuestMsrLoad->u32Msr == pVM->hmr0.s.vmx.idLbrTosMsr))
1542 {
1543 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_EXIT_RD_WR,
1544 ("u32Msr=%#RX32 cMsrs=%u Passthru read/write for LBR MSRs!\n",
1545 pGuestMsrLoad->u32Msr, cMsrs));
1546 }
1547 else if (!fIsNstGstVmcs)
1548 {
1549 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_ALLOW_RD_WR,
1550 ("u32Msr=%#RX32 cMsrs=%u No passthru read/write!\n", pGuestMsrLoad->u32Msr, cMsrs));
1551 }
1552 else
1553 {
1554 /*
1555 * A nested-guest VMCS must -also- allow read/write passthrough for the MSR for us to
1556 * execute a nested-guest with MSR passthrough.
1557 *
1558 * Check if the nested-guest MSR bitmap allows passthrough, and if so, assert that we
1559 * allow passthrough too.
1560 */
1561 void const *pvMsrBitmapNstGst = pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap;
1562 Assert(pvMsrBitmapNstGst);
1563 uint32_t const fMsrpmNstGst = CPUMGetVmxMsrPermission(pvMsrBitmapNstGst, pGuestMsrLoad->u32Msr);
1564 AssertMsgReturnVoid(fMsrpm == fMsrpmNstGst,
1565 ("u32Msr=%#RX32 cMsrs=%u Permission mismatch fMsrpm=%#x fMsrpmNstGst=%#x!\n",
1566 pGuestMsrLoad->u32Msr, cMsrs, fMsrpm, fMsrpmNstGst));
1567 }
1568 }
1569 }
1570
1571 /* Move to the next MSR. */
1572 pHostMsrLoad++;
1573 pGuestMsrLoad++;
1574 pGuestMsrStore++;
1575 }
1576}
1577
1578#endif /* VBOX_STRICT */
1579
1580/**
1581 * Flushes the TLB using EPT.
1582 *
1583 * @returns VBox status code.
1584 * @param pVCpu The cross context virtual CPU structure of the calling
1585 * EMT. Can be NULL depending on @a enmTlbFlush.
1586 * @param pVmcsInfo The VMCS info. object. Can be NULL depending on @a
1587 * enmTlbFlush.
1588 * @param enmTlbFlush Type of flush.
1589 *
1590 * @remarks Caller is responsible for making sure this function is called only
1591 * when NestedPaging is supported and providing @a enmTlbFlush that is
1592 * supported by the CPU.
1593 * @remarks Can be called with interrupts disabled.
1594 */
1595static void hmR0VmxFlushEpt(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, VMXTLBFLUSHEPT enmTlbFlush)
1596{
1597 uint64_t au64Descriptor[2];
1598 if (enmTlbFlush == VMXTLBFLUSHEPT_ALL_CONTEXTS)
1599 au64Descriptor[0] = 0;
1600 else
1601 {
1602 Assert(pVCpu);
1603 Assert(pVmcsInfo);
1604 au64Descriptor[0] = pVmcsInfo->HCPhysEPTP;
1605 }
1606 au64Descriptor[1] = 0; /* MBZ. Intel spec. 33.3 "VMX Instructions" */
1607
1608 int rc = VMXR0InvEPT(enmTlbFlush, &au64Descriptor[0]);
1609 AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %#x %#RHp failed. rc=%Rrc\n", enmTlbFlush, au64Descriptor[0], rc));
1610
1611 if ( RT_SUCCESS(rc)
1612 && pVCpu)
1613 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushNestedPaging);
1614}
1615
1616
1617/**
1618 * Flushes the TLB using VPID.
1619 *
1620 * @returns VBox status code.
1621 * @param pVCpu The cross context virtual CPU structure of the calling
1622 * EMT. Can be NULL depending on @a enmTlbFlush.
1623 * @param enmTlbFlush Type of flush.
1624 * @param GCPtr Virtual address of the page to flush (can be 0 depending
1625 * on @a enmTlbFlush).
1626 *
1627 * @remarks Can be called with interrupts disabled.
1628 */
1629static void hmR0VmxFlushVpid(PVMCPUCC pVCpu, VMXTLBFLUSHVPID enmTlbFlush, RTGCPTR GCPtr)
1630{
1631 Assert(pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid);
1632
1633 uint64_t au64Descriptor[2];
1634 if (enmTlbFlush == VMXTLBFLUSHVPID_ALL_CONTEXTS)
1635 {
1636 au64Descriptor[0] = 0;
1637 au64Descriptor[1] = 0;
1638 }
1639 else
1640 {
1641 AssertPtr(pVCpu);
1642 AssertMsg(pVCpu->hmr0.s.uCurrentAsid != 0, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hmr0.s.uCurrentAsid));
1643 AssertMsg(pVCpu->hmr0.s.uCurrentAsid <= UINT16_MAX, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hmr0.s.uCurrentAsid));
1644 au64Descriptor[0] = pVCpu->hmr0.s.uCurrentAsid;
1645 au64Descriptor[1] = GCPtr;
1646 }
1647
1648 int rc = VMXR0InvVPID(enmTlbFlush, &au64Descriptor[0]);
1649 AssertMsg(rc == VINF_SUCCESS,
1650 ("VMXR0InvVPID %#x %u %RGv failed with %Rrc\n", enmTlbFlush, pVCpu ? pVCpu->hmr0.s.uCurrentAsid : 0, GCPtr, rc));
1651
1652 if ( RT_SUCCESS(rc)
1653 && pVCpu)
1654 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid);
1655 NOREF(rc);
1656}
1657
1658
1659/**
1660 * Invalidates a guest page by guest virtual address. Only relevant for EPT/VPID,
1661 * otherwise there is nothing really to invalidate.
1662 *
1663 * @returns VBox status code.
1664 * @param pVCpu The cross context virtual CPU structure.
1665 * @param GCVirt Guest virtual address of the page to invalidate.
1666 */
1667VMMR0DECL(int) VMXR0InvalidatePage(PVMCPUCC pVCpu, RTGCPTR GCVirt)
1668{
1669 AssertPtr(pVCpu);
1670 LogFlowFunc(("pVCpu=%p GCVirt=%RGv\n", pVCpu, GCVirt));
1671
1672 if (!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH))
1673 {
1674 /*
1675 * We must invalidate the guest TLB entry in either case, we cannot ignore it even for
1676 * the EPT case. See @bugref{6043} and @bugref{6177}.
1677 *
1678 * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VM-entry in hmR0VmxFlushTLB*()
1679 * as this function maybe called in a loop with individual addresses.
1680 */
1681 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1682 if (pVM->hmr0.s.vmx.fVpid)
1683 {
1684 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
1685 {
1686 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_INDIV_ADDR, GCVirt);
1687 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpgVirt);
1688 }
1689 else
1690 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1691 }
1692 else if (pVM->hmr0.s.fNestedPaging)
1693 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1694 }
1695
1696 return VINF_SUCCESS;
1697}
1698
1699
1700/**
1701 * Dummy placeholder for tagged-TLB flush handling before VM-entry. Used in the
1702 * case where neither EPT nor VPID is supported by the CPU.
1703 *
1704 * @param pHostCpu The HM physical-CPU structure.
1705 * @param pVCpu The cross context virtual CPU structure.
1706 *
1707 * @remarks Called with interrupts disabled.
1708 */
1709static void hmR0VmxFlushTaggedTlbNone(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu)
1710{
1711 AssertPtr(pVCpu);
1712 AssertPtr(pHostCpu);
1713
1714 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
1715
1716 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1717 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1718 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1719 pVCpu->hmr0.s.fForceTLBFlush = false;
1720 return;
1721}
1722
1723
1724/**
1725 * Flushes the tagged-TLB entries for EPT+VPID CPUs as necessary.
1726 *
1727 * @param pHostCpu The HM physical-CPU structure.
1728 * @param pVCpu The cross context virtual CPU structure.
1729 * @param pVmcsInfo The VMCS info. object.
1730 *
1731 * @remarks All references to "ASID" in this function pertains to "VPID" in Intel's
1732 * nomenclature. The reason is, to avoid confusion in compare statements
1733 * since the host-CPU copies are named "ASID".
1734 *
1735 * @remarks Called with interrupts disabled.
1736 */
1737static void hmR0VmxFlushTaggedTlbBoth(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1738{
1739#ifdef VBOX_WITH_STATISTICS
1740 bool fTlbFlushed = false;
1741# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { fTlbFlushed = true; } while (0)
1742# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { \
1743 if (!fTlbFlushed) \
1744 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch); \
1745 } while (0)
1746#else
1747# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { } while (0)
1748# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { } while (0)
1749#endif
1750
1751 AssertPtr(pVCpu);
1752 AssertPtr(pHostCpu);
1753 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1754
1755 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1756 AssertMsg(pVM->hmr0.s.fNestedPaging && pVM->hmr0.s.vmx.fVpid,
1757 ("hmR0VmxFlushTaggedTlbBoth cannot be invoked unless NestedPaging & VPID are enabled."
1758 "fNestedPaging=%RTbool fVpid=%RTbool", pVM->hmr0.s.fNestedPaging, pVM->hmr0.s.vmx.fVpid));
1759
1760 /*
1761 * Force a TLB flush for the first world-switch if the current CPU differs from the one we
1762 * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
1763 * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
1764 * cannot reuse the current ASID anymore.
1765 */
1766 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1767 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1768 {
1769 ++pHostCpu->uCurrentAsid;
1770 if (pHostCpu->uCurrentAsid >= g_uHmMaxAsid)
1771 {
1772 pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0. */
1773 pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
1774 pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
1775 }
1776
1777 pVCpu->hmr0.s.uCurrentAsid = pHostCpu->uCurrentAsid;
1778 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1779 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1780
1781 /*
1782 * Flush by EPT when we get rescheduled to a new host CPU to ensure EPT-only tagged mappings are also
1783 * invalidated. We don't need to flush-by-VPID here as flushing by EPT covers it. See @bugref{6568}.
1784 */
1785 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1786 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1787 HMVMX_SET_TAGGED_TLB_FLUSHED();
1788 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
1789 }
1790 else if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH)) /* Check for explicit TLB flushes. */
1791 {
1792 /*
1793 * Changes to the EPT paging structure by VMM requires flushing-by-EPT as the CPU
1794 * creates guest-physical (ie. only EPT-tagged) mappings while traversing the EPT
1795 * tables when EPT is in use. Flushing-by-VPID will only flush linear (only
1796 * VPID-tagged) and combined (EPT+VPID tagged) mappings but not guest-physical
1797 * mappings, see @bugref{6568}.
1798 *
1799 * See Intel spec. 28.3.2 "Creating and Using Cached Translation Information".
1800 */
1801 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1802 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1803 HMVMX_SET_TAGGED_TLB_FLUSHED();
1804 }
1805 else if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1806 {
1807 /*
1808 * The nested-guest specifies its own guest-physical address to use as the APIC-access
1809 * address which requires flushing the TLB of EPT cached structures.
1810 *
1811 * See Intel spec. 28.3.3.4 "Guidelines for Use of the INVEPT Instruction".
1812 */
1813 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1814 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1815 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1816 HMVMX_SET_TAGGED_TLB_FLUSHED();
1817 }
1818
1819
1820 pVCpu->hmr0.s.fForceTLBFlush = false;
1821 HMVMX_UPDATE_FLUSH_SKIPPED_STAT();
1822
1823 Assert(pVCpu->hmr0.s.idLastCpu == pHostCpu->idCpu);
1824 Assert(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes);
1825 AssertMsg(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes,
1826 ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hmr0.s.cTlbFlushes, pHostCpu->cTlbFlushes));
1827 AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < g_uHmMaxAsid,
1828 ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
1829 pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hmr0.s.idLastCpu, pVCpu->hmr0.s.cTlbFlushes));
1830 AssertMsg(pVCpu->hmr0.s.uCurrentAsid >= 1 && pVCpu->hmr0.s.uCurrentAsid < g_uHmMaxAsid,
1831 ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hmr0.s.uCurrentAsid));
1832
1833 /* Update VMCS with the VPID. */
1834 int rc = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hmr0.s.uCurrentAsid);
1835 AssertRC(rc);
1836
1837#undef HMVMX_SET_TAGGED_TLB_FLUSHED
1838}
1839
1840
1841/**
1842 * Flushes the tagged-TLB entries for EPT CPUs as necessary.
1843 *
1844 * @param pHostCpu The HM physical-CPU structure.
1845 * @param pVCpu The cross context virtual CPU structure.
1846 * @param pVmcsInfo The VMCS info. object.
1847 *
1848 * @remarks Called with interrupts disabled.
1849 */
1850static void hmR0VmxFlushTaggedTlbEpt(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1851{
1852 AssertPtr(pVCpu);
1853 AssertPtr(pHostCpu);
1854 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1855 AssertMsg(pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked without NestedPaging."));
1856 AssertMsg(!pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked with VPID."));
1857
1858 /*
1859 * Force a TLB flush for the first world-switch if the current CPU differs from the one we ran on last.
1860 * A change in the TLB flush count implies the host CPU is online after a suspend/resume.
1861 */
1862 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1863 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1864 {
1865 pVCpu->hmr0.s.fForceTLBFlush = true;
1866 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1867 }
1868
1869 /* Check for explicit TLB flushes. */
1870 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1871 {
1872 pVCpu->hmr0.s.fForceTLBFlush = true;
1873 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1874 }
1875
1876 /* Check for TLB flushes while switching to/from a nested-guest. */
1877 if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1878 {
1879 pVCpu->hmr0.s.fForceTLBFlush = true;
1880 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1881 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1882 }
1883
1884 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1885 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1886
1887 if (pVCpu->hmr0.s.fForceTLBFlush)
1888 {
1889 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.enmTlbFlushEpt);
1890 pVCpu->hmr0.s.fForceTLBFlush = false;
1891 }
1892}
1893
1894
1895/**
1896 * Flushes the tagged-TLB entries for VPID CPUs as necessary.
1897 *
1898 * @param pHostCpu The HM physical-CPU structure.
1899 * @param pVCpu The cross context virtual CPU structure.
1900 *
1901 * @remarks Called with interrupts disabled.
1902 */
1903static void hmR0VmxFlushTaggedTlbVpid(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu)
1904{
1905 AssertPtr(pVCpu);
1906 AssertPtr(pHostCpu);
1907 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1908 AssertMsg(pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid, ("hmR0VmxFlushTlbVpid cannot be invoked without VPID."));
1909 AssertMsg(!pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging, ("hmR0VmxFlushTlbVpid cannot be invoked with NestedPaging"));
1910
1911 /*
1912 * Force a TLB flush for the first world switch if the current CPU differs from the one we
1913 * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
1914 * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
1915 * cannot reuse the current ASID anymore.
1916 */
1917 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1918 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1919 {
1920 pVCpu->hmr0.s.fForceTLBFlush = true;
1921 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1922 }
1923
1924 /* Check for explicit TLB flushes. */
1925 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1926 {
1927 /*
1928 * If we ever support VPID flush combinations other than ALL or SINGLE-context (see
1929 * hmR0VmxSetupTaggedTlb()) we would need to explicitly flush in this case (add an
1930 * fExplicitFlush = true here and change the pHostCpu->fFlushAsidBeforeUse check below to
1931 * include fExplicitFlush's too) - an obscure corner case.
1932 */
1933 pVCpu->hmr0.s.fForceTLBFlush = true;
1934 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1935 }
1936
1937 /* Check for TLB flushes while switching to/from a nested-guest. */
1938 if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1939 {
1940 pVCpu->hmr0.s.fForceTLBFlush = true;
1941 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1942 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1943 }
1944
1945 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1946 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1947 if (pVCpu->hmr0.s.fForceTLBFlush)
1948 {
1949 ++pHostCpu->uCurrentAsid;
1950 if (pHostCpu->uCurrentAsid >= g_uHmMaxAsid)
1951 {
1952 pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0 */
1953 pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
1954 pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
1955 }
1956
1957 pVCpu->hmr0.s.fForceTLBFlush = false;
1958 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1959 pVCpu->hmr0.s.uCurrentAsid = pHostCpu->uCurrentAsid;
1960 if (pHostCpu->fFlushAsidBeforeUse)
1961 {
1962 if (pVM->hmr0.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_SINGLE_CONTEXT)
1963 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_SINGLE_CONTEXT, 0 /* GCPtr */);
1964 else if (pVM->hmr0.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_ALL_CONTEXTS)
1965 {
1966 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_ALL_CONTEXTS, 0 /* GCPtr */);
1967 pHostCpu->fFlushAsidBeforeUse = false;
1968 }
1969 else
1970 {
1971 /* hmR0VmxSetupTaggedTlb() ensures we never get here. Paranoia. */
1972 AssertMsgFailed(("Unsupported VPID-flush context type.\n"));
1973 }
1974 }
1975 }
1976
1977 AssertMsg(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes,
1978 ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hmr0.s.cTlbFlushes, pHostCpu->cTlbFlushes));
1979 AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < g_uHmMaxAsid,
1980 ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
1981 pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hmr0.s.idLastCpu, pVCpu->hmr0.s.cTlbFlushes));
1982 AssertMsg(pVCpu->hmr0.s.uCurrentAsid >= 1 && pVCpu->hmr0.s.uCurrentAsid < g_uHmMaxAsid,
1983 ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hmr0.s.uCurrentAsid));
1984
1985 int rc = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hmr0.s.uCurrentAsid);
1986 AssertRC(rc);
1987}
1988
1989
1990/**
1991 * Flushes the guest TLB entry based on CPU capabilities.
1992 *
1993 * @param pHostCpu The HM physical-CPU structure.
1994 * @param pVCpu The cross context virtual CPU structure.
1995 * @param pVmcsInfo The VMCS info. object.
1996 *
1997 * @remarks Called with interrupts disabled.
1998 */
1999static void hmR0VmxFlushTaggedTlb(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2000{
2001#ifdef HMVMX_ALWAYS_FLUSH_TLB
2002 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
2003#endif
2004 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2005 switch (pVM->hmr0.s.vmx.enmTlbFlushType)
2006 {
2007 case VMXTLBFLUSHTYPE_EPT_VPID: hmR0VmxFlushTaggedTlbBoth(pHostCpu, pVCpu, pVmcsInfo); break;
2008 case VMXTLBFLUSHTYPE_EPT: hmR0VmxFlushTaggedTlbEpt(pHostCpu, pVCpu, pVmcsInfo); break;
2009 case VMXTLBFLUSHTYPE_VPID: hmR0VmxFlushTaggedTlbVpid(pHostCpu, pVCpu); break;
2010 case VMXTLBFLUSHTYPE_NONE: hmR0VmxFlushTaggedTlbNone(pHostCpu, pVCpu); break;
2011 default:
2012 AssertMsgFailed(("Invalid flush-tag function identifier\n"));
2013 break;
2014 }
2015 /* Don't assert that VMCPU_FF_TLB_FLUSH should no longer be pending. It can be set by other EMTs. */
2016}
2017
2018
2019/**
2020 * Sets up the appropriate tagged TLB-flush level and handler for flushing guest
2021 * TLB entries from the host TLB before VM-entry.
2022 *
2023 * @returns VBox status code.
2024 * @param pVM The cross context VM structure.
2025 */
2026static int hmR0VmxSetupTaggedTlb(PVMCC pVM)
2027{
2028 /*
2029 * Determine optimal flush type for nested paging.
2030 * We cannot ignore EPT if no suitable flush-types is supported by the CPU as we've already setup
2031 * unrestricted guest execution (see hmR3InitFinalizeR0()).
2032 */
2033 if (pVM->hmr0.s.fNestedPaging)
2034 {
2035 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT)
2036 {
2037 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_SINGLE_CONTEXT)
2038 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_SINGLE_CONTEXT;
2039 else if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
2040 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_ALL_CONTEXTS;
2041 else
2042 {
2043 /* Shouldn't happen. EPT is supported but no suitable flush-types supported. */
2044 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2045 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_FLUSH_TYPE_UNSUPPORTED;
2046 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2047 }
2048
2049 /* Make sure the write-back cacheable memory type for EPT is supported. */
2050 if (RT_UNLIKELY(!(g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_MEMTYPE_WB)))
2051 {
2052 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2053 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_MEM_TYPE_NOT_WB;
2054 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2055 }
2056
2057 /* EPT requires a page-walk length of 4. */
2058 if (RT_UNLIKELY(!(g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_PAGE_WALK_LENGTH_4)))
2059 {
2060 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2061 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_PAGE_WALK_LENGTH_UNSUPPORTED;
2062 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2063 }
2064 }
2065 else
2066 {
2067 /* Shouldn't happen. EPT is supported but INVEPT instruction is not supported. */
2068 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2069 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_INVEPT_UNAVAILABLE;
2070 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2071 }
2072 }
2073
2074 /*
2075 * Determine optimal flush type for VPID.
2076 */
2077 if (pVM->hmr0.s.vmx.fVpid)
2078 {
2079 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID)
2080 {
2081 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT)
2082 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_SINGLE_CONTEXT;
2083 else if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_ALL_CONTEXTS)
2084 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_ALL_CONTEXTS;
2085 else
2086 {
2087 /* Neither SINGLE nor ALL-context flush types for VPID is supported by the CPU. Ignore VPID capability. */
2088 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
2089 LogRelFunc(("Only INDIV_ADDR supported. Ignoring VPID.\n"));
2090 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT_RETAIN_GLOBALS)
2091 LogRelFunc(("Only SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n"));
2092 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
2093 pVM->hmr0.s.vmx.fVpid = false;
2094 }
2095 }
2096 else
2097 {
2098 /* Shouldn't happen. VPID is supported but INVVPID is not supported by the CPU. Ignore VPID capability. */
2099 Log4Func(("VPID supported without INVEPT support. Ignoring VPID.\n"));
2100 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
2101 pVM->hmr0.s.vmx.fVpid = false;
2102 }
2103 }
2104
2105 /*
2106 * Setup the handler for flushing tagged-TLBs.
2107 */
2108 if (pVM->hmr0.s.fNestedPaging && pVM->hmr0.s.vmx.fVpid)
2109 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT_VPID;
2110 else if (pVM->hmr0.s.fNestedPaging)
2111 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT;
2112 else if (pVM->hmr0.s.vmx.fVpid)
2113 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_VPID;
2114 else
2115 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_NONE;
2116
2117
2118 /*
2119 * Copy out the result to ring-3.
2120 */
2121 pVM->hm.s.ForR3.vmx.fVpid = pVM->hmr0.s.vmx.fVpid;
2122 pVM->hm.s.ForR3.vmx.enmTlbFlushType = pVM->hmr0.s.vmx.enmTlbFlushType;
2123 pVM->hm.s.ForR3.vmx.enmTlbFlushEpt = pVM->hmr0.s.vmx.enmTlbFlushEpt;
2124 pVM->hm.s.ForR3.vmx.enmTlbFlushVpid = pVM->hmr0.s.vmx.enmTlbFlushVpid;
2125 return VINF_SUCCESS;
2126}
2127
2128
2129/**
2130 * Sets up the LBR MSR ranges based on the host CPU.
2131 *
2132 * @returns VBox status code.
2133 * @param pVM The cross context VM structure.
2134 *
2135 * @sa nemR3DarwinSetupLbrMsrRange
2136 */
2137static int hmR0VmxSetupLbrMsrRange(PVMCC pVM)
2138{
2139 Assert(pVM->hmr0.s.vmx.fLbr);
2140 uint32_t idLbrFromIpMsrFirst;
2141 uint32_t idLbrFromIpMsrLast;
2142 uint32_t idLbrToIpMsrFirst;
2143 uint32_t idLbrToIpMsrLast;
2144 uint32_t idLbrTosMsr;
2145
2146 /*
2147 * Determine the LBR MSRs supported for this host CPU family and model.
2148 *
2149 * See Intel spec. 17.4.8 "LBR Stack".
2150 * See Intel "Model-Specific Registers" spec.
2151 */
2152 uint32_t const uFamilyModel = (pVM->cpum.ro.HostFeatures.uFamily << 8)
2153 | pVM->cpum.ro.HostFeatures.uModel;
2154 switch (uFamilyModel)
2155 {
2156 case 0x0f01: case 0x0f02:
2157 idLbrFromIpMsrFirst = MSR_P4_LASTBRANCH_0;
2158 idLbrFromIpMsrLast = MSR_P4_LASTBRANCH_3;
2159 idLbrToIpMsrFirst = 0x0;
2160 idLbrToIpMsrLast = 0x0;
2161 idLbrTosMsr = MSR_P4_LASTBRANCH_TOS;
2162 break;
2163
2164 case 0x065c: case 0x065f: case 0x064e: case 0x065e: case 0x068e:
2165 case 0x069e: case 0x0655: case 0x0666: case 0x067a: case 0x0667:
2166 case 0x066a: case 0x066c: case 0x067d: case 0x067e:
2167 idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP;
2168 idLbrFromIpMsrLast = MSR_LASTBRANCH_31_FROM_IP;
2169 idLbrToIpMsrFirst = MSR_LASTBRANCH_0_TO_IP;
2170 idLbrToIpMsrLast = MSR_LASTBRANCH_31_TO_IP;
2171 idLbrTosMsr = MSR_LASTBRANCH_TOS;
2172 break;
2173
2174 case 0x063d: case 0x0647: case 0x064f: case 0x0656: case 0x063c:
2175 case 0x0645: case 0x0646: case 0x063f: case 0x062a: case 0x062d:
2176 case 0x063a: case 0x063e: case 0x061a: case 0x061e: case 0x061f:
2177 case 0x062e: case 0x0625: case 0x062c: case 0x062f:
2178 idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP;
2179 idLbrFromIpMsrLast = MSR_LASTBRANCH_15_FROM_IP;
2180 idLbrToIpMsrFirst = MSR_LASTBRANCH_0_TO_IP;
2181 idLbrToIpMsrLast = MSR_LASTBRANCH_15_TO_IP;
2182 idLbrTosMsr = MSR_LASTBRANCH_TOS;
2183 break;
2184
2185 case 0x0617: case 0x061d: case 0x060f:
2186 idLbrFromIpMsrFirst = MSR_CORE2_LASTBRANCH_0_FROM_IP;
2187 idLbrFromIpMsrLast = MSR_CORE2_LASTBRANCH_3_FROM_IP;
2188 idLbrToIpMsrFirst = MSR_CORE2_LASTBRANCH_0_TO_IP;
2189 idLbrToIpMsrLast = MSR_CORE2_LASTBRANCH_3_TO_IP;
2190 idLbrTosMsr = MSR_CORE2_LASTBRANCH_TOS;
2191 break;
2192
2193 /* Atom and related microarchitectures we don't care about:
2194 case 0x0637: case 0x064a: case 0x064c: case 0x064d: case 0x065a:
2195 case 0x065d: case 0x061c: case 0x0626: case 0x0627: case 0x0635:
2196 case 0x0636: */
2197 /* All other CPUs: */
2198 default:
2199 {
2200 LogRelFunc(("Could not determine LBR stack size for the CPU model %#x\n", uFamilyModel));
2201 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_UNKNOWN;
2202 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2203 }
2204 }
2205
2206 /*
2207 * Validate.
2208 */
2209 uint32_t const cLbrStack = idLbrFromIpMsrLast - idLbrFromIpMsrFirst + 1;
2210 PCVMCPU pVCpu0 = VMCC_GET_CPU_0(pVM);
2211 AssertCompile( RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr)
2212 == RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrToIpMsr));
2213 if (cLbrStack > RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr))
2214 {
2215 LogRelFunc(("LBR stack size of the CPU (%u) exceeds our buffer size\n", cLbrStack));
2216 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_OVERFLOW;
2217 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2218 }
2219 NOREF(pVCpu0);
2220
2221 /*
2222 * Update the LBR info. to the VM struct. for use later.
2223 */
2224 pVM->hmr0.s.vmx.idLbrTosMsr = idLbrTosMsr;
2225
2226 pVM->hm.s.ForR3.vmx.idLbrFromIpMsrFirst = pVM->hmr0.s.vmx.idLbrFromIpMsrFirst = idLbrFromIpMsrFirst;
2227 pVM->hm.s.ForR3.vmx.idLbrFromIpMsrLast = pVM->hmr0.s.vmx.idLbrFromIpMsrLast = idLbrFromIpMsrLast;
2228
2229 pVM->hm.s.ForR3.vmx.idLbrToIpMsrFirst = pVM->hmr0.s.vmx.idLbrToIpMsrFirst = idLbrToIpMsrFirst;
2230 pVM->hm.s.ForR3.vmx.idLbrToIpMsrLast = pVM->hmr0.s.vmx.idLbrToIpMsrLast = idLbrToIpMsrLast;
2231 return VINF_SUCCESS;
2232}
2233
2234
2235#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2236/**
2237 * Sets up the shadow VMCS fields arrays.
2238 *
2239 * This function builds arrays of VMCS fields to sync the shadow VMCS later while
2240 * executing the guest.
2241 *
2242 * @returns VBox status code.
2243 * @param pVM The cross context VM structure.
2244 */
2245static int hmR0VmxSetupShadowVmcsFieldsArrays(PVMCC pVM)
2246{
2247 /*
2248 * Paranoia. Ensure we haven't exposed the VMWRITE-All VMX feature to the guest
2249 * when the host does not support it.
2250 */
2251 bool const fGstVmwriteAll = pVM->cpum.ro.GuestFeatures.fVmxVmwriteAll;
2252 if ( !fGstVmwriteAll
2253 || (g_HmMsrs.u.vmx.u64Misc & VMX_MISC_VMWRITE_ALL))
2254 { /* likely. */ }
2255 else
2256 {
2257 LogRelFunc(("VMX VMWRITE-All feature exposed to the guest but host CPU does not support it!\n"));
2258 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_GST_HOST_VMWRITE_ALL;
2259 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2260 }
2261
2262 uint32_t const cVmcsFields = RT_ELEMENTS(g_aVmcsFields);
2263 uint32_t cRwFields = 0;
2264 uint32_t cRoFields = 0;
2265 for (uint32_t i = 0; i < cVmcsFields; i++)
2266 {
2267 VMXVMCSFIELD VmcsField;
2268 VmcsField.u = g_aVmcsFields[i];
2269
2270 /*
2271 * We will be writing "FULL" (64-bit) fields while syncing the shadow VMCS.
2272 * Therefore, "HIGH" (32-bit portion of 64-bit) fields must not be included
2273 * in the shadow VMCS fields array as they would be redundant.
2274 *
2275 * If the VMCS field depends on a CPU feature that is not exposed to the guest,
2276 * we must not include it in the shadow VMCS fields array. Guests attempting to
2277 * VMREAD/VMWRITE such VMCS fields would cause a VM-exit and we shall emulate
2278 * the required behavior.
2279 */
2280 if ( VmcsField.n.fAccessType == VMX_VMCSFIELD_ACCESS_FULL
2281 && CPUMIsGuestVmxVmcsFieldValid(pVM, VmcsField.u))
2282 {
2283 /*
2284 * Read-only fields are placed in a separate array so that while syncing shadow
2285 * VMCS fields later (which is more performance critical) we can avoid branches.
2286 *
2287 * However, if the guest can write to all fields (including read-only fields),
2288 * we treat it a as read/write field. Otherwise, writing to these fields would
2289 * cause a VMWRITE instruction error while syncing the shadow VMCS.
2290 */
2291 if ( fGstVmwriteAll
2292 || !VMXIsVmcsFieldReadOnly(VmcsField.u))
2293 pVM->hmr0.s.vmx.paShadowVmcsFields[cRwFields++] = VmcsField.u;
2294 else
2295 pVM->hmr0.s.vmx.paShadowVmcsRoFields[cRoFields++] = VmcsField.u;
2296 }
2297 }
2298
2299 /* Update the counts. */
2300 pVM->hmr0.s.vmx.cShadowVmcsFields = cRwFields;
2301 pVM->hmr0.s.vmx.cShadowVmcsRoFields = cRoFields;
2302 return VINF_SUCCESS;
2303}
2304
2305
2306/**
2307 * Sets up the VMREAD and VMWRITE bitmaps.
2308 *
2309 * @param pVM The cross context VM structure.
2310 */
2311static void hmR0VmxSetupVmreadVmwriteBitmaps(PVMCC pVM)
2312{
2313 /*
2314 * By default, ensure guest attempts to access any VMCS fields cause VM-exits.
2315 */
2316 uint32_t const cbBitmap = X86_PAGE_4K_SIZE;
2317 uint8_t *pbVmreadBitmap = (uint8_t *)pVM->hmr0.s.vmx.pvVmreadBitmap;
2318 uint8_t *pbVmwriteBitmap = (uint8_t *)pVM->hmr0.s.vmx.pvVmwriteBitmap;
2319 ASMMemFill32(pbVmreadBitmap, cbBitmap, UINT32_C(0xffffffff));
2320 ASMMemFill32(pbVmwriteBitmap, cbBitmap, UINT32_C(0xffffffff));
2321
2322 /*
2323 * Skip intercepting VMREAD/VMWRITE to guest read/write fields in the
2324 * VMREAD and VMWRITE bitmaps.
2325 */
2326 {
2327 uint32_t const *paShadowVmcsFields = pVM->hmr0.s.vmx.paShadowVmcsFields;
2328 uint32_t const cShadowVmcsFields = pVM->hmr0.s.vmx.cShadowVmcsFields;
2329 for (uint32_t i = 0; i < cShadowVmcsFields; i++)
2330 {
2331 uint32_t const uVmcsField = paShadowVmcsFields[i];
2332 Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK));
2333 Assert(uVmcsField >> 3 < cbBitmap);
2334 ASMBitClear(pbVmreadBitmap + (uVmcsField >> 3), uVmcsField & 7);
2335 ASMBitClear(pbVmwriteBitmap + (uVmcsField >> 3), uVmcsField & 7);
2336 }
2337 }
2338
2339 /*
2340 * Skip intercepting VMREAD for guest read-only fields in the VMREAD bitmap
2341 * if the host supports VMWRITE to all supported VMCS fields.
2342 */
2343 if (g_HmMsrs.u.vmx.u64Misc & VMX_MISC_VMWRITE_ALL)
2344 {
2345 uint32_t const *paShadowVmcsRoFields = pVM->hmr0.s.vmx.paShadowVmcsRoFields;
2346 uint32_t const cShadowVmcsRoFields = pVM->hmr0.s.vmx.cShadowVmcsRoFields;
2347 for (uint32_t i = 0; i < cShadowVmcsRoFields; i++)
2348 {
2349 uint32_t const uVmcsField = paShadowVmcsRoFields[i];
2350 Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK));
2351 Assert(uVmcsField >> 3 < cbBitmap);
2352 ASMBitClear(pbVmreadBitmap + (uVmcsField >> 3), uVmcsField & 7);
2353 }
2354 }
2355}
2356#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
2357
2358
2359/**
2360 * Sets up the virtual-APIC page address for the VMCS.
2361 *
2362 * @param pVmcsInfo The VMCS info. object.
2363 */
2364DECLINLINE(void) hmR0VmxSetupVmcsVirtApicAddr(PCVMXVMCSINFO pVmcsInfo)
2365{
2366 RTHCPHYS const HCPhysVirtApic = pVmcsInfo->HCPhysVirtApic;
2367 Assert(HCPhysVirtApic != NIL_RTHCPHYS);
2368 Assert(!(HCPhysVirtApic & 0xfff)); /* Bits 11:0 MBZ. */
2369 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, HCPhysVirtApic);
2370 AssertRC(rc);
2371}
2372
2373
2374/**
2375 * Sets up the MSR-bitmap address for the VMCS.
2376 *
2377 * @param pVmcsInfo The VMCS info. object.
2378 */
2379DECLINLINE(void) hmR0VmxSetupVmcsMsrBitmapAddr(PCVMXVMCSINFO pVmcsInfo)
2380{
2381 RTHCPHYS const HCPhysMsrBitmap = pVmcsInfo->HCPhysMsrBitmap;
2382 Assert(HCPhysMsrBitmap != NIL_RTHCPHYS);
2383 Assert(!(HCPhysMsrBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2384 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_MSR_BITMAP_FULL, HCPhysMsrBitmap);
2385 AssertRC(rc);
2386}
2387
2388
2389/**
2390 * Sets up the APIC-access page address for the VMCS.
2391 *
2392 * @param pVCpu The cross context virtual CPU structure.
2393 */
2394DECLINLINE(void) hmR0VmxSetupVmcsApicAccessAddr(PVMCPUCC pVCpu)
2395{
2396 RTHCPHYS const HCPhysApicAccess = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysApicAccess;
2397 Assert(HCPhysApicAccess != NIL_RTHCPHYS);
2398 Assert(!(HCPhysApicAccess & 0xfff)); /* Bits 11:0 MBZ. */
2399 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, HCPhysApicAccess);
2400 AssertRC(rc);
2401}
2402
2403#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2404
2405/**
2406 * Sets up the VMREAD bitmap address for the VMCS.
2407 *
2408 * @param pVCpu The cross context virtual CPU structure.
2409 */
2410DECLINLINE(void) hmR0VmxSetupVmcsVmreadBitmapAddr(PVMCPUCC pVCpu)
2411{
2412 RTHCPHYS const HCPhysVmreadBitmap = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysVmreadBitmap;
2413 Assert(HCPhysVmreadBitmap != NIL_RTHCPHYS);
2414 Assert(!(HCPhysVmreadBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2415 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMREAD_BITMAP_FULL, HCPhysVmreadBitmap);
2416 AssertRC(rc);
2417}
2418
2419
2420/**
2421 * Sets up the VMWRITE bitmap address for the VMCS.
2422 *
2423 * @param pVCpu The cross context virtual CPU structure.
2424 */
2425DECLINLINE(void) hmR0VmxSetupVmcsVmwriteBitmapAddr(PVMCPUCC pVCpu)
2426{
2427 RTHCPHYS const HCPhysVmwriteBitmap = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysVmwriteBitmap;
2428 Assert(HCPhysVmwriteBitmap != NIL_RTHCPHYS);
2429 Assert(!(HCPhysVmwriteBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2430 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMWRITE_BITMAP_FULL, HCPhysVmwriteBitmap);
2431 AssertRC(rc);
2432}
2433
2434#endif
2435
2436/**
2437 * Sets up the VM-entry MSR load, VM-exit MSR-store and VM-exit MSR-load addresses
2438 * in the VMCS.
2439 *
2440 * @returns VBox status code.
2441 * @param pVmcsInfo The VMCS info. object.
2442 */
2443DECLINLINE(int) hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(PVMXVMCSINFO pVmcsInfo)
2444{
2445 RTHCPHYS const HCPhysGuestMsrLoad = pVmcsInfo->HCPhysGuestMsrLoad;
2446 Assert(HCPhysGuestMsrLoad != NIL_RTHCPHYS);
2447 Assert(!(HCPhysGuestMsrLoad & 0xf)); /* Bits 3:0 MBZ. */
2448
2449 RTHCPHYS const HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrStore;
2450 Assert(HCPhysGuestMsrStore != NIL_RTHCPHYS);
2451 Assert(!(HCPhysGuestMsrStore & 0xf)); /* Bits 3:0 MBZ. */
2452
2453 RTHCPHYS const HCPhysHostMsrLoad = pVmcsInfo->HCPhysHostMsrLoad;
2454 Assert(HCPhysHostMsrLoad != NIL_RTHCPHYS);
2455 Assert(!(HCPhysHostMsrLoad & 0xf)); /* Bits 3:0 MBZ. */
2456
2457 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL, HCPhysGuestMsrLoad); AssertRC(rc);
2458 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL, HCPhysGuestMsrStore); AssertRC(rc);
2459 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL, HCPhysHostMsrLoad); AssertRC(rc);
2460 return VINF_SUCCESS;
2461}
2462
2463
2464/**
2465 * Sets up MSR permissions in the MSR bitmap of a VMCS info. object.
2466 *
2467 * @param pVCpu The cross context virtual CPU structure.
2468 * @param pVmcsInfo The VMCS info. object.
2469 */
2470static void hmR0VmxSetupVmcsMsrPermissions(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2471{
2472 Assert(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS);
2473
2474 /*
2475 * By default, ensure guest attempts to access any MSR cause VM-exits.
2476 * This shall later be relaxed for specific MSRs as necessary.
2477 *
2478 * Note: For nested-guests, the entire bitmap will be merged prior to
2479 * executing the nested-guest using hardware-assisted VMX and hence there
2480 * is no need to perform this operation. See hmR0VmxMergeMsrBitmapNested.
2481 */
2482 Assert(pVmcsInfo->pvMsrBitmap);
2483 ASMMemFill32(pVmcsInfo->pvMsrBitmap, X86_PAGE_4K_SIZE, UINT32_C(0xffffffff));
2484
2485 /*
2486 * The guest can access the following MSRs (read, write) without causing
2487 * VM-exits; they are loaded/stored automatically using fields in the VMCS.
2488 */
2489 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2490 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_CS, VMXMSRPM_ALLOW_RD_WR);
2491 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_ESP, VMXMSRPM_ALLOW_RD_WR);
2492 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_EIP, VMXMSRPM_ALLOW_RD_WR);
2493 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_GS_BASE, VMXMSRPM_ALLOW_RD_WR);
2494 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_FS_BASE, VMXMSRPM_ALLOW_RD_WR);
2495
2496 /*
2497 * The IA32_PRED_CMD and IA32_FLUSH_CMD MSRs are write-only and has no state
2498 * associated with then. We never need to intercept access (writes need to be
2499 * executed without causing a VM-exit, reads will #GP fault anyway).
2500 *
2501 * The IA32_SPEC_CTRL MSR is read/write and has state. We allow the guest to
2502 * read/write them. We swap the guest/host MSR value using the
2503 * auto-load/store MSR area.
2504 */
2505 if (pVM->cpum.ro.GuestFeatures.fIbpb)
2506 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_PRED_CMD, VMXMSRPM_ALLOW_RD_WR);
2507 if (pVM->cpum.ro.GuestFeatures.fFlushCmd)
2508 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_FLUSH_CMD, VMXMSRPM_ALLOW_RD_WR);
2509 if (pVM->cpum.ro.GuestFeatures.fIbrs)
2510 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SPEC_CTRL, VMXMSRPM_ALLOW_RD_WR);
2511
2512 /*
2513 * Allow full read/write access for the following MSRs (mandatory for VT-x)
2514 * required for 64-bit guests.
2515 */
2516 if (pVM->hmr0.s.fAllow64BitGuests)
2517 {
2518 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_LSTAR, VMXMSRPM_ALLOW_RD_WR);
2519 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K6_STAR, VMXMSRPM_ALLOW_RD_WR);
2520 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_SF_MASK, VMXMSRPM_ALLOW_RD_WR);
2521 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_KERNEL_GS_BASE, VMXMSRPM_ALLOW_RD_WR);
2522 }
2523
2524 /*
2525 * IA32_EFER MSR is always intercepted, see @bugref{9180#c37}.
2526 */
2527#ifdef VBOX_STRICT
2528 Assert(pVmcsInfo->pvMsrBitmap);
2529 uint32_t const fMsrpmEfer = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, MSR_K6_EFER);
2530 Assert(fMsrpmEfer == VMXMSRPM_EXIT_RD_WR);
2531#endif
2532}
2533
2534
2535/**
2536 * Sets up pin-based VM-execution controls in the VMCS.
2537 *
2538 * @returns VBox status code.
2539 * @param pVCpu The cross context virtual CPU structure.
2540 * @param pVmcsInfo The VMCS info. object.
2541 */
2542static int hmR0VmxSetupVmcsPinCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2543{
2544 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2545 uint32_t fVal = g_HmMsrs.u.vmx.PinCtls.n.allowed0; /* Bits set here must always be set. */
2546 uint32_t const fZap = g_HmMsrs.u.vmx.PinCtls.n.allowed1; /* Bits cleared here must always be cleared. */
2547
2548 fVal |= VMX_PIN_CTLS_EXT_INT_EXIT /* External interrupts cause a VM-exit. */
2549 | VMX_PIN_CTLS_NMI_EXIT; /* Non-maskable interrupts (NMIs) cause a VM-exit. */
2550
2551 if (g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_VIRT_NMI)
2552 fVal |= VMX_PIN_CTLS_VIRT_NMI; /* Use virtual NMIs and virtual-NMI blocking features. */
2553
2554 /* Enable the VMX-preemption timer. */
2555 if (pVM->hmr0.s.vmx.fUsePreemptTimer)
2556 {
2557 Assert(g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_PREEMPT_TIMER);
2558 fVal |= VMX_PIN_CTLS_PREEMPT_TIMER;
2559 }
2560
2561#if 0
2562 /* Enable posted-interrupt processing. */
2563 if (pVM->hm.s.fPostedIntrs)
2564 {
2565 Assert(g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_POSTED_INT);
2566 Assert(g_HmMsrs.u.vmx.ExitCtls.n.allowed1 & VMX_EXIT_CTLS_ACK_EXT_INT);
2567 fVal |= VMX_PIN_CTLS_POSTED_INT;
2568 }
2569#endif
2570
2571 if ((fVal & fZap) != fVal)
2572 {
2573 LogRelFunc(("Invalid pin-based VM-execution controls combo! Cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2574 g_HmMsrs.u.vmx.PinCtls.n.allowed0, fVal, fZap));
2575 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PIN_EXEC;
2576 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2577 }
2578
2579 /* Commit it to the VMCS and update our cache. */
2580 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, fVal);
2581 AssertRC(rc);
2582 pVmcsInfo->u32PinCtls = fVal;
2583
2584 return VINF_SUCCESS;
2585}
2586
2587
2588/**
2589 * Sets up secondary processor-based VM-execution controls in the VMCS.
2590 *
2591 * @returns VBox status code.
2592 * @param pVCpu The cross context virtual CPU structure.
2593 * @param pVmcsInfo The VMCS info. object.
2594 */
2595static int hmR0VmxSetupVmcsProcCtls2(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2596{
2597 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2598 uint32_t fVal = g_HmMsrs.u.vmx.ProcCtls2.n.allowed0; /* Bits set here must be set in the VMCS. */
2599 uint32_t const fZap = g_HmMsrs.u.vmx.ProcCtls2.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
2600
2601 /* WBINVD causes a VM-exit. */
2602 if (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_WBINVD_EXIT)
2603 fVal |= VMX_PROC_CTLS2_WBINVD_EXIT;
2604
2605 /* Enable EPT (aka nested-paging). */
2606 if (pVM->hmr0.s.fNestedPaging)
2607 fVal |= VMX_PROC_CTLS2_EPT;
2608
2609 /* Enable the INVPCID instruction if we expose it to the guest and is supported
2610 by the hardware. Without this, guest executing INVPCID would cause a #UD. */
2611 if ( pVM->cpum.ro.GuestFeatures.fInvpcid
2612 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_INVPCID))
2613 fVal |= VMX_PROC_CTLS2_INVPCID;
2614
2615 /* Enable VPID. */
2616 if (pVM->hmr0.s.vmx.fVpid)
2617 fVal |= VMX_PROC_CTLS2_VPID;
2618
2619 /* Enable unrestricted guest execution. */
2620 if (pVM->hmr0.s.vmx.fUnrestrictedGuest)
2621 fVal |= VMX_PROC_CTLS2_UNRESTRICTED_GUEST;
2622
2623#if 0
2624 if (pVM->hm.s.fVirtApicRegs)
2625 {
2626 /* Enable APIC-register virtualization. */
2627 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_APIC_REG_VIRT);
2628 fVal |= VMX_PROC_CTLS2_APIC_REG_VIRT;
2629
2630 /* Enable virtual-interrupt delivery. */
2631 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_INTR_DELIVERY);
2632 fVal |= VMX_PROC_CTLS2_VIRT_INTR_DELIVERY;
2633 }
2634#endif
2635
2636 /* Virtualize-APIC accesses if supported by the CPU. The virtual-APIC page is
2637 where the TPR shadow resides. */
2638 /** @todo VIRT_X2APIC support, it's mutually exclusive with this. So must be
2639 * done dynamically. */
2640 if (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
2641 {
2642 fVal |= VMX_PROC_CTLS2_VIRT_APIC_ACCESS;
2643 hmR0VmxSetupVmcsApicAccessAddr(pVCpu);
2644 }
2645
2646 /* Enable the RDTSCP instruction if we expose it to the guest and is supported
2647 by the hardware. Without this, guest executing RDTSCP would cause a #UD. */
2648 if ( pVM->cpum.ro.GuestFeatures.fRdTscP
2649 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_RDTSCP))
2650 fVal |= VMX_PROC_CTLS2_RDTSCP;
2651
2652 /* Enable Pause-Loop exiting. */
2653 if ( (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT)
2654 && pVM->hm.s.vmx.cPleGapTicks
2655 && pVM->hm.s.vmx.cPleWindowTicks)
2656 {
2657 fVal |= VMX_PROC_CTLS2_PAUSE_LOOP_EXIT;
2658
2659 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, pVM->hm.s.vmx.cPleGapTicks); AssertRC(rc);
2660 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, pVM->hm.s.vmx.cPleWindowTicks); AssertRC(rc);
2661 }
2662
2663 if ((fVal & fZap) != fVal)
2664 {
2665 LogRelFunc(("Invalid secondary processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2666 g_HmMsrs.u.vmx.ProcCtls2.n.allowed0, fVal, fZap));
2667 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC2;
2668 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2669 }
2670
2671 /* Commit it to the VMCS and update our cache. */
2672 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, fVal);
2673 AssertRC(rc);
2674 pVmcsInfo->u32ProcCtls2 = fVal;
2675
2676 return VINF_SUCCESS;
2677}
2678
2679
2680/**
2681 * Sets up processor-based VM-execution controls in the VMCS.
2682 *
2683 * @returns VBox status code.
2684 * @param pVCpu The cross context virtual CPU structure.
2685 * @param pVmcsInfo The VMCS info. object.
2686 */
2687static int hmR0VmxSetupVmcsProcCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2688{
2689 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2690 uint32_t fVal = g_HmMsrs.u.vmx.ProcCtls.n.allowed0; /* Bits set here must be set in the VMCS. */
2691 uint32_t const fZap = g_HmMsrs.u.vmx.ProcCtls.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
2692
2693 fVal |= VMX_PROC_CTLS_HLT_EXIT /* HLT causes a VM-exit. */
2694 | VMX_PROC_CTLS_USE_TSC_OFFSETTING /* Use TSC-offsetting. */
2695 | VMX_PROC_CTLS_MOV_DR_EXIT /* MOV DRx causes a VM-exit. */
2696 | VMX_PROC_CTLS_UNCOND_IO_EXIT /* All IO instructions cause a VM-exit. */
2697 | VMX_PROC_CTLS_RDPMC_EXIT /* RDPMC causes a VM-exit. */
2698 | VMX_PROC_CTLS_MONITOR_EXIT /* MONITOR causes a VM-exit. */
2699 | VMX_PROC_CTLS_MWAIT_EXIT; /* MWAIT causes a VM-exit. */
2700
2701 /* We toggle VMX_PROC_CTLS_MOV_DR_EXIT later, check if it's not -always- needed to be set or clear. */
2702 if ( !(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MOV_DR_EXIT)
2703 || (g_HmMsrs.u.vmx.ProcCtls.n.allowed0 & VMX_PROC_CTLS_MOV_DR_EXIT))
2704 {
2705 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_MOV_DRX_EXIT;
2706 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2707 }
2708
2709 /* Without nested paging, INVLPG (also affects INVPCID) and MOV CR3 instructions should cause VM-exits. */
2710 if (!pVM->hmr0.s.fNestedPaging)
2711 {
2712 Assert(!pVM->hmr0.s.vmx.fUnrestrictedGuest);
2713 fVal |= VMX_PROC_CTLS_INVLPG_EXIT
2714 | VMX_PROC_CTLS_CR3_LOAD_EXIT
2715 | VMX_PROC_CTLS_CR3_STORE_EXIT;
2716 }
2717
2718 /* Use TPR shadowing if supported by the CPU. */
2719 if ( PDMHasApic(pVM)
2720 && (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW))
2721 {
2722 fVal |= VMX_PROC_CTLS_USE_TPR_SHADOW; /* CR8 reads from the Virtual-APIC page. */
2723 /* CR8 writes cause a VM-exit based on TPR threshold. */
2724 Assert(!(fVal & VMX_PROC_CTLS_CR8_STORE_EXIT));
2725 Assert(!(fVal & VMX_PROC_CTLS_CR8_LOAD_EXIT));
2726 hmR0VmxSetupVmcsVirtApicAddr(pVmcsInfo);
2727 }
2728 else
2729 {
2730 /* Some 32-bit CPUs do not support CR8 load/store exiting as MOV CR8 is
2731 invalid on 32-bit Intel CPUs. Set this control only for 64-bit guests. */
2732 if (pVM->hmr0.s.fAllow64BitGuests)
2733 fVal |= VMX_PROC_CTLS_CR8_STORE_EXIT /* CR8 reads cause a VM-exit. */
2734 | VMX_PROC_CTLS_CR8_LOAD_EXIT; /* CR8 writes cause a VM-exit. */
2735 }
2736
2737 /* Use MSR-bitmaps if supported by the CPU. */
2738 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2739 {
2740 fVal |= VMX_PROC_CTLS_USE_MSR_BITMAPS;
2741 hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo);
2742 }
2743
2744 /* Use the secondary processor-based VM-execution controls if supported by the CPU. */
2745 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
2746 fVal |= VMX_PROC_CTLS_USE_SECONDARY_CTLS;
2747
2748 if ((fVal & fZap) != fVal)
2749 {
2750 LogRelFunc(("Invalid processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2751 g_HmMsrs.u.vmx.ProcCtls.n.allowed0, fVal, fZap));
2752 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC;
2753 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2754 }
2755
2756 /* Commit it to the VMCS and update our cache. */
2757 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, fVal);
2758 AssertRC(rc);
2759 pVmcsInfo->u32ProcCtls = fVal;
2760
2761 /* Set up MSR permissions that don't change through the lifetime of the VM. */
2762 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2763 hmR0VmxSetupVmcsMsrPermissions(pVCpu, pVmcsInfo);
2764
2765 /* Set up secondary processor-based VM-execution controls if the CPU supports it. */
2766 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
2767 return hmR0VmxSetupVmcsProcCtls2(pVCpu, pVmcsInfo);
2768
2769 /* Sanity check, should not really happen. */
2770 if (RT_LIKELY(!pVM->hmr0.s.vmx.fUnrestrictedGuest))
2771 { /* likely */ }
2772 else
2773 {
2774 pVCpu->hm.s.u32HMError = VMX_UFC_INVALID_UX_COMBO;
2775 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2776 }
2777
2778 /* Old CPUs without secondary processor-based VM-execution controls would end up here. */
2779 return VINF_SUCCESS;
2780}
2781
2782
2783/**
2784 * Sets up miscellaneous (everything other than Pin, Processor and secondary
2785 * Processor-based VM-execution) control fields in the VMCS.
2786 *
2787 * @returns VBox status code.
2788 * @param pVCpu The cross context virtual CPU structure.
2789 * @param pVmcsInfo The VMCS info. object.
2790 */
2791static int hmR0VmxSetupVmcsMiscCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2792{
2793#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2794 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUseVmcsShadowing)
2795 {
2796 hmR0VmxSetupVmcsVmreadBitmapAddr(pVCpu);
2797 hmR0VmxSetupVmcsVmwriteBitmapAddr(pVCpu);
2798 }
2799#endif
2800
2801 Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS);
2802 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS);
2803 AssertRC(rc);
2804
2805 rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo);
2806 if (RT_SUCCESS(rc))
2807 {
2808 uint64_t const u64Cr0Mask = vmxHCGetFixedCr0Mask(pVCpu);
2809 uint64_t const u64Cr4Mask = vmxHCGetFixedCr4Mask(pVCpu);
2810
2811 rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, u64Cr0Mask); AssertRC(rc);
2812 rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, u64Cr4Mask); AssertRC(rc);
2813
2814 pVmcsInfo->u64Cr0Mask = u64Cr0Mask;
2815 pVmcsInfo->u64Cr4Mask = u64Cr4Mask;
2816
2817 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fLbr)
2818 {
2819 rc = VMXWriteVmcsNw(VMX_VMCS64_GUEST_DEBUGCTL_FULL, MSR_IA32_DEBUGCTL_LBR);
2820 AssertRC(rc);
2821 }
2822 return VINF_SUCCESS;
2823 }
2824 else
2825 LogRelFunc(("Failed to initialize VMCS auto-load/store MSR addresses. rc=%Rrc\n", rc));
2826 return rc;
2827}
2828
2829
2830/**
2831 * Sets up the initial exception bitmap in the VMCS based on static conditions.
2832 *
2833 * We shall setup those exception intercepts that don't change during the
2834 * lifetime of the VM here. The rest are done dynamically while loading the
2835 * guest state.
2836 *
2837 * @param pVCpu The cross context virtual CPU structure.
2838 * @param pVmcsInfo The VMCS info. object.
2839 */
2840static void hmR0VmxSetupVmcsXcptBitmap(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2841{
2842 /*
2843 * The following exceptions are always intercepted:
2844 *
2845 * #AC - To prevent the guest from hanging the CPU and for dealing with
2846 * split-lock detecting host configs.
2847 * #DB - To maintain the DR6 state even when intercepting DRx reads/writes and
2848 * recursive #DBs can cause a CPU hang.
2849 * #PF - To sync our shadow page tables when nested-paging is not used.
2850 */
2851 bool const fNestedPaging = pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging;
2852 uint32_t const uXcptBitmap = RT_BIT(X86_XCPT_AC)
2853 | RT_BIT(X86_XCPT_DB)
2854 | (fNestedPaging ? 0 : RT_BIT(X86_XCPT_PF));
2855
2856 /* Commit it to the VMCS. */
2857 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, uXcptBitmap);
2858 AssertRC(rc);
2859
2860 /* Update our cache of the exception bitmap. */
2861 pVmcsInfo->u32XcptBitmap = uXcptBitmap;
2862}
2863
2864
2865#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2866/**
2867 * Sets up the VMCS for executing a nested-guest using hardware-assisted VMX.
2868 *
2869 * @returns VBox status code.
2870 * @param pVmcsInfo The VMCS info. object.
2871 */
2872static int hmR0VmxSetupVmcsCtlsNested(PVMXVMCSINFO pVmcsInfo)
2873{
2874 Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS);
2875 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS);
2876 AssertRC(rc);
2877
2878 rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo);
2879 if (RT_SUCCESS(rc))
2880 {
2881 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2882 hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo);
2883
2884 /* Paranoia - We've not yet initialized these, they shall be done while merging the VMCS. */
2885 Assert(!pVmcsInfo->u64Cr0Mask);
2886 Assert(!pVmcsInfo->u64Cr4Mask);
2887 return VINF_SUCCESS;
2888 }
2889 LogRelFunc(("Failed to set up the VMCS link pointer in the nested-guest VMCS. rc=%Rrc\n", rc));
2890 return rc;
2891}
2892#endif
2893
2894
2895/**
2896 * Selector FNHMSVMVMRUN implementation.
2897 */
2898static DECLCALLBACK(int) hmR0VmxStartVmSelector(PVMXVMCSINFO pVmcsInfo, PVMCPUCC pVCpu, bool fResume)
2899{
2900 hmR0VmxUpdateStartVmFunction(pVCpu);
2901 return pVCpu->hmr0.s.vmx.pfnStartVm(pVmcsInfo, pVCpu, fResume);
2902}
2903
2904
2905/**
2906 * Sets up the VMCS for executing a guest (or nested-guest) using hardware-assisted
2907 * VMX.
2908 *
2909 * @returns VBox status code.
2910 * @param pVCpu The cross context virtual CPU structure.
2911 * @param pVmcsInfo The VMCS info. object.
2912 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
2913 */
2914static int hmR0VmxSetupVmcs(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
2915{
2916 Assert(pVmcsInfo->pvVmcs);
2917 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2918
2919 /* Set the CPU specified revision identifier at the beginning of the VMCS structure. */
2920 *(uint32_t *)pVmcsInfo->pvVmcs = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
2921 const char * const pszVmcs = fIsNstGstVmcs ? "nested-guest VMCS" : "guest VMCS";
2922
2923 LogFlowFunc(("\n"));
2924
2925 /*
2926 * Initialize the VMCS using VMCLEAR before loading the VMCS.
2927 * See Intel spec. 31.6 "Preparation And Launching A Virtual Machine".
2928 */
2929 int rc = hmR0VmxClearVmcs(pVmcsInfo);
2930 if (RT_SUCCESS(rc))
2931 {
2932 rc = hmR0VmxLoadVmcs(pVmcsInfo);
2933 if (RT_SUCCESS(rc))
2934 {
2935 /*
2936 * Initialize the hardware-assisted VMX execution handler for guest and nested-guest VMCS.
2937 * The host is always 64-bit since we no longer support 32-bit hosts.
2938 * Currently we have just a single handler for all guest modes as well, see @bugref{6208#c73}.
2939 */
2940 if (!fIsNstGstVmcs)
2941 {
2942 rc = hmR0VmxSetupVmcsPinCtls(pVCpu, pVmcsInfo);
2943 if (RT_SUCCESS(rc))
2944 {
2945 rc = hmR0VmxSetupVmcsProcCtls(pVCpu, pVmcsInfo);
2946 if (RT_SUCCESS(rc))
2947 {
2948 rc = hmR0VmxSetupVmcsMiscCtls(pVCpu, pVmcsInfo);
2949 if (RT_SUCCESS(rc))
2950 {
2951 hmR0VmxSetupVmcsXcptBitmap(pVCpu, pVmcsInfo);
2952#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2953 /*
2954 * If a shadow VMCS is allocated for the VMCS info. object, initialize the
2955 * VMCS revision ID and shadow VMCS indicator bit. Also, clear the VMCS
2956 * making it fit for use when VMCS shadowing is later enabled.
2957 */
2958 if (pVmcsInfo->pvShadowVmcs)
2959 {
2960 VMXVMCSREVID VmcsRevId;
2961 VmcsRevId.u = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
2962 VmcsRevId.n.fIsShadowVmcs = 1;
2963 *(uint32_t *)pVmcsInfo->pvShadowVmcs = VmcsRevId.u;
2964 rc = vmxHCClearShadowVmcs(pVmcsInfo);
2965 if (RT_SUCCESS(rc))
2966 { /* likely */ }
2967 else
2968 LogRelFunc(("Failed to initialize shadow VMCS. rc=%Rrc\n", rc));
2969 }
2970#endif
2971 }
2972 else
2973 LogRelFunc(("Failed to setup miscellaneous controls. rc=%Rrc\n", rc));
2974 }
2975 else
2976 LogRelFunc(("Failed to setup processor-based VM-execution controls. rc=%Rrc\n", rc));
2977 }
2978 else
2979 LogRelFunc(("Failed to setup pin-based controls. rc=%Rrc\n", rc));
2980 }
2981 else
2982 {
2983#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2984 rc = hmR0VmxSetupVmcsCtlsNested(pVmcsInfo);
2985 if (RT_SUCCESS(rc))
2986 { /* likely */ }
2987 else
2988 LogRelFunc(("Failed to initialize nested-guest VMCS. rc=%Rrc\n", rc));
2989#else
2990 AssertFailed();
2991#endif
2992 }
2993 }
2994 else
2995 LogRelFunc(("Failed to load the %s. rc=%Rrc\n", rc, pszVmcs));
2996 }
2997 else
2998 LogRelFunc(("Failed to clear the %s. rc=%Rrc\n", rc, pszVmcs));
2999
3000 /* Sync any CPU internal VMCS data back into our VMCS in memory. */
3001 if (RT_SUCCESS(rc))
3002 {
3003 rc = hmR0VmxClearVmcs(pVmcsInfo);
3004 if (RT_SUCCESS(rc))
3005 { /* likely */ }
3006 else
3007 LogRelFunc(("Failed to clear the %s post setup. rc=%Rrc\n", rc, pszVmcs));
3008 }
3009
3010 /*
3011 * Update the last-error record both for failures and success, so we
3012 * can propagate the status code back to ring-3 for diagnostics.
3013 */
3014 hmR0VmxUpdateErrorRecord(pVCpu, rc);
3015 NOREF(pszVmcs);
3016 return rc;
3017}
3018
3019
3020/**
3021 * Does global VT-x initialization (called during module initialization).
3022 *
3023 * @returns VBox status code.
3024 */
3025VMMR0DECL(int) VMXR0GlobalInit(void)
3026{
3027#ifdef HMVMX_USE_FUNCTION_TABLE
3028 AssertCompile(VMX_EXIT_MAX + 1 == RT_ELEMENTS(g_aVMExitHandlers));
3029# ifdef VBOX_STRICT
3030 for (unsigned i = 0; i < RT_ELEMENTS(g_aVMExitHandlers); i++)
3031 Assert(g_aVMExitHandlers[i].pfn);
3032# endif
3033#endif
3034 return VINF_SUCCESS;
3035}
3036
3037
3038/**
3039 * Does global VT-x termination (called during module termination).
3040 */
3041VMMR0DECL(void) VMXR0GlobalTerm()
3042{
3043 /* Nothing to do currently. */
3044}
3045
3046
3047/**
3048 * Sets up and activates VT-x on the current CPU.
3049 *
3050 * @returns VBox status code.
3051 * @param pHostCpu The HM physical-CPU structure.
3052 * @param pVM The cross context VM structure. Can be
3053 * NULL after a host resume operation.
3054 * @param pvCpuPage Pointer to the VMXON region (can be NULL if @a
3055 * fEnabledByHost is @c true).
3056 * @param HCPhysCpuPage Physical address of the VMXON region (can be 0 if
3057 * @a fEnabledByHost is @c true).
3058 * @param fEnabledByHost Set if SUPR0EnableVTx() or similar was used to
3059 * enable VT-x on the host.
3060 * @param pHwvirtMsrs Pointer to the hardware-virtualization MSRs.
3061 */
3062VMMR0DECL(int) VMXR0EnableCpu(PHMPHYSCPU pHostCpu, PVMCC pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost,
3063 PCSUPHWVIRTMSRS pHwvirtMsrs)
3064{
3065 AssertPtr(pHostCpu);
3066 AssertPtr(pHwvirtMsrs);
3067 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3068
3069 /* Enable VT-x if it's not already enabled by the host. */
3070 if (!fEnabledByHost)
3071 {
3072 int rc = hmR0VmxEnterRootMode(pHostCpu, pVM, HCPhysCpuPage, pvCpuPage);
3073 if (RT_FAILURE(rc))
3074 return rc;
3075 }
3076
3077 /*
3078 * Flush all EPT tagged-TLB entries (in case VirtualBox or any other hypervisor have been
3079 * using EPTPs) so we don't retain any stale guest-physical mappings which won't get
3080 * invalidated when flushing by VPID.
3081 */
3082 if (pHwvirtMsrs->u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
3083 {
3084 hmR0VmxFlushEpt(NULL /* pVCpu */, NULL /* pVmcsInfo */, VMXTLBFLUSHEPT_ALL_CONTEXTS);
3085 pHostCpu->fFlushAsidBeforeUse = false;
3086 }
3087 else
3088 pHostCpu->fFlushAsidBeforeUse = true;
3089
3090 /* Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}. */
3091 ++pHostCpu->cTlbFlushes;
3092
3093 return VINF_SUCCESS;
3094}
3095
3096
3097/**
3098 * Deactivates VT-x on the current CPU.
3099 *
3100 * @returns VBox status code.
3101 * @param pHostCpu The HM physical-CPU structure.
3102 * @param pvCpuPage Pointer to the VMXON region.
3103 * @param HCPhysCpuPage Physical address of the VMXON region.
3104 *
3105 * @remarks This function should never be called when SUPR0EnableVTx() or
3106 * similar was used to enable VT-x on the host.
3107 */
3108VMMR0DECL(int) VMXR0DisableCpu(PHMPHYSCPU pHostCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
3109{
3110 RT_NOREF2(pvCpuPage, HCPhysCpuPage);
3111
3112 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3113 return hmR0VmxLeaveRootMode(pHostCpu);
3114}
3115
3116
3117/**
3118 * Does per-VM VT-x initialization.
3119 *
3120 * @returns VBox status code.
3121 * @param pVM The cross context VM structure.
3122 */
3123VMMR0DECL(int) VMXR0InitVM(PVMCC pVM)
3124{
3125 AssertPtr(pVM);
3126 LogFlowFunc(("pVM=%p\n", pVM));
3127
3128 hmR0VmxStructsInit(pVM);
3129 int rc = hmR0VmxStructsAlloc(pVM);
3130 if (RT_FAILURE(rc))
3131 {
3132 LogRelFunc(("Failed to allocated VMX structures. rc=%Rrc\n", rc));
3133 return rc;
3134 }
3135
3136 /* Setup the crash dump page. */
3137#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3138 strcpy((char *)pVM->hmr0.s.vmx.pbScratch, "SCRATCH Magic");
3139 *(uint64_t *)(pVM->hmr0.s.vmx.pbScratch + 16) = UINT64_C(0xdeadbeefdeadbeef);
3140#endif
3141 return VINF_SUCCESS;
3142}
3143
3144
3145/**
3146 * Does per-VM VT-x termination.
3147 *
3148 * @returns VBox status code.
3149 * @param pVM The cross context VM structure.
3150 */
3151VMMR0DECL(int) VMXR0TermVM(PVMCC pVM)
3152{
3153 AssertPtr(pVM);
3154 LogFlowFunc(("pVM=%p\n", pVM));
3155
3156#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3157 if (pVM->hmr0.s.vmx.pbScratch)
3158 RT_BZERO(pVM->hmr0.s.vmx.pbScratch, X86_PAGE_4K_SIZE);
3159#endif
3160 hmR0VmxStructsFree(pVM);
3161 return VINF_SUCCESS;
3162}
3163
3164
3165/**
3166 * Sets up the VM for execution using hardware-assisted VMX.
3167 * This function is only called once per-VM during initialization.
3168 *
3169 * @returns VBox status code.
3170 * @param pVM The cross context VM structure.
3171 */
3172VMMR0DECL(int) VMXR0SetupVM(PVMCC pVM)
3173{
3174 AssertPtr(pVM);
3175 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3176
3177 LogFlowFunc(("pVM=%p\n", pVM));
3178
3179 /*
3180 * At least verify if VMX is enabled, since we can't check if we're in VMX root mode or not
3181 * without causing a #GP.
3182 */
3183 RTCCUINTREG const uHostCr4 = ASMGetCR4();
3184 if (RT_LIKELY(uHostCr4 & X86_CR4_VMXE))
3185 { /* likely */ }
3186 else
3187 return VERR_VMX_NOT_IN_VMX_ROOT_MODE;
3188
3189 /*
3190 * Check that nested paging is supported if enabled and copy over the flag to the
3191 * ring-0 only structure.
3192 */
3193 bool const fNestedPaging = pVM->hm.s.fNestedPagingCfg;
3194 AssertReturn( !fNestedPaging
3195 || (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_EPT), /** @todo use a ring-0 copy of ProcCtls2.n.allowed1 */
3196 VERR_INCOMPATIBLE_CONFIG);
3197 pVM->hmr0.s.fNestedPaging = fNestedPaging;
3198 pVM->hmr0.s.fAllow64BitGuests = pVM->hm.s.fAllow64BitGuestsCfg;
3199
3200 /*
3201 * Without unrestricted guest execution, pRealModeTSS and pNonPagingModeEPTPageTable *must*
3202 * always be allocated. We no longer support the highly unlikely case of unrestricted guest
3203 * without pRealModeTSS, see hmR3InitFinalizeR0Intel().
3204 */
3205 bool const fUnrestrictedGuest = pVM->hm.s.vmx.fUnrestrictedGuestCfg;
3206 AssertReturn( !fUnrestrictedGuest
3207 || ( (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_UNRESTRICTED_GUEST)
3208 && fNestedPaging),
3209 VERR_INCOMPATIBLE_CONFIG);
3210 if ( !fUnrestrictedGuest
3211 && ( !pVM->hm.s.vmx.pNonPagingModeEPTPageTable
3212 || !pVM->hm.s.vmx.pRealModeTSS))
3213 {
3214 LogRelFunc(("Invalid real-on-v86 state.\n"));
3215 return VERR_INTERNAL_ERROR;
3216 }
3217 pVM->hmr0.s.vmx.fUnrestrictedGuest = fUnrestrictedGuest;
3218
3219 /* Initialize these always, see hmR3InitFinalizeR0().*/
3220 pVM->hm.s.ForR3.vmx.enmTlbFlushEpt = pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NONE;
3221 pVM->hm.s.ForR3.vmx.enmTlbFlushVpid = pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NONE;
3222
3223 /* Setup the tagged-TLB flush handlers. */
3224 int rc = hmR0VmxSetupTaggedTlb(pVM);
3225 if (RT_FAILURE(rc))
3226 {
3227 LogRelFunc(("Failed to setup tagged TLB. rc=%Rrc\n", rc));
3228 return rc;
3229 }
3230
3231 /* Determine LBR capabilities. */
3232 pVM->hmr0.s.vmx.fLbr = pVM->hm.s.vmx.fLbrCfg;
3233 if (pVM->hmr0.s.vmx.fLbr)
3234 {
3235 rc = hmR0VmxSetupLbrMsrRange(pVM);
3236 if (RT_FAILURE(rc))
3237 {
3238 LogRelFunc(("Failed to setup LBR MSR range. rc=%Rrc\n", rc));
3239 return rc;
3240 }
3241 }
3242
3243#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3244 /* Setup the shadow VMCS fields array and VMREAD/VMWRITE bitmaps. */
3245 if (pVM->hmr0.s.vmx.fUseVmcsShadowing)
3246 {
3247 rc = hmR0VmxSetupShadowVmcsFieldsArrays(pVM);
3248 if (RT_SUCCESS(rc))
3249 hmR0VmxSetupVmreadVmwriteBitmaps(pVM);
3250 else
3251 {
3252 LogRelFunc(("Failed to setup shadow VMCS fields arrays. rc=%Rrc\n", rc));
3253 return rc;
3254 }
3255 }
3256#endif
3257
3258 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
3259 {
3260 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
3261 Log4Func(("pVCpu=%p idCpu=%RU32\n", pVCpu, pVCpu->idCpu));
3262
3263 pVCpu->hmr0.s.vmx.pfnStartVm = hmR0VmxStartVmSelector;
3264
3265 rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfo, false /* fIsNstGstVmcs */);
3266 if (RT_SUCCESS(rc))
3267 {
3268#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3269 if (pVM->cpum.ro.GuestFeatures.fVmx)
3270 {
3271 rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */);
3272 if (RT_SUCCESS(rc))
3273 { /* likely */ }
3274 else
3275 {
3276 LogRelFunc(("Nested-guest VMCS setup failed. rc=%Rrc\n", rc));
3277 return rc;
3278 }
3279 }
3280#endif
3281 }
3282 else
3283 {
3284 LogRelFunc(("VMCS setup failed. rc=%Rrc\n", rc));
3285 return rc;
3286 }
3287 }
3288
3289 return VINF_SUCCESS;
3290}
3291
3292
3293/**
3294 * Saves the host control registers (CR0, CR3, CR4) into the host-state area in
3295 * the VMCS.
3296 * @returns CR4 for passing along to hmR0VmxExportHostSegmentRegs.
3297 */
3298static uint64_t hmR0VmxExportHostControlRegs(void)
3299{
3300 int rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR0, ASMGetCR0()); AssertRC(rc);
3301 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR3, ASMGetCR3()); AssertRC(rc);
3302 uint64_t uHostCr4 = ASMGetCR4();
3303 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR4, uHostCr4); AssertRC(rc);
3304 return uHostCr4;
3305}
3306
3307
3308/**
3309 * Saves the host segment registers and GDTR, IDTR, (TR, GS and FS bases) into
3310 * the host-state area in the VMCS.
3311 *
3312 * @returns VBox status code.
3313 * @param pVCpu The cross context virtual CPU structure.
3314 * @param uHostCr4 The host CR4 value.
3315 */
3316static int hmR0VmxExportHostSegmentRegs(PVMCPUCC pVCpu, uint64_t uHostCr4)
3317{
3318 /*
3319 * If we've executed guest code using hardware-assisted VMX, the host-state bits
3320 * will be messed up. We should -not- save the messed up state without restoring
3321 * the original host-state, see @bugref{7240}.
3322 *
3323 * This apparently can happen (most likely the FPU changes), deal with it rather than
3324 * asserting. Was observed booting Solaris 10u10 32-bit guest.
3325 */
3326 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
3327 {
3328 Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hmr0.s.vmx.fRestoreHostFlags,
3329 pVCpu->idCpu));
3330 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
3331 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
3332 }
3333
3334 /*
3335 * Get all the host info.
3336 * ASSUME it is safe to use rdfsbase and friends if the CR4.FSGSBASE bit is set
3337 * without also checking the cpuid bit.
3338 */
3339 uint32_t fRestoreHostFlags;
3340#if RT_INLINE_ASM_EXTERNAL
3341 if (uHostCr4 & X86_CR4_FSGSBASE)
3342 {
3343 hmR0VmxExportHostSegmentRegsAsmHlp(&pVCpu->hmr0.s.vmx.RestoreHost, true /*fHaveFsGsBase*/);
3344 fRestoreHostFlags = VMX_RESTORE_HOST_CAN_USE_WRFSBASE_AND_WRGSBASE;
3345 }
3346 else
3347 {
3348 hmR0VmxExportHostSegmentRegsAsmHlp(&pVCpu->hmr0.s.vmx.RestoreHost, false /*fHaveFsGsBase*/);
3349 fRestoreHostFlags = 0;
3350 }
3351 RTSEL uSelES = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelES;
3352 RTSEL uSelDS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelDS;
3353 RTSEL uSelFS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelFS;
3354 RTSEL uSelGS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelGS;
3355#else
3356 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR = ASMGetTR();
3357 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS = ASMGetSS();
3358 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS = ASMGetCS();
3359 ASMGetGDTR((PRTGDTR)&pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr);
3360 ASMGetIDTR((PRTIDTR)&pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr);
3361 if (uHostCr4 & X86_CR4_FSGSBASE)
3362 {
3363 pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase = ASMGetFSBase();
3364 pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase = ASMGetGSBase();
3365 fRestoreHostFlags = VMX_RESTORE_HOST_CAN_USE_WRFSBASE_AND_WRGSBASE;
3366 }
3367 else
3368 {
3369 pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase = ASMRdMsr(MSR_K8_FS_BASE);
3370 pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase = ASMRdMsr(MSR_K8_GS_BASE);
3371 fRestoreHostFlags = 0;
3372 }
3373 RTSEL uSelES, uSelDS, uSelFS, uSelGS;
3374 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelDS = uSelDS = ASMGetDS();
3375 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelES = uSelES = ASMGetES();
3376 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelFS = uSelFS = ASMGetFS();
3377 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelGS = uSelGS = ASMGetGS();
3378#endif
3379
3380 /*
3381 * Determine if the host segment registers are suitable for VT-x. Otherwise use zero to
3382 * gain VM-entry and restore them before we get preempted.
3383 *
3384 * See Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers".
3385 */
3386 RTSEL const uSelAll = uSelFS | uSelGS | uSelES | uSelDS;
3387 if (uSelAll & (X86_SEL_RPL | X86_SEL_LDT))
3388 {
3389 if (!(uSelAll & X86_SEL_LDT))
3390 {
3391#define VMXLOCAL_ADJUST_HOST_SEG(a_Seg, a_uVmcsVar) \
3392 do { \
3393 (a_uVmcsVar) = pVCpu->hmr0.s.vmx.RestoreHost.uHostSel##a_Seg; \
3394 if ((a_uVmcsVar) & X86_SEL_RPL) \
3395 { \
3396 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3397 (a_uVmcsVar) = 0; \
3398 } \
3399 } while (0)
3400 VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
3401 VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
3402 VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
3403 VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
3404#undef VMXLOCAL_ADJUST_HOST_SEG
3405 }
3406 else
3407 {
3408#define VMXLOCAL_ADJUST_HOST_SEG(a_Seg, a_uVmcsVar) \
3409 do { \
3410 (a_uVmcsVar) = pVCpu->hmr0.s.vmx.RestoreHost.uHostSel##a_Seg; \
3411 if ((a_uVmcsVar) & (X86_SEL_RPL | X86_SEL_LDT)) \
3412 { \
3413 if (!((a_uVmcsVar) & X86_SEL_LDT)) \
3414 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3415 else \
3416 { \
3417 uint32_t const fAttr = ASMGetSegAttr(a_uVmcsVar); \
3418 if ((fAttr & X86_DESC_P) && fAttr != UINT32_MAX) \
3419 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3420 } \
3421 (a_uVmcsVar) = 0; \
3422 } \
3423 } while (0)
3424 VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
3425 VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
3426 VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
3427 VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
3428#undef VMXLOCAL_ADJUST_HOST_SEG
3429 }
3430 }
3431
3432 /* Verification based on Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers" */
3433 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR & X86_SEL_LDT)); Assert(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR);
3434 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS & X86_SEL_LDT)); Assert(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS);
3435 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS & X86_SEL_LDT));
3436 Assert(!(uSelDS & X86_SEL_RPL)); Assert(!(uSelDS & X86_SEL_LDT));
3437 Assert(!(uSelES & X86_SEL_RPL)); Assert(!(uSelES & X86_SEL_LDT));
3438 Assert(!(uSelFS & X86_SEL_RPL)); Assert(!(uSelFS & X86_SEL_LDT));
3439 Assert(!(uSelGS & X86_SEL_RPL)); Assert(!(uSelGS & X86_SEL_LDT));
3440
3441 /*
3442 * Determine if we need to manually need to restore the GDTR and IDTR limits as VT-x zaps
3443 * them to the maximum limit (0xffff) on every VM-exit.
3444 */
3445 if (pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb != 0xffff)
3446 fRestoreHostFlags |= VMX_RESTORE_HOST_GDTR;
3447
3448 /*
3449 * IDT limit is effectively capped at 0xfff. (See Intel spec. 6.14.1 "64-Bit Mode IDT" and
3450 * Intel spec. 6.2 "Exception and Interrupt Vectors".) Therefore if the host has the limit
3451 * as 0xfff, VT-x bloating the limit to 0xffff shouldn't cause any different CPU behavior.
3452 * However, several hosts either insists on 0xfff being the limit (Windows Patch Guard) or
3453 * uses the limit for other purposes (darwin puts the CPU ID in there but botches sidt
3454 * alignment in at least one consumer). So, we're only allowing the IDTR.LIMIT to be left
3455 * at 0xffff on hosts where we are sure it won't cause trouble.
3456 */
3457#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS)
3458 if (pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.cb < 0x0fff)
3459#else
3460 if (pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.cb != 0xffff)
3461#endif
3462 fRestoreHostFlags |= VMX_RESTORE_HOST_IDTR;
3463
3464 /*
3465 * Host TR base. Verify that TR selector doesn't point past the GDT. Masking off the TI
3466 * and RPL bits is effectively what the CPU does for "scaling by 8". TI is always 0 and
3467 * RPL should be too in most cases.
3468 */
3469 RTSEL const uSelTR = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR;
3470 AssertMsgReturn((uSelTR | X86_SEL_RPL_LDT) <= pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb,
3471 ("TR selector exceeds limit. TR=%RTsel cbGdt=%#x\n", uSelTR, pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb),
3472 VERR_VMX_INVALID_HOST_STATE);
3473
3474 PCX86DESCHC pDesc = (PCX86DESCHC)(pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.uAddr + (uSelTR & X86_SEL_MASK));
3475 uintptr_t const uTRBase = X86DESC64_BASE(pDesc);
3476
3477 /*
3478 * VT-x unconditionally restores the TR limit to 0x67 and type to 11 (32-bit busy TSS) on
3479 * all VM-exits. The type is the same for 64-bit busy TSS[1]. The limit needs manual
3480 * restoration if the host has something else. Task switching is not supported in 64-bit
3481 * mode[2], but the limit still matters as IOPM is supported in 64-bit mode. Restoring the
3482 * limit lazily while returning to ring-3 is safe because IOPM is not applicable in ring-0.
3483 *
3484 * [1] See Intel spec. 3.5 "System Descriptor Types".
3485 * [2] See Intel spec. 7.2.3 "TSS Descriptor in 64-bit mode".
3486 */
3487 Assert(pDesc->System.u4Type == 11);
3488 if ( pDesc->System.u16LimitLow != 0x67
3489 || pDesc->System.u4LimitHigh)
3490 {
3491 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_TR;
3492
3493 /* If the host has made GDT read-only, we would need to temporarily toggle CR0.WP before writing the GDT. */
3494 if (g_fHmHostKernelFeatures & SUPKERNELFEATURES_GDT_READ_ONLY)
3495 fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_READ_ONLY;
3496 if (g_fHmHostKernelFeatures & SUPKERNELFEATURES_GDT_NEED_WRITABLE)
3497 {
3498 /* The GDT is read-only but the writable GDT is available. */
3499 fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_NEED_WRITABLE;
3500 pVCpu->hmr0.s.vmx.RestoreHost.HostGdtrRw.cb = pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb;
3501 int rc = SUPR0GetCurrentGdtRw(&pVCpu->hmr0.s.vmx.RestoreHost.HostGdtrRw.uAddr);
3502 AssertRCReturn(rc, rc);
3503 }
3504 }
3505
3506 pVCpu->hmr0.s.vmx.fRestoreHostFlags = fRestoreHostFlags;
3507
3508 /*
3509 * Do all the VMCS updates in one block to assist nested virtualization.
3510 */
3511 int rc;
3512 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_CS_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS); AssertRC(rc);
3513 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_SS_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS); AssertRC(rc);
3514 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_DS_SEL, uSelDS); AssertRC(rc);
3515 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_ES_SEL, uSelES); AssertRC(rc);
3516 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_FS_SEL, uSelFS); AssertRC(rc);
3517 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_GS_SEL, uSelGS); AssertRC(rc);
3518 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_TR_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR); AssertRC(rc);
3519 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GDTR_BASE, pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.uAddr); AssertRC(rc);
3520 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_IDTR_BASE, pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.uAddr); AssertRC(rc);
3521 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_TR_BASE, uTRBase); AssertRC(rc);
3522 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_FS_BASE, pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase); AssertRC(rc);
3523 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GS_BASE, pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase); AssertRC(rc);
3524
3525 return VINF_SUCCESS;
3526}
3527
3528
3529/**
3530 * Exports certain host MSRs in the VM-exit MSR-load area and some in the
3531 * host-state area of the VMCS.
3532 *
3533 * These MSRs will be automatically restored on the host after every successful
3534 * VM-exit.
3535 *
3536 * @param pVCpu The cross context virtual CPU structure.
3537 *
3538 * @remarks No-long-jump zone!!!
3539 */
3540static void hmR0VmxExportHostMsrs(PVMCPUCC pVCpu)
3541{
3542 AssertPtr(pVCpu);
3543
3544 /*
3545 * Save MSRs that we restore lazily (due to preemption or transition to ring-3)
3546 * rather than swapping them on every VM-entry.
3547 */
3548 hmR0VmxLazySaveHostMsrs(pVCpu);
3549
3550 /*
3551 * Host Sysenter MSRs.
3552 */
3553 int rc = VMXWriteVmcs32(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)); AssertRC(rc);
3554 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP)); AssertRC(rc);
3555 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP)); AssertRC(rc);
3556
3557 /*
3558 * Host EFER MSR.
3559 *
3560 * If the CPU supports the newer VMCS controls for managing EFER, use it. Otherwise it's
3561 * done as part of auto-load/store MSR area in the VMCS, see hmR0VmxExportGuestMsrs().
3562 */
3563 if (g_fHmVmxSupportsVmcsEfer)
3564 {
3565 rc = VMXWriteVmcs64(VMX_VMCS64_HOST_EFER_FULL, g_uHmVmxHostMsrEfer);
3566 AssertRC(rc);
3567 }
3568
3569 /** @todo IA32_PERF_GLOBALCTRL, IA32_PAT also see
3570 * hmR0VmxExportGuestEntryExitCtls(). */
3571}
3572
3573
3574/**
3575 * Figures out if we need to swap the EFER MSR which is particularly expensive.
3576 *
3577 * We check all relevant bits. For now, that's everything besides LMA/LME, as
3578 * these two bits are handled by VM-entry, see hmR0VMxExportGuestEntryExitCtls().
3579 *
3580 * @returns true if we need to load guest EFER, false otherwise.
3581 * @param pVCpu The cross context virtual CPU structure.
3582 * @param pVmxTransient The VMX-transient structure.
3583 *
3584 * @remarks Requires EFER, CR4.
3585 * @remarks No-long-jump zone!!!
3586 */
3587static bool hmR0VmxShouldSwapEferMsr(PCVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3588{
3589#ifdef HMVMX_ALWAYS_SWAP_EFER
3590 RT_NOREF2(pVCpu, pVmxTransient);
3591 return true;
3592#else
3593 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
3594 uint64_t const u64HostEfer = g_uHmVmxHostMsrEfer;
3595 uint64_t const u64GuestEfer = pCtx->msrEFER;
3596
3597# ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3598 /*
3599 * For nested-guests, we shall honor swapping the EFER MSR when requested by
3600 * the nested-guest.
3601 */
3602 if ( pVmxTransient->fIsNestedGuest
3603 && ( CPUMIsGuestVmxEntryCtlsSet(pCtx, VMX_ENTRY_CTLS_LOAD_EFER_MSR)
3604 || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_SAVE_EFER_MSR)
3605 || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_LOAD_EFER_MSR)))
3606 return true;
3607# else
3608 RT_NOREF(pVmxTransient);
3609#endif
3610
3611 /*
3612 * For 64-bit guests, if EFER.SCE bit differs, we need to swap the EFER MSR
3613 * to ensure that the guest's SYSCALL behaviour isn't broken, see @bugref{7386}.
3614 */
3615 if ( CPUMIsGuestInLongModeEx(pCtx)
3616 && (u64GuestEfer & MSR_K6_EFER_SCE) != (u64HostEfer & MSR_K6_EFER_SCE))
3617 return true;
3618
3619 /*
3620 * If the guest uses PAE and EFER.NXE bit differs, we need to swap the EFER MSR
3621 * as it affects guest paging. 64-bit paging implies CR4.PAE as well.
3622 *
3623 * See Intel spec. 4.5 "IA-32e Paging".
3624 * See Intel spec. 4.1.1 "Three Paging Modes".
3625 *
3626 * Verify that we always intercept CR4.PAE and CR0.PG bits, so we don't need to
3627 * import CR4 and CR0 from the VMCS here as those bits are always up to date.
3628 */
3629 Assert(vmxHCGetFixedCr4Mask(pVCpu) & X86_CR4_PAE);
3630 Assert(vmxHCGetFixedCr0Mask(pVCpu) & X86_CR0_PG);
3631 if ( (pCtx->cr4 & X86_CR4_PAE)
3632 && (pCtx->cr0 & X86_CR0_PG))
3633 {
3634 /*
3635 * If nested paging is not used, verify that the guest paging mode matches the
3636 * shadow paging mode which is/will be placed in the VMCS (which is what will
3637 * actually be used while executing the guest and not the CR4 shadow value).
3638 */
3639 AssertMsg( pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging
3640 || pVCpu->hm.s.enmShadowMode == PGMMODE_PAE
3641 || pVCpu->hm.s.enmShadowMode == PGMMODE_PAE_NX
3642 || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64
3643 || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64_NX,
3644 ("enmShadowMode=%u\n", pVCpu->hm.s.enmShadowMode));
3645 if ((u64GuestEfer & MSR_K6_EFER_NXE) != (u64HostEfer & MSR_K6_EFER_NXE))
3646 {
3647 /* Verify that the host is NX capable. */
3648 Assert(pVCpu->CTX_SUFF(pVM)->cpum.ro.HostFeatures.fNoExecute);
3649 return true;
3650 }
3651 }
3652
3653 return false;
3654#endif
3655}
3656
3657
3658/**
3659 * Exports the guest's RSP into the guest-state area in the VMCS.
3660 *
3661 * @param pVCpu The cross context virtual CPU structure.
3662 *
3663 * @remarks No-long-jump zone!!!
3664 */
3665static void hmR0VmxExportGuestRsp(PVMCPUCC pVCpu)
3666{
3667 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_RSP)
3668 {
3669 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RSP);
3670
3671 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_RSP, pVCpu->cpum.GstCtx.rsp);
3672 AssertRC(rc);
3673
3674 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_RSP);
3675 Log4Func(("rsp=%#RX64\n", pVCpu->cpum.GstCtx.rsp));
3676 }
3677}
3678
3679
3680/**
3681 * Exports the guest hardware-virtualization state.
3682 *
3683 * @returns VBox status code.
3684 * @param pVCpu The cross context virtual CPU structure.
3685 * @param pVmxTransient The VMX-transient structure.
3686 *
3687 * @remarks No-long-jump zone!!!
3688 */
3689static int hmR0VmxExportGuestHwvirtState(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3690{
3691 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_HWVIRT)
3692 {
3693#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3694 /*
3695 * Check if the VMX feature is exposed to the guest and if the host CPU supports
3696 * VMCS shadowing.
3697 */
3698 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUseVmcsShadowing)
3699 {
3700 /*
3701 * If the nested hypervisor has loaded a current VMCS and is in VMX root mode,
3702 * copy the nested hypervisor's current VMCS into the shadow VMCS and enable
3703 * VMCS shadowing to skip intercepting some or all VMREAD/VMWRITE VM-exits.
3704 *
3705 * We check for VMX root mode here in case the guest executes VMXOFF without
3706 * clearing the current VMCS pointer and our VMXOFF instruction emulation does
3707 * not clear the current VMCS pointer.
3708 */
3709 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
3710 if ( CPUMIsGuestInVmxRootMode(&pVCpu->cpum.GstCtx)
3711 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx)
3712 && CPUMIsGuestVmxCurrentVmcsValid(&pVCpu->cpum.GstCtx))
3713 {
3714 /* Paranoia. */
3715 Assert(!pVmxTransient->fIsNestedGuest);
3716
3717 /*
3718 * For performance reasons, also check if the nested hypervisor's current VMCS
3719 * was newly loaded or modified before copying it to the shadow VMCS.
3720 */
3721 if (!pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs)
3722 {
3723 int rc = vmxHCCopyNstGstToShadowVmcs(pVCpu, pVmcsInfo);
3724 AssertRCReturn(rc, rc);
3725 pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs = true;
3726 }
3727 vmxHCEnableVmcsShadowing(pVCpu, pVmcsInfo);
3728 }
3729 else
3730 vmxHCDisableVmcsShadowing(pVCpu, pVmcsInfo);
3731 }
3732#else
3733 NOREF(pVmxTransient);
3734#endif
3735 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_HWVIRT);
3736 }
3737 return VINF_SUCCESS;
3738}
3739
3740
3741/**
3742 * Exports the guest debug registers into the guest-state area in the VMCS.
3743 * The guest debug bits are partially shared with the host (e.g. DR6, DR0-3).
3744 *
3745 * This also sets up whether \#DB and MOV DRx accesses cause VM-exits.
3746 *
3747 * @returns VBox status code.
3748 * @param pVCpu The cross context virtual CPU structure.
3749 * @param pVmxTransient The VMX-transient structure.
3750 *
3751 * @remarks No-long-jump zone!!!
3752 */
3753static int hmR0VmxExportSharedDebugState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
3754{
3755 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3756
3757 /** @todo NSTVMX: Figure out what we want to do with nested-guest instruction
3758 * stepping. */
3759 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
3760 if (pVmxTransient->fIsNestedGuest)
3761 {
3762 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, CPUMGetGuestDR7(pVCpu));
3763 AssertRC(rc);
3764
3765 /*
3766 * We don't want to always intercept MOV DRx for nested-guests as it causes
3767 * problems when the nested hypervisor isn't intercepting them, see @bugref{10080}.
3768 * Instead, they are strictly only requested when the nested hypervisor intercepts
3769 * them -- handled while merging VMCS controls.
3770 *
3771 * If neither the outer nor the nested-hypervisor is intercepting MOV DRx,
3772 * then the nested-guest debug state should be actively loaded on the host so that
3773 * nested-guest reads its own debug registers without causing VM-exits.
3774 */
3775 if ( !(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT)
3776 && !CPUMIsGuestDebugStateActive(pVCpu))
3777 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
3778 return VINF_SUCCESS;
3779 }
3780
3781#ifdef VBOX_STRICT
3782 /* Validate. Intel spec. 26.3.1.1 "Checks on Guest Controls Registers, Debug Registers, MSRs" */
3783 if (pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_LOAD_DEBUG)
3784 {
3785 /* Validate. Intel spec. 17.2 "Debug Registers", recompiler paranoia checks. */
3786 Assert((pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_MBZ_MASK | X86_DR7_RAZ_MASK)) == 0);
3787 Assert((pVCpu->cpum.GstCtx.dr[7] & X86_DR7_RA1_MASK) == X86_DR7_RA1_MASK);
3788 }
3789#endif
3790
3791 bool fSteppingDB = false;
3792 bool fInterceptMovDRx = false;
3793 uint32_t uProcCtls = pVmcsInfo->u32ProcCtls;
3794 if (pVCpu->hm.s.fSingleInstruction)
3795 {
3796 /* If the CPU supports the monitor trap flag, use it for single stepping in DBGF and avoid intercepting #DB. */
3797 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MONITOR_TRAP_FLAG)
3798 {
3799 uProcCtls |= VMX_PROC_CTLS_MONITOR_TRAP_FLAG;
3800 Assert(fSteppingDB == false);
3801 }
3802 else
3803 {
3804 pVCpu->cpum.GstCtx.eflags.u32 |= X86_EFL_TF;
3805 pVCpu->hm.s.fCtxChanged |= HM_CHANGED_GUEST_RFLAGS;
3806 pVCpu->hmr0.s.fClearTrapFlag = true;
3807 fSteppingDB = true;
3808 }
3809 }
3810
3811 uint64_t u64GuestDr7;
3812 if ( fSteppingDB
3813 || (CPUMGetHyperDR7(pVCpu) & X86_DR7_ENABLED_MASK))
3814 {
3815 /*
3816 * Use the combined guest and host DRx values found in the hypervisor register set
3817 * because the hypervisor debugger has breakpoints active or someone is single stepping
3818 * on the host side without a monitor trap flag.
3819 *
3820 * Note! DBGF expects a clean DR6 state before executing guest code.
3821 */
3822 if (!CPUMIsHyperDebugStateActive(pVCpu))
3823 {
3824 CPUMR0LoadHyperDebugState(pVCpu, true /* include DR6 */);
3825 Assert(CPUMIsHyperDebugStateActive(pVCpu));
3826 Assert(!CPUMIsGuestDebugStateActive(pVCpu));
3827 }
3828
3829 /* Update DR7 with the hypervisor value (other DRx registers are handled by CPUM one way or another). */
3830 u64GuestDr7 = CPUMGetHyperDR7(pVCpu);
3831 pVCpu->hmr0.s.fUsingHyperDR7 = true;
3832 fInterceptMovDRx = true;
3833 }
3834 else
3835 {
3836 /*
3837 * If the guest has enabled debug registers, we need to load them prior to
3838 * executing guest code so they'll trigger at the right time.
3839 */
3840 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_DR7);
3841 if (pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_ENABLED_MASK | X86_DR7_GD))
3842 {
3843 if (!CPUMIsGuestDebugStateActive(pVCpu))
3844 {
3845 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
3846 Assert(CPUMIsGuestDebugStateActive(pVCpu));
3847 Assert(!CPUMIsHyperDebugStateActive(pVCpu));
3848 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
3849 }
3850 Assert(!fInterceptMovDRx);
3851 }
3852 else if (!CPUMIsGuestDebugStateActive(pVCpu))
3853 {
3854 /*
3855 * If no debugging enabled, we'll lazy load DR0-3. Unlike on AMD-V, we
3856 * must intercept #DB in order to maintain a correct DR6 guest value, and
3857 * because we need to intercept it to prevent nested #DBs from hanging the
3858 * CPU, we end up always having to intercept it. See hmR0VmxSetupVmcsXcptBitmap().
3859 */
3860 fInterceptMovDRx = true;
3861 }
3862
3863 /* Update DR7 with the actual guest value. */
3864 u64GuestDr7 = pVCpu->cpum.GstCtx.dr[7];
3865 pVCpu->hmr0.s.fUsingHyperDR7 = false;
3866 }
3867
3868 if (fInterceptMovDRx)
3869 uProcCtls |= VMX_PROC_CTLS_MOV_DR_EXIT;
3870 else
3871 uProcCtls &= ~VMX_PROC_CTLS_MOV_DR_EXIT;
3872
3873 /*
3874 * Update the processor-based VM-execution controls with the MOV-DRx intercepts and the
3875 * monitor-trap flag and update our cache.
3876 */
3877 if (uProcCtls != pVmcsInfo->u32ProcCtls)
3878 {
3879 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, uProcCtls);
3880 AssertRC(rc);
3881 pVmcsInfo->u32ProcCtls = uProcCtls;
3882 }
3883
3884 /*
3885 * Update guest DR7.
3886 */
3887 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, u64GuestDr7);
3888 AssertRC(rc);
3889
3890 /*
3891 * If we have forced EFLAGS.TF to be set because we're single-stepping in the hypervisor debugger,
3892 * we need to clear interrupt inhibition if any as otherwise it causes a VM-entry failure.
3893 *
3894 * See Intel spec. 26.3.1.5 "Checks on Guest Non-Register State".
3895 */
3896 if (fSteppingDB)
3897 {
3898 Assert(pVCpu->hm.s.fSingleInstruction);
3899 Assert(pVCpu->cpum.GstCtx.eflags.Bits.u1TF);
3900
3901 uint32_t fIntrState = 0;
3902 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &fIntrState);
3903 AssertRC(rc);
3904
3905 if (fIntrState & (VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS))
3906 {
3907 fIntrState &= ~(VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS);
3908 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_INT_STATE, fIntrState);
3909 AssertRC(rc);
3910 }
3911 }
3912
3913 return VINF_SUCCESS;
3914}
3915
3916
3917/**
3918 * Exports certain guest MSRs into the VM-entry MSR-load and VM-exit MSR-store
3919 * areas.
3920 *
3921 * These MSRs will automatically be loaded to the host CPU on every successful
3922 * VM-entry and stored from the host CPU on every successful VM-exit.
3923 *
3924 * We creates/updates MSR slots for the host MSRs in the VM-exit MSR-load area. The
3925 * actual host MSR values are not- updated here for performance reasons. See
3926 * hmR0VmxExportHostMsrs().
3927 *
3928 * We also exports the guest sysenter MSRs into the guest-state area in the VMCS.
3929 *
3930 * @returns VBox status code.
3931 * @param pVCpu The cross context virtual CPU structure.
3932 * @param pVmxTransient The VMX-transient structure.
3933 *
3934 * @remarks No-long-jump zone!!!
3935 */
3936static int hmR0VmxExportGuestMsrs(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3937{
3938 AssertPtr(pVCpu);
3939 AssertPtr(pVmxTransient);
3940
3941 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
3942 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
3943
3944 /*
3945 * MSRs that we use the auto-load/store MSR area in the VMCS.
3946 * For 64-bit hosts, we load/restore them lazily, see hmR0VmxLazyLoadGuestMsrs(),
3947 * nothing to do here. The host MSR values are updated when it's safe in
3948 * hmR0VmxLazySaveHostMsrs().
3949 *
3950 * For nested-guests, the guests MSRs from the VM-entry MSR-load area are already
3951 * loaded (into the guest-CPU context) by the VMLAUNCH/VMRESUME instruction
3952 * emulation. The merged MSR permission bitmap will ensure that we get VM-exits
3953 * for any MSR that are not part of the lazy MSRs so we do not need to place
3954 * those MSRs into the auto-load/store MSR area. Nothing to do here.
3955 */
3956 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_GUEST_AUTO_MSRS)
3957 {
3958 /* No auto-load/store MSRs currently. */
3959 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_VMX_GUEST_AUTO_MSRS);
3960 }
3961
3962 /*
3963 * Guest Sysenter MSRs.
3964 */
3965 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_MSR_MASK)
3966 {
3967 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_SYSENTER_MSRS);
3968
3969 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_CS_MSR)
3970 {
3971 int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
3972 AssertRC(rc);
3973 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_CS_MSR);
3974 }
3975
3976 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_EIP_MSR)
3977 {
3978 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
3979 AssertRC(rc);
3980 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_EIP_MSR);
3981 }
3982
3983 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_ESP_MSR)
3984 {
3985 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
3986 AssertRC(rc);
3987 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_ESP_MSR);
3988 }
3989 }
3990
3991 /*
3992 * Guest/host EFER MSR.
3993 */
3994 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_EFER_MSR)
3995 {
3996 /* Whether we are using the VMCS to swap the EFER MSR must have been
3997 determined earlier while exporting VM-entry/VM-exit controls. */
3998 Assert(!(ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_ENTRY_EXIT_CTLS));
3999 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_EFER);
4000
4001 if (hmR0VmxShouldSwapEferMsr(pVCpu, pVmxTransient))
4002 {
4003 /*
4004 * EFER.LME is written by software, while EFER.LMA is set by the CPU to (CR0.PG & EFER.LME).
4005 * This means a guest can set EFER.LME=1 while CR0.PG=0 and EFER.LMA can remain 0.
4006 * VT-x requires that "IA-32e mode guest" VM-entry control must be identical to EFER.LMA
4007 * and to CR0.PG. Without unrestricted execution, CR0.PG (used for VT-x, not the shadow)
4008 * must always be 1. This forces us to effectively clear both EFER.LMA and EFER.LME until
4009 * the guest has also set CR0.PG=1. Otherwise, we would run into an invalid-guest state
4010 * during VM-entry.
4011 */
4012 uint64_t uGuestEferMsr = pCtx->msrEFER;
4013 if (!pVM->hmr0.s.vmx.fUnrestrictedGuest)
4014 {
4015 if (!(pCtx->msrEFER & MSR_K6_EFER_LMA))
4016 uGuestEferMsr &= ~MSR_K6_EFER_LME;
4017 else
4018 Assert((pCtx->msrEFER & (MSR_K6_EFER_LMA | MSR_K6_EFER_LME)) == (MSR_K6_EFER_LMA | MSR_K6_EFER_LME));
4019 }
4020
4021 /*
4022 * If the CPU supports VMCS controls for swapping EFER, use it. Otherwise, we have no option
4023 * but to use the auto-load store MSR area in the VMCS for swapping EFER. See @bugref{7368}.
4024 */
4025 if (g_fHmVmxSupportsVmcsEfer)
4026 {
4027 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_EFER_FULL, uGuestEferMsr);
4028 AssertRC(rc);
4029 }
4030 else
4031 {
4032 /*
4033 * We shall use the auto-load/store MSR area only for loading the EFER MSR but we must
4034 * continue to intercept guest read and write accesses to it, see @bugref{7386#c16}.
4035 */
4036 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER, uGuestEferMsr,
4037 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4038 AssertRCReturn(rc, rc);
4039 }
4040
4041 Log4Func(("efer=%#RX64 shadow=%#RX64\n", uGuestEferMsr, pCtx->msrEFER));
4042 }
4043 else if (!g_fHmVmxSupportsVmcsEfer)
4044 hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER);
4045
4046 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_EFER_MSR);
4047 }
4048
4049 /*
4050 * Other MSRs.
4051 */
4052 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_OTHER_MSRS)
4053 {
4054 /* Speculation Control (R/W). */
4055 HMVMX_CPUMCTX_ASSERT(pVCpu, HM_CHANGED_GUEST_OTHER_MSRS);
4056 if (pVM->cpum.ro.GuestFeatures.fIbrs)
4057 {
4058 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_IA32_SPEC_CTRL, CPUMGetGuestSpecCtrl(pVCpu),
4059 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4060 AssertRCReturn(rc, rc);
4061 }
4062
4063 /* Last Branch Record. */
4064 if (pVM->hmr0.s.vmx.fLbr)
4065 {
4066 PVMXVMCSINFOSHARED const pVmcsInfoShared = pVmxTransient->pVmcsInfo->pShared;
4067 uint32_t const idFromIpMsrStart = pVM->hmr0.s.vmx.idLbrFromIpMsrFirst;
4068 uint32_t const idToIpMsrStart = pVM->hmr0.s.vmx.idLbrToIpMsrFirst;
4069 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrFromIpMsrLast - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst + 1;
4070 Assert(cLbrStack <= 32);
4071 for (uint32_t i = 0; i < cLbrStack; i++)
4072 {
4073 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idFromIpMsrStart + i,
4074 pVmcsInfoShared->au64LbrFromIpMsr[i],
4075 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4076 AssertRCReturn(rc, rc);
4077
4078 /* Some CPUs don't have a Branch-To-IP MSR (P4 and related Xeons). */
4079 if (idToIpMsrStart != 0)
4080 {
4081 rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idToIpMsrStart + i,
4082 pVmcsInfoShared->au64LbrToIpMsr[i],
4083 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4084 AssertRCReturn(rc, rc);
4085 }
4086 }
4087
4088 /* Add LBR top-of-stack MSR (which contains the index to the most recent record). */
4089 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, pVM->hmr0.s.vmx.idLbrTosMsr,
4090 pVmcsInfoShared->u64LbrTosMsr, false /* fSetReadWrite */,
4091 false /* fUpdateHostMsr */);
4092 AssertRCReturn(rc, rc);
4093 }
4094
4095 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_OTHER_MSRS);
4096 }
4097
4098 return VINF_SUCCESS;
4099}
4100
4101
4102/**
4103 * Wrapper for running the guest code in VT-x.
4104 *
4105 * @returns VBox status code, no informational status codes.
4106 * @param pVCpu The cross context virtual CPU structure.
4107 * @param pVmxTransient The VMX-transient structure.
4108 *
4109 * @remarks No-long-jump zone!!!
4110 */
4111DECLINLINE(int) hmR0VmxRunGuest(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
4112{
4113 /* Mark that HM is the keeper of all guest-CPU registers now that we're going to execute guest code. */
4114 pVCpu->cpum.GstCtx.fExtrn |= HMVMX_CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_KEEPER_HM;
4115
4116 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
4117 bool const fResumeVM = RT_BOOL(pVmcsInfo->fVmcsState & VMX_V_VMCS_LAUNCH_STATE_LAUNCHED);
4118#ifdef VBOX_WITH_STATISTICS
4119 if (fResumeVM)
4120 STAM_COUNTER_INC(&pVCpu->hm.s.StatVmxVmResume);
4121 else
4122 STAM_COUNTER_INC(&pVCpu->hm.s.StatVmxVmLaunch);
4123#endif
4124 int rc = pVCpu->hmr0.s.vmx.pfnStartVm(pVmcsInfo, pVCpu, fResumeVM);
4125 AssertMsg(rc <= VINF_SUCCESS, ("%Rrc\n", rc));
4126 return rc;
4127}
4128
4129
4130/**
4131 * Reports world-switch error and dumps some useful debug info.
4132 *
4133 * @param pVCpu The cross context virtual CPU structure.
4134 * @param rcVMRun The return code from VMLAUNCH/VMRESUME.
4135 * @param pVmxTransient The VMX-transient structure (only
4136 * exitReason updated).
4137 */
4138static void hmR0VmxReportWorldSwitchError(PVMCPUCC pVCpu, int rcVMRun, PVMXTRANSIENT pVmxTransient)
4139{
4140 Assert(pVCpu);
4141 Assert(pVmxTransient);
4142 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
4143
4144 Log4Func(("VM-entry failure: %Rrc\n", rcVMRun));
4145 switch (rcVMRun)
4146 {
4147 case VERR_VMX_INVALID_VMXON_PTR:
4148 AssertFailed();
4149 break;
4150 case VINF_SUCCESS: /* VMLAUNCH/VMRESUME succeeded but VM-entry failed... yeah, true story. */
4151 case VERR_VMX_UNABLE_TO_START_VM: /* VMLAUNCH/VMRESUME itself failed. */
4152 {
4153 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &pVCpu->hm.s.vmx.LastError.u32ExitReason);
4154 rc |= VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
4155 AssertRC(rc);
4156 vmxHCReadExitQualVmcs(pVCpu, pVmxTransient);
4157
4158 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hmr0.s.idEnteredCpu;
4159 /* LastError.idCurrentCpu was already updated in hmR0VmxPreRunGuestCommitted().
4160 Cannot do it here as we may have been long preempted. */
4161
4162#ifdef VBOX_STRICT
4163 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4164 Log4(("uExitReason %#RX32 (VmxTransient %#RX16)\n", pVCpu->hm.s.vmx.LastError.u32ExitReason,
4165 pVmxTransient->uExitReason));
4166 Log4(("Exit Qualification %#RX64\n", pVmxTransient->uExitQual));
4167 Log4(("InstrError %#RX32\n", pVCpu->hm.s.vmx.LastError.u32InstrError));
4168 if (pVCpu->hm.s.vmx.LastError.u32InstrError <= HMVMX_INSTR_ERROR_MAX)
4169 Log4(("InstrError Desc. \"%s\"\n", g_apszVmxInstrErrors[pVCpu->hm.s.vmx.LastError.u32InstrError]));
4170 else
4171 Log4(("InstrError Desc. Range exceeded %u\n", HMVMX_INSTR_ERROR_MAX));
4172 Log4(("Entered host CPU %u\n", pVCpu->hm.s.vmx.LastError.idEnteredCpu));
4173 Log4(("Current host CPU %u\n", pVCpu->hm.s.vmx.LastError.idCurrentCpu));
4174
4175 static struct
4176 {
4177 /** Name of the field to log. */
4178 const char *pszName;
4179 /** The VMCS field. */
4180 uint32_t uVmcsField;
4181 /** Whether host support of this field needs to be checked. */
4182 bool fCheckSupport;
4183 } const s_aVmcsFields[] =
4184 {
4185 { "VMX_VMCS32_CTRL_PIN_EXEC", VMX_VMCS32_CTRL_PIN_EXEC, false },
4186 { "VMX_VMCS32_CTRL_PROC_EXEC", VMX_VMCS32_CTRL_PROC_EXEC, false },
4187 { "VMX_VMCS32_CTRL_PROC_EXEC2", VMX_VMCS32_CTRL_PROC_EXEC2, true },
4188 { "VMX_VMCS32_CTRL_ENTRY", VMX_VMCS32_CTRL_ENTRY, false },
4189 { "VMX_VMCS32_CTRL_EXIT", VMX_VMCS32_CTRL_EXIT, false },
4190 { "VMX_VMCS32_CTRL_CR3_TARGET_COUNT", VMX_VMCS32_CTRL_CR3_TARGET_COUNT, false },
4191 { "VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO", VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, false },
4192 { "VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE", VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, false },
4193 { "VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH", VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, false },
4194 { "VMX_VMCS32_CTRL_TPR_THRESHOLD", VMX_VMCS32_CTRL_TPR_THRESHOLD, false },
4195 { "VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT", VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, false },
4196 { "VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT", VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, false },
4197 { "VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT", VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, false },
4198 { "VMX_VMCS32_CTRL_EXCEPTION_BITMAP", VMX_VMCS32_CTRL_EXCEPTION_BITMAP, false },
4199 { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK", VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, false },
4200 { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH", VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, false },
4201 { "VMX_VMCS_CTRL_CR0_MASK", VMX_VMCS_CTRL_CR0_MASK, false },
4202 { "VMX_VMCS_CTRL_CR0_READ_SHADOW", VMX_VMCS_CTRL_CR0_READ_SHADOW, false },
4203 { "VMX_VMCS_CTRL_CR4_MASK", VMX_VMCS_CTRL_CR4_MASK, false },
4204 { "VMX_VMCS_CTRL_CR4_READ_SHADOW", VMX_VMCS_CTRL_CR4_READ_SHADOW, false },
4205 { "VMX_VMCS64_CTRL_EPTP_FULL", VMX_VMCS64_CTRL_EPTP_FULL, true },
4206 { "VMX_VMCS_GUEST_RIP", VMX_VMCS_GUEST_RIP, false },
4207 { "VMX_VMCS_GUEST_RSP", VMX_VMCS_GUEST_RSP, false },
4208 { "VMX_VMCS_GUEST_RFLAGS", VMX_VMCS_GUEST_RFLAGS, false },
4209 { "VMX_VMCS16_VPID", VMX_VMCS16_VPID, true, },
4210 { "VMX_VMCS_HOST_CR0", VMX_VMCS_HOST_CR0, false },
4211 { "VMX_VMCS_HOST_CR3", VMX_VMCS_HOST_CR3, false },
4212 { "VMX_VMCS_HOST_CR4", VMX_VMCS_HOST_CR4, false },
4213 /* The order of selector fields below are fixed! */
4214 { "VMX_VMCS16_HOST_ES_SEL", VMX_VMCS16_HOST_ES_SEL, false },
4215 { "VMX_VMCS16_HOST_CS_SEL", VMX_VMCS16_HOST_CS_SEL, false },
4216 { "VMX_VMCS16_HOST_SS_SEL", VMX_VMCS16_HOST_SS_SEL, false },
4217 { "VMX_VMCS16_HOST_DS_SEL", VMX_VMCS16_HOST_DS_SEL, false },
4218 { "VMX_VMCS16_HOST_FS_SEL", VMX_VMCS16_HOST_FS_SEL, false },
4219 { "VMX_VMCS16_HOST_GS_SEL", VMX_VMCS16_HOST_GS_SEL, false },
4220 { "VMX_VMCS16_HOST_TR_SEL", VMX_VMCS16_HOST_TR_SEL, false },
4221 /* End of ordered selector fields. */
4222 { "VMX_VMCS_HOST_TR_BASE", VMX_VMCS_HOST_TR_BASE, false },
4223 { "VMX_VMCS_HOST_GDTR_BASE", VMX_VMCS_HOST_GDTR_BASE, false },
4224 { "VMX_VMCS_HOST_IDTR_BASE", VMX_VMCS_HOST_IDTR_BASE, false },
4225 { "VMX_VMCS32_HOST_SYSENTER_CS", VMX_VMCS32_HOST_SYSENTER_CS, false },
4226 { "VMX_VMCS_HOST_SYSENTER_EIP", VMX_VMCS_HOST_SYSENTER_EIP, false },
4227 { "VMX_VMCS_HOST_SYSENTER_ESP", VMX_VMCS_HOST_SYSENTER_ESP, false },
4228 { "VMX_VMCS_HOST_RSP", VMX_VMCS_HOST_RSP, false },
4229 { "VMX_VMCS_HOST_RIP", VMX_VMCS_HOST_RIP, false }
4230 };
4231
4232 RTGDTR HostGdtr;
4233 ASMGetGDTR(&HostGdtr);
4234
4235 uint32_t const cVmcsFields = RT_ELEMENTS(s_aVmcsFields);
4236 for (uint32_t i = 0; i < cVmcsFields; i++)
4237 {
4238 uint32_t const uVmcsField = s_aVmcsFields[i].uVmcsField;
4239
4240 bool fSupported;
4241 if (!s_aVmcsFields[i].fCheckSupport)
4242 fSupported = true;
4243 else
4244 {
4245 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4246 switch (uVmcsField)
4247 {
4248 case VMX_VMCS64_CTRL_EPTP_FULL: fSupported = pVM->hmr0.s.fNestedPaging; break;
4249 case VMX_VMCS16_VPID: fSupported = pVM->hmr0.s.vmx.fVpid; break;
4250 case VMX_VMCS32_CTRL_PROC_EXEC2:
4251 fSupported = RT_BOOL(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS);
4252 break;
4253 default:
4254 AssertMsgFailedReturnVoid(("Failed to provide VMCS field support for %#RX32\n", uVmcsField));
4255 }
4256 }
4257
4258 if (fSupported)
4259 {
4260 uint8_t const uWidth = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_WIDTH);
4261 switch (uWidth)
4262 {
4263 case VMX_VMCSFIELD_WIDTH_16BIT:
4264 {
4265 uint16_t u16Val;
4266 rc = VMXReadVmcs16(uVmcsField, &u16Val);
4267 AssertRC(rc);
4268 Log4(("%-40s = %#RX16\n", s_aVmcsFields[i].pszName, u16Val));
4269
4270 if ( uVmcsField >= VMX_VMCS16_HOST_ES_SEL
4271 && uVmcsField <= VMX_VMCS16_HOST_TR_SEL)
4272 {
4273 if (u16Val < HostGdtr.cbGdt)
4274 {
4275 /* Order of selectors in s_apszSel is fixed and matches the order in s_aVmcsFields. */
4276 static const char * const s_apszSel[] = { "Host ES", "Host CS", "Host SS", "Host DS",
4277 "Host FS", "Host GS", "Host TR" };
4278 uint8_t const idxSel = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_INDEX);
4279 Assert(idxSel < RT_ELEMENTS(s_apszSel));
4280 PCX86DESCHC pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u16Val & X86_SEL_MASK));
4281 hmR0DumpDescriptor(pDesc, u16Val, s_apszSel[idxSel]);
4282 }
4283 else
4284 Log4((" Selector value exceeds GDT limit!\n"));
4285 }
4286 break;
4287 }
4288
4289 case VMX_VMCSFIELD_WIDTH_32BIT:
4290 {
4291 uint32_t u32Val;
4292 rc = VMXReadVmcs32(uVmcsField, &u32Val);
4293 AssertRC(rc);
4294 Log4(("%-40s = %#RX32\n", s_aVmcsFields[i].pszName, u32Val));
4295 break;
4296 }
4297
4298 case VMX_VMCSFIELD_WIDTH_64BIT:
4299 case VMX_VMCSFIELD_WIDTH_NATURAL:
4300 {
4301 uint64_t u64Val;
4302 rc = VMXReadVmcs64(uVmcsField, &u64Val);
4303 AssertRC(rc);
4304 Log4(("%-40s = %#RX64\n", s_aVmcsFields[i].pszName, u64Val));
4305 break;
4306 }
4307 }
4308 }
4309 }
4310
4311 Log4(("MSR_K6_EFER = %#RX64\n", ASMRdMsr(MSR_K6_EFER)));
4312 Log4(("MSR_K8_CSTAR = %#RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
4313 Log4(("MSR_K8_LSTAR = %#RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
4314 Log4(("MSR_K6_STAR = %#RX64\n", ASMRdMsr(MSR_K6_STAR)));
4315 Log4(("MSR_K8_SF_MASK = %#RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
4316 Log4(("MSR_K8_KERNEL_GS_BASE = %#RX64\n", ASMRdMsr(MSR_K8_KERNEL_GS_BASE)));
4317#endif /* VBOX_STRICT */
4318 break;
4319 }
4320
4321 default:
4322 /* Impossible */
4323 AssertMsgFailed(("hmR0VmxReportWorldSwitchError %Rrc (%#x)\n", rcVMRun, rcVMRun));
4324 break;
4325 }
4326}
4327
4328
4329/**
4330 * Sets up the usage of TSC-offsetting and updates the VMCS.
4331 *
4332 * If offsetting is not possible, cause VM-exits on RDTSC(P)s. Also sets up the
4333 * VMX-preemption timer.
4334 *
4335 * @returns VBox status code.
4336 * @param pVCpu The cross context virtual CPU structure.
4337 * @param pVmxTransient The VMX-transient structure.
4338 * @param idCurrentCpu The current CPU number.
4339 *
4340 * @remarks No-long-jump zone!!!
4341 */
4342static void hmR0VmxUpdateTscOffsettingAndPreemptTimer(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, RTCPUID idCurrentCpu)
4343{
4344 bool fOffsettedTsc;
4345 bool fParavirtTsc;
4346 uint64_t uTscOffset;
4347 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4348 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4349
4350 if (pVM->hmr0.s.vmx.fUsePreemptTimer)
4351 {
4352 /* The TMCpuTickGetDeadlineAndTscOffset function is expensive (calling it on
4353 every entry slowed down the bs2-test1 CPUID testcase by ~33% (on an 10980xe). */
4354 uint64_t cTicksToDeadline;
4355 if ( idCurrentCpu == pVCpu->hmr0.s.idLastCpu
4356 && TMVirtualSyncIsCurrentDeadlineVersion(pVM, pVCpu->hmr0.s.vmx.uTscDeadlineVersion))
4357 {
4358 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionReusingDeadline);
4359 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc);
4360 cTicksToDeadline = pVCpu->hmr0.s.vmx.uTscDeadline - SUPReadTsc();
4361 if ((int64_t)cTicksToDeadline > 0)
4362 { /* hopefully */ }
4363 else
4364 {
4365 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionReusingDeadlineExpired);
4366 cTicksToDeadline = 0;
4367 }
4368 }
4369 else
4370 {
4371 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionRecalcingDeadline);
4372 cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVM, pVCpu, &uTscOffset, &fOffsettedTsc, &fParavirtTsc,
4373 &pVCpu->hmr0.s.vmx.uTscDeadline,
4374 &pVCpu->hmr0.s.vmx.uTscDeadlineVersion);
4375 pVCpu->hmr0.s.vmx.uTscDeadline += cTicksToDeadline;
4376 if (cTicksToDeadline >= 128)
4377 { /* hopefully */ }
4378 else
4379 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionRecalcingDeadlineExpired);
4380 }
4381
4382 /* Make sure the returned values have sane upper and lower boundaries. */
4383 uint64_t const u64CpuHz = SUPGetCpuHzFromGipBySetIndex(g_pSUPGlobalInfoPage, pVCpu->iHostCpuSet);
4384 cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64th of a second, 15.625ms. */ /** @todo r=bird: Once real+virtual timers move to separate thread, we can raise the upper limit (16ms isn't much). ASSUMES working poke cpu function. */
4385 cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 32678); /* 1/32768th of a second, ~30us. */
4386 cTicksToDeadline >>= pVM->hm.s.vmx.cPreemptTimerShift;
4387
4388 /** @todo r=ramshankar: We need to find a way to integrate nested-guest
4389 * preemption timers here. We probably need to clamp the preemption timer,
4390 * after converting the timer value to the host. */
4391 uint32_t const cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16);
4392 int rc = VMXWriteVmcs32(VMX_VMCS32_PREEMPT_TIMER_VALUE, cPreemptionTickCount);
4393 AssertRC(rc);
4394 }
4395 else
4396 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc);
4397
4398 if (fParavirtTsc)
4399 {
4400 /* Currently neither Hyper-V nor KVM need to update their paravirt. TSC
4401 information before every VM-entry, hence disable it for performance sake. */
4402#if 0
4403 int rc = GIMR0UpdateParavirtTsc(pVM, 0 /* u64Offset */);
4404 AssertRC(rc);
4405#endif
4406 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscParavirt);
4407 }
4408
4409 if ( fOffsettedTsc
4410 && RT_LIKELY(!pVCpu->hmr0.s.fDebugWantRdTscExit))
4411 {
4412 if (pVmxTransient->fIsNestedGuest)
4413 uTscOffset = CPUMApplyNestedGuestTscOffset(pVCpu, uTscOffset);
4414 hmR0VmxSetTscOffsetVmcs(pVmcsInfo, uTscOffset);
4415 hmR0VmxRemoveProcCtlsVmcs(pVCpu, pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT);
4416 }
4417 else
4418 {
4419 /* We can't use TSC-offsetting (non-fixed TSC, warp drive active etc.), VM-exit on RDTSC(P). */
4420 hmR0VmxSetProcCtlsVmcs(pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT);
4421 }
4422}
4423
4424
4425/**
4426 * Worker for VMXR0ImportStateOnDemand.
4427 *
4428 * @returns VBox status code.
4429 * @param pVCpu The cross context virtual CPU structure.
4430 * @param pVmcsInfo The VMCS info. object.
4431 * @param fWhat What to import, CPUMCTX_EXTRN_XXX.
4432 */
4433static int hmR0VmxImportGuestState(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint64_t fWhat)
4434{
4435 int rc = VINF_SUCCESS;
4436 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4437 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
4438 uint32_t u32Val;
4439
4440 /*
4441 * Note! This is hack to workaround a mysterious BSOD observed with release builds
4442 * on Windows 10 64-bit hosts. Profile and debug builds are not affected and
4443 * neither are other host platforms.
4444 *
4445 * Committing this temporarily as it prevents BSOD.
4446 *
4447 * Update: This is very likely a compiler optimization bug, see @bugref{9180}.
4448 */
4449#ifdef RT_OS_WINDOWS
4450 if (pVM == 0 || pVM == (void *)(uintptr_t)-1)
4451 return VERR_HM_IPE_1;
4452#endif
4453
4454 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatImportGuestState, x);
4455
4456 /*
4457 * We disable interrupts to make the updating of the state and in particular
4458 * the fExtrn modification atomic wrt to preemption hooks.
4459 */
4460 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
4461
4462 fWhat &= pCtx->fExtrn;
4463 if (fWhat)
4464 {
4465 do
4466 {
4467 if (fWhat & CPUMCTX_EXTRN_RIP)
4468 vmxHCImportGuestRip(pVCpu);
4469
4470 if (fWhat & CPUMCTX_EXTRN_RFLAGS)
4471 vmxHCImportGuestRFlags(pVCpu, pVmcsInfo);
4472
4473 if (fWhat & (CPUMCTX_EXTRN_INHIBIT_INT | CPUMCTX_EXTRN_INHIBIT_NMI))
4474 vmxHCImportGuestIntrState(pVCpu, pVmcsInfo);
4475
4476 if (fWhat & CPUMCTX_EXTRN_RSP)
4477 {
4478 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_RSP, &pCtx->rsp);
4479 AssertRC(rc);
4480 }
4481
4482 if (fWhat & CPUMCTX_EXTRN_SREG_MASK)
4483 {
4484 PVMXVMCSINFOSHARED pVmcsInfoShared = pVmcsInfo->pShared;
4485 bool const fRealOnV86Active = pVmcsInfoShared->RealMode.fRealOnV86Active;
4486 if (fWhat & CPUMCTX_EXTRN_CS)
4487 {
4488 vmxHCImportGuestSegReg(pVCpu, X86_SREG_CS);
4489 vmxHCImportGuestRip(pVCpu);
4490 if (fRealOnV86Active)
4491 pCtx->cs.Attr.u = pVmcsInfoShared->RealMode.AttrCS.u;
4492 EMHistoryUpdatePC(pVCpu, pCtx->cs.u64Base + pCtx->rip, true /* fFlattened */);
4493 }
4494 if (fWhat & CPUMCTX_EXTRN_SS)
4495 {
4496 vmxHCImportGuestSegReg(pVCpu, X86_SREG_SS);
4497 if (fRealOnV86Active)
4498 pCtx->ss.Attr.u = pVmcsInfoShared->RealMode.AttrSS.u;
4499 }
4500 if (fWhat & CPUMCTX_EXTRN_DS)
4501 {
4502 vmxHCImportGuestSegReg(pVCpu, X86_SREG_DS);
4503 if (fRealOnV86Active)
4504 pCtx->ds.Attr.u = pVmcsInfoShared->RealMode.AttrDS.u;
4505 }
4506 if (fWhat & CPUMCTX_EXTRN_ES)
4507 {
4508 vmxHCImportGuestSegReg(pVCpu, X86_SREG_ES);
4509 if (fRealOnV86Active)
4510 pCtx->es.Attr.u = pVmcsInfoShared->RealMode.AttrES.u;
4511 }
4512 if (fWhat & CPUMCTX_EXTRN_FS)
4513 {
4514 vmxHCImportGuestSegReg(pVCpu, X86_SREG_FS);
4515 if (fRealOnV86Active)
4516 pCtx->fs.Attr.u = pVmcsInfoShared->RealMode.AttrFS.u;
4517 }
4518 if (fWhat & CPUMCTX_EXTRN_GS)
4519 {
4520 vmxHCImportGuestSegReg(pVCpu, X86_SREG_GS);
4521 if (fRealOnV86Active)
4522 pCtx->gs.Attr.u = pVmcsInfoShared->RealMode.AttrGS.u;
4523 }
4524 }
4525
4526 if (fWhat & CPUMCTX_EXTRN_TABLE_MASK)
4527 {
4528 if (fWhat & CPUMCTX_EXTRN_LDTR)
4529 vmxHCImportGuestLdtr(pVCpu);
4530
4531 if (fWhat & CPUMCTX_EXTRN_GDTR)
4532 {
4533 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_GDTR_BASE, &pCtx->gdtr.pGdt); AssertRC(rc);
4534 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, &u32Val); AssertRC(rc);
4535 pCtx->gdtr.cbGdt = u32Val;
4536 }
4537
4538 /* Guest IDTR. */
4539 if (fWhat & CPUMCTX_EXTRN_IDTR)
4540 {
4541 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_IDTR_BASE, &pCtx->idtr.pIdt); AssertRC(rc);
4542 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, &u32Val); AssertRC(rc);
4543 pCtx->idtr.cbIdt = u32Val;
4544 }
4545
4546 /* Guest TR. */
4547 if (fWhat & CPUMCTX_EXTRN_TR)
4548 {
4549 /* Real-mode emulation using virtual-8086 mode has the fake TSS (pRealModeTSS) in TR,
4550 don't need to import that one. */
4551 if (!pVmcsInfo->pShared->RealMode.fRealOnV86Active)
4552 vmxHCImportGuestTr(pVCpu);
4553 }
4554 }
4555
4556 if (fWhat & CPUMCTX_EXTRN_DR7)
4557 {
4558 if (!pVCpu->hmr0.s.fUsingHyperDR7)
4559 {
4560 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_DR7, &pCtx->dr[7]);
4561 AssertRC(rc);
4562 }
4563 }
4564
4565 if (fWhat & CPUMCTX_EXTRN_SYSENTER_MSRS)
4566 {
4567 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_SYSENTER_EIP, &pCtx->SysEnter.eip); AssertRC(rc);
4568 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_SYSENTER_ESP, &pCtx->SysEnter.esp); AssertRC(rc);
4569 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, &u32Val); AssertRC(rc);
4570 pCtx->SysEnter.cs = u32Val;
4571 }
4572
4573 if (fWhat & CPUMCTX_EXTRN_KERNEL_GS_BASE)
4574 {
4575 if ( pVM->hmr0.s.fAllow64BitGuests
4576 && (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST))
4577 pCtx->msrKERNELGSBASE = ASMRdMsr(MSR_K8_KERNEL_GS_BASE);
4578 }
4579
4580 if (fWhat & CPUMCTX_EXTRN_SYSCALL_MSRS)
4581 {
4582 if ( pVM->hmr0.s.fAllow64BitGuests
4583 && (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST))
4584 {
4585 pCtx->msrLSTAR = ASMRdMsr(MSR_K8_LSTAR);
4586 pCtx->msrSTAR = ASMRdMsr(MSR_K6_STAR);
4587 pCtx->msrSFMASK = ASMRdMsr(MSR_K8_SF_MASK);
4588 }
4589 }
4590
4591 if (fWhat & (CPUMCTX_EXTRN_TSC_AUX | CPUMCTX_EXTRN_OTHER_MSRS))
4592 {
4593 PVMXVMCSINFOSHARED pVmcsInfoShared = pVmcsInfo->pShared;
4594 PCVMXAUTOMSR pMsrs = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
4595 uint32_t const cMsrs = pVmcsInfo->cExitMsrStore;
4596 Assert(pMsrs);
4597 Assert(cMsrs <= VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc));
4598 Assert(sizeof(*pMsrs) * cMsrs <= X86_PAGE_4K_SIZE);
4599 for (uint32_t i = 0; i < cMsrs; i++)
4600 {
4601 uint32_t const idMsr = pMsrs[i].u32Msr;
4602 switch (idMsr)
4603 {
4604 case MSR_K8_TSC_AUX: CPUMSetGuestTscAux(pVCpu, pMsrs[i].u64Value); break;
4605 case MSR_IA32_SPEC_CTRL: CPUMSetGuestSpecCtrl(pVCpu, pMsrs[i].u64Value); break;
4606 case MSR_K6_EFER: /* Can't be changed without causing a VM-exit */ break;
4607 default:
4608 {
4609 uint32_t idxLbrMsr;
4610 if (pVM->hmr0.s.vmx.fLbr)
4611 {
4612 if (hmR0VmxIsLbrBranchFromMsr(pVM, idMsr, &idxLbrMsr))
4613 {
4614 Assert(idxLbrMsr < RT_ELEMENTS(pVmcsInfoShared->au64LbrFromIpMsr));
4615 pVmcsInfoShared->au64LbrFromIpMsr[idxLbrMsr] = pMsrs[i].u64Value;
4616 break;
4617 }
4618 if (hmR0VmxIsLbrBranchToMsr(pVM, idMsr, &idxLbrMsr))
4619 {
4620 Assert(idxLbrMsr < RT_ELEMENTS(pVmcsInfoShared->au64LbrFromIpMsr));
4621 pVmcsInfoShared->au64LbrToIpMsr[idxLbrMsr] = pMsrs[i].u64Value;
4622 break;
4623 }
4624 if (idMsr == pVM->hmr0.s.vmx.idLbrTosMsr)
4625 {
4626 pVmcsInfoShared->u64LbrTosMsr = pMsrs[i].u64Value;
4627 break;
4628 }
4629 /* Fallthru (no break) */
4630 }
4631 pCtx->fExtrn = 0;
4632 pVCpu->hm.s.u32HMError = pMsrs->u32Msr;
4633 ASMSetFlags(fEFlags);
4634 AssertMsgFailed(("Unexpected MSR in auto-load/store area. idMsr=%#RX32 cMsrs=%u\n", idMsr, cMsrs));
4635 return VERR_HM_UNEXPECTED_LD_ST_MSR;
4636 }
4637 }
4638 }
4639 }
4640
4641 if (fWhat & CPUMCTX_EXTRN_CR_MASK)
4642 {
4643 if (fWhat & CPUMCTX_EXTRN_CR0)
4644 {
4645 uint64_t u64Cr0;
4646 uint64_t u64Shadow;
4647 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_CR0, &u64Cr0); AssertRC(rc);
4648 rc = VMXReadVmcsNw(VMX_VMCS_CTRL_CR0_READ_SHADOW, &u64Shadow); AssertRC(rc);
4649#ifndef VBOX_WITH_NESTED_HWVIRT_VMX
4650 u64Cr0 = (u64Cr0 & ~pVmcsInfo->u64Cr0Mask)
4651 | (u64Shadow & pVmcsInfo->u64Cr0Mask);
4652#else
4653 if (!CPUMIsGuestInVmxNonRootMode(pCtx))
4654 {
4655 u64Cr0 = (u64Cr0 & ~pVmcsInfo->u64Cr0Mask)
4656 | (u64Shadow & pVmcsInfo->u64Cr0Mask);
4657 }
4658 else
4659 {
4660 /*
4661 * We've merged the guest and nested-guest's CR0 guest/host mask while executing
4662 * the nested-guest using hardware-assisted VMX. Accordingly we need to
4663 * re-construct CR0. See @bugref{9180#c95} for details.
4664 */
4665 PCVMXVMCSINFO const pVmcsInfoGst = &pVCpu->hmr0.s.vmx.VmcsInfo;
4666 PVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
4667 u64Cr0 = (u64Cr0 & ~pVmcsInfo->u64Cr0Mask)
4668 | (pVmcsNstGst->u64GuestCr0.u & pVmcsNstGst->u64Cr0Mask.u)
4669 | (u64Shadow & (pVmcsInfoGst->u64Cr0Mask & ~pVmcsNstGst->u64Cr0Mask.u));
4670 }
4671#endif
4672 VMMRZCallRing3Disable(pVCpu); /* May call into PGM which has Log statements. */
4673 CPUMSetGuestCR0(pVCpu, u64Cr0);
4674 VMMRZCallRing3Enable(pVCpu);
4675 }
4676
4677 if (fWhat & CPUMCTX_EXTRN_CR4)
4678 {
4679 uint64_t u64Cr4;
4680 uint64_t u64Shadow;
4681 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_CR4, &u64Cr4); AssertRC(rc);
4682 rc |= VMXReadVmcsNw(VMX_VMCS_CTRL_CR4_READ_SHADOW, &u64Shadow); AssertRC(rc);
4683#ifndef VBOX_WITH_NESTED_HWVIRT_VMX
4684 u64Cr4 = (u64Cr4 & ~pVmcsInfo->u64Cr4Mask)
4685 | (u64Shadow & pVmcsInfo->u64Cr4Mask);
4686#else
4687 if (!CPUMIsGuestInVmxNonRootMode(pCtx))
4688 {
4689 u64Cr4 = (u64Cr4 & ~pVmcsInfo->u64Cr4Mask)
4690 | (u64Shadow & pVmcsInfo->u64Cr4Mask);
4691 }
4692 else
4693 {
4694 /*
4695 * We've merged the guest and nested-guest's CR4 guest/host mask while executing
4696 * the nested-guest using hardware-assisted VMX. Accordingly we need to
4697 * re-construct CR4. See @bugref{9180#c95} for details.
4698 */
4699 PCVMXVMCSINFO const pVmcsInfoGst = &pVCpu->hmr0.s.vmx.VmcsInfo;
4700 PVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
4701 u64Cr4 = (u64Cr4 & ~pVmcsInfo->u64Cr4Mask)
4702 | (pVmcsNstGst->u64GuestCr4.u & pVmcsNstGst->u64Cr4Mask.u)
4703 | (u64Shadow & (pVmcsInfoGst->u64Cr4Mask & ~pVmcsNstGst->u64Cr4Mask.u));
4704 }
4705#endif
4706 pCtx->cr4 = u64Cr4;
4707 }
4708
4709 if (fWhat & CPUMCTX_EXTRN_CR3)
4710 {
4711 /* CR0.PG bit changes are always intercepted, so it's up to date. */
4712 if ( pVM->hmr0.s.vmx.fUnrestrictedGuest
4713 || ( pVM->hmr0.s.fNestedPaging
4714 && CPUMIsGuestPagingEnabledEx(pCtx)))
4715 {
4716 uint64_t u64Cr3;
4717 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_CR3, &u64Cr3); AssertRC(rc);
4718 if (pCtx->cr3 != u64Cr3)
4719 {
4720 pCtx->cr3 = u64Cr3;
4721 VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3);
4722 }
4723
4724 /*
4725 * If the guest is in PAE mode, sync back the PDPE's into the guest state.
4726 * CR4.PAE, CR0.PG, EFER MSR changes are always intercepted, so they're up to date.
4727 */
4728 if (CPUMIsGuestInPAEModeEx(pCtx))
4729 {
4730 X86PDPE aPaePdpes[4];
4731 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, &aPaePdpes[0].u); AssertRC(rc);
4732 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, &aPaePdpes[1].u); AssertRC(rc);
4733 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, &aPaePdpes[2].u); AssertRC(rc);
4734 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, &aPaePdpes[3].u); AssertRC(rc);
4735 if (memcmp(&aPaePdpes[0], &pCtx->aPaePdpes[0], sizeof(aPaePdpes)))
4736 {
4737 memcpy(&pCtx->aPaePdpes[0], &aPaePdpes[0], sizeof(aPaePdpes));
4738 /* PGM now updates PAE PDPTEs while updating CR3. */
4739 VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3);
4740 }
4741 }
4742 }
4743 }
4744 }
4745
4746#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
4747 if (fWhat & CPUMCTX_EXTRN_HWVIRT)
4748 {
4749 if ( (pVmcsInfo->u32ProcCtls2 & VMX_PROC_CTLS2_VMCS_SHADOWING)
4750 && !CPUMIsGuestInVmxNonRootMode(pCtx))
4751 {
4752 Assert(CPUMIsGuestInVmxRootMode(pCtx));
4753 rc = vmxHCCopyShadowToNstGstVmcs(pVCpu, pVmcsInfo);
4754 if (RT_SUCCESS(rc))
4755 { /* likely */ }
4756 else
4757 break;
4758 }
4759 }
4760#endif
4761 } while (0);
4762
4763 if (RT_SUCCESS(rc))
4764 {
4765 /* Update fExtrn. */
4766 pCtx->fExtrn &= ~fWhat;
4767
4768 /* If everything has been imported, clear the HM keeper bit. */
4769 if (!(pCtx->fExtrn & HMVMX_CPUMCTX_EXTRN_ALL))
4770 {
4771 pCtx->fExtrn &= ~CPUMCTX_EXTRN_KEEPER_HM;
4772 Assert(!pCtx->fExtrn);
4773 }
4774 }
4775 }
4776 else
4777 AssertMsg(!pCtx->fExtrn || (pCtx->fExtrn & HMVMX_CPUMCTX_EXTRN_ALL), ("%#RX64\n", pCtx->fExtrn));
4778
4779 /*
4780 * Restore interrupts.
4781 */
4782 ASMSetFlags(fEFlags);
4783
4784 STAM_PROFILE_ADV_STOP(& pVCpu->hm.s.StatImportGuestState, x);
4785
4786 if (RT_SUCCESS(rc))
4787 { /* likely */ }
4788 else
4789 return rc;
4790
4791 /*
4792 * Honor any pending CR3 updates.
4793 *
4794 * Consider this scenario: VM-exit -> VMMRZCallRing3Enable() -> do stuff that causes a longjmp -> VMXR0CallRing3Callback()
4795 * -> VMMRZCallRing3Disable() -> hmR0VmxImportGuestState() -> Sets VMCPU_FF_HM_UPDATE_CR3 pending -> return from the longjmp
4796 * -> continue with VM-exit handling -> hmR0VmxImportGuestState() and here we are.
4797 *
4798 * The reason for such complicated handling is because VM-exits that call into PGM expect CR3 to be up-to-date and thus
4799 * if any CR3-saves -before- the VM-exit (longjmp) postponed the CR3 update via the force-flag, any VM-exit handler that
4800 * calls into PGM when it re-saves CR3 will end up here and we call PGMUpdateCR3(). This is why the code below should
4801 * -NOT- check if CPUMCTX_EXTRN_CR3 is set!
4802 *
4803 * The longjmp exit path can't check these CR3 force-flags and call code that takes a lock again. We cover for it here.
4804 *
4805 * The force-flag is checked first as it's cheaper for potential superfluous calls to this function.
4806 */
4807 if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3)
4808 && VMMRZCallRing3IsEnabled(pVCpu))
4809 {
4810 Assert(!(ASMAtomicUoReadU64(&pCtx->fExtrn) & CPUMCTX_EXTRN_CR3));
4811 PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu));
4812 Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
4813 }
4814
4815 return VINF_SUCCESS;
4816}
4817
4818
4819/**
4820 * Saves the guest state from the VMCS into the guest-CPU context.
4821 *
4822 * @returns VBox status code.
4823 * @param pVCpu The cross context virtual CPU structure.
4824 * @param fWhat What to import, CPUMCTX_EXTRN_XXX.
4825 */
4826VMMR0DECL(int) VMXR0ImportStateOnDemand(PVMCPUCC pVCpu, uint64_t fWhat)
4827{
4828 AssertPtr(pVCpu);
4829 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4830 return hmR0VmxImportGuestState(pVCpu, pVmcsInfo, fWhat);
4831}
4832
4833
4834/**
4835 * Does the necessary state syncing before returning to ring-3 for any reason
4836 * (longjmp, preemption, voluntary exits to ring-3) from VT-x.
4837 *
4838 * @returns VBox status code.
4839 * @param pVCpu The cross context virtual CPU structure.
4840 * @param fImportState Whether to import the guest state from the VMCS back
4841 * to the guest-CPU context.
4842 *
4843 * @remarks No-long-jmp zone!!!
4844 */
4845static int hmR0VmxLeave(PVMCPUCC pVCpu, bool fImportState)
4846{
4847 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4848 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
4849
4850 RTCPUID const idCpu = RTMpCpuId();
4851 Log4Func(("HostCpuId=%u\n", idCpu));
4852
4853 /*
4854 * !!! IMPORTANT !!!
4855 * If you modify code here, check whether VMXR0CallRing3Callback() needs to be updated too.
4856 */
4857
4858 /* Save the guest state if necessary. */
4859 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4860 if (fImportState)
4861 {
4862 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
4863 AssertRCReturn(rc, rc);
4864 }
4865
4866 /* Restore host FPU state if necessary. We will resync on next R0 reentry. */
4867 CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
4868 Assert(!CPUMIsGuestFPUStateActive(pVCpu));
4869
4870 /* Restore host debug registers if necessary. We will resync on next R0 reentry. */
4871#ifdef VBOX_STRICT
4872 if (CPUMIsHyperDebugStateActive(pVCpu))
4873 Assert(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT);
4874#endif
4875 CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
4876 Assert(!CPUMIsGuestDebugStateActive(pVCpu));
4877 Assert(!CPUMIsHyperDebugStateActive(pVCpu));
4878
4879 /* Restore host-state bits that VT-x only restores partially. */
4880 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
4881 {
4882 Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hmr0.s.vmx.fRestoreHostFlags, idCpu));
4883 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
4884 }
4885 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
4886
4887 /* Restore the lazy host MSRs as we're leaving VT-x context. */
4888 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
4889 {
4890 /* We shouldn't restore the host MSRs without saving the guest MSRs first. */
4891 if (!fImportState)
4892 {
4893 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_KERNEL_GS_BASE | CPUMCTX_EXTRN_SYSCALL_MSRS);
4894 AssertRCReturn(rc, rc);
4895 }
4896 hmR0VmxLazyRestoreHostMsrs(pVCpu);
4897 Assert(!pVCpu->hmr0.s.vmx.fLazyMsrs);
4898 }
4899 else
4900 pVCpu->hmr0.s.vmx.fLazyMsrs = 0;
4901
4902 /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
4903 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
4904
4905 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatEntry);
4906 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatImportGuestState);
4907 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExportGuestState);
4908 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatPreExit);
4909 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitHandling);
4910 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitIO);
4911 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitMovCRx);
4912 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitXcptNmi);
4913 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitVmentry);
4914 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
4915
4916 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
4917
4918 /** @todo This partially defeats the purpose of having preemption hooks.
4919 * The problem is, deregistering the hooks should be moved to a place that
4920 * lasts until the EMT is about to be destroyed not everytime while leaving HM
4921 * context.
4922 */
4923 int rc = hmR0VmxClearVmcs(pVmcsInfo);
4924 AssertRCReturn(rc, rc);
4925
4926#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
4927 /*
4928 * A valid shadow VMCS is made active as part of VM-entry. It is necessary to
4929 * clear a shadow VMCS before allowing that VMCS to become active on another
4930 * logical processor. We may or may not be importing guest state which clears
4931 * it, so cover for it here.
4932 *
4933 * See Intel spec. 24.11.1 "Software Use of Virtual-Machine Control Structures".
4934 */
4935 if ( pVmcsInfo->pvShadowVmcs
4936 && pVmcsInfo->fShadowVmcsState != VMX_V_VMCS_LAUNCH_STATE_CLEAR)
4937 {
4938 rc = vmxHCClearShadowVmcs(pVmcsInfo);
4939 AssertRCReturn(rc, rc);
4940 }
4941
4942 /*
4943 * Flag that we need to re-export the host state if we switch to this VMCS before
4944 * executing guest or nested-guest code.
4945 */
4946 pVmcsInfo->idHostCpuState = NIL_RTCPUID;
4947#endif
4948
4949 Log4Func(("Cleared Vmcs. HostCpuId=%u\n", idCpu));
4950 NOREF(idCpu);
4951 return VINF_SUCCESS;
4952}
4953
4954
4955/**
4956 * Leaves the VT-x session.
4957 *
4958 * @returns VBox status code.
4959 * @param pVCpu The cross context virtual CPU structure.
4960 *
4961 * @remarks No-long-jmp zone!!!
4962 */
4963static int hmR0VmxLeaveSession(PVMCPUCC pVCpu)
4964{
4965 HM_DISABLE_PREEMPT(pVCpu);
4966 HMVMX_ASSERT_CPU_SAFE(pVCpu);
4967 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
4968 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4969
4970 /* When thread-context hooks are used, we can avoid doing the leave again if we had been preempted before
4971 and done this from the VMXR0ThreadCtxCallback(). */
4972 if (!pVCpu->hmr0.s.fLeaveDone)
4973 {
4974 int rc2 = hmR0VmxLeave(pVCpu, true /* fImportState */);
4975 AssertRCReturnStmt(rc2, HM_RESTORE_PREEMPT(), rc2);
4976 pVCpu->hmr0.s.fLeaveDone = true;
4977 }
4978 Assert(!pVCpu->cpum.GstCtx.fExtrn);
4979
4980 /*
4981 * !!! IMPORTANT !!!
4982 * If you modify code here, make sure to check whether VMXR0CallRing3Callback() needs to be updated too.
4983 */
4984
4985 /* Deregister hook now that we've left HM context before re-enabling preemption. */
4986 /** @todo Deregistering here means we need to VMCLEAR always
4987 * (longjmp/exit-to-r3) in VT-x which is not efficient, eliminate need
4988 * for calling VMMR0ThreadCtxHookDisable here! */
4989 VMMR0ThreadCtxHookDisable(pVCpu);
4990
4991 /* Leave HM context. This takes care of local init (term) and deregistering the longjmp-to-ring-3 callback. */
4992 int rc = HMR0LeaveCpu(pVCpu);
4993 HM_RESTORE_PREEMPT();
4994 return rc;
4995}
4996
4997
4998/**
4999 * Take necessary actions before going back to ring-3.
5000 *
5001 * An action requires us to go back to ring-3. This function does the necessary
5002 * steps before we can safely return to ring-3. This is not the same as longjmps
5003 * to ring-3, this is voluntary and prepares the guest so it may continue
5004 * executing outside HM (recompiler/IEM).
5005 *
5006 * @returns VBox status code.
5007 * @param pVCpu The cross context virtual CPU structure.
5008 * @param rcExit The reason for exiting to ring-3. Can be
5009 * VINF_VMM_UNKNOWN_RING3_CALL.
5010 */
5011static int hmR0VmxExitToRing3(PVMCPUCC pVCpu, VBOXSTRICTRC rcExit)
5012{
5013 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
5014
5015 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
5016 if (RT_UNLIKELY(rcExit == VERR_VMX_INVALID_VMCS_PTR))
5017 {
5018 VMXGetCurrentVmcs(&pVCpu->hm.s.vmx.LastError.HCPhysCurrentVmcs);
5019 pVCpu->hm.s.vmx.LastError.u32VmcsRev = *(uint32_t *)pVmcsInfo->pvVmcs;
5020 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hmr0.s.idEnteredCpu;
5021 /* LastError.idCurrentCpu was updated in hmR0VmxPreRunGuestCommitted(). */
5022 }
5023
5024 /* Please, no longjumps here (any logging shouldn't flush jump back to ring-3). NO LOGGING BEFORE THIS POINT! */
5025 VMMRZCallRing3Disable(pVCpu);
5026 Log4Func(("rcExit=%d\n", VBOXSTRICTRC_VAL(rcExit)));
5027
5028 /*
5029 * Convert any pending HM events back to TRPM due to premature exits to ring-3.
5030 * We need to do this only on returns to ring-3 and not for longjmps to ring3.
5031 *
5032 * This is because execution may continue from ring-3 and we would need to inject
5033 * the event from there (hence place it back in TRPM).
5034 */
5035 if (pVCpu->hm.s.Event.fPending)
5036 {
5037 vmxHCPendingEventToTrpmTrap(pVCpu);
5038 Assert(!pVCpu->hm.s.Event.fPending);
5039
5040 /* Clear the events from the VMCS. */
5041 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, 0); AssertRC(rc);
5042 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, 0); AssertRC(rc);
5043 }
5044#ifdef VBOX_STRICT
5045 /*
5046 * We check for rcExit here since for errors like VERR_VMX_UNABLE_TO_START_VM (which are
5047 * fatal), we don't care about verifying duplicate injection of events. Errors like
5048 * VERR_EM_INTERPRET are converted to their VINF_* counterparts -prior- to calling this
5049 * function so those should and will be checked below.
5050 */
5051 else if (RT_SUCCESS(rcExit))
5052 {
5053 /*
5054 * Ensure we don't accidentally clear a pending HM event without clearing the VMCS.
5055 * This can be pretty hard to debug otherwise, interrupts might get injected twice
5056 * occasionally, see @bugref{9180#c42}.
5057 *
5058 * However, if the VM-entry failed, any VM entry-interruption info. field would
5059 * be left unmodified as the event would not have been injected to the guest. In
5060 * such cases, don't assert, we're not going to continue guest execution anyway.
5061 */
5062 uint32_t uExitReason;
5063 uint32_t uEntryIntInfo;
5064 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
5065 rc |= VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &uEntryIntInfo);
5066 AssertRC(rc);
5067 AssertMsg(VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason) || !VMX_ENTRY_INT_INFO_IS_VALID(uEntryIntInfo),
5068 ("uExitReason=%#RX32 uEntryIntInfo=%#RX32 rcExit=%d\n", uExitReason, uEntryIntInfo, VBOXSTRICTRC_VAL(rcExit)));
5069 }
5070#endif
5071
5072 /*
5073 * Clear the interrupt-window and NMI-window VMCS controls as we could have got
5074 * a VM-exit with higher priority than interrupt-window or NMI-window VM-exits
5075 * (e.g. TPR below threshold).
5076 */
5077 if (!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
5078 {
5079 vmxHCClearIntWindowExitVmcs(pVCpu, pVmcsInfo);
5080 vmxHCClearNmiWindowExitVmcs(pVCpu, pVmcsInfo);
5081 }
5082
5083 /* If we're emulating an instruction, we shouldn't have any TRPM traps pending
5084 and if we're injecting an event we should have a TRPM trap pending. */
5085 AssertMsg(rcExit != VINF_EM_RAW_INJECT_TRPM_EVENT || TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
5086#ifndef DEBUG_bird /* Triggered after firing an NMI against NT4SP1, possibly a triple fault in progress. */
5087 AssertMsg(rcExit != VINF_EM_RAW_EMULATE_INSTR || !TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
5088#endif
5089
5090 /* Save guest state and restore host state bits. */
5091 int rc = hmR0VmxLeaveSession(pVCpu);
5092 AssertRCReturn(rc, rc);
5093 STAM_COUNTER_DEC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
5094
5095 /* Thread-context hooks are unregistered at this point!!! */
5096 /* Ring-3 callback notifications are unregistered at this point!!! */
5097
5098 /* Sync recompiler state. */
5099 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
5100 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR
5101 | CPUM_CHANGED_LDTR
5102 | CPUM_CHANGED_GDTR
5103 | CPUM_CHANGED_IDTR
5104 | CPUM_CHANGED_TR
5105 | CPUM_CHANGED_HIDDEN_SEL_REGS);
5106 if ( pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging
5107 && CPUMIsGuestPagingEnabledEx(&pVCpu->cpum.GstCtx))
5108 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_GLOBAL_TLB_FLUSH);
5109
5110 Assert(!pVCpu->hmr0.s.fClearTrapFlag);
5111
5112 /* Update the exit-to-ring 3 reason. */
5113 pVCpu->hm.s.rcLastExitToR3 = VBOXSTRICTRC_VAL(rcExit);
5114
5115 /* On our way back from ring-3 reload the guest state if there is a possibility of it being changed. */
5116 if ( rcExit != VINF_EM_RAW_INTERRUPT
5117 || CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
5118 {
5119 Assert(!(pVCpu->cpum.GstCtx.fExtrn & HMVMX_CPUMCTX_EXTRN_ALL));
5120 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
5121 }
5122
5123 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchExitToR3);
5124 VMMRZCallRing3Enable(pVCpu);
5125 return rc;
5126}
5127
5128
5129/**
5130 * VMMRZCallRing3() callback wrapper which saves the guest state before we
5131 * longjump due to a ring-0 assertion.
5132 *
5133 * @returns VBox status code.
5134 * @param pVCpu The cross context virtual CPU structure.
5135 */
5136VMMR0DECL(int) VMXR0AssertionCallback(PVMCPUCC pVCpu)
5137{
5138 /*
5139 * !!! IMPORTANT !!!
5140 * If you modify code here, check whether hmR0VmxLeave() and hmR0VmxLeaveSession() needs to be updated too.
5141 * This is a stripped down version which gets out ASAP, trying to not trigger any further assertions.
5142 */
5143 VMMR0AssertionRemoveNotification(pVCpu);
5144 VMMRZCallRing3Disable(pVCpu);
5145 HM_DISABLE_PREEMPT(pVCpu);
5146
5147 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
5148 vmxHCImportGuestState(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
5149 CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
5150 CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
5151
5152 /* Restore host-state bits that VT-x only restores partially. */
5153 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
5154 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
5155 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
5156
5157 /* Restore the lazy host MSRs as we're leaving VT-x context. */
5158 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
5159 hmR0VmxLazyRestoreHostMsrs(pVCpu);
5160
5161 /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
5162 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
5163 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
5164
5165 /* Clear the current VMCS data back to memory (shadow VMCS if any would have been
5166 cleared as part of importing the guest state above. */
5167 hmR0VmxClearVmcs(pVmcsInfo);
5168
5169 /** @todo eliminate the need for calling VMMR0ThreadCtxHookDisable here! */
5170 VMMR0ThreadCtxHookDisable(pVCpu);
5171
5172 /* Leave HM context. This takes care of local init (term). */
5173 HMR0LeaveCpu(pVCpu);
5174 HM_RESTORE_PREEMPT();
5175 return VINF_SUCCESS;
5176}
5177
5178
5179/**
5180 * Enters the VT-x session.
5181 *
5182 * @returns VBox status code.
5183 * @param pVCpu The cross context virtual CPU structure.
5184 */
5185VMMR0DECL(int) VMXR0Enter(PVMCPUCC pVCpu)
5186{
5187 AssertPtr(pVCpu);
5188 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fSupported);
5189 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5190
5191 LogFlowFunc(("pVCpu=%p\n", pVCpu));
5192 Assert((pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
5193 == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
5194
5195#ifdef VBOX_STRICT
5196 /* At least verify VMX is enabled, since we can't check if we're in VMX root mode without #GP'ing. */
5197 RTCCUINTREG uHostCr4 = ASMGetCR4();
5198 if (!(uHostCr4 & X86_CR4_VMXE))
5199 {
5200 LogRelFunc(("X86_CR4_VMXE bit in CR4 is not set!\n"));
5201 return VERR_VMX_X86_CR4_VMXE_CLEARED;
5202 }
5203#endif
5204
5205 /*
5206 * Do the EMT scheduled L1D and MDS flush here if needed.
5207 */
5208 if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_SCHED)
5209 ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
5210 else if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_SCHED)
5211 hmR0MdsClear();
5212
5213 /*
5214 * Load the appropriate VMCS as the current and active one.
5215 */
5216 PVMXVMCSINFO pVmcsInfo;
5217 bool const fInNestedGuestMode = CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx);
5218 if (!fInNestedGuestMode)
5219 pVmcsInfo = &pVCpu->hmr0.s.vmx.VmcsInfo;
5220 else
5221 pVmcsInfo = &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
5222 int rc = hmR0VmxLoadVmcs(pVmcsInfo);
5223 if (RT_SUCCESS(rc))
5224 {
5225 pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs = fInNestedGuestMode;
5226 pVCpu->hm.s.vmx.fSwitchedToNstGstVmcsCopyForRing3 = fInNestedGuestMode;
5227 pVCpu->hmr0.s.fLeaveDone = false;
5228 Log4Func(("Loaded Vmcs. HostCpuId=%u\n", RTMpCpuId()));
5229 }
5230 return rc;
5231}
5232
5233
5234/**
5235 * The thread-context callback.
5236 *
5237 * This is used together with RTThreadCtxHookCreate() on platforms which
5238 * supports it, and directly from VMMR0EmtPrepareForBlocking() and
5239 * VMMR0EmtResumeAfterBlocking() on platforms which don't.
5240 *
5241 * @param enmEvent The thread-context event.
5242 * @param pVCpu The cross context virtual CPU structure.
5243 * @param fGlobalInit Whether global VT-x/AMD-V init. was used.
5244 * @thread EMT(pVCpu)
5245 */
5246VMMR0DECL(void) VMXR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPUCC pVCpu, bool fGlobalInit)
5247{
5248 AssertPtr(pVCpu);
5249 RT_NOREF1(fGlobalInit);
5250
5251 switch (enmEvent)
5252 {
5253 case RTTHREADCTXEVENT_OUT:
5254 {
5255 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5256 VMCPU_ASSERT_EMT(pVCpu);
5257
5258 /* No longjmps (logger flushes, locks) in this fragile context. */
5259 VMMRZCallRing3Disable(pVCpu);
5260 Log4Func(("Preempting: HostCpuId=%u\n", RTMpCpuId()));
5261
5262 /* Restore host-state (FPU, debug etc.) */
5263 if (!pVCpu->hmr0.s.fLeaveDone)
5264 {
5265 /*
5266 * Do -not- import the guest-state here as we might already be in the middle of importing
5267 * it, esp. bad if we're holding the PGM lock, see comment in hmR0VmxImportGuestState().
5268 */
5269 hmR0VmxLeave(pVCpu, false /* fImportState */);
5270 pVCpu->hmr0.s.fLeaveDone = true;
5271 }
5272
5273 /* Leave HM context, takes care of local init (term). */
5274 int rc = HMR0LeaveCpu(pVCpu);
5275 AssertRC(rc);
5276
5277 /* Restore longjmp state. */
5278 VMMRZCallRing3Enable(pVCpu);
5279 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatSwitchPreempt);
5280 break;
5281 }
5282
5283 case RTTHREADCTXEVENT_IN:
5284 {
5285 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5286 VMCPU_ASSERT_EMT(pVCpu);
5287
5288 /* Do the EMT scheduled L1D and MDS flush here if needed. */
5289 if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_SCHED)
5290 ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
5291 else if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_SCHED)
5292 hmR0MdsClear();
5293
5294 /* No longjmps here, as we don't want to trigger preemption (& its hook) while resuming. */
5295 VMMRZCallRing3Disable(pVCpu);
5296 Log4Func(("Resumed: HostCpuId=%u\n", RTMpCpuId()));
5297
5298 /* Initialize the bare minimum state required for HM. This takes care of
5299 initializing VT-x if necessary (onlined CPUs, local init etc.) */
5300 int rc = hmR0EnterCpu(pVCpu);
5301 AssertRC(rc);
5302 Assert( (pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
5303 == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
5304
5305 /* Load the active VMCS as the current one. */
5306 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
5307 rc = hmR0VmxLoadVmcs(pVmcsInfo);
5308 AssertRC(rc);
5309 Log4Func(("Resumed: Loaded Vmcs. HostCpuId=%u\n", RTMpCpuId()));
5310 pVCpu->hmr0.s.fLeaveDone = false;
5311
5312 /* Restore longjmp state. */
5313 VMMRZCallRing3Enable(pVCpu);
5314 break;
5315 }
5316
5317 default:
5318 break;
5319 }
5320}
5321
5322
5323/**
5324 * Exports the host state into the VMCS host-state area.
5325 * Sets up the VM-exit MSR-load area.
5326 *
5327 * The CPU state will be loaded from these fields on every successful VM-exit.
5328 *
5329 * @returns VBox status code.
5330 * @param pVCpu The cross context virtual CPU structure.
5331 *
5332 * @remarks No-long-jump zone!!!
5333 */
5334static int hmR0VmxExportHostState(PVMCPUCC pVCpu)
5335{
5336 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5337
5338 int rc = VINF_SUCCESS;
5339 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)
5340 {
5341 uint64_t uHostCr4 = hmR0VmxExportHostControlRegs();
5342
5343 rc = hmR0VmxExportHostSegmentRegs(pVCpu, uHostCr4);
5344 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5345
5346 hmR0VmxExportHostMsrs(pVCpu);
5347
5348 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_HOST_CONTEXT;
5349 }
5350 return rc;
5351}
5352
5353
5354/**
5355 * Saves the host state in the VMCS host-state.
5356 *
5357 * @returns VBox status code.
5358 * @param pVCpu The cross context virtual CPU structure.
5359 *
5360 * @remarks No-long-jump zone!!!
5361 */
5362VMMR0DECL(int) VMXR0ExportHostState(PVMCPUCC pVCpu)
5363{
5364 AssertPtr(pVCpu);
5365 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5366
5367 /*
5368 * Export the host state here while entering HM context.
5369 * When thread-context hooks are used, we might get preempted and have to re-save the host
5370 * state but most of the time we won't be, so do it here before we disable interrupts.
5371 */
5372 return hmR0VmxExportHostState(pVCpu);
5373}
5374
5375
5376/**
5377 * Exports the guest state into the VMCS guest-state area.
5378 *
5379 * The will typically be done before VM-entry when the guest-CPU state and the
5380 * VMCS state may potentially be out of sync.
5381 *
5382 * Sets up the VM-entry MSR-load and VM-exit MSR-store areas. Sets up the
5383 * VM-entry controls.
5384 * Sets up the appropriate VMX non-root function to execute guest code based on
5385 * the guest CPU mode.
5386 *
5387 * @returns VBox strict status code.
5388 * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
5389 * without unrestricted guest execution and the VMMDev is not presently
5390 * mapped (e.g. EFI32).
5391 *
5392 * @param pVCpu The cross context virtual CPU structure.
5393 * @param pVmxTransient The VMX-transient structure.
5394 *
5395 * @remarks No-long-jump zone!!!
5396 */
5397static VBOXSTRICTRC hmR0VmxExportGuestState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5398{
5399 AssertPtr(pVCpu);
5400 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
5401 LogFlowFunc(("pVCpu=%p\n", pVCpu));
5402
5403 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExportGuestState, x);
5404
5405 /*
5406 * Determine real-on-v86 mode.
5407 * Used when the guest is in real-mode and unrestricted guest execution is not used.
5408 */
5409 PVMXVMCSINFOSHARED pVmcsInfoShared = pVmxTransient->pVmcsInfo->pShared;
5410 if ( pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUnrestrictedGuest
5411 || !CPUMIsGuestInRealModeEx(&pVCpu->cpum.GstCtx))
5412 pVmcsInfoShared->RealMode.fRealOnV86Active = false;
5413 else
5414 {
5415 Assert(!pVmxTransient->fIsNestedGuest);
5416 pVmcsInfoShared->RealMode.fRealOnV86Active = true;
5417 }
5418
5419 /*
5420 * Any ordering dependency among the sub-functions below must be explicitly stated using comments.
5421 * Ideally, assert that the cross-dependent bits are up-to-date at the point of using it.
5422 */
5423 int rc = vmxHCExportGuestEntryExitCtls(pVCpu, pVmxTransient);
5424 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5425
5426 rc = vmxHCExportGuestCR0(pVCpu, pVmxTransient);
5427 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5428
5429 VBOXSTRICTRC rcStrict = vmxHCExportGuestCR3AndCR4(pVCpu, pVmxTransient);
5430 if (rcStrict == VINF_SUCCESS)
5431 { /* likely */ }
5432 else
5433 {
5434 Assert(rcStrict == VINF_EM_RESCHEDULE_REM || RT_FAILURE_NP(rcStrict));
5435 return rcStrict;
5436 }
5437
5438 rc = vmxHCExportGuestSegRegsXdtr(pVCpu, pVmxTransient);
5439 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5440
5441 rc = hmR0VmxExportGuestMsrs(pVCpu, pVmxTransient);
5442 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5443
5444 vmxHCExportGuestApicTpr(pVCpu, pVmxTransient);
5445 vmxHCExportGuestXcptIntercepts(pVCpu, pVmxTransient);
5446 vmxHCExportGuestRip(pVCpu);
5447 hmR0VmxExportGuestRsp(pVCpu);
5448 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5449
5450 rc = hmR0VmxExportGuestHwvirtState(pVCpu, pVmxTransient);
5451 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5452
5453 /* Clear any bits that may be set but exported unconditionally or unused/reserved bits. */
5454 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~( (HM_CHANGED_GUEST_GPRS_MASK & ~HM_CHANGED_GUEST_RSP)
5455 | HM_CHANGED_GUEST_CR2
5456 | (HM_CHANGED_GUEST_DR_MASK & ~HM_CHANGED_GUEST_DR7)
5457 | HM_CHANGED_GUEST_X87
5458 | HM_CHANGED_GUEST_SSE_AVX
5459 | HM_CHANGED_GUEST_OTHER_XSAVE
5460 | HM_CHANGED_GUEST_XCRx
5461 | HM_CHANGED_GUEST_KERNEL_GS_BASE /* Part of lazy or auto load-store MSRs. */
5462 | HM_CHANGED_GUEST_SYSCALL_MSRS /* Part of lazy or auto load-store MSRs. */
5463 | HM_CHANGED_GUEST_TSC_AUX
5464 | HM_CHANGED_GUEST_OTHER_MSRS
5465 | (HM_CHANGED_KEEPER_STATE_MASK & ~HM_CHANGED_VMX_MASK)));
5466
5467 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExportGuestState, x);
5468 return rc;
5469}
5470
5471
5472/**
5473 * Exports the state shared between the host and guest into the VMCS.
5474 *
5475 * @param pVCpu The cross context virtual CPU structure.
5476 * @param pVmxTransient The VMX-transient structure.
5477 *
5478 * @remarks No-long-jump zone!!!
5479 */
5480static void hmR0VmxExportSharedState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5481{
5482 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5483 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5484
5485 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_DR_MASK)
5486 {
5487 int rc = hmR0VmxExportSharedDebugState(pVCpu, pVmxTransient);
5488 AssertRC(rc);
5489 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_GUEST_DR_MASK;
5490
5491 /* Loading shared debug bits might have changed eflags.TF bit for debugging purposes. */
5492 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_RFLAGS)
5493 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5494 }
5495
5496 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_GUEST_LAZY_MSRS)
5497 {
5498 hmR0VmxLazyLoadGuestMsrs(pVCpu);
5499 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_VMX_GUEST_LAZY_MSRS;
5500 }
5501
5502 AssertMsg(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE),
5503 ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
5504}
5505
5506
5507/**
5508 * Worker for loading the guest-state bits in the inner VT-x execution loop.
5509 *
5510 * @returns Strict VBox status code (i.e. informational status codes too).
5511 * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
5512 * without unrestricted guest execution and the VMMDev is not presently
5513 * mapped (e.g. EFI32).
5514 *
5515 * @param pVCpu The cross context virtual CPU structure.
5516 * @param pVmxTransient The VMX-transient structure.
5517 *
5518 * @remarks No-long-jump zone!!!
5519 */
5520static VBOXSTRICTRC hmR0VmxExportGuestStateOptimal(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5521{
5522 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
5523 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5524
5525#ifdef HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
5526 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
5527#endif
5528
5529 /*
5530 * For many VM-exits only RIP/RSP/RFLAGS (and HWVIRT state when executing a nested-guest)
5531 * changes. First try to export only these without going through all other changed-flag checks.
5532 */
5533 VBOXSTRICTRC rcStrict;
5534 uint64_t const fCtxMask = HM_CHANGED_ALL_GUEST & ~HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE;
5535 uint64_t const fMinimalMask = HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT;
5536 uint64_t const fCtxChanged = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
5537
5538 /* If only RIP/RSP/RFLAGS/HWVIRT changed, export only those (quicker, happens more often).*/
5539 if ( (fCtxChanged & fMinimalMask)
5540 && !(fCtxChanged & (fCtxMask & ~fMinimalMask)))
5541 {
5542 vmxHCExportGuestRip(pVCpu);
5543 hmR0VmxExportGuestRsp(pVCpu);
5544 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5545 rcStrict = hmR0VmxExportGuestHwvirtState(pVCpu, pVmxTransient);
5546 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportMinimal);
5547 }
5548 /* If anything else also changed, go through the full export routine and export as required. */
5549 else if (fCtxChanged & fCtxMask)
5550 {
5551 rcStrict = hmR0VmxExportGuestState(pVCpu, pVmxTransient);
5552 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
5553 { /* likely */}
5554 else
5555 {
5556 AssertMsg(rcStrict == VINF_EM_RESCHEDULE_REM, ("Failed to export guest state! rc=%Rrc\n",
5557 VBOXSTRICTRC_VAL(rcStrict)));
5558 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5559 return rcStrict;
5560 }
5561 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportFull);
5562 }
5563 /* Nothing changed, nothing to load here. */
5564 else
5565 rcStrict = VINF_SUCCESS;
5566
5567#ifdef VBOX_STRICT
5568 /* All the guest state bits should be loaded except maybe the host context and/or the shared host/guest bits. */
5569 uint64_t const fCtxChangedCur = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
5570 AssertMsg(!(fCtxChangedCur & fCtxMask), ("fCtxChangedCur=%#RX64\n", fCtxChangedCur));
5571#endif
5572 return rcStrict;
5573}
5574
5575
5576/**
5577 * Map the APIC-access page for virtualizing APIC accesses.
5578 *
5579 * This can cause a longjumps to R3 due to the acquisition of the PGM lock. Hence,
5580 * this not done as part of exporting guest state, see @bugref{8721}.
5581 *
5582 * @returns VBox status code.
5583 * @param pVCpu The cross context virtual CPU structure.
5584 * @param GCPhysApicBase The guest-physical address of the APIC access page.
5585 */
5586static int hmR0VmxMapHCApicAccessPage(PVMCPUCC pVCpu, RTGCPHYS GCPhysApicBase)
5587{
5588 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
5589 Assert(GCPhysApicBase);
5590
5591 LogFunc(("Mappping HC APIC-access page at %#RGp\n", GCPhysApicBase));
5592
5593 /* Unalias the existing mapping. */
5594 int rc = PGMHandlerPhysicalReset(pVM, GCPhysApicBase);
5595 AssertRCReturn(rc, rc);
5596
5597 /* Map the HC APIC-access page in place of the MMIO page, also updates the shadow page tables if necessary. */
5598 Assert(pVM->hmr0.s.vmx.HCPhysApicAccess != NIL_RTHCPHYS);
5599 rc = IOMR0MmioMapMmioHCPage(pVM, pVCpu, GCPhysApicBase, pVM->hmr0.s.vmx.HCPhysApicAccess, X86_PTE_RW | X86_PTE_P);
5600 AssertRCReturn(rc, rc);
5601
5602 return VINF_SUCCESS;
5603}
5604
5605
5606/**
5607 * Worker function passed to RTMpOnSpecific() that is to be called on the target
5608 * CPU.
5609 *
5610 * @param idCpu The ID for the CPU the function is called on.
5611 * @param pvUser1 Null, not used.
5612 * @param pvUser2 Null, not used.
5613 */
5614static DECLCALLBACK(void) hmR0DispatchHostNmi(RTCPUID idCpu, void *pvUser1, void *pvUser2)
5615{
5616 RT_NOREF3(idCpu, pvUser1, pvUser2);
5617 VMXDispatchHostNmi();
5618}
5619
5620
5621/**
5622 * Dispatching an NMI on the host CPU that received it.
5623 *
5624 * @returns VBox status code.
5625 * @param pVCpu The cross context virtual CPU structure.
5626 * @param pVmcsInfo The VMCS info. object corresponding to the VMCS that was
5627 * executing when receiving the host NMI in VMX non-root
5628 * operation.
5629 */
5630static int hmR0VmxExitHostNmi(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
5631{
5632 RTCPUID const idCpu = pVmcsInfo->idHostCpuExec;
5633 Assert(idCpu != NIL_RTCPUID);
5634
5635 /*
5636 * We don't want to delay dispatching the NMI any more than we have to. However,
5637 * we have already chosen -not- to dispatch NMIs when interrupts were still disabled
5638 * after executing guest or nested-guest code for the following reasons:
5639 *
5640 * - We would need to perform VMREADs with interrupts disabled and is orders of
5641 * magnitude worse when we run as a nested hypervisor without VMCS shadowing
5642 * supported by the host hypervisor.
5643 *
5644 * - It affects the common VM-exit scenario and keeps interrupts disabled for a
5645 * longer period of time just for handling an edge case like host NMIs which do
5646 * not occur nearly as frequently as other VM-exits.
5647 *
5648 * Let's cover the most likely scenario first. Check if we are on the target CPU
5649 * and dispatch the NMI right away. This should be much faster than calling into
5650 * RTMpOnSpecific() machinery.
5651 */
5652 bool fDispatched = false;
5653 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
5654 if (idCpu == RTMpCpuId())
5655 {
5656 VMXDispatchHostNmi();
5657 fDispatched = true;
5658 }
5659 ASMSetFlags(fEFlags);
5660 if (fDispatched)
5661 {
5662 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGC);
5663 return VINF_SUCCESS;
5664 }
5665
5666 /*
5667 * RTMpOnSpecific() waits until the worker function has run on the target CPU. So
5668 * there should be no race or recursion even if we are unlucky enough to be preempted
5669 * (to the target CPU) without dispatching the host NMI above.
5670 */
5671 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGCIpi);
5672 return RTMpOnSpecific(idCpu, &hmR0DispatchHostNmi, NULL /* pvUser1 */, NULL /* pvUser2 */);
5673}
5674
5675
5676#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5677/**
5678 * Merges the guest with the nested-guest MSR bitmap in preparation of executing the
5679 * nested-guest using hardware-assisted VMX.
5680 *
5681 * @param pVCpu The cross context virtual CPU structure.
5682 * @param pVmcsInfoNstGst The nested-guest VMCS info. object.
5683 * @param pVmcsInfoGst The guest VMCS info. object.
5684 */
5685static void hmR0VmxMergeMsrBitmapNested(PCVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfoNstGst, PCVMXVMCSINFO pVmcsInfoGst)
5686{
5687 uint32_t const cbMsrBitmap = X86_PAGE_4K_SIZE;
5688 uint64_t *pu64MsrBitmap = (uint64_t *)pVmcsInfoNstGst->pvMsrBitmap;
5689 Assert(pu64MsrBitmap);
5690
5691 /*
5692 * We merge the guest MSR bitmap with the nested-guest MSR bitmap such that any
5693 * MSR that is intercepted by the guest is also intercepted while executing the
5694 * nested-guest using hardware-assisted VMX.
5695 *
5696 * Note! If the nested-guest is not using an MSR bitmap, every MSR must cause a
5697 * nested-guest VM-exit even if the outer guest is not intercepting some
5698 * MSRs. We cannot assume the caller has initialized the nested-guest
5699 * MSR bitmap in this case.
5700 *
5701 * The nested hypervisor may also switch whether it uses MSR bitmaps for
5702 * each of its VM-entry, hence initializing it once per-VM while setting
5703 * up the nested-guest VMCS is not sufficient.
5704 */
5705 PCVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
5706 if (pVmcsNstGst->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
5707 {
5708 uint64_t const *pu64MsrBitmapNstGst = (uint64_t const *)&pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap[0];
5709 uint64_t const *pu64MsrBitmapGst = (uint64_t const *)pVmcsInfoGst->pvMsrBitmap;
5710 Assert(pu64MsrBitmapNstGst);
5711 Assert(pu64MsrBitmapGst);
5712
5713 /** @todo Detect and use EVEX.POR? */
5714 uint32_t const cFrags = cbMsrBitmap / sizeof(uint64_t);
5715 for (uint32_t i = 0; i < cFrags; i++)
5716 pu64MsrBitmap[i] = pu64MsrBitmapNstGst[i] | pu64MsrBitmapGst[i];
5717 }
5718 else
5719 ASMMemFill32(pu64MsrBitmap, cbMsrBitmap, UINT32_C(0xffffffff));
5720}
5721
5722
5723/**
5724 * Merges the guest VMCS in to the nested-guest VMCS controls in preparation of
5725 * hardware-assisted VMX execution of the nested-guest.
5726 *
5727 * For a guest, we don't modify these controls once we set up the VMCS and hence
5728 * this function is never called.
5729 *
5730 * For nested-guests since the nested hypervisor provides these controls on every
5731 * nested-guest VM-entry and could potentially change them everytime we need to
5732 * merge them before every nested-guest VM-entry.
5733 *
5734 * @returns VBox status code.
5735 * @param pVCpu The cross context virtual CPU structure.
5736 */
5737static int hmR0VmxMergeVmcsNested(PVMCPUCC pVCpu)
5738{
5739 PVMCC const pVM = pVCpu->CTX_SUFF(pVM);
5740 PCVMXVMCSINFO const pVmcsInfoGst = &pVCpu->hmr0.s.vmx.VmcsInfo;
5741 PCVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
5742
5743 /*
5744 * Merge the controls with the requirements of the guest VMCS.
5745 *
5746 * We do not need to validate the nested-guest VMX features specified in the nested-guest
5747 * VMCS with the features supported by the physical CPU as it's already done by the
5748 * VMLAUNCH/VMRESUME instruction emulation.
5749 *
5750 * This is because the VMX features exposed by CPUM (through CPUID/MSRs) to the guest are
5751 * derived from the VMX features supported by the physical CPU.
5752 */
5753
5754 /* Pin-based VM-execution controls. */
5755 uint32_t const u32PinCtls = pVmcsNstGst->u32PinCtls | pVmcsInfoGst->u32PinCtls;
5756
5757 /* Processor-based VM-execution controls. */
5758 uint32_t u32ProcCtls = (pVmcsNstGst->u32ProcCtls & ~VMX_PROC_CTLS_USE_IO_BITMAPS)
5759 | (pVmcsInfoGst->u32ProcCtls & ~( VMX_PROC_CTLS_INT_WINDOW_EXIT
5760 | VMX_PROC_CTLS_NMI_WINDOW_EXIT
5761 | VMX_PROC_CTLS_MOV_DR_EXIT
5762 | VMX_PROC_CTLS_USE_TPR_SHADOW
5763 | VMX_PROC_CTLS_MONITOR_TRAP_FLAG));
5764
5765 /* Secondary processor-based VM-execution controls. */
5766 uint32_t const u32ProcCtls2 = (pVmcsNstGst->u32ProcCtls2 & ~VMX_PROC_CTLS2_VPID)
5767 | (pVmcsInfoGst->u32ProcCtls2 & ~( VMX_PROC_CTLS2_VIRT_APIC_ACCESS
5768 | VMX_PROC_CTLS2_INVPCID
5769 | VMX_PROC_CTLS2_VMCS_SHADOWING
5770 | VMX_PROC_CTLS2_RDTSCP
5771 | VMX_PROC_CTLS2_XSAVES_XRSTORS
5772 | VMX_PROC_CTLS2_APIC_REG_VIRT
5773 | VMX_PROC_CTLS2_VIRT_INT_DELIVERY
5774 | VMX_PROC_CTLS2_VMFUNC));
5775
5776 /*
5777 * VM-entry controls:
5778 * These controls contains state that depends on the nested-guest state (primarily
5779 * EFER MSR) and is thus not constant between VMLAUNCH/VMRESUME and the nested-guest
5780 * VM-exit. Although the nested hypervisor cannot change it, we need to in order to
5781 * properly continue executing the nested-guest if the EFER MSR changes but does not
5782 * cause a nested-guest VM-exits.
5783 *
5784 * VM-exit controls:
5785 * These controls specify the host state on return. We cannot use the controls from
5786 * the nested hypervisor state as is as it would contain the guest state rather than
5787 * the host state. Since the host state is subject to change (e.g. preemption, trips
5788 * to ring-3, longjmp and rescheduling to a different host CPU) they are not constant
5789 * through VMLAUNCH/VMRESUME and the nested-guest VM-exit.
5790 *
5791 * VM-entry MSR-load:
5792 * The guest MSRs from the VM-entry MSR-load area are already loaded into the guest-CPU
5793 * context by the VMLAUNCH/VMRESUME instruction emulation.
5794 *
5795 * VM-exit MSR-store:
5796 * The VM-exit emulation will take care of populating the MSRs from the guest-CPU context
5797 * back into the VM-exit MSR-store area.
5798 *
5799 * VM-exit MSR-load areas:
5800 * This must contain the real host MSRs with hardware-assisted VMX execution. Hence, we
5801 * can entirely ignore what the nested hypervisor wants to load here.
5802 */
5803
5804 /*
5805 * Exception bitmap.
5806 *
5807 * We could remove #UD from the guest bitmap and merge it with the nested-guest bitmap
5808 * here (and avoid doing anything while exporting nested-guest state), but to keep the
5809 * code more flexible if intercepting exceptions become more dynamic in the future we do
5810 * it as part of exporting the nested-guest state.
5811 */
5812 uint32_t const u32XcptBitmap = pVmcsNstGst->u32XcptBitmap | pVmcsInfoGst->u32XcptBitmap;
5813
5814 /*
5815 * CR0/CR4 guest/host mask.
5816 *
5817 * Modifications by the nested-guest to CR0/CR4 bits owned by the host and the guest must
5818 * cause VM-exits, so we need to merge them here.
5819 */
5820 uint64_t const u64Cr0Mask = pVmcsNstGst->u64Cr0Mask.u | pVmcsInfoGst->u64Cr0Mask;
5821 uint64_t const u64Cr4Mask = pVmcsNstGst->u64Cr4Mask.u | pVmcsInfoGst->u64Cr4Mask;
5822
5823 /*
5824 * Page-fault error-code mask and match.
5825 *
5826 * Although we require unrestricted guest execution (and thereby nested-paging) for
5827 * hardware-assisted VMX execution of nested-guests and thus the outer guest doesn't
5828 * normally intercept #PFs, it might intercept them for debugging purposes.
5829 *
5830 * If the outer guest is not intercepting #PFs, we can use the nested-guest #PF filters.
5831 * If the outer guest is intercepting #PFs, we must intercept all #PFs.
5832 */
5833 uint32_t u32XcptPFMask;
5834 uint32_t u32XcptPFMatch;
5835 if (!(pVmcsInfoGst->u32XcptBitmap & RT_BIT(X86_XCPT_PF)))
5836 {
5837 u32XcptPFMask = pVmcsNstGst->u32XcptPFMask;
5838 u32XcptPFMatch = pVmcsNstGst->u32XcptPFMatch;
5839 }
5840 else
5841 {
5842 u32XcptPFMask = 0;
5843 u32XcptPFMatch = 0;
5844 }
5845
5846 /*
5847 * Pause-Loop exiting.
5848 */
5849 /** @todo r=bird: given that both pVM->hm.s.vmx.cPleGapTicks and
5850 * pVM->hm.s.vmx.cPleWindowTicks defaults to zero, I cannot see how
5851 * this will work... */
5852 uint32_t const cPleGapTicks = RT_MIN(pVM->hm.s.vmx.cPleGapTicks, pVmcsNstGst->u32PleGap);
5853 uint32_t const cPleWindowTicks = RT_MIN(pVM->hm.s.vmx.cPleWindowTicks, pVmcsNstGst->u32PleWindow);
5854
5855 /*
5856 * Pending debug exceptions.
5857 * Currently just copy whatever the nested-guest provides us.
5858 */
5859 uint64_t const uPendingDbgXcpts = pVmcsNstGst->u64GuestPendingDbgXcpts.u;
5860
5861 /*
5862 * I/O Bitmap.
5863 *
5864 * We do not use the I/O bitmap that may be provided by the nested hypervisor as we always
5865 * intercept all I/O port accesses.
5866 */
5867 Assert(u32ProcCtls & VMX_PROC_CTLS_UNCOND_IO_EXIT);
5868 Assert(!(u32ProcCtls & VMX_PROC_CTLS_USE_IO_BITMAPS));
5869
5870 /*
5871 * VMCS shadowing.
5872 *
5873 * We do not yet expose VMCS shadowing to the guest and thus VMCS shadowing should not be
5874 * enabled while executing the nested-guest.
5875 */
5876 Assert(!(u32ProcCtls2 & VMX_PROC_CTLS2_VMCS_SHADOWING));
5877
5878 /*
5879 * APIC-access page.
5880 */
5881 RTHCPHYS HCPhysApicAccess;
5882 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
5883 {
5884 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS);
5885 RTGCPHYS const GCPhysApicAccess = pVmcsNstGst->u64AddrApicAccess.u;
5886
5887 /** @todo NSTVMX: This is not really correct but currently is required to make
5888 * things work. We need to re-enable the page handler when we fallback to
5889 * IEM execution of the nested-guest! */
5890 PGMHandlerPhysicalPageTempOff(pVM, GCPhysApicAccess, GCPhysApicAccess);
5891
5892 void *pvPage;
5893 PGMPAGEMAPLOCK PgLockApicAccess;
5894 int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhysApicAccess, &pvPage, &PgLockApicAccess);
5895 if (RT_SUCCESS(rc))
5896 {
5897 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysApicAccess, &HCPhysApicAccess);
5898 AssertMsgRCReturn(rc, ("Failed to get host-physical address for APIC-access page at %#RGp\n", GCPhysApicAccess), rc);
5899
5900 /** @todo Handle proper releasing of page-mapping lock later. */
5901 PGMPhysReleasePageMappingLock(pVCpu->CTX_SUFF(pVM), &PgLockApicAccess);
5902 }
5903 else
5904 return rc;
5905 }
5906 else
5907 HCPhysApicAccess = 0;
5908
5909 /*
5910 * Virtual-APIC page and TPR threshold.
5911 */
5912 RTHCPHYS HCPhysVirtApic;
5913 uint32_t u32TprThreshold;
5914 if (u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)
5915 {
5916 Assert(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW);
5917 RTGCPHYS const GCPhysVirtApic = pVmcsNstGst->u64AddrVirtApic.u;
5918
5919 void *pvPage;
5920 PGMPAGEMAPLOCK PgLockVirtApic;
5921 int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhysVirtApic, &pvPage, &PgLockVirtApic);
5922 if (RT_SUCCESS(rc))
5923 {
5924 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysVirtApic, &HCPhysVirtApic);
5925 AssertMsgRCReturn(rc, ("Failed to get host-physical address for virtual-APIC page at %#RGp\n", GCPhysVirtApic), rc);
5926
5927 /** @todo Handle proper releasing of page-mapping lock later. */
5928 PGMPhysReleasePageMappingLock(pVCpu->CTX_SUFF(pVM), &PgLockVirtApic);
5929 }
5930 else
5931 return rc;
5932
5933 u32TprThreshold = pVmcsNstGst->u32TprThreshold;
5934 }
5935 else
5936 {
5937 HCPhysVirtApic = 0;
5938 u32TprThreshold = 0;
5939
5940 /*
5941 * We must make sure CR8 reads/write must cause VM-exits when TPR shadowing is not
5942 * used by the nested hypervisor. Preventing MMIO accesses to the physical APIC will
5943 * be taken care of by EPT/shadow paging.
5944 */
5945 if (pVM->hmr0.s.fAllow64BitGuests)
5946 u32ProcCtls |= VMX_PROC_CTLS_CR8_STORE_EXIT
5947 | VMX_PROC_CTLS_CR8_LOAD_EXIT;
5948 }
5949
5950 /*
5951 * Validate basic assumptions.
5952 */
5953 PVMXVMCSINFO pVmcsInfoNstGst = &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
5954 Assert(pVM->hmr0.s.vmx.fUnrestrictedGuest);
5955 Assert(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS);
5956 Assert(hmGetVmxActiveVmcsInfo(pVCpu) == pVmcsInfoNstGst);
5957
5958 /*
5959 * Commit it to the nested-guest VMCS.
5960 */
5961 int rc = VINF_SUCCESS;
5962 if (pVmcsInfoNstGst->u32PinCtls != u32PinCtls)
5963 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, u32PinCtls);
5964 if (pVmcsInfoNstGst->u32ProcCtls != u32ProcCtls)
5965 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, u32ProcCtls);
5966 if (pVmcsInfoNstGst->u32ProcCtls2 != u32ProcCtls2)
5967 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, u32ProcCtls2);
5968 if (pVmcsInfoNstGst->u32XcptBitmap != u32XcptBitmap)
5969 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, u32XcptBitmap);
5970 if (pVmcsInfoNstGst->u64Cr0Mask != u64Cr0Mask)
5971 rc |= VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, u64Cr0Mask);
5972 if (pVmcsInfoNstGst->u64Cr4Mask != u64Cr4Mask)
5973 rc |= VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, u64Cr4Mask);
5974 if (pVmcsInfoNstGst->u32XcptPFMask != u32XcptPFMask)
5975 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, u32XcptPFMask);
5976 if (pVmcsInfoNstGst->u32XcptPFMatch != u32XcptPFMatch)
5977 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, u32XcptPFMatch);
5978 if ( !(u32ProcCtls & VMX_PROC_CTLS_PAUSE_EXIT)
5979 && (u32ProcCtls2 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT))
5980 {
5981 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT);
5982 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, cPleGapTicks);
5983 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, cPleWindowTicks);
5984 }
5985 if (u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)
5986 {
5987 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, u32TprThreshold);
5988 rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, HCPhysVirtApic);
5989 }
5990 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
5991 rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, HCPhysApicAccess);
5992 rc |= VMXWriteVmcsNw(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, uPendingDbgXcpts);
5993 AssertRC(rc);
5994
5995 /*
5996 * Update the nested-guest VMCS cache.
5997 */
5998 pVmcsInfoNstGst->u32PinCtls = u32PinCtls;
5999 pVmcsInfoNstGst->u32ProcCtls = u32ProcCtls;
6000 pVmcsInfoNstGst->u32ProcCtls2 = u32ProcCtls2;
6001 pVmcsInfoNstGst->u32XcptBitmap = u32XcptBitmap;
6002 pVmcsInfoNstGst->u64Cr0Mask = u64Cr0Mask;
6003 pVmcsInfoNstGst->u64Cr4Mask = u64Cr4Mask;
6004 pVmcsInfoNstGst->u32XcptPFMask = u32XcptPFMask;
6005 pVmcsInfoNstGst->u32XcptPFMatch = u32XcptPFMatch;
6006 pVmcsInfoNstGst->HCPhysVirtApic = HCPhysVirtApic;
6007
6008 /*
6009 * We need to flush the TLB if we are switching the APIC-access page address.
6010 * See Intel spec. 28.3.3.4 "Guidelines for Use of the INVEPT Instruction".
6011 */
6012 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
6013 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = true;
6014
6015 /*
6016 * MSR bitmap.
6017 *
6018 * The MSR bitmap address has already been initialized while setting up the nested-guest
6019 * VMCS, here we need to merge the MSR bitmaps.
6020 */
6021 if (u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
6022 hmR0VmxMergeMsrBitmapNested(pVCpu, pVmcsInfoNstGst, pVmcsInfoGst);
6023
6024 return VINF_SUCCESS;
6025}
6026#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
6027
6028
6029/**
6030 * Does the preparations before executing guest code in VT-x.
6031 *
6032 * This may cause longjmps to ring-3 and may even result in rescheduling to the
6033 * recompiler/IEM. We must be cautious what we do here regarding committing
6034 * guest-state information into the VMCS assuming we assuredly execute the
6035 * guest in VT-x mode.
6036 *
6037 * If we fall back to the recompiler/IEM after updating the VMCS and clearing
6038 * the common-state (TRPM/forceflags), we must undo those changes so that the
6039 * recompiler/IEM can (and should) use them when it resumes guest execution.
6040 * Otherwise such operations must be done when we can no longer exit to ring-3.
6041 *
6042 * @returns Strict VBox status code (i.e. informational status codes too).
6043 * @retval VINF_SUCCESS if we can proceed with running the guest, interrupts
6044 * have been disabled.
6045 * @retval VINF_VMX_VMEXIT if a nested-guest VM-exit occurs (e.g., while evaluating
6046 * pending events).
6047 * @retval VINF_EM_RESET if a triple-fault occurs while injecting a
6048 * double-fault into the guest.
6049 * @retval VINF_EM_DBG_STEPPED if @a fStepping is true and an event was
6050 * dispatched directly.
6051 * @retval VINF_* scheduling changes, we have to go back to ring-3.
6052 *
6053 * @param pVCpu The cross context virtual CPU structure.
6054 * @param pVmxTransient The VMX-transient structure.
6055 * @param fStepping Whether we are single-stepping the guest in the
6056 * hypervisor debugger. Makes us ignore some of the reasons
6057 * for returning to ring-3, and return VINF_EM_DBG_STEPPED
6058 * if event dispatching took place.
6059 */
6060static VBOXSTRICTRC hmR0VmxPreRunGuest(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, bool fStepping)
6061{
6062 Assert(VMMRZCallRing3IsEnabled(pVCpu));
6063
6064 Log4Func(("fIsNested=%RTbool fStepping=%RTbool\n", pVmxTransient->fIsNestedGuest, fStepping));
6065
6066#ifdef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
6067 if (pVmxTransient->fIsNestedGuest)
6068 {
6069 RT_NOREF2(pVCpu, fStepping);
6070 Log2Func(("Rescheduling to IEM due to nested-hwvirt or forced IEM exec -> VINF_EM_RESCHEDULE_REM\n"));
6071 return VINF_EM_RESCHEDULE_REM;
6072 }
6073#endif
6074
6075 /*
6076 * Check and process force flag actions, some of which might require us to go back to ring-3.
6077 */
6078 VBOXSTRICTRC rcStrict = vmxHCCheckForceFlags(pVCpu, pVmxTransient->fIsNestedGuest, fStepping);
6079 if (rcStrict == VINF_SUCCESS)
6080 {
6081 /* FFs don't get set all the time. */
6082#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6083 if ( pVmxTransient->fIsNestedGuest
6084 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
6085 {
6086 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
6087 return VINF_VMX_VMEXIT;
6088 }
6089#endif
6090 }
6091 else
6092 return rcStrict;
6093
6094 /*
6095 * Virtualize memory-mapped accesses to the physical APIC (may take locks).
6096 */
6097 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
6098 if ( !pVCpu->hm.s.vmx.u64GstMsrApicBase
6099 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
6100 && PDMHasApic(pVM))
6101 {
6102 /* Get the APIC base MSR from the virtual APIC device. */
6103 uint64_t const uApicBaseMsr = APICGetBaseMsrNoCheck(pVCpu);
6104
6105 /* Map the APIC access page. */
6106 int rc = hmR0VmxMapHCApicAccessPage(pVCpu, uApicBaseMsr & PAGE_BASE_GC_MASK);
6107 AssertRCReturn(rc, rc);
6108
6109 /* Update the per-VCPU cache of the APIC base MSR corresponding to the mapped APIC access page. */
6110 pVCpu->hm.s.vmx.u64GstMsrApicBase = uApicBaseMsr;
6111 }
6112
6113#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6114 /*
6115 * Merge guest VMCS controls with the nested-guest VMCS controls.
6116 *
6117 * Even if we have not executed the guest prior to this (e.g. when resuming from a
6118 * saved state), we should be okay with merging controls as we initialize the
6119 * guest VMCS controls as part of VM setup phase.
6120 */
6121 if ( pVmxTransient->fIsNestedGuest
6122 && !pVCpu->hm.s.vmx.fMergedNstGstCtls)
6123 {
6124 int rc = hmR0VmxMergeVmcsNested(pVCpu);
6125 AssertRCReturn(rc, rc);
6126 pVCpu->hm.s.vmx.fMergedNstGstCtls = true;
6127 }
6128#endif
6129
6130 /*
6131 * Evaluate events to be injected into the guest.
6132 *
6133 * Events in TRPM can be injected without inspecting the guest state.
6134 * If any new events (interrupts/NMI) are pending currently, we try to set up the
6135 * guest to cause a VM-exit the next time they are ready to receive the event.
6136 */
6137 if (TRPMHasTrap(pVCpu))
6138 vmxHCTrpmTrapToPendingEvent(pVCpu);
6139
6140 uint32_t fIntrState;
6141 rcStrict = vmxHCEvaluatePendingEvent(pVCpu, pVmxTransient->pVmcsInfo, pVmxTransient->fIsNestedGuest,
6142 &fIntrState);
6143
6144#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6145 /*
6146 * While evaluating pending events if something failed (unlikely) or if we were
6147 * preparing to run a nested-guest but performed a nested-guest VM-exit, we should bail.
6148 */
6149 if (rcStrict != VINF_SUCCESS)
6150 return rcStrict;
6151 if ( pVmxTransient->fIsNestedGuest
6152 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
6153 {
6154 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
6155 return VINF_VMX_VMEXIT;
6156 }
6157#else
6158 Assert(rcStrict == VINF_SUCCESS);
6159#endif
6160
6161 /*
6162 * Event injection may take locks (currently the PGM lock for real-on-v86 case) and thus
6163 * needs to be done with longjmps or interrupts + preemption enabled. Event injection might
6164 * also result in triple-faulting the VM.
6165 *
6166 * With nested-guests, the above does not apply since unrestricted guest execution is a
6167 * requirement. Regardless, we do this here to avoid duplicating code elsewhere.
6168 */
6169 rcStrict = vmxHCInjectPendingEvent(pVCpu, pVmxTransient->pVmcsInfo, pVmxTransient->fIsNestedGuest,
6170 fIntrState, fStepping);
6171 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
6172 { /* likely */ }
6173 else
6174 {
6175 AssertMsg(rcStrict == VINF_EM_RESET || (rcStrict == VINF_EM_DBG_STEPPED && fStepping),
6176 ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
6177 return rcStrict;
6178 }
6179
6180 /*
6181 * A longjump might result in importing CR3 even for VM-exits that don't necessarily
6182 * import CR3 themselves. We will need to update them here, as even as late as the above
6183 * hmR0VmxInjectPendingEvent() call may lazily import guest-CPU state on demand causing
6184 * the below force flags to be set.
6185 */
6186 if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3))
6187 {
6188 Assert(!(ASMAtomicUoReadU64(&pVCpu->cpum.GstCtx.fExtrn) & CPUMCTX_EXTRN_CR3));
6189 int rc2 = PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu));
6190 AssertMsgReturn(rc2 == VINF_SUCCESS || rc2 == VINF_PGM_SYNC_CR3,
6191 ("%Rrc\n", rc2), RT_FAILURE_NP(rc2) ? rc2 : VERR_IPE_UNEXPECTED_INFO_STATUS);
6192 Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
6193 }
6194
6195#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6196 /* Paranoia. */
6197 Assert(!pVmxTransient->fIsNestedGuest || CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
6198#endif
6199
6200 /*
6201 * No longjmps to ring-3 from this point on!!!
6202 * Asserts() will still longjmp to ring-3 (but won't return), which is intentional, better than a kernel panic.
6203 * This also disables flushing of the R0-logger instance (if any).
6204 */
6205 VMMRZCallRing3Disable(pVCpu);
6206
6207 /*
6208 * Export the guest state bits.
6209 *
6210 * We cannot perform longjmps while loading the guest state because we do not preserve the
6211 * host/guest state (although the VMCS will be preserved) across longjmps which can cause
6212 * CPU migration.
6213 *
6214 * If we are injecting events to a real-on-v86 mode guest, we would have updated RIP and some segment
6215 * registers. Hence, exporting of the guest state needs to be done -after- injection of events.
6216 */
6217 rcStrict = hmR0VmxExportGuestStateOptimal(pVCpu, pVmxTransient);
6218 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
6219 { /* likely */ }
6220 else
6221 {
6222 VMMRZCallRing3Enable(pVCpu);
6223 return rcStrict;
6224 }
6225
6226 /*
6227 * We disable interrupts so that we don't miss any interrupts that would flag preemption
6228 * (IPI/timers etc.) when thread-context hooks aren't used and we've been running with
6229 * preemption disabled for a while. Since this is purely to aid the
6230 * RTThreadPreemptIsPending() code, it doesn't matter that it may temporarily reenable and
6231 * disable interrupt on NT.
6232 *
6233 * We need to check for force-flags that could've possible been altered since we last
6234 * checked them (e.g. by PDMGetInterrupt() leaving the PDM critical section,
6235 * see @bugref{6398}).
6236 *
6237 * We also check a couple of other force-flags as a last opportunity to get the EMT back
6238 * to ring-3 before executing guest code.
6239 */
6240 pVmxTransient->fEFlags = ASMIntDisableFlags();
6241
6242 if ( ( !VM_FF_IS_ANY_SET(pVM, VM_FF_EMT_RENDEZVOUS | VM_FF_TM_VIRTUAL_SYNC)
6243 && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
6244 || ( fStepping /* Optimized for the non-stepping case, so a bit of unnecessary work when stepping. */
6245 && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK & ~(VMCPU_FF_TIMER | VMCPU_FF_PDM_CRITSECT))) )
6246 {
6247 if (!RTThreadPreemptIsPending(NIL_RTTHREAD))
6248 {
6249#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6250 /*
6251 * If we are executing a nested-guest make sure that we should intercept subsequent
6252 * events. The one we are injecting might be part of VM-entry. This is mainly to keep
6253 * the VM-exit instruction emulation happy.
6254 */
6255 if (pVmxTransient->fIsNestedGuest)
6256 CPUMSetGuestVmxInterceptEvents(&pVCpu->cpum.GstCtx, true);
6257#endif
6258
6259 /*
6260 * We've injected any pending events. This is really the point of no return (to ring-3).
6261 *
6262 * Note! The caller expects to continue with interrupts & longjmps disabled on successful
6263 * returns from this function, so do -not- enable them here.
6264 */
6265 pVCpu->hm.s.Event.fPending = false;
6266 return VINF_SUCCESS;
6267 }
6268
6269 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchPendingHostIrq);
6270 rcStrict = VINF_EM_RAW_INTERRUPT;
6271 }
6272 else
6273 {
6274 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF);
6275 rcStrict = VINF_EM_RAW_TO_R3;
6276 }
6277
6278 ASMSetFlags(pVmxTransient->fEFlags);
6279 VMMRZCallRing3Enable(pVCpu);
6280
6281 return rcStrict;
6282}
6283
6284
6285/**
6286 * Final preparations before executing guest code using hardware-assisted VMX.
6287 *
6288 * We can no longer get preempted to a different host CPU and there are no returns
6289 * to ring-3. We ignore any errors that may happen from this point (e.g. VMWRITE
6290 * failures), this function is not intended to fail sans unrecoverable hardware
6291 * errors.
6292 *
6293 * @param pVCpu The cross context virtual CPU structure.
6294 * @param pVmxTransient The VMX-transient structure.
6295 *
6296 * @remarks Called with preemption disabled.
6297 * @remarks No-long-jump zone!!!
6298 */
6299static void hmR0VmxPreRunGuestCommitted(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
6300{
6301 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
6302 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
6303 Assert(!pVCpu->hm.s.Event.fPending);
6304
6305 /*
6306 * Indicate start of guest execution and where poking EMT out of guest-context is recognized.
6307 */
6308 VMCPU_ASSERT_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
6309 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
6310
6311 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
6312 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
6313 PHMPHYSCPU pHostCpu = hmR0GetCurrentCpu();
6314 RTCPUID const idCurrentCpu = pHostCpu->idCpu;
6315
6316 if (!CPUMIsGuestFPUStateActive(pVCpu))
6317 {
6318 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatLoadGuestFpuState, x);
6319 if (CPUMR0LoadGuestFPU(pVM, pVCpu) == VINF_CPUM_HOST_CR0_MODIFIED)
6320 pVCpu->hm.s.fCtxChanged |= HM_CHANGED_HOST_CONTEXT;
6321 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatLoadGuestFpuState, x);
6322 STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadGuestFpu);
6323 }
6324
6325 /*
6326 * Re-export the host state bits as we may've been preempted (only happens when
6327 * thread-context hooks are used or when the VM start function changes) or if
6328 * the host CR0 is modified while loading the guest FPU state above.
6329 *
6330 * The 64-on-32 switcher saves the (64-bit) host state into the VMCS and if we
6331 * changed the switcher back to 32-bit, we *must* save the 32-bit host state here,
6332 * see @bugref{8432}.
6333 *
6334 * This may also happen when switching to/from a nested-guest VMCS without leaving
6335 * ring-0.
6336 */
6337 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)
6338 {
6339 hmR0VmxExportHostState(pVCpu);
6340 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportHostState);
6341 }
6342 Assert(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT));
6343
6344 /*
6345 * Export the state shared between host and guest (FPU, debug, lazy MSRs).
6346 */
6347 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE)
6348 hmR0VmxExportSharedState(pVCpu, pVmxTransient);
6349 AssertMsg(!pVCpu->hm.s.fCtxChanged, ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
6350
6351 /*
6352 * Store status of the shared guest/host debug state at the time of VM-entry.
6353 */
6354 pVmxTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActive(pVCpu);
6355 pVmxTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActive(pVCpu);
6356
6357 /*
6358 * Always cache the TPR-shadow if the virtual-APIC page exists, thereby skipping
6359 * more than one conditional check. The post-run side of our code shall determine
6360 * if it needs to sync. the virtual APIC TPR with the TPR-shadow.
6361 */
6362 if (pVmcsInfo->pbVirtApic)
6363 pVmxTransient->u8GuestTpr = pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR];
6364
6365 /*
6366 * Update the host MSRs values in the VM-exit MSR-load area.
6367 */
6368 if (!pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs)
6369 {
6370 if (pVmcsInfo->cExitMsrLoad > 0)
6371 hmR0VmxUpdateAutoLoadHostMsrs(pVCpu, pVmcsInfo);
6372 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = true;
6373 }
6374
6375 /*
6376 * Evaluate if we need to intercept guest RDTSC/P accesses. Set up the
6377 * VMX-preemption timer based on the next virtual sync clock deadline.
6378 */
6379 if ( !pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer
6380 || idCurrentCpu != pVCpu->hmr0.s.idLastCpu)
6381 {
6382 hmR0VmxUpdateTscOffsettingAndPreemptTimer(pVCpu, pVmxTransient, idCurrentCpu);
6383 pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer = true;
6384 }
6385
6386 /* Record statistics of how often we use TSC offsetting as opposed to intercepting RDTSC/P. */
6387 bool const fIsRdtscIntercepted = RT_BOOL(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT);
6388 if (!fIsRdtscIntercepted)
6389 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset);
6390 else
6391 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept);
6392
6393 ASMAtomicUoWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true); /* Used for TLB flushing, set this across the world switch. */
6394 hmR0VmxFlushTaggedTlb(pHostCpu, pVCpu, pVmcsInfo); /* Invalidate the appropriate guest entries from the TLB. */
6395 Assert(idCurrentCpu == pVCpu->hmr0.s.idLastCpu);
6396 pVCpu->hm.s.vmx.LastError.idCurrentCpu = idCurrentCpu; /* Record the error reporting info. with the current host CPU. */
6397 pVmcsInfo->idHostCpuState = idCurrentCpu; /* Record the CPU for which the host-state has been exported. */
6398 pVmcsInfo->idHostCpuExec = idCurrentCpu; /* Record the CPU on which we shall execute. */
6399
6400 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatEntry, &pVCpu->hm.s.StatInGC, x);
6401
6402 TMNotifyStartOfExecution(pVM, pVCpu); /* Notify TM to resume its clocks when TSC is tied to execution,
6403 as we're about to start executing the guest. */
6404
6405 /*
6406 * Load the guest TSC_AUX MSR when we are not intercepting RDTSCP.
6407 *
6408 * This is done this late as updating the TSC offsetting/preemption timer above
6409 * figures out if we can skip intercepting RDTSCP by calculating the number of
6410 * host CPU ticks till the next virtual sync deadline (for the dynamic case).
6411 */
6412 if ( (pVmcsInfo->u32ProcCtls2 & VMX_PROC_CTLS2_RDTSCP)
6413 && !fIsRdtscIntercepted)
6414 {
6415 vmxHCImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_TSC_AUX);
6416
6417 /* NB: Because we call hmR0VmxAddAutoLoadStoreMsr with fUpdateHostMsr=true,
6418 it's safe even after hmR0VmxUpdateAutoLoadHostMsrs has already been done. */
6419 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K8_TSC_AUX, CPUMGetGuestTscAux(pVCpu),
6420 true /* fSetReadWrite */, true /* fUpdateHostMsr */);
6421 AssertRC(rc);
6422 Assert(!pVmxTransient->fRemoveTscAuxMsr);
6423 pVmxTransient->fRemoveTscAuxMsr = true;
6424 }
6425
6426#ifdef VBOX_STRICT
6427 Assert(pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs);
6428 hmR0VmxCheckAutoLoadStoreMsrs(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest);
6429 hmR0VmxCheckHostEferMsr(pVmcsInfo);
6430 AssertRC(vmxHCCheckCachedVmcsCtls(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest));
6431#endif
6432
6433#ifdef HMVMX_ALWAYS_CHECK_GUEST_STATE
6434 /** @todo r=ramshankar: We can now probably use iemVmxVmentryCheckGuestState here.
6435 * Add a PVMXMSRS parameter to it, so that IEM can look at the host MSRs,
6436 * see @bugref{9180#c54}. */
6437 uint32_t const uInvalidReason = hmR0VmxCheckGuestState(pVCpu, pVmcsInfo);
6438 if (uInvalidReason != VMX_IGS_REASON_NOT_FOUND)
6439 Log4(("hmR0VmxCheckGuestState returned %#x\n", uInvalidReason));
6440#endif
6441}
6442
6443
6444/**
6445 * First C routine invoked after running guest code using hardware-assisted VMX.
6446 *
6447 * @param pVCpu The cross context virtual CPU structure.
6448 * @param pVmxTransient The VMX-transient structure.
6449 * @param rcVMRun Return code of VMLAUNCH/VMRESUME.
6450 *
6451 * @remarks Called with interrupts disabled, and returns with interrupts enabled!
6452 *
6453 * @remarks No-long-jump zone!!! This function will however re-enable longjmps
6454 * unconditionally when it is safe to do so.
6455 */
6456static void hmR0VmxPostRunGuest(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, int rcVMRun)
6457{
6458 ASMAtomicUoWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false); /* See HMInvalidatePageOnAllVCpus(): used for TLB flushing. */
6459 ASMAtomicIncU32(&pVCpu->hmr0.s.cWorldSwitchExits); /* Initialized in vmR3CreateUVM(): used for EMT poking. */
6460 pVCpu->hm.s.fCtxChanged = 0; /* Exits/longjmps to ring-3 requires saving the guest state. */
6461 pVmxTransient->fVmcsFieldsRead = 0; /* Transient fields need to be read from the VMCS. */
6462 pVmxTransient->fVectoringPF = false; /* Vectoring page-fault needs to be determined later. */
6463 pVmxTransient->fVectoringDoublePF = false; /* Vectoring double page-fault needs to be determined later. */
6464
6465 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
6466 if (!(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT))
6467 {
6468 uint64_t uGstTsc;
6469 if (!pVmxTransient->fIsNestedGuest)
6470 uGstTsc = pVCpu->hmr0.s.uTscExit + pVmcsInfo->u64TscOffset;
6471 else
6472 {
6473 uint64_t const uNstGstTsc = pVCpu->hmr0.s.uTscExit + pVmcsInfo->u64TscOffset;
6474 uGstTsc = CPUMRemoveNestedGuestTscOffset(pVCpu, uNstGstTsc);
6475 }
6476 TMCpuTickSetLastSeen(pVCpu, uGstTsc); /* Update TM with the guest TSC. */
6477 }
6478
6479 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatInGC, &pVCpu->hm.s.StatPreExit, x);
6480 TMNotifyEndOfExecution(pVCpu->CTX_SUFF(pVM), pVCpu, pVCpu->hmr0.s.uTscExit); /* Notify TM that the guest is no longer running. */
6481 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
6482
6483 pVCpu->hmr0.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_REQUIRED; /* Some host state messed up by VMX needs restoring. */
6484 pVmcsInfo->fVmcsState |= VMX_V_VMCS_LAUNCH_STATE_LAUNCHED; /* Use VMRESUME instead of VMLAUNCH in the next run. */
6485#ifdef VBOX_STRICT
6486 hmR0VmxCheckHostEferMsr(pVmcsInfo); /* Verify that the host EFER MSR wasn't modified. */
6487#endif
6488 Assert(!ASMIntAreEnabled());
6489 ASMSetFlags(pVmxTransient->fEFlags); /* Enable interrupts. */
6490 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
6491
6492#ifdef HMVMX_ALWAYS_CLEAN_TRANSIENT
6493 /*
6494 * Clean all the VMCS fields in the transient structure before reading
6495 * anything from the VMCS.
6496 */
6497 pVmxTransient->uExitReason = 0;
6498 pVmxTransient->uExitIntErrorCode = 0;
6499 pVmxTransient->uExitQual = 0;
6500 pVmxTransient->uGuestLinearAddr = 0;
6501 pVmxTransient->uExitIntInfo = 0;
6502 pVmxTransient->cbExitInstr = 0;
6503 pVmxTransient->ExitInstrInfo.u = 0;
6504 pVmxTransient->uEntryIntInfo = 0;
6505 pVmxTransient->uEntryXcptErrorCode = 0;
6506 pVmxTransient->cbEntryInstr = 0;
6507 pVmxTransient->uIdtVectoringInfo = 0;
6508 pVmxTransient->uIdtVectoringErrorCode = 0;
6509#endif
6510
6511 /*
6512 * Save the basic VM-exit reason and check if the VM-entry failed.
6513 * See Intel spec. 24.9.1 "Basic VM-exit Information".
6514 */
6515 uint32_t uExitReason;
6516 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
6517 AssertRC(rc);
6518 pVmxTransient->uExitReason = VMX_EXIT_REASON_BASIC(uExitReason);
6519 pVmxTransient->fVMEntryFailed = VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason);
6520
6521 /*
6522 * Log the VM-exit before logging anything else as otherwise it might be a
6523 * tad confusing what happens before and after the world-switch.
6524 */
6525 HMVMX_LOG_EXIT(pVCpu, uExitReason);
6526
6527 /*
6528 * Remove the TSC_AUX MSR from the auto-load/store MSR area and reset any MSR
6529 * bitmap permissions, if it was added before VM-entry.
6530 */
6531 if (pVmxTransient->fRemoveTscAuxMsr)
6532 {
6533 hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K8_TSC_AUX);
6534 pVmxTransient->fRemoveTscAuxMsr = false;
6535 }
6536
6537 /*
6538 * Check if VMLAUNCH/VMRESUME succeeded.
6539 * If this failed, we cause a guru meditation and cease further execution.
6540 *
6541 * However, if we are executing a nested-guest we might fail if we use the
6542 * fast path rather than fully emulating VMLAUNCH/VMRESUME instruction in IEM.
6543 */
6544 if (RT_LIKELY(rcVMRun == VINF_SUCCESS))
6545 {
6546 /*
6547 * Update the VM-exit history array here even if the VM-entry failed due to:
6548 * - Invalid guest state.
6549 * - MSR loading.
6550 * - Machine-check event.
6551 *
6552 * In any of the above cases we will still have a "valid" VM-exit reason
6553 * despite @a fVMEntryFailed being false.
6554 *
6555 * See Intel spec. 26.7 "VM-Entry failures during or after loading guest state".
6556 *
6557 * Note! We don't have CS or RIP at this point. Will probably address that later
6558 * by amending the history entry added here.
6559 */
6560 EMHistoryAddExit(pVCpu, EMEXIT_MAKE_FT(EMEXIT_F_KIND_VMX, pVmxTransient->uExitReason & EMEXIT_F_TYPE_MASK),
6561 UINT64_MAX, pVCpu->hmr0.s.uTscExit);
6562
6563 if (RT_LIKELY(!pVmxTransient->fVMEntryFailed))
6564 {
6565 VMMRZCallRing3Enable(pVCpu);
6566 Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
6567
6568#ifdef HMVMX_ALWAYS_SAVE_RO_GUEST_STATE
6569 hmR0VmxReadAllRoFieldsVmcs(pVmxTransient);
6570#endif
6571
6572 /*
6573 * Always import the guest-interruptibility state as we need it while evaluating
6574 * injecting events on re-entry.
6575 *
6576 * We don't import CR0 (when unrestricted guest execution is unavailable) despite
6577 * checking for real-mode while exporting the state because all bits that cause
6578 * mode changes wrt CR0 are intercepted.
6579 */
6580 uint64_t const fImportMask = CPUMCTX_EXTRN_INHIBIT_INT
6581 | CPUMCTX_EXTRN_INHIBIT_NMI
6582#if defined(HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE) || defined(HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE)
6583 | HMVMX_CPUMCTX_EXTRN_ALL
6584#elif defined(HMVMX_ALWAYS_SAVE_GUEST_RFLAGS)
6585 | CPUMCTX_EXTRN_RFLAGS
6586#endif
6587 ;
6588 rc = vmxHCImportGuestState(pVCpu, pVmcsInfo, fImportMask);
6589 AssertRC(rc);
6590
6591 /*
6592 * Sync the TPR shadow with our APIC state.
6593 */
6594 if ( !pVmxTransient->fIsNestedGuest
6595 && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW))
6596 {
6597 Assert(pVmcsInfo->pbVirtApic);
6598 if (pVmxTransient->u8GuestTpr != pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR])
6599 {
6600 rc = APICSetTpr(pVCpu, pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR]);
6601 AssertRC(rc);
6602 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR);
6603 }
6604 }
6605
6606 Assert(VMMRZCallRing3IsEnabled(pVCpu));
6607 Assert( pVmxTransient->fWasGuestDebugStateActive == false
6608 || pVmxTransient->fWasHyperDebugStateActive == false);
6609 return;
6610 }
6611 }
6612#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6613 else if (pVmxTransient->fIsNestedGuest)
6614 AssertMsgFailed(("VMLAUNCH/VMRESUME failed but shouldn't happen when VMLAUNCH/VMRESUME was emulated in IEM!\n"));
6615#endif
6616 else
6617 Log4Func(("VM-entry failure: rcVMRun=%Rrc fVMEntryFailed=%RTbool\n", rcVMRun, pVmxTransient->fVMEntryFailed));
6618
6619 VMMRZCallRing3Enable(pVCpu);
6620}
6621
6622
6623/**
6624 * Runs the guest code using hardware-assisted VMX the normal way.
6625 *
6626 * @returns VBox status code.
6627 * @param pVCpu The cross context virtual CPU structure.
6628 * @param pcLoops Pointer to the number of executed loops.
6629 */
6630static VBOXSTRICTRC hmR0VmxRunGuestCodeNormal(PVMCPUCC pVCpu, uint32_t *pcLoops)
6631{
6632 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
6633 Assert(pcLoops);
6634 Assert(*pcLoops <= cMaxResumeLoops);
6635 Assert(!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
6636
6637#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6638 /*
6639 * Switch to the guest VMCS as we may have transitioned from executing the nested-guest
6640 * without leaving ring-0. Otherwise, if we came from ring-3 we would have loaded the
6641 * guest VMCS while entering the VMX ring-0 session.
6642 */
6643 if (pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
6644 {
6645 int rc = vmxHCSwitchToGstOrNstGstVmcs(pVCpu, false /* fSwitchToNstGstVmcs */);
6646 if (RT_SUCCESS(rc))
6647 { /* likely */ }
6648 else
6649 {
6650 LogRelFunc(("Failed to switch to the guest VMCS. rc=%Rrc\n", rc));
6651 return rc;
6652 }
6653 }
6654#endif
6655
6656 VMXTRANSIENT VmxTransient;
6657 RT_ZERO(VmxTransient);
6658 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
6659
6660 /* Paranoia. */
6661 Assert(VmxTransient.pVmcsInfo == &pVCpu->hmr0.s.vmx.VmcsInfo);
6662
6663 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
6664 for (;;)
6665 {
6666 Assert(!HMR0SuspendPending());
6667 HMVMX_ASSERT_CPU_SAFE(pVCpu);
6668 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
6669
6670 /*
6671 * Preparatory work for running nested-guest code, this may force us to
6672 * return to ring-3.
6673 *
6674 * Warning! This bugger disables interrupts on VINF_SUCCESS!
6675 */
6676 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */);
6677 if (rcStrict != VINF_SUCCESS)
6678 break;
6679
6680 /* Interrupts are disabled at this point! */
6681 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
6682 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
6683 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
6684 /* Interrupts are re-enabled at this point! */
6685
6686 /*
6687 * Check for errors with running the VM (VMLAUNCH/VMRESUME).
6688 */
6689 if (RT_SUCCESS(rcRun))
6690 { /* very likely */ }
6691 else
6692 {
6693 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
6694 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
6695 return rcRun;
6696 }
6697
6698 /*
6699 * Profile the VM-exit.
6700 */
6701 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
6702 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll);
6703 STAM_COUNTER_INC(&pVCpu->hm.s.aStatExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
6704 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
6705 HMVMX_START_EXIT_DISPATCH_PROF();
6706
6707 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
6708
6709 /*
6710 * Handle the VM-exit.
6711 */
6712#ifdef HMVMX_USE_FUNCTION_TABLE
6713 rcStrict = g_aVMExitHandlers[VmxTransient.uExitReason].pfn(pVCpu, &VmxTransient);
6714#else
6715 rcStrict = hmR0VmxHandleExit(pVCpu, &VmxTransient);
6716#endif
6717 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
6718 if (rcStrict == VINF_SUCCESS)
6719 {
6720 if (++(*pcLoops) <= cMaxResumeLoops)
6721 continue;
6722 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
6723 rcStrict = VINF_EM_RAW_INTERRUPT;
6724 }
6725 break;
6726 }
6727
6728 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
6729 return rcStrict;
6730}
6731
6732
6733#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6734/**
6735 * Runs the nested-guest code using hardware-assisted VMX.
6736 *
6737 * @returns VBox status code.
6738 * @param pVCpu The cross context virtual CPU structure.
6739 * @param pcLoops Pointer to the number of executed loops.
6740 *
6741 * @sa hmR0VmxRunGuestCodeNormal.
6742 */
6743static VBOXSTRICTRC hmR0VmxRunGuestCodeNested(PVMCPUCC pVCpu, uint32_t *pcLoops)
6744{
6745 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
6746 Assert(pcLoops);
6747 Assert(*pcLoops <= cMaxResumeLoops);
6748 Assert(CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
6749
6750 /*
6751 * Switch to the nested-guest VMCS as we may have transitioned from executing the
6752 * guest without leaving ring-0. Otherwise, if we came from ring-3 we would have
6753 * loaded the nested-guest VMCS while entering the VMX ring-0 session.
6754 */
6755 if (!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
6756 {
6757 int rc = vmxHCSwitchToGstOrNstGstVmcs(pVCpu, true /* fSwitchToNstGstVmcs */);
6758 if (RT_SUCCESS(rc))
6759 { /* likely */ }
6760 else
6761 {
6762 LogRelFunc(("Failed to switch to the nested-guest VMCS. rc=%Rrc\n", rc));
6763 return rc;
6764 }
6765 }
6766
6767 VMXTRANSIENT VmxTransient;
6768 RT_ZERO(VmxTransient);
6769 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
6770 VmxTransient.fIsNestedGuest = true;
6771
6772 /* Paranoia. */
6773 Assert(VmxTransient.pVmcsInfo == &pVCpu->hmr0.s.vmx.VmcsInfoNstGst);
6774
6775 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
6776 for (;;)
6777 {
6778 Assert(!HMR0SuspendPending());
6779 HMVMX_ASSERT_CPU_SAFE(pVCpu);
6780 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
6781
6782 /*
6783 * Preparatory work for running guest code, this may force us to
6784 * return to ring-3.
6785 *
6786 * Warning! This bugger disables interrupts on VINF_SUCCESS!
6787 */
6788 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */);
6789 if (rcStrict != VINF_SUCCESS)
6790 break;
6791
6792 /* Interrupts are disabled at this point! */
6793 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
6794 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
6795 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
6796 /* Interrupts are re-enabled at this point! */
6797
6798 /*
6799 * Check for errors with running the VM (VMLAUNCH/VMRESUME).
6800 */
6801 if (RT_SUCCESS(rcRun))
6802 { /* very likely */ }
6803 else
6804 {
6805 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
6806 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
6807 return rcRun;
6808 }
6809
6810 /*
6811 * Undo temporary disabling of the APIC-access page monitoring we did in hmR0VmxMergeVmcsNested.
6812 * This is needed for NestedTrap0eHandler (and IEM) to cause nested-guest APIC-access VM-exits.
6813 */
6814 if (VmxTransient.pVmcsInfo->u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
6815 {
6816 PVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
6817 RTGCPHYS const GCPhysApicAccess = pVmcsNstGst->u64AddrApicAccess.u;
6818 PGMHandlerPhysicalReset(pVCpu->CTX_SUFF(pVM), GCPhysApicAccess);
6819 }
6820
6821 /*
6822 * Profile the VM-exit.
6823 */
6824 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
6825 STAM_COUNTER_INC(&pVCpu->hm.s.StatNestedExitAll);
6826 STAM_COUNTER_INC(&pVCpu->hm.s.aStatNestedExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
6827 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
6828 HMVMX_START_EXIT_DISPATCH_PROF();
6829
6830 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
6831
6832 /*
6833 * Handle the VM-exit.
6834 */
6835 rcStrict = vmxHCHandleExitNested(pVCpu, &VmxTransient);
6836 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
6837 if (rcStrict == VINF_SUCCESS)
6838 {
6839 if (!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
6840 {
6841 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
6842 rcStrict = VINF_VMX_VMEXIT;
6843 }
6844 else
6845 {
6846 if (++(*pcLoops) <= cMaxResumeLoops)
6847 continue;
6848 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
6849 rcStrict = VINF_EM_RAW_INTERRUPT;
6850 }
6851 }
6852 else
6853 Assert(rcStrict != VINF_VMX_VMEXIT);
6854 break;
6855 }
6856
6857 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
6858 return rcStrict;
6859}
6860#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
6861
6862
6863/** @name Execution loop for single stepping, DBGF events and expensive Dtrace
6864 * probes.
6865 *
6866 * The following few functions and associated structure contains the bloat
6867 * necessary for providing detailed debug events and dtrace probes as well as
6868 * reliable host side single stepping. This works on the principle of
6869 * "subclassing" the normal execution loop and workers. We replace the loop
6870 * method completely and override selected helpers to add necessary adjustments
6871 * to their core operation.
6872 *
6873 * The goal is to keep the "parent" code lean and mean, so as not to sacrifice
6874 * any performance for debug and analysis features.
6875 *
6876 * @{
6877 */
6878
6879/**
6880 * Single steps guest code using hardware-assisted VMX.
6881 *
6882 * This is -not- the same as the guest single-stepping itself (say using EFLAGS.TF)
6883 * but single-stepping through the hypervisor debugger.
6884 *
6885 * @returns Strict VBox status code (i.e. informational status codes too).
6886 * @param pVCpu The cross context virtual CPU structure.
6887 * @param pcLoops Pointer to the number of executed loops.
6888 *
6889 * @note Mostly the same as hmR0VmxRunGuestCodeNormal().
6890 */
6891static VBOXSTRICTRC hmR0VmxRunGuestCodeDebug(PVMCPUCC pVCpu, uint32_t *pcLoops)
6892{
6893 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
6894 Assert(pcLoops);
6895 Assert(*pcLoops <= cMaxResumeLoops);
6896
6897 VMXTRANSIENT VmxTransient;
6898 RT_ZERO(VmxTransient);
6899 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
6900
6901 /* Set HMCPU indicators. */
6902 bool const fSavedSingleInstruction = pVCpu->hm.s.fSingleInstruction;
6903 pVCpu->hm.s.fSingleInstruction = pVCpu->hm.s.fSingleInstruction || DBGFIsStepping(pVCpu);
6904 pVCpu->hmr0.s.fDebugWantRdTscExit = false;
6905 pVCpu->hmr0.s.fUsingDebugLoop = true;
6906
6907 /* State we keep to help modify and later restore the VMCS fields we alter, and for detecting steps. */
6908 VMXRUNDBGSTATE DbgState;
6909 vmxHCRunDebugStateInit(pVCpu, &VmxTransient, &DbgState);
6910 vmxHCPreRunGuestDebugStateUpdate(pVCpu, &VmxTransient, &DbgState);
6911
6912 /*
6913 * The loop.
6914 */
6915 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
6916 for (;;)
6917 {
6918 Assert(!HMR0SuspendPending());
6919 HMVMX_ASSERT_CPU_SAFE(pVCpu);
6920 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
6921 bool fStepping = pVCpu->hm.s.fSingleInstruction;
6922
6923 /* Set up VM-execution controls the next two can respond to. */
6924 vmxHCPreRunGuestDebugStateApply(pVCpu, &VmxTransient, &DbgState);
6925
6926 /*
6927 * Preparatory work for running guest code, this may force us to
6928 * return to ring-3.
6929 *
6930 * Warning! This bugger disables interrupts on VINF_SUCCESS!
6931 */
6932 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, fStepping);
6933 if (rcStrict != VINF_SUCCESS)
6934 break;
6935
6936 /* Interrupts are disabled at this point! */
6937 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
6938
6939 /* Override any obnoxious code in the above two calls. */
6940 vmxHCPreRunGuestDebugStateApply(pVCpu, &VmxTransient, &DbgState);
6941
6942 /*
6943 * Finally execute the guest.
6944 */
6945 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
6946
6947 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
6948 /* Interrupts are re-enabled at this point! */
6949
6950 /* Check for errors with running the VM (VMLAUNCH/VMRESUME). */
6951 if (RT_SUCCESS(rcRun))
6952 { /* very likely */ }
6953 else
6954 {
6955 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
6956 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
6957 return rcRun;
6958 }
6959
6960 /* Profile the VM-exit. */
6961 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
6962 STAM_COUNTER_INC(&pVCpu->hm.s.StatDebugExitAll);
6963 STAM_COUNTER_INC(&pVCpu->hm.s.aStatExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
6964 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
6965 HMVMX_START_EXIT_DISPATCH_PROF();
6966
6967 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
6968
6969 /*
6970 * Handle the VM-exit - we quit earlier on certain VM-exits, see hmR0VmxHandleExitDebug().
6971 */
6972 rcStrict = vmxHCRunDebugHandleExit(pVCpu, &VmxTransient, &DbgState);
6973 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
6974 if (rcStrict != VINF_SUCCESS)
6975 break;
6976 if (++(*pcLoops) > cMaxResumeLoops)
6977 {
6978 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
6979 rcStrict = VINF_EM_RAW_INTERRUPT;
6980 break;
6981 }
6982
6983 /*
6984 * Stepping: Did the RIP change, if so, consider it a single step.
6985 * Otherwise, make sure one of the TFs gets set.
6986 */
6987 if (fStepping)
6988 {
6989 int rc = hmR0VmxImportGuestState(pVCpu, VmxTransient.pVmcsInfo, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
6990 AssertRC(rc);
6991 if ( pVCpu->cpum.GstCtx.rip != DbgState.uRipStart
6992 || pVCpu->cpum.GstCtx.cs.Sel != DbgState.uCsStart)
6993 {
6994 rcStrict = VINF_EM_DBG_STEPPED;
6995 break;
6996 }
6997 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_DR7);
6998 }
6999
7000 /*
7001 * Update when dtrace settings changes (DBGF kicks us, so no need to check).
7002 */
7003 if (VBOXVMM_GET_SETTINGS_SEQ_NO() != DbgState.uDtraceSettingsSeqNo)
7004 vmxHCPreRunGuestDebugStateUpdate(pVCpu, &VmxTransient, &DbgState);
7005
7006 /* Restore all controls applied by hmR0VmxPreRunGuestDebugStateApply above. */
7007 rcStrict = vmxHCRunDebugStateRevert(pVCpu, &VmxTransient, &DbgState, rcStrict);
7008 Assert(rcStrict == VINF_SUCCESS);
7009 }
7010
7011 /*
7012 * Clear the X86_EFL_TF if necessary.
7013 */
7014 if (pVCpu->hmr0.s.fClearTrapFlag)
7015 {
7016 int rc = hmR0VmxImportGuestState(pVCpu, VmxTransient.pVmcsInfo, CPUMCTX_EXTRN_RFLAGS);
7017 AssertRC(rc);
7018 pVCpu->hmr0.s.fClearTrapFlag = false;
7019 pVCpu->cpum.GstCtx.eflags.Bits.u1TF = 0;
7020 }
7021 /** @todo there seems to be issues with the resume flag when the monitor trap
7022 * flag is pending without being used. Seen early in bios init when
7023 * accessing APIC page in protected mode. */
7024
7025/** @todo we need to do hmR0VmxRunDebugStateRevert here too, in case we broke
7026 * out of the above loop. */
7027
7028 /* Restore HMCPU indicators. */
7029 pVCpu->hmr0.s.fUsingDebugLoop = false;
7030 pVCpu->hmr0.s.fDebugWantRdTscExit = false;
7031 pVCpu->hm.s.fSingleInstruction = fSavedSingleInstruction;
7032
7033 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
7034 return rcStrict;
7035}
7036
7037/** @} */
7038
7039
7040/**
7041 * Checks if any expensive dtrace probes are enabled and we should go to the
7042 * debug loop.
7043 *
7044 * @returns true if we should use debug loop, false if not.
7045 */
7046static bool hmR0VmxAnyExpensiveProbesEnabled(void)
7047{
7048 /* It's probably faster to OR the raw 32-bit counter variables together.
7049 Since the variables are in an array and the probes are next to one
7050 another (more or less), we have good locality. So, better read
7051 eight-nine cache lines ever time and only have one conditional, than
7052 128+ conditionals, right? */
7053 return ( VBOXVMM_R0_HMVMX_VMEXIT_ENABLED_RAW() /* expensive too due to context */
7054 | VBOXVMM_XCPT_DE_ENABLED_RAW()
7055 | VBOXVMM_XCPT_DB_ENABLED_RAW()
7056 | VBOXVMM_XCPT_BP_ENABLED_RAW()
7057 | VBOXVMM_XCPT_OF_ENABLED_RAW()
7058 | VBOXVMM_XCPT_BR_ENABLED_RAW()
7059 | VBOXVMM_XCPT_UD_ENABLED_RAW()
7060 | VBOXVMM_XCPT_NM_ENABLED_RAW()
7061 | VBOXVMM_XCPT_DF_ENABLED_RAW()
7062 | VBOXVMM_XCPT_TS_ENABLED_RAW()
7063 | VBOXVMM_XCPT_NP_ENABLED_RAW()
7064 | VBOXVMM_XCPT_SS_ENABLED_RAW()
7065 | VBOXVMM_XCPT_GP_ENABLED_RAW()
7066 | VBOXVMM_XCPT_PF_ENABLED_RAW()
7067 | VBOXVMM_XCPT_MF_ENABLED_RAW()
7068 | VBOXVMM_XCPT_AC_ENABLED_RAW()
7069 | VBOXVMM_XCPT_XF_ENABLED_RAW()
7070 | VBOXVMM_XCPT_VE_ENABLED_RAW()
7071 | VBOXVMM_XCPT_SX_ENABLED_RAW()
7072 | VBOXVMM_INT_SOFTWARE_ENABLED_RAW()
7073 | VBOXVMM_INT_HARDWARE_ENABLED_RAW()
7074 ) != 0
7075 || ( VBOXVMM_INSTR_HALT_ENABLED_RAW()
7076 | VBOXVMM_INSTR_MWAIT_ENABLED_RAW()
7077 | VBOXVMM_INSTR_MONITOR_ENABLED_RAW()
7078 | VBOXVMM_INSTR_CPUID_ENABLED_RAW()
7079 | VBOXVMM_INSTR_INVD_ENABLED_RAW()
7080 | VBOXVMM_INSTR_WBINVD_ENABLED_RAW()
7081 | VBOXVMM_INSTR_INVLPG_ENABLED_RAW()
7082 | VBOXVMM_INSTR_RDTSC_ENABLED_RAW()
7083 | VBOXVMM_INSTR_RDTSCP_ENABLED_RAW()
7084 | VBOXVMM_INSTR_RDPMC_ENABLED_RAW()
7085 | VBOXVMM_INSTR_RDMSR_ENABLED_RAW()
7086 | VBOXVMM_INSTR_WRMSR_ENABLED_RAW()
7087 | VBOXVMM_INSTR_CRX_READ_ENABLED_RAW()
7088 | VBOXVMM_INSTR_CRX_WRITE_ENABLED_RAW()
7089 | VBOXVMM_INSTR_DRX_READ_ENABLED_RAW()
7090 | VBOXVMM_INSTR_DRX_WRITE_ENABLED_RAW()
7091 | VBOXVMM_INSTR_PAUSE_ENABLED_RAW()
7092 | VBOXVMM_INSTR_XSETBV_ENABLED_RAW()
7093 | VBOXVMM_INSTR_SIDT_ENABLED_RAW()
7094 | VBOXVMM_INSTR_LIDT_ENABLED_RAW()
7095 | VBOXVMM_INSTR_SGDT_ENABLED_RAW()
7096 | VBOXVMM_INSTR_LGDT_ENABLED_RAW()
7097 | VBOXVMM_INSTR_SLDT_ENABLED_RAW()
7098 | VBOXVMM_INSTR_LLDT_ENABLED_RAW()
7099 | VBOXVMM_INSTR_STR_ENABLED_RAW()
7100 | VBOXVMM_INSTR_LTR_ENABLED_RAW()
7101 | VBOXVMM_INSTR_GETSEC_ENABLED_RAW()
7102 | VBOXVMM_INSTR_RSM_ENABLED_RAW()
7103 | VBOXVMM_INSTR_RDRAND_ENABLED_RAW()
7104 | VBOXVMM_INSTR_RDSEED_ENABLED_RAW()
7105 | VBOXVMM_INSTR_XSAVES_ENABLED_RAW()
7106 | VBOXVMM_INSTR_XRSTORS_ENABLED_RAW()
7107 | VBOXVMM_INSTR_VMM_CALL_ENABLED_RAW()
7108 | VBOXVMM_INSTR_VMX_VMCLEAR_ENABLED_RAW()
7109 | VBOXVMM_INSTR_VMX_VMLAUNCH_ENABLED_RAW()
7110 | VBOXVMM_INSTR_VMX_VMPTRLD_ENABLED_RAW()
7111 | VBOXVMM_INSTR_VMX_VMPTRST_ENABLED_RAW()
7112 | VBOXVMM_INSTR_VMX_VMREAD_ENABLED_RAW()
7113 | VBOXVMM_INSTR_VMX_VMRESUME_ENABLED_RAW()
7114 | VBOXVMM_INSTR_VMX_VMWRITE_ENABLED_RAW()
7115 | VBOXVMM_INSTR_VMX_VMXOFF_ENABLED_RAW()
7116 | VBOXVMM_INSTR_VMX_VMXON_ENABLED_RAW()
7117 | VBOXVMM_INSTR_VMX_VMFUNC_ENABLED_RAW()
7118 | VBOXVMM_INSTR_VMX_INVEPT_ENABLED_RAW()
7119 | VBOXVMM_INSTR_VMX_INVVPID_ENABLED_RAW()
7120 | VBOXVMM_INSTR_VMX_INVPCID_ENABLED_RAW()
7121 ) != 0
7122 || ( VBOXVMM_EXIT_TASK_SWITCH_ENABLED_RAW()
7123 | VBOXVMM_EXIT_HALT_ENABLED_RAW()
7124 | VBOXVMM_EXIT_MWAIT_ENABLED_RAW()
7125 | VBOXVMM_EXIT_MONITOR_ENABLED_RAW()
7126 | VBOXVMM_EXIT_CPUID_ENABLED_RAW()
7127 | VBOXVMM_EXIT_INVD_ENABLED_RAW()
7128 | VBOXVMM_EXIT_WBINVD_ENABLED_RAW()
7129 | VBOXVMM_EXIT_INVLPG_ENABLED_RAW()
7130 | VBOXVMM_EXIT_RDTSC_ENABLED_RAW()
7131 | VBOXVMM_EXIT_RDTSCP_ENABLED_RAW()
7132 | VBOXVMM_EXIT_RDPMC_ENABLED_RAW()
7133 | VBOXVMM_EXIT_RDMSR_ENABLED_RAW()
7134 | VBOXVMM_EXIT_WRMSR_ENABLED_RAW()
7135 | VBOXVMM_EXIT_CRX_READ_ENABLED_RAW()
7136 | VBOXVMM_EXIT_CRX_WRITE_ENABLED_RAW()
7137 | VBOXVMM_EXIT_DRX_READ_ENABLED_RAW()
7138 | VBOXVMM_EXIT_DRX_WRITE_ENABLED_RAW()
7139 | VBOXVMM_EXIT_PAUSE_ENABLED_RAW()
7140 | VBOXVMM_EXIT_XSETBV_ENABLED_RAW()
7141 | VBOXVMM_EXIT_SIDT_ENABLED_RAW()
7142 | VBOXVMM_EXIT_LIDT_ENABLED_RAW()
7143 | VBOXVMM_EXIT_SGDT_ENABLED_RAW()
7144 | VBOXVMM_EXIT_LGDT_ENABLED_RAW()
7145 | VBOXVMM_EXIT_SLDT_ENABLED_RAW()
7146 | VBOXVMM_EXIT_LLDT_ENABLED_RAW()
7147 | VBOXVMM_EXIT_STR_ENABLED_RAW()
7148 | VBOXVMM_EXIT_LTR_ENABLED_RAW()
7149 | VBOXVMM_EXIT_GETSEC_ENABLED_RAW()
7150 | VBOXVMM_EXIT_RSM_ENABLED_RAW()
7151 | VBOXVMM_EXIT_RDRAND_ENABLED_RAW()
7152 | VBOXVMM_EXIT_RDSEED_ENABLED_RAW()
7153 | VBOXVMM_EXIT_XSAVES_ENABLED_RAW()
7154 | VBOXVMM_EXIT_XRSTORS_ENABLED_RAW()
7155 | VBOXVMM_EXIT_VMM_CALL_ENABLED_RAW()
7156 | VBOXVMM_EXIT_VMX_VMCLEAR_ENABLED_RAW()
7157 | VBOXVMM_EXIT_VMX_VMLAUNCH_ENABLED_RAW()
7158 | VBOXVMM_EXIT_VMX_VMPTRLD_ENABLED_RAW()
7159 | VBOXVMM_EXIT_VMX_VMPTRST_ENABLED_RAW()
7160 | VBOXVMM_EXIT_VMX_VMREAD_ENABLED_RAW()
7161 | VBOXVMM_EXIT_VMX_VMRESUME_ENABLED_RAW()
7162 | VBOXVMM_EXIT_VMX_VMWRITE_ENABLED_RAW()
7163 | VBOXVMM_EXIT_VMX_VMXOFF_ENABLED_RAW()
7164 | VBOXVMM_EXIT_VMX_VMXON_ENABLED_RAW()
7165 | VBOXVMM_EXIT_VMX_VMFUNC_ENABLED_RAW()
7166 | VBOXVMM_EXIT_VMX_INVEPT_ENABLED_RAW()
7167 | VBOXVMM_EXIT_VMX_INVVPID_ENABLED_RAW()
7168 | VBOXVMM_EXIT_VMX_INVPCID_ENABLED_RAW()
7169 | VBOXVMM_EXIT_VMX_EPT_VIOLATION_ENABLED_RAW()
7170 | VBOXVMM_EXIT_VMX_EPT_MISCONFIG_ENABLED_RAW()
7171 | VBOXVMM_EXIT_VMX_VAPIC_ACCESS_ENABLED_RAW()
7172 | VBOXVMM_EXIT_VMX_VAPIC_WRITE_ENABLED_RAW()
7173 ) != 0;
7174}
7175
7176
7177/**
7178 * Runs the guest using hardware-assisted VMX.
7179 *
7180 * @returns Strict VBox status code (i.e. informational status codes too).
7181 * @param pVCpu The cross context virtual CPU structure.
7182 */
7183VMMR0DECL(VBOXSTRICTRC) VMXR0RunGuestCode(PVMCPUCC pVCpu)
7184{
7185 AssertPtr(pVCpu);
7186 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
7187 Assert(VMMRZCallRing3IsEnabled(pVCpu));
7188 Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
7189 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
7190
7191 VBOXSTRICTRC rcStrict;
7192 uint32_t cLoops = 0;
7193 for (;;)
7194 {
7195#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
7196 bool const fInNestedGuestMode = CPUMIsGuestInVmxNonRootMode(pCtx);
7197#else
7198 NOREF(pCtx);
7199 bool const fInNestedGuestMode = false;
7200#endif
7201 if (!fInNestedGuestMode)
7202 {
7203 if ( !pVCpu->hm.s.fUseDebugLoop
7204 && (!VBOXVMM_ANY_PROBES_ENABLED() || !hmR0VmxAnyExpensiveProbesEnabled())
7205 && !DBGFIsStepping(pVCpu)
7206 && !pVCpu->CTX_SUFF(pVM)->dbgf.ro.cEnabledInt3Breakpoints)
7207 rcStrict = hmR0VmxRunGuestCodeNormal(pVCpu, &cLoops);
7208 else
7209 rcStrict = hmR0VmxRunGuestCodeDebug(pVCpu, &cLoops);
7210 }
7211#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
7212 else
7213 rcStrict = hmR0VmxRunGuestCodeNested(pVCpu, &cLoops);
7214
7215 if (rcStrict == VINF_VMX_VMLAUNCH_VMRESUME)
7216 {
7217 Assert(CPUMIsGuestInVmxNonRootMode(pCtx));
7218 continue;
7219 }
7220 if (rcStrict == VINF_VMX_VMEXIT)
7221 {
7222 Assert(!CPUMIsGuestInVmxNonRootMode(pCtx));
7223 continue;
7224 }
7225#endif
7226 break;
7227 }
7228
7229 int const rcLoop = VBOXSTRICTRC_VAL(rcStrict);
7230 switch (rcLoop)
7231 {
7232 case VERR_EM_INTERPRETER: rcStrict = VINF_EM_RAW_EMULATE_INSTR; break;
7233 case VINF_EM_RESET: rcStrict = VINF_EM_TRIPLE_FAULT; break;
7234 }
7235
7236 int rc2 = hmR0VmxExitToRing3(pVCpu, rcStrict);
7237 if (RT_FAILURE(rc2))
7238 {
7239 pVCpu->hm.s.u32HMError = (uint32_t)VBOXSTRICTRC_VAL(rcStrict);
7240 rcStrict = rc2;
7241 }
7242 Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
7243 Assert(!VMMR0AssertionIsNotificationSet(pVCpu));
7244 return rcStrict;
7245}
7246
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette