VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/NEMR3Native-win.cpp

最後變更 在這個檔案是 108434,由 vboxsync 提交於 11 天 前

VMM/NEM/Hyper-V: Started implementing a NEM/Hyper-V specific APIC emulation utilizing the LocalApicEmulation feature of Hyper-V, bugref:9993

  • 屬性 svn:eol-style 設為 native
  • 屬性 svn:keywords 設為 Author Date Id Revision
檔案大小: 162.2 KB
 
1/* $Id: NEMR3Native-win.cpp 108434 2025-03-04 11:24:21Z vboxsync $ */
2/** @file
3 * NEM - Native execution manager, native ring-3 Windows backend.
4 *
5 * Log group 2: Exit logging.
6 * Log group 3: Log context on exit.
7 * Log group 5: Ring-3 memory management
8 * Log group 6: Ring-0 memory management
9 * Log group 12: API intercepts.
10 */
11
12/*
13 * Copyright (C) 2018-2024 Oracle and/or its affiliates.
14 *
15 * This file is part of VirtualBox base platform packages, as
16 * available from https://www.alldomusa.eu.org.
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation, in version 3 of the
21 * License.
22 *
23 * This program is distributed in the hope that it will be useful, but
24 * WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 * General Public License for more details.
27 *
28 * You should have received a copy of the GNU General Public License
29 * along with this program; if not, see <https://www.gnu.org/licenses>.
30 *
31 * SPDX-License-Identifier: GPL-3.0-only
32 */
33
34
35/*********************************************************************************************************************************
36* Header Files *
37*********************************************************************************************************************************/
38#define LOG_GROUP LOG_GROUP_NEM
39#define VMCPU_INCL_CPUM_GST_CTX
40#include <iprt/nt/nt-and-windows.h>
41#include <iprt/nt/hyperv.h>
42#include <iprt/nt/vid.h>
43#include <WinHvPlatform.h>
44
45#ifndef _WIN32_WINNT_WIN10
46# error "Missing _WIN32_WINNT_WIN10"
47#endif
48#ifndef _WIN32_WINNT_WIN10_RS1 /* Missing define, causing trouble for us. */
49# define _WIN32_WINNT_WIN10_RS1 (_WIN32_WINNT_WIN10 + 1)
50#endif
51#include <sysinfoapi.h>
52#include <debugapi.h>
53#include <errhandlingapi.h>
54#include <fileapi.h>
55#include <winerror.h> /* no api header for this. */
56
57#include <VBox/vmm/nem.h>
58#include <VBox/vmm/iem.h>
59#include <VBox/vmm/em.h>
60#include <VBox/vmm/pdmapic.h>
61#include <VBox/vmm/pdm.h>
62#include <VBox/vmm/dbgftrace.h>
63#include "NEMInternal.h"
64#include <VBox/vmm/vmcc.h>
65
66#include <iprt/ldr.h>
67#include <iprt/path.h>
68#include <iprt/string.h>
69#include <iprt/system.h>
70#include <iprt/utf16.h>
71
72#ifndef NTDDI_WIN10_VB /* Present in W10 2004 SDK, quite possibly earlier. */
73HRESULT WINAPI WHvQueryGpaRangeDirtyBitmap(WHV_PARTITION_HANDLE, WHV_GUEST_PHYSICAL_ADDRESS, UINT64, UINT64 *, UINT32);
74# define WHvMapGpaRangeFlagTrackDirtyPages ((WHV_MAP_GPA_RANGE_FLAGS)0x00000008)
75#endif
76
77
78/*********************************************************************************************************************************
79* Defined Constants And Macros *
80*********************************************************************************************************************************/
81#ifdef LOG_ENABLED
82# define NEM_WIN_INTERCEPT_NT_IO_CTLS
83#endif
84
85/** VID I/O control detection: Fake partition handle input. */
86#define NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE ((HANDLE)(uintptr_t)38479125)
87/** VID I/O control detection: Fake partition ID return. */
88#define NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_ID UINT64_C(0xfa1e000042424242)
89/** VID I/O control detection: The property we get via VidGetPartitionProperty. */
90#define NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_PROPERTY_CODE HvPartitionPropertyProcessorVendor
91/** VID I/O control detection: Fake property value return. */
92#define NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_PROPERTY_VALUE UINT64_C(0xf00dface01020304)
93/** VID I/O control detection: Fake CPU index input. */
94#define NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX UINT32_C(42)
95/** VID I/O control detection: Fake timeout input. */
96#define NEM_WIN_IOCTL_DETECTOR_FAKE_TIMEOUT UINT32_C(0x00080286)
97
98
99#ifndef NTDDI_WIN10_RS4 /* We use this to support older SDKs. */
100# define NTDDI_WIN10_RS4 (NTDDI_WIN10 + 5) /* 17134 */
101#endif
102#ifndef NTDDI_WIN10_RS5 /* We use this to support older SDKs. */
103# define NTDDI_WIN10_RS5 (NTDDI_WIN10 + 6) /* ????? */
104#endif
105#ifndef NTDDI_WIN10_19H1 /* We use this to support older SDKs. */
106# define NTDDI_WIN10_19H1 (NTDDI_WIN10 + 7) /* 18362 */
107#endif
108#ifndef NTDDI_WIN10_VB
109# define NTDDI_WIN10_VB (NTDDI_WIN10 + 8) /* 19040 */
110#endif
111#ifndef NTDDI_WIN10_MN
112# define NTDDI_WIN10_MN (NTDDI_WIN10 + 9) /* ????? */
113#endif
114#ifndef NTDDI_WIN10_FE
115# define NTDDI_WIN10_FE (NTDDI_WIN10 + 10) /* ????? */
116#endif
117#ifndef NTDDI_WIN10_CO
118# define NTDDI_WIN10_CO (NTDDI_WIN10 + 11) /* 22000 */
119#endif
120#ifndef NTDDI_WIN10_NI
121# define NTDDI_WIN10_NI (NTDDI_WIN10 + 12) /* 22621 */
122#endif
123#ifndef NTDDI_WIN10_CU
124# define NTDDI_WIN10_CU (NTDDI_WIN10 + 13) /* ????? */
125#endif
126#ifndef NTDDI_WIN10_ZN
127# define NTDDI_WIN10_ZN (NTDDI_WIN10 + 14) /* ????? */
128#endif
129#ifndef NTDDI_WIN10_GA
130# define NTDDI_WIN10_GA (NTDDI_WIN10 + 15) /* ????? */
131#endif
132#ifndef NTDDI_WIN10_GE
133# define NTDDI_WIN10_GE (NTDDI_WIN10 + 16) /* 26100 */
134#endif
135
136#define MY_NTDDI_WIN10_17134 NTDDI_WIN10_RS4
137#define MY_NTDDI_WIN10_18362 NTDDI_WIN10_19H1
138#define MY_NTDDI_WIN10_19040 NTDDI_WIN10_VB
139#define MY_NTDDI_WIN11_22000 NTDDI_WIN10_CO
140#define MY_NTDDI_WIN11_22621 NTDDI_WIN10_NI
141#define MY_NTDDI_WIN11_26100 NTDDI_WIN10_GE
142
143
144/*********************************************************************************************************************************
145* Global Variables *
146*********************************************************************************************************************************/
147/** @name APIs imported from WinHvPlatform.dll
148 * @{ */
149static decltype(WHvGetCapability) * g_pfnWHvGetCapability;
150static decltype(WHvCreatePartition) * g_pfnWHvCreatePartition;
151static decltype(WHvSetupPartition) * g_pfnWHvSetupPartition;
152static decltype(WHvDeletePartition) * g_pfnWHvDeletePartition;
153static decltype(WHvGetPartitionProperty) * g_pfnWHvGetPartitionProperty;
154static decltype(WHvSetPartitionProperty) * g_pfnWHvSetPartitionProperty;
155static decltype(WHvMapGpaRange) * g_pfnWHvMapGpaRange;
156static decltype(WHvUnmapGpaRange) * g_pfnWHvUnmapGpaRange;
157static decltype(WHvTranslateGva) * g_pfnWHvTranslateGva;
158static decltype(WHvQueryGpaRangeDirtyBitmap) * g_pfnWHvQueryGpaRangeDirtyBitmap;
159static decltype(WHvCreateVirtualProcessor) * g_pfnWHvCreateVirtualProcessor;
160static decltype(WHvDeleteVirtualProcessor) * g_pfnWHvDeleteVirtualProcessor;
161static decltype(WHvRunVirtualProcessor) * g_pfnWHvRunVirtualProcessor;
162static decltype(WHvCancelRunVirtualProcessor) * g_pfnWHvCancelRunVirtualProcessor;
163static decltype(WHvGetVirtualProcessorRegisters) * g_pfnWHvGetVirtualProcessorRegisters;
164static decltype(WHvSetVirtualProcessorRegisters) * g_pfnWHvSetVirtualProcessorRegisters;
165static decltype(WHvSuspendPartitionTime) * g_pfnWHvSuspendPartitionTime;
166static decltype(WHvResumePartitionTime) * g_pfnWHvResumePartitionTime;
167decltype(WHvGetVirtualProcessorState) * g_pfnWHvGetVirtualProcessorState = NULL;
168decltype(WHvSetVirtualProcessorState) * g_pfnWHvSetVirtualProcessorState = NULL;
169decltype(WHvGetVirtualProcessorInterruptControllerState) *g_pfnWHvGetVirtualProcessorInterruptControllerState = NULL;
170decltype(WHvSetVirtualProcessorInterruptControllerState) *g_pfnWHvSetVirtualProcessorInterruptControllerState = NULL;
171decltype(WHvGetVirtualProcessorInterruptControllerState2) *g_pfnWHvGetVirtualProcessorInterruptControllerState2 = NULL;
172decltype(WHvSetVirtualProcessorInterruptControllerState2) *g_pfnWHvSetVirtualProcessorInterruptControllerState2 = NULL;
173decltype(WHvRequestInterrupt) * g_pfnWHvRequestInterrupt;
174/** @} */
175
176/** @name APIs imported from Vid.dll
177 * @{ */
178static decltype(VidGetHvPartitionId) *g_pfnVidGetHvPartitionId;
179static decltype(VidGetPartitionProperty) *g_pfnVidGetPartitionProperty;
180#ifdef LOG_ENABLED
181static decltype(VidStartVirtualProcessor) *g_pfnVidStartVirtualProcessor;
182static decltype(VidStopVirtualProcessor) *g_pfnVidStopVirtualProcessor;
183static decltype(VidMessageSlotMap) *g_pfnVidMessageSlotMap;
184static decltype(VidMessageSlotHandleAndGetNext) *g_pfnVidMessageSlotHandleAndGetNext;
185static decltype(VidGetVirtualProcessorState) *g_pfnVidGetVirtualProcessorState;
186static decltype(VidSetVirtualProcessorState) *g_pfnVidSetVirtualProcessorState;
187static decltype(VidGetVirtualProcessorRunningStatus) *g_pfnVidGetVirtualProcessorRunningStatus;
188#endif
189/** @} */
190
191/** The Windows build number. */
192static uint32_t g_uBuildNo = 17134;
193
194
195
196/**
197 * Import instructions.
198 */
199static const struct
200{
201 uint8_t idxDll; /**< 0 for WinHvPlatform.dll, 1 for vid.dll. */
202 bool fOptional; /**< Set if import is optional. */
203 PFNRT *ppfn; /**< The function pointer variable. */
204 const char *pszName; /**< The function name. */
205} g_aImports[] =
206{
207#define NEM_WIN_IMPORT(a_idxDll, a_fOptional, a_Name) { (a_idxDll), (a_fOptional), (PFNRT *)&RT_CONCAT(g_pfn,a_Name), #a_Name }
208 NEM_WIN_IMPORT(0, false, WHvGetCapability),
209 NEM_WIN_IMPORT(0, false, WHvCreatePartition),
210 NEM_WIN_IMPORT(0, false, WHvSetupPartition),
211 NEM_WIN_IMPORT(0, false, WHvDeletePartition),
212 NEM_WIN_IMPORT(0, false, WHvGetPartitionProperty),
213 NEM_WIN_IMPORT(0, false, WHvSetPartitionProperty),
214 NEM_WIN_IMPORT(0, false, WHvMapGpaRange),
215 NEM_WIN_IMPORT(0, false, WHvUnmapGpaRange),
216 NEM_WIN_IMPORT(0, false, WHvTranslateGva),
217 NEM_WIN_IMPORT(0, true, WHvQueryGpaRangeDirtyBitmap),
218 NEM_WIN_IMPORT(0, false, WHvCreateVirtualProcessor),
219 NEM_WIN_IMPORT(0, false, WHvDeleteVirtualProcessor),
220 NEM_WIN_IMPORT(0, false, WHvRunVirtualProcessor),
221 NEM_WIN_IMPORT(0, false, WHvCancelRunVirtualProcessor),
222 NEM_WIN_IMPORT(0, false, WHvGetVirtualProcessorRegisters),
223 NEM_WIN_IMPORT(0, false, WHvSetVirtualProcessorRegisters),
224 NEM_WIN_IMPORT(0, true, WHvSuspendPartitionTime),
225 NEM_WIN_IMPORT(0, true, WHvResumePartitionTime),
226 NEM_WIN_IMPORT(0, true, WHvRequestInterrupt),
227 NEM_WIN_IMPORT(0, true, WHvGetVirtualProcessorState),
228 NEM_WIN_IMPORT(0, true, WHvSetVirtualProcessorState),
229 NEM_WIN_IMPORT(0, true, WHvGetVirtualProcessorInterruptControllerState),
230 NEM_WIN_IMPORT(0, true, WHvSetVirtualProcessorInterruptControllerState),
231 NEM_WIN_IMPORT(0, true, WHvGetVirtualProcessorInterruptControllerState2),
232 NEM_WIN_IMPORT(0, true, WHvSetVirtualProcessorInterruptControllerState2),
233
234 NEM_WIN_IMPORT(1, true, VidGetHvPartitionId),
235 NEM_WIN_IMPORT(1, true, VidGetPartitionProperty),
236#ifdef LOG_ENABLED
237 NEM_WIN_IMPORT(1, false, VidMessageSlotMap),
238 NEM_WIN_IMPORT(1, false, VidMessageSlotHandleAndGetNext),
239 NEM_WIN_IMPORT(1, false, VidStartVirtualProcessor),
240 NEM_WIN_IMPORT(1, false, VidStopVirtualProcessor),
241 NEM_WIN_IMPORT(1, false, VidGetVirtualProcessorState),
242 NEM_WIN_IMPORT(1, false, VidSetVirtualProcessorState),
243 NEM_WIN_IMPORT(1, false, VidGetVirtualProcessorRunningStatus),
244#endif
245#undef NEM_WIN_IMPORT
246};
247
248
249/** The real NtDeviceIoControlFile API in NTDLL. */
250static decltype(NtDeviceIoControlFile) *g_pfnNtDeviceIoControlFile;
251/** Pointer to the NtDeviceIoControlFile import table entry. */
252static decltype(NtDeviceIoControlFile) **g_ppfnVidNtDeviceIoControlFile;
253#ifdef LOG_ENABLED
254/** Info about the VidGetHvPartitionId I/O control interface. */
255static NEMWINIOCTL g_IoCtlGetHvPartitionId;
256/** Info about the VidGetPartitionProperty I/O control interface. */
257static NEMWINIOCTL g_IoCtlGetPartitionProperty;
258/** Info about the VidStartVirtualProcessor I/O control interface. */
259static NEMWINIOCTL g_IoCtlStartVirtualProcessor;
260/** Info about the VidStopVirtualProcessor I/O control interface. */
261static NEMWINIOCTL g_IoCtlStopVirtualProcessor;
262/** Info about the VidMessageSlotHandleAndGetNext I/O control interface. */
263static NEMWINIOCTL g_IoCtlMessageSlotHandleAndGetNext;
264/** Info about the VidMessageSlotMap I/O control interface - for logging. */
265static NEMWINIOCTL g_IoCtlMessageSlotMap;
266/** Info about the VidGetVirtualProcessorState I/O control interface - for logging. */
267static NEMWINIOCTL g_IoCtlGetVirtualProcessorState;
268/** Info about the VidSetVirtualProcessorState I/O control interface - for logging. */
269static NEMWINIOCTL g_IoCtlSetVirtualProcessorState;
270/** Pointer to what nemR3WinIoctlDetector_ForLogging should fill in. */
271static NEMWINIOCTL *g_pIoCtlDetectForLogging;
272#endif
273
274#ifdef NEM_WIN_INTERCEPT_NT_IO_CTLS
275/** Mapping slot for CPU #0.
276 * @{ */
277static VID_MESSAGE_MAPPING_HEADER *g_pMsgSlotMapping = NULL;
278static const HV_MESSAGE_HEADER *g_pHvMsgHdr;
279static const HV_X64_INTERCEPT_MESSAGE_HEADER *g_pX64MsgHdr;
280/** @} */
281#endif
282
283
284/*
285 * Let the preprocessor alias the APIs to import variables for better autocompletion.
286 */
287#ifndef IN_SLICKEDIT
288# define WHvGetCapability g_pfnWHvGetCapability
289# define WHvCreatePartition g_pfnWHvCreatePartition
290# define WHvSetupPartition g_pfnWHvSetupPartition
291# define WHvDeletePartition g_pfnWHvDeletePartition
292# define WHvGetPartitionProperty g_pfnWHvGetPartitionProperty
293# define WHvSetPartitionProperty g_pfnWHvSetPartitionProperty
294# define WHvMapGpaRange g_pfnWHvMapGpaRange
295# define WHvUnmapGpaRange g_pfnWHvUnmapGpaRange
296# define WHvTranslateGva g_pfnWHvTranslateGva
297# define WHvQueryGpaRangeDirtyBitmap g_pfnWHvQueryGpaRangeDirtyBitmap
298# define WHvCreateVirtualProcessor g_pfnWHvCreateVirtualProcessor
299# define WHvDeleteVirtualProcessor g_pfnWHvDeleteVirtualProcessor
300# define WHvRunVirtualProcessor g_pfnWHvRunVirtualProcessor
301# define WHvGetRunExitContextSize g_pfnWHvGetRunExitContextSize
302# define WHvCancelRunVirtualProcessor g_pfnWHvCancelRunVirtualProcessor
303# define WHvGetVirtualProcessorRegisters g_pfnWHvGetVirtualProcessorRegisters
304# define WHvSetVirtualProcessorRegisters g_pfnWHvSetVirtualProcessorRegisters
305# define WHvSuspendPartitionTime g_pfnWHvSuspendPartitionTime
306# define WHvResumePartitionTime g_pfnWHvResumePartitionTime
307# define WHvRequestInterrupt g_pfnWHvRequestInterrupt
308# define WHvGetVirtualProcessorState g_pfnWHvGetVirtualProcessorState
309# define WHvSetVirtualProcessorState g_pfnWHvSetVirtualProcessorState
310# define WHvGetVirtualProcessorInterruptControllerState g_pfnWHvGetVirtualProcessorInterruptControllerState
311# define WHvGetVirtualProcessorInterruptControllerState2 g_pfnWHvGetVirtualProcessorInterruptControllerState2
312
313# define VidMessageSlotHandleAndGetNext g_pfnVidMessageSlotHandleAndGetNext
314# define VidStartVirtualProcessor g_pfnVidStartVirtualProcessor
315# define VidStopVirtualProcessor g_pfnVidStopVirtualProcessor
316
317#endif
318
319#if 0 /* unused */
320/** WHV_MEMORY_ACCESS_TYPE names */
321static const char * const g_apszWHvMemAccesstypes[4] = { "read", "write", "exec", "!undefined!" };
322#endif
323
324
325/*********************************************************************************************************************************
326* Internal Functions *
327*********************************************************************************************************************************/
328DECLINLINE(int) nemR3NativeGCPhys2R3PtrReadOnly(PVM pVM, RTGCPHYS GCPhys, const void **ppv);
329DECLINLINE(int) nemR3NativeGCPhys2R3PtrWriteable(PVM pVM, RTGCPHYS GCPhys, void **ppv);
330
331/*
332 * Instantate the code we used to share with ring-0.
333 */
334#include "../VMMAll/NEMAllNativeTemplate-win.cpp.h"
335
336
337
338#ifdef NEM_WIN_INTERCEPT_NT_IO_CTLS
339/**
340 * Wrapper that logs the call from VID.DLL.
341 *
342 * This is very handy for figuring out why an API call fails.
343 */
344static NTSTATUS WINAPI
345nemR3WinLogWrapper_NtDeviceIoControlFile(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx,
346 PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput,
347 PVOID pvOutput, ULONG cbOutput)
348{
349
350 char szFunction[32];
351 const char *pszFunction;
352 if (uFunction == g_IoCtlMessageSlotHandleAndGetNext.uFunction)
353 pszFunction = "VidMessageSlotHandleAndGetNext";
354 else if (uFunction == g_IoCtlStartVirtualProcessor.uFunction)
355 pszFunction = "VidStartVirtualProcessor";
356 else if (uFunction == g_IoCtlStopVirtualProcessor.uFunction)
357 pszFunction = "VidStopVirtualProcessor";
358 else if (uFunction == g_IoCtlMessageSlotMap.uFunction)
359 pszFunction = "VidMessageSlotMap";
360 else if (uFunction == g_IoCtlGetVirtualProcessorState.uFunction)
361 pszFunction = "VidGetVirtualProcessorState";
362 else if (uFunction == g_IoCtlSetVirtualProcessorState.uFunction)
363 pszFunction = "VidSetVirtualProcessorState";
364 else
365 {
366 RTStrPrintf(szFunction, sizeof(szFunction), "%#x", uFunction);
367 pszFunction = szFunction;
368 }
369
370 if (cbInput > 0 && pvInput)
371 Log12(("VID!NtDeviceIoControlFile: %s/input: %.*Rhxs\n", pszFunction, RT_MIN(cbInput, 32), pvInput));
372 NTSTATUS rcNt = g_pfnNtDeviceIoControlFile(hFile, hEvt, pfnApcCallback, pvApcCtx, pIos, uFunction,
373 pvInput, cbInput, pvOutput, cbOutput);
374 if (!hEvt && !pfnApcCallback && !pvApcCtx)
375 Log12(("VID!NtDeviceIoControlFile: hFile=%#zx pIos=%p->{s:%#x, i:%#zx} uFunction=%s Input=%p LB %#x Output=%p LB %#x) -> %#x; Caller=%p\n",
376 hFile, pIos, pIos->Status, pIos->Information, pszFunction, pvInput, cbInput, pvOutput, cbOutput, rcNt, ASMReturnAddress()));
377 else
378 Log12(("VID!NtDeviceIoControlFile: hFile=%#zx hEvt=%#zx Apc=%p/%p pIos=%p->{s:%#x, i:%#zx} uFunction=%s Input=%p LB %#x Output=%p LB %#x) -> %#x; Caller=%p\n",
379 hFile, hEvt, RT_CB_LOG_CAST(pfnApcCallback), pvApcCtx, pIos, pIos->Status, pIos->Information, pszFunction,
380 pvInput, cbInput, pvOutput, cbOutput, rcNt, ASMReturnAddress()));
381 if (cbOutput > 0 && pvOutput)
382 {
383 Log12(("VID!NtDeviceIoControlFile: %s/output: %.*Rhxs\n", pszFunction, RT_MIN(cbOutput, 32), pvOutput));
384 if (uFunction == 0x2210cc && g_pMsgSlotMapping == NULL && cbOutput >= sizeof(void *))
385 {
386 g_pMsgSlotMapping = *(VID_MESSAGE_MAPPING_HEADER **)pvOutput;
387 g_pHvMsgHdr = (const HV_MESSAGE_HEADER *)(g_pMsgSlotMapping + 1);
388 g_pX64MsgHdr = (const HV_X64_INTERCEPT_MESSAGE_HEADER *)(g_pHvMsgHdr + 1);
389 Log12(("VID!NtDeviceIoControlFile: Message slot mapping: %p\n", g_pMsgSlotMapping));
390 }
391 }
392 if ( g_pMsgSlotMapping
393 && ( uFunction == g_IoCtlMessageSlotHandleAndGetNext.uFunction
394 || uFunction == g_IoCtlStopVirtualProcessor.uFunction
395 || uFunction == g_IoCtlMessageSlotMap.uFunction
396 ))
397 Log12(("VID!NtDeviceIoControlFile: enmVidMsgType=%#x cb=%#x msg=%#x payload=%u cs:rip=%04x:%08RX64 (%s)\n",
398 g_pMsgSlotMapping->enmVidMsgType, g_pMsgSlotMapping->cbMessage,
399 g_pHvMsgHdr->MessageType, g_pHvMsgHdr->PayloadSize,
400 g_pX64MsgHdr->CsSegment.Selector, g_pX64MsgHdr->Rip, pszFunction));
401
402 return rcNt;
403}
404#endif /* NEM_WIN_INTERCEPT_NT_IO_CTLS */
405
406
407/**
408 * Patches the call table of VID.DLL so we can intercept NtDeviceIoControlFile.
409 *
410 * This is for used to figure out the I/O control codes and in logging builds
411 * for logging API calls that WinHvPlatform.dll does.
412 *
413 * @returns VBox status code.
414 * @param hLdrModVid The VID module handle.
415 * @param pErrInfo Where to return additional error information.
416 */
417static int nemR3WinInitVidIntercepts(RTLDRMOD hLdrModVid, PRTERRINFO pErrInfo)
418{
419 /*
420 * Locate the real API.
421 */
422 g_pfnNtDeviceIoControlFile = (decltype(NtDeviceIoControlFile) *)RTLdrGetSystemSymbol("NTDLL.DLL", "NtDeviceIoControlFile");
423 AssertReturn(g_pfnNtDeviceIoControlFile != NULL,
424 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Failed to resolve NtDeviceIoControlFile from NTDLL.DLL"));
425
426 /*
427 * Locate the PE header and get what we need from it.
428 */
429 uint8_t const *pbImage = (uint8_t const *)RTLdrGetNativeHandle(hLdrModVid);
430 IMAGE_DOS_HEADER const *pMzHdr = (IMAGE_DOS_HEADER const *)pbImage;
431 AssertReturn(pMzHdr->e_magic == IMAGE_DOS_SIGNATURE,
432 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL mapping doesn't start with MZ signature: %#x", pMzHdr->e_magic));
433 IMAGE_NT_HEADERS const *pNtHdrs = (IMAGE_NT_HEADERS const *)&pbImage[pMzHdr->e_lfanew];
434 AssertReturn(pNtHdrs->Signature == IMAGE_NT_SIGNATURE,
435 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL has invalid PE signaturre: %#x @%#x",
436 pNtHdrs->Signature, pMzHdr->e_lfanew));
437
438 uint32_t const cbImage = pNtHdrs->OptionalHeader.SizeOfImage;
439 IMAGE_DATA_DIRECTORY const ImportDir = pNtHdrs->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT];
440
441 /*
442 * Walk the import descriptor table looking for NTDLL.DLL.
443 */
444 AssertReturn( ImportDir.Size > 0
445 && ImportDir.Size < cbImage,
446 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL bad import directory size: %#x", ImportDir.Size));
447 AssertReturn( ImportDir.VirtualAddress > 0
448 && ImportDir.VirtualAddress <= cbImage - ImportDir.Size,
449 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL bad import directory RVA: %#x", ImportDir.VirtualAddress));
450
451 for (PIMAGE_IMPORT_DESCRIPTOR pImps = (PIMAGE_IMPORT_DESCRIPTOR)&pbImage[ImportDir.VirtualAddress];
452 pImps->Name != 0 && pImps->FirstThunk != 0;
453 pImps++)
454 {
455 AssertReturn(pImps->Name < cbImage,
456 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL bad import directory entry name: %#x", pImps->Name));
457 const char *pszModName = (const char *)&pbImage[pImps->Name];
458 if (RTStrICmpAscii(pszModName, "ntdll.dll"))
459 continue;
460 AssertReturn(pImps->FirstThunk < cbImage,
461 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL bad FirstThunk: %#x", pImps->FirstThunk));
462 AssertReturn(pImps->OriginalFirstThunk < cbImage,
463 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL bad FirstThunk: %#x", pImps->FirstThunk));
464
465 /*
466 * Walk the thunks table(s) looking for NtDeviceIoControlFile.
467 */
468 uintptr_t *puFirstThunk = (uintptr_t *)&pbImage[pImps->FirstThunk]; /* update this. */
469 if ( pImps->OriginalFirstThunk != 0
470 && pImps->OriginalFirstThunk != pImps->FirstThunk)
471 {
472 uintptr_t const *puOrgThunk = (uintptr_t const *)&pbImage[pImps->OriginalFirstThunk]; /* read from this. */
473 uintptr_t cLeft = (cbImage - (RT_MAX(pImps->FirstThunk, pImps->OriginalFirstThunk)))
474 / sizeof(*puFirstThunk);
475 while (cLeft-- > 0 && *puOrgThunk != 0)
476 {
477 if (!(*puOrgThunk & IMAGE_ORDINAL_FLAG64)) /* ASSUMES 64-bit */
478 {
479 AssertReturn(*puOrgThunk > 0 && *puOrgThunk < cbImage,
480 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL bad thunk entry: %#x", *puOrgThunk));
481
482 const char *pszSymbol = (const char *)&pbImage[*puOrgThunk + 2];
483 if (strcmp(pszSymbol, "NtDeviceIoControlFile") == 0)
484 g_ppfnVidNtDeviceIoControlFile = (decltype(NtDeviceIoControlFile) **)puFirstThunk;
485 }
486
487 puOrgThunk++;
488 puFirstThunk++;
489 }
490 }
491 else
492 {
493 /* No original thunk table, so scan the resolved symbols for a match
494 with the NtDeviceIoControlFile address. */
495 uintptr_t const uNeedle = (uintptr_t)g_pfnNtDeviceIoControlFile;
496 uintptr_t cLeft = (cbImage - pImps->FirstThunk) / sizeof(*puFirstThunk);
497 while (cLeft-- > 0 && *puFirstThunk != 0)
498 {
499 if (*puFirstThunk == uNeedle)
500 g_ppfnVidNtDeviceIoControlFile = (decltype(NtDeviceIoControlFile) **)puFirstThunk;
501 puFirstThunk++;
502 }
503 }
504 }
505
506 if (g_ppfnVidNtDeviceIoControlFile != NULL)
507 {
508 /* Make the thunk writable we can freely modify it. */
509 DWORD fOldProt = PAGE_READONLY;
510 VirtualProtect((void *)(uintptr_t)g_ppfnVidNtDeviceIoControlFile, sizeof(uintptr_t), PAGE_EXECUTE_READWRITE, &fOldProt);
511
512#ifdef NEM_WIN_INTERCEPT_NT_IO_CTLS
513 *g_ppfnVidNtDeviceIoControlFile = nemR3WinLogWrapper_NtDeviceIoControlFile;
514#endif
515 return VINF_SUCCESS;
516 }
517 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Failed to patch NtDeviceIoControlFile import in VID.DLL!");
518}
519
520
521/**
522 * Worker for nemR3NativeInit that probes and load the native API.
523 *
524 * @returns VBox status code.
525 * @param fForced Whether the HMForced flag is set and we should
526 * fail if we cannot initialize.
527 * @param pErrInfo Where to always return error info.
528 */
529static int nemR3WinInitProbeAndLoad(bool fForced, PRTERRINFO pErrInfo)
530{
531 /*
532 * Check that the DLL files we need are present, but without loading them.
533 * We'd like to avoid loading them unnecessarily.
534 */
535 WCHAR wszPath[MAX_PATH + 64];
536 UINT cwcPath = GetSystemDirectoryW(wszPath, MAX_PATH);
537 if (cwcPath >= MAX_PATH || cwcPath < 2)
538 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "GetSystemDirectoryW failed (%#x / %u)", cwcPath, GetLastError());
539
540 if (wszPath[cwcPath - 1] != '\\' || wszPath[cwcPath - 1] != '/')
541 wszPath[cwcPath++] = '\\';
542 RTUtf16CopyAscii(&wszPath[cwcPath], RT_ELEMENTS(wszPath) - cwcPath, "WinHvPlatform.dll");
543 if (GetFileAttributesW(wszPath) == INVALID_FILE_ATTRIBUTES)
544 return RTErrInfoSetF(pErrInfo, VERR_NEM_NOT_AVAILABLE, "The native API dll was not found (%ls)", wszPath);
545
546 /*
547 * Check that we're in a VM and that the hypervisor identifies itself as Hyper-V.
548 */
549 if (!ASMHasCpuId())
550 return RTErrInfoSet(pErrInfo, VERR_NEM_NOT_AVAILABLE, "No CPUID support");
551 if (!RTX86IsValidStdRange(ASMCpuId_EAX(0)))
552 return RTErrInfoSet(pErrInfo, VERR_NEM_NOT_AVAILABLE, "No CPUID leaf #1");
553 if (!(ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_HVP))
554 return RTErrInfoSet(pErrInfo, VERR_NEM_NOT_AVAILABLE, "Not in a hypervisor partition (HVP=0)");
555
556 uint32_t cMaxHyperLeaf = 0;
557 uint32_t uEbx = 0;
558 uint32_t uEcx = 0;
559 uint32_t uEdx = 0;
560 ASMCpuIdExSlow(0x40000000, 0, 0, 0, &cMaxHyperLeaf, &uEbx, &uEcx, &uEdx);
561 if (!RTX86IsValidHypervisorRange(cMaxHyperLeaf))
562 return RTErrInfoSetF(pErrInfo, VERR_NEM_NOT_AVAILABLE, "Invalid hypervisor CPUID range (%#x %#x %#x %#x)",
563 cMaxHyperLeaf, uEbx, uEcx, uEdx);
564 if ( uEbx != UINT32_C(0x7263694d) /* Micr */
565 || uEcx != UINT32_C(0x666f736f) /* osof */
566 || uEdx != UINT32_C(0x76482074) /* t Hv */)
567 return RTErrInfoSetF(pErrInfo, VERR_NEM_NOT_AVAILABLE,
568 "Not Hyper-V CPUID signature: %#x %#x %#x (expected %#x %#x %#x)",
569 uEbx, uEcx, uEdx, UINT32_C(0x7263694d), UINT32_C(0x666f736f), UINT32_C(0x76482074));
570 if (cMaxHyperLeaf < UINT32_C(0x40000005))
571 return RTErrInfoSetF(pErrInfo, VERR_NEM_NOT_AVAILABLE, "Too narrow hypervisor CPUID range (%#x)", cMaxHyperLeaf);
572
573 /** @todo would be great if we could recognize a root partition from the
574 * CPUID info, but I currently don't dare do that. */
575
576 /*
577 * Now try load the DLLs and resolve the APIs.
578 */
579 static const char * const s_apszDllNames[2] = { "WinHvPlatform.dll", "vid.dll" };
580 RTLDRMOD ahMods[2] = { NIL_RTLDRMOD, NIL_RTLDRMOD };
581 int rc = VINF_SUCCESS;
582 for (unsigned i = 0; i < RT_ELEMENTS(s_apszDllNames); i++)
583 {
584 int rc2 = RTLdrLoadSystem(s_apszDllNames[i], true /*fNoUnload*/, &ahMods[i]);
585 if (RT_FAILURE(rc2))
586 {
587 if (!RTErrInfoIsSet(pErrInfo))
588 RTErrInfoSetF(pErrInfo, rc2, "Failed to load API DLL: %s: %Rrc", s_apszDllNames[i], rc2);
589 else
590 RTErrInfoAddF(pErrInfo, rc2, "; %s: %Rrc", s_apszDllNames[i], rc2);
591 ahMods[i] = NIL_RTLDRMOD;
592 rc = VERR_NEM_INIT_FAILED;
593 }
594 }
595 if (RT_SUCCESS(rc))
596 rc = nemR3WinInitVidIntercepts(ahMods[1], pErrInfo);
597 if (RT_SUCCESS(rc))
598 {
599 for (unsigned i = 0; i < RT_ELEMENTS(g_aImports); i++)
600 {
601 int rc2 = RTLdrGetSymbol(ahMods[g_aImports[i].idxDll], g_aImports[i].pszName, (void **)g_aImports[i].ppfn);
602 if (RT_SUCCESS(rc2))
603 {
604 if (g_aImports[i].fOptional)
605 LogRel(("NEM: info: Found optional import %s!%s.\n",
606 s_apszDllNames[g_aImports[i].idxDll], g_aImports[i].pszName));
607 }
608 else
609 {
610 *g_aImports[i].ppfn = NULL;
611
612 LogRel(("NEM: %s: Failed to import %s!%s: %Rrc\n",
613 g_aImports[i].fOptional ? "info" : fForced ? "fatal" : "error",
614 s_apszDllNames[g_aImports[i].idxDll], g_aImports[i].pszName, rc2));
615 if (!g_aImports[i].fOptional)
616 {
617 if (RTErrInfoIsSet(pErrInfo))
618 RTErrInfoAddF(pErrInfo, rc2, ", %s!%s",
619 s_apszDllNames[g_aImports[i].idxDll], g_aImports[i].pszName);
620 else
621 rc = RTErrInfoSetF(pErrInfo, rc2, "Failed to import: %s!%s",
622 s_apszDllNames[g_aImports[i].idxDll], g_aImports[i].pszName);
623 Assert(RT_FAILURE(rc));
624 }
625 }
626 }
627 if (RT_SUCCESS(rc))
628 {
629 Assert(!RTErrInfoIsSet(pErrInfo));
630 }
631 }
632
633 for (unsigned i = 0; i < RT_ELEMENTS(ahMods); i++)
634 RTLdrClose(ahMods[i]);
635 return rc;
636}
637
638
639/**
640 * Wrapper for different WHvGetCapability signatures.
641 */
642DECLINLINE(HRESULT) WHvGetCapabilityWrapper(WHV_CAPABILITY_CODE enmCap, WHV_CAPABILITY *pOutput, uint32_t cbOutput)
643{
644 return g_pfnWHvGetCapability(enmCap, pOutput, cbOutput, NULL);
645}
646
647
648/**
649 * Worker for nemR3NativeInit that gets the hypervisor capabilities.
650 *
651 * @returns VBox status code.
652 * @param pVM The cross context VM structure.
653 * @param pErrInfo Where to always return error info.
654 */
655static int nemR3WinInitCheckCapabilities(PVM pVM, PRTERRINFO pErrInfo)
656{
657#define NEM_LOG_REL_CAP_EX(a_szField, a_szFmt, a_Value) LogRel(("NEM: %-38s= " a_szFmt "\n", a_szField, a_Value))
658#define NEM_LOG_REL_CAP_SUB_EX(a_szField, a_szFmt, a_Value) LogRel(("NEM: %36s: " a_szFmt "\n", a_szField, a_Value))
659#define NEM_LOG_REL_CAP_SUB(a_szField, a_Value) NEM_LOG_REL_CAP_SUB_EX(a_szField, "%d", a_Value)
660
661 /*
662 * Is the hypervisor present with the desired capability?
663 *
664 * In build 17083 this translates into:
665 * - CPUID[0x00000001].HVP is set
666 * - CPUID[0x40000000] == "Microsoft Hv"
667 * - CPUID[0x40000001].eax == "Hv#1"
668 * - CPUID[0x40000003].ebx[12] is set.
669 * - VidGetExoPartitionProperty(INVALID_HANDLE_VALUE, 0x60000, &Ignored) returns
670 * a non-zero value.
671 */
672 /**
673 * @todo Someone at Microsoft please explain weird API design:
674 * 1. Pointless CapabilityCode duplication int the output;
675 * 2. No output size.
676 */
677 WHV_CAPABILITY Caps;
678 RT_ZERO(Caps);
679 SetLastError(0);
680 HRESULT hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeHypervisorPresent, &Caps, sizeof(Caps));
681 DWORD rcWin = GetLastError();
682 if (FAILED(hrc))
683 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
684 "WHvGetCapability/WHvCapabilityCodeHypervisorPresent failed: %Rhrc (Last=%#x/%u)",
685 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
686 if (!Caps.HypervisorPresent)
687 {
688 if (!RTPathExists(RTPATH_NT_PASSTHRU_PREFIX "Device\\VidExo"))
689 return RTErrInfoSetF(pErrInfo, VERR_NEM_NOT_AVAILABLE,
690 "WHvCapabilityCodeHypervisorPresent is FALSE! Make sure you have enabled the 'Windows Hypervisor Platform' feature.");
691 return RTErrInfoSetF(pErrInfo, VERR_NEM_NOT_AVAILABLE, "WHvCapabilityCodeHypervisorPresent is FALSE! (%u)", rcWin);
692 }
693 LogRel(("NEM: WHvCapabilityCodeHypervisorPresent is TRUE, so this might work...\n"));
694
695
696 /*
697 * Check what extended VM exits are supported.
698 */
699 RT_ZERO(Caps);
700 hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeExtendedVmExits, &Caps, sizeof(Caps));
701 if (FAILED(hrc))
702 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
703 "WHvGetCapability/WHvCapabilityCodeExtendedVmExits failed: %Rhrc (Last=%#x/%u)",
704 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
705 NEM_LOG_REL_CAP_EX("WHvCapabilityCodeExtendedVmExits", "%'#018RX64", Caps.ExtendedVmExits.AsUINT64);
706#define NEM_LOG_REL_CAP_VM_EXIT(a_Field) NEM_LOG_REL_CAP_SUB(#a_Field, Caps.ExtendedVmExits.a_Field)
707 NEM_LOG_REL_CAP_VM_EXIT(X64CpuidExit);
708 NEM_LOG_REL_CAP_VM_EXIT(X64MsrExit);
709 NEM_LOG_REL_CAP_VM_EXIT(ExceptionExit);
710#if WDK_NTDDI_VERSION >= MY_NTDDI_WIN10_19040
711 NEM_LOG_REL_CAP_VM_EXIT(X64RdtscExit);
712 NEM_LOG_REL_CAP_VM_EXIT(X64ApicSmiExitTrap);
713 NEM_LOG_REL_CAP_VM_EXIT(HypercallExit);
714 NEM_LOG_REL_CAP_VM_EXIT(X64ApicInitSipiExitTrap);
715#endif
716#if WDK_NTDDI_VERSION >= MY_NTDDI_WIN11_22000 /** @todo Could some of these may have been added earlier... */
717 NEM_LOG_REL_CAP_VM_EXIT(X64ApicWriteLint0ExitTrap);
718 NEM_LOG_REL_CAP_VM_EXIT(X64ApicWriteLint1ExitTrap);
719 NEM_LOG_REL_CAP_VM_EXIT(X64ApicWriteSvrExitTrap);
720 NEM_LOG_REL_CAP_VM_EXIT(UnknownSynicConnection);
721 NEM_LOG_REL_CAP_VM_EXIT(RetargetUnknownVpciDevice);
722 NEM_LOG_REL_CAP_VM_EXIT(X64ApicWriteLdrExitTrap);
723 NEM_LOG_REL_CAP_VM_EXIT(X64ApicWriteDfrExitTrap);
724 NEM_LOG_REL_CAP_VM_EXIT(GpaAccessFaultExit);
725#endif
726#undef NEM_LOG_REL_CAP_VM_EXIT
727 uint64_t const fKnownVmExits = RT_BIT_64(15) - 1U;
728 if (Caps.ExtendedVmExits.AsUINT64 & ~fKnownVmExits)
729 NEM_LOG_REL_CAP_SUB_EX("Unknown VM exit defs", "%#RX64", Caps.ExtendedVmExits.AsUINT64 & ~fKnownVmExits);
730 pVM->nem.s.fExtendedMsrExit = RT_BOOL(Caps.ExtendedVmExits.X64MsrExit);
731 pVM->nem.s.fExtendedCpuIdExit = RT_BOOL(Caps.ExtendedVmExits.X64CpuidExit);
732 pVM->nem.s.fExtendedXcptExit = RT_BOOL(Caps.ExtendedVmExits.ExceptionExit);
733 /** @todo RECHECK: WHV_EXTENDED_VM_EXITS typedef. */
734
735 /*
736 * Check features in case they end up defining any.
737 */
738 RT_ZERO(Caps);
739 hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeFeatures, &Caps, sizeof(Caps));
740 if (FAILED(hrc))
741 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
742 "WHvGetCapability/WHvCapabilityCodeFeatures failed: %Rhrc (Last=%#x/%u)",
743 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
744 NEM_LOG_REL_CAP_EX("WHvCapabilityCodeFeatures", "%'#018RX64", Caps.Features.AsUINT64);
745#define NEM_LOG_REL_CAP_FEATURE(a_Field) NEM_LOG_REL_CAP_SUB(#a_Field, Caps.Features.a_Field)
746#if WDK_NTDDI_VERSION >= MY_NTDDI_WIN10_18362
747 NEM_LOG_REL_CAP_FEATURE(PartialUnmap);
748 NEM_LOG_REL_CAP_FEATURE(LocalApicEmulation);
749 NEM_LOG_REL_CAP_FEATURE(Xsave);
750 NEM_LOG_REL_CAP_FEATURE(DirtyPageTracking);
751 NEM_LOG_REL_CAP_FEATURE(SpeculationControl);
752#endif
753#if WDK_NTDDI_VERSION >= MY_NTDDI_WIN10_19040
754 NEM_LOG_REL_CAP_FEATURE(ApicRemoteRead);
755 NEM_LOG_REL_CAP_FEATURE(IdleSuspend);
756#endif
757#if WDK_NTDDI_VERSION >= MY_NTDDI_WIN11_22000 /** @todo Could some of these may have been added earlier... */
758 NEM_LOG_REL_CAP_FEATURE(VirtualPciDeviceSupport);
759 NEM_LOG_REL_CAP_FEATURE(IommuSupport);
760 NEM_LOG_REL_CAP_FEATURE(VpHotAddRemove);
761#endif
762#undef NEM_LOG_REL_CAP_FEATURE
763 const uint64_t fKnownFeatures = RT_BIT_64(10) - 1U;
764 if (Caps.Features.AsUINT64 & ~fKnownFeatures)
765 NEM_LOG_REL_CAP_SUB_EX("Unknown features", "%#RX64", Caps.ExtendedVmExits.AsUINT64 & ~fKnownVmExits);
766 pVM->nem.s.fSpeculationControl = RT_BOOL(Caps.Features.SpeculationControl);
767 pVM->nem.s.fLocalApicEmulation = RT_BOOL(Caps.Features.LocalApicEmulation);
768 /** @todo RECHECK: WHV_CAPABILITY_FEATURES typedef. */
769
770 /*
771 * Check supported exception exit bitmap bits.
772 * We don't currently require this, so we just log failure.
773 */
774 RT_ZERO(Caps);
775 hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeExceptionExitBitmap, &Caps, sizeof(Caps));
776 if (SUCCEEDED(hrc))
777 LogRel(("NEM: Supported exception exit bitmap: %#RX64\n", Caps.ExceptionExitBitmap));
778 else
779 LogRel(("NEM: Warning! WHvGetCapability/WHvCapabilityCodeExceptionExitBitmap failed: %Rhrc (Last=%#x/%u)",
780 hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
781
782 /*
783 * Check that the CPU vendor is supported.
784 */
785 RT_ZERO(Caps);
786 hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeProcessorVendor, &Caps, sizeof(Caps));
787 if (FAILED(hrc))
788 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
789 "WHvGetCapability/WHvCapabilityCodeProcessorVendor failed: %Rhrc (Last=%#x/%u)",
790 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
791 switch (Caps.ProcessorVendor)
792 {
793 /** @todo RECHECK: WHV_PROCESSOR_VENDOR typedef. */
794 case WHvProcessorVendorIntel:
795 NEM_LOG_REL_CAP_EX("WHvCapabilityCodeProcessorVendor", "%d - Intel", Caps.ProcessorVendor);
796 pVM->nem.s.enmCpuVendor = CPUMCPUVENDOR_INTEL;
797 break;
798 case WHvProcessorVendorAmd:
799 NEM_LOG_REL_CAP_EX("WHvCapabilityCodeProcessorVendor", "%d - AMD", Caps.ProcessorVendor);
800 pVM->nem.s.enmCpuVendor = CPUMCPUVENDOR_AMD;
801 break;
802 default:
803 NEM_LOG_REL_CAP_EX("WHvCapabilityCodeProcessorVendor", "%d", Caps.ProcessorVendor);
804 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Unknown processor vendor: %d", Caps.ProcessorVendor);
805 }
806
807 /*
808 * CPU features, guessing these are virtual CPU features?
809 */
810 RT_ZERO(Caps);
811 hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeProcessorFeatures, &Caps, sizeof(Caps));
812 if (FAILED(hrc))
813 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
814 "WHvGetCapability/WHvCapabilityCodeProcessorFeatures failed: %Rhrc (Last=%#x/%u)",
815 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
816 NEM_LOG_REL_CAP_EX("WHvCapabilityCodeProcessorFeatures", "%'#018RX64", Caps.ProcessorFeatures.AsUINT64);
817#define NEM_LOG_REL_CPU_FEATURE(a_Field) NEM_LOG_REL_CAP_SUB(#a_Field, Caps.ProcessorFeatures.a_Field)
818 NEM_LOG_REL_CPU_FEATURE(Sse3Support);
819 NEM_LOG_REL_CPU_FEATURE(LahfSahfSupport);
820 NEM_LOG_REL_CPU_FEATURE(Ssse3Support);
821 NEM_LOG_REL_CPU_FEATURE(Sse4_1Support);
822 NEM_LOG_REL_CPU_FEATURE(Sse4_2Support);
823 NEM_LOG_REL_CPU_FEATURE(Sse4aSupport);
824 NEM_LOG_REL_CPU_FEATURE(XopSupport);
825 NEM_LOG_REL_CPU_FEATURE(PopCntSupport);
826 NEM_LOG_REL_CPU_FEATURE(Cmpxchg16bSupport);
827 NEM_LOG_REL_CPU_FEATURE(Altmovcr8Support);
828 NEM_LOG_REL_CPU_FEATURE(LzcntSupport);
829 NEM_LOG_REL_CPU_FEATURE(MisAlignSseSupport);
830 NEM_LOG_REL_CPU_FEATURE(MmxExtSupport);
831 NEM_LOG_REL_CPU_FEATURE(Amd3DNowSupport);
832 NEM_LOG_REL_CPU_FEATURE(ExtendedAmd3DNowSupport);
833 NEM_LOG_REL_CPU_FEATURE(Page1GbSupport);
834 NEM_LOG_REL_CPU_FEATURE(AesSupport);
835 NEM_LOG_REL_CPU_FEATURE(PclmulqdqSupport);
836 NEM_LOG_REL_CPU_FEATURE(PcidSupport);
837 NEM_LOG_REL_CPU_FEATURE(Fma4Support);
838 NEM_LOG_REL_CPU_FEATURE(F16CSupport);
839 NEM_LOG_REL_CPU_FEATURE(RdRandSupport);
840 NEM_LOG_REL_CPU_FEATURE(RdWrFsGsSupport);
841 NEM_LOG_REL_CPU_FEATURE(SmepSupport);
842 NEM_LOG_REL_CPU_FEATURE(EnhancedFastStringSupport);
843 NEM_LOG_REL_CPU_FEATURE(Bmi1Support);
844 NEM_LOG_REL_CPU_FEATURE(Bmi2Support);
845 NEM_LOG_REL_CPU_FEATURE(Reserved1);
846 NEM_LOG_REL_CPU_FEATURE(MovbeSupport);
847 NEM_LOG_REL_CPU_FEATURE(Npiep1Support);
848 NEM_LOG_REL_CPU_FEATURE(DepX87FPUSaveSupport);
849 NEM_LOG_REL_CPU_FEATURE(RdSeedSupport);
850 NEM_LOG_REL_CPU_FEATURE(AdxSupport);
851 NEM_LOG_REL_CPU_FEATURE(IntelPrefetchSupport);
852 NEM_LOG_REL_CPU_FEATURE(SmapSupport);
853 NEM_LOG_REL_CPU_FEATURE(HleSupport);
854 NEM_LOG_REL_CPU_FEATURE(RtmSupport);
855 NEM_LOG_REL_CPU_FEATURE(RdtscpSupport);
856 NEM_LOG_REL_CPU_FEATURE(ClflushoptSupport);
857 NEM_LOG_REL_CPU_FEATURE(ClwbSupport);
858 NEM_LOG_REL_CPU_FEATURE(ShaSupport);
859 NEM_LOG_REL_CPU_FEATURE(X87PointersSavedSupport);
860#if WDK_NTDDI_VERSION >= MY_NTDDI_WIN10_17134 /** @todo maybe some of these were added earlier... */
861 NEM_LOG_REL_CPU_FEATURE(InvpcidSupport);
862 NEM_LOG_REL_CPU_FEATURE(IbrsSupport);
863 NEM_LOG_REL_CPU_FEATURE(StibpSupport);
864 NEM_LOG_REL_CPU_FEATURE(IbpbSupport);
865 NEM_LOG_REL_CPU_FEATURE(Reserved2);
866 NEM_LOG_REL_CPU_FEATURE(SsbdSupport);
867 NEM_LOG_REL_CPU_FEATURE(FastShortRepMovSupport);
868 NEM_LOG_REL_CPU_FEATURE(Reserved3);
869 NEM_LOG_REL_CPU_FEATURE(RdclNo);
870 NEM_LOG_REL_CPU_FEATURE(IbrsAllSupport);
871 NEM_LOG_REL_CPU_FEATURE(Reserved4);
872 NEM_LOG_REL_CPU_FEATURE(SsbNo);
873 NEM_LOG_REL_CPU_FEATURE(RsbANo);
874#endif
875#if WDK_NTDDI_VERSION >= MY_NTDDI_WIN10_19040
876 NEM_LOG_REL_CPU_FEATURE(Reserved5);
877 NEM_LOG_REL_CPU_FEATURE(RdPidSupport);
878 NEM_LOG_REL_CPU_FEATURE(UmipSupport);
879 NEM_LOG_REL_CPU_FEATURE(MdsNoSupport);
880 NEM_LOG_REL_CPU_FEATURE(MdClearSupport);
881#endif
882#if WDK_NTDDI_VERSION >= MY_NTDDI_WIN11_22000
883 NEM_LOG_REL_CPU_FEATURE(TaaNoSupport);
884 NEM_LOG_REL_CPU_FEATURE(TsxCtrlSupport);
885 NEM_LOG_REL_CPU_FEATURE(Reserved6);
886#endif
887#undef NEM_LOG_REL_CPU_FEATURE
888 pVM->nem.s.uCpuFeatures.u64 = Caps.ProcessorFeatures.AsUINT64;
889 /** @todo RECHECK: WHV_PROCESSOR_FEATURES typedef. */
890
891 /*
892 * The cache line flush size.
893 */
894 RT_ZERO(Caps);
895 hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeProcessorClFlushSize, &Caps, sizeof(Caps));
896 if (FAILED(hrc))
897 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
898 "WHvGetCapability/WHvCapabilityCodeProcessorClFlushSize failed: %Rhrc (Last=%#x/%u)",
899 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
900 NEM_LOG_REL_CAP_EX("WHvCapabilityCodeProcessorClFlushSize", "2^%u", Caps.ProcessorClFlushSize);
901 if (Caps.ProcessorClFlushSize < 8 && Caps.ProcessorClFlushSize > 9)
902 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Unsupported cache line flush size: %u", Caps.ProcessorClFlushSize);
903 pVM->nem.s.cCacheLineFlushShift = Caps.ProcessorClFlushSize;
904
905 /*
906 * See if they've added more properties that we're not aware of.
907 */
908 /** @todo RECHECK: WHV_CAPABILITY_CODE typedef. */
909 if (!IsDebuggerPresent()) /* Too noisy when in debugger, so skip. */
910 {
911 static const struct
912 {
913 uint32_t iMin, iMax; } s_aUnknowns[] =
914 {
915 { 0x0004, 0x000f },
916 { 0x1003, 0x100f },
917 { 0x2000, 0x200f },
918 { 0x3000, 0x300f },
919 { 0x4000, 0x400f },
920 };
921 for (uint32_t j = 0; j < RT_ELEMENTS(s_aUnknowns); j++)
922 for (uint32_t i = s_aUnknowns[j].iMin; i <= s_aUnknowns[j].iMax; i++)
923 {
924 RT_ZERO(Caps);
925 hrc = WHvGetCapabilityWrapper((WHV_CAPABILITY_CODE)i, &Caps, sizeof(Caps));
926 if (SUCCEEDED(hrc))
927 LogRel(("NEM: Warning! Unknown capability %#x returning: %.*Rhxs\n", i, sizeof(Caps), &Caps));
928 }
929 }
930
931 /*
932 * For proper operation, we require CPUID exits.
933 */
934 if (!pVM->nem.s.fExtendedCpuIdExit)
935 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Missing required extended CPUID exit support");
936 if (!pVM->nem.s.fExtendedMsrExit)
937 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Missing required extended MSR exit support");
938 if (!pVM->nem.s.fExtendedXcptExit)
939 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Missing required extended exception exit support");
940
941#undef NEM_LOG_REL_CAP_EX
942#undef NEM_LOG_REL_CAP_SUB_EX
943#undef NEM_LOG_REL_CAP_SUB
944 return VINF_SUCCESS;
945}
946
947#ifdef LOG_ENABLED
948
949/**
950 * Used to fill in g_IoCtlGetHvPartitionId.
951 */
952static NTSTATUS WINAPI
953nemR3WinIoctlDetector_GetHvPartitionId(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx,
954 PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput,
955 PVOID pvOutput, ULONG cbOutput)
956{
957 AssertLogRelMsgReturn(hFile == NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, ("hFile=%p\n", hFile), STATUS_INVALID_PARAMETER_1);
958 RT_NOREF(hEvt); RT_NOREF(pfnApcCallback); RT_NOREF(pvApcCtx);
959 AssertLogRelMsgReturn(RT_VALID_PTR(pIos), ("pIos=%p\n", pIos), STATUS_INVALID_PARAMETER_5);
960 AssertLogRelMsgReturn(cbInput == 0, ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_8);
961 RT_NOREF(pvInput);
962
963 AssertLogRelMsgReturn(RT_VALID_PTR(pvOutput), ("pvOutput=%p\n", pvOutput), STATUS_INVALID_PARAMETER_9);
964 AssertLogRelMsgReturn(cbOutput == sizeof(HV_PARTITION_ID), ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_10);
965 *(HV_PARTITION_ID *)pvOutput = NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_ID;
966
967 g_IoCtlGetHvPartitionId.cbInput = cbInput;
968 g_IoCtlGetHvPartitionId.cbOutput = cbOutput;
969 g_IoCtlGetHvPartitionId.uFunction = uFunction;
970
971 return STATUS_SUCCESS;
972}
973
974
975/**
976 * Used to fill in g_IoCtlGetHvPartitionId.
977 */
978static NTSTATUS WINAPI
979nemR3WinIoctlDetector_GetPartitionProperty(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx,
980 PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput,
981 PVOID pvOutput, ULONG cbOutput)
982{
983 AssertLogRelMsgReturn(hFile == NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, ("hFile=%p\n", hFile), STATUS_INVALID_PARAMETER_1);
984 RT_NOREF(hEvt); RT_NOREF(pfnApcCallback); RT_NOREF(pvApcCtx);
985 AssertLogRelMsgReturn(RT_VALID_PTR(pIos), ("pIos=%p\n", pIos), STATUS_INVALID_PARAMETER_5);
986 AssertLogRelMsgReturn(cbInput == sizeof(VID_PARTITION_PROPERTY_CODE), ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_8);
987 AssertLogRelMsgReturn(RT_VALID_PTR(pvInput), ("pvInput=%p\n", pvInput), STATUS_INVALID_PARAMETER_9);
988 AssertLogRelMsgReturn(*(VID_PARTITION_PROPERTY_CODE *)pvInput == NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_PROPERTY_CODE,
989 ("*pvInput=%#x, expected %#x\n", *(HV_PARTITION_PROPERTY_CODE *)pvInput,
990 NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_PROPERTY_CODE), STATUS_INVALID_PARAMETER_9);
991 AssertLogRelMsgReturn(RT_VALID_PTR(pvOutput), ("pvOutput=%p\n", pvOutput), STATUS_INVALID_PARAMETER_9);
992 AssertLogRelMsgReturn(cbOutput == sizeof(HV_PARTITION_PROPERTY), ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_10);
993 *(HV_PARTITION_PROPERTY *)pvOutput = NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_PROPERTY_VALUE;
994
995 g_IoCtlGetPartitionProperty.cbInput = cbInput;
996 g_IoCtlGetPartitionProperty.cbOutput = cbOutput;
997 g_IoCtlGetPartitionProperty.uFunction = uFunction;
998
999 return STATUS_SUCCESS;
1000}
1001
1002
1003/**
1004 * Used to fill in g_IoCtlStartVirtualProcessor.
1005 */
1006static NTSTATUS WINAPI
1007nemR3WinIoctlDetector_StartVirtualProcessor(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx,
1008 PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput,
1009 PVOID pvOutput, ULONG cbOutput)
1010{
1011 AssertLogRelMsgReturn(hFile == NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, ("hFile=%p\n", hFile), STATUS_INVALID_PARAMETER_1);
1012 RT_NOREF(hEvt); RT_NOREF(pfnApcCallback); RT_NOREF(pvApcCtx);
1013 AssertLogRelMsgReturn(RT_VALID_PTR(pIos), ("pIos=%p\n", pIos), STATUS_INVALID_PARAMETER_5);
1014 AssertLogRelMsgReturn(cbInput == sizeof(HV_VP_INDEX), ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_8);
1015 AssertLogRelMsgReturn(RT_VALID_PTR(pvInput), ("pvInput=%p\n", pvInput), STATUS_INVALID_PARAMETER_9);
1016 AssertLogRelMsgReturn(*(HV_VP_INDEX *)pvInput == NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX,
1017 ("*piCpu=%u\n", *(HV_VP_INDEX *)pvInput), STATUS_INVALID_PARAMETER_9);
1018 AssertLogRelMsgReturn(cbOutput == 0, ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_10);
1019 RT_NOREF(pvOutput);
1020
1021 g_IoCtlStartVirtualProcessor.cbInput = cbInput;
1022 g_IoCtlStartVirtualProcessor.cbOutput = cbOutput;
1023 g_IoCtlStartVirtualProcessor.uFunction = uFunction;
1024
1025 return STATUS_SUCCESS;
1026}
1027
1028
1029/**
1030 * Used to fill in g_IoCtlStartVirtualProcessor.
1031 */
1032static NTSTATUS WINAPI
1033nemR3WinIoctlDetector_StopVirtualProcessor(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx,
1034 PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput,
1035 PVOID pvOutput, ULONG cbOutput)
1036{
1037 AssertLogRelMsgReturn(hFile == NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, ("hFile=%p\n", hFile), STATUS_INVALID_PARAMETER_1);
1038 RT_NOREF(hEvt); RT_NOREF(pfnApcCallback); RT_NOREF(pvApcCtx);
1039 AssertLogRelMsgReturn(RT_VALID_PTR(pIos), ("pIos=%p\n", pIos), STATUS_INVALID_PARAMETER_5);
1040 AssertLogRelMsgReturn(cbInput == sizeof(HV_VP_INDEX), ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_8);
1041 AssertLogRelMsgReturn(RT_VALID_PTR(pvInput), ("pvInput=%p\n", pvInput), STATUS_INVALID_PARAMETER_9);
1042 AssertLogRelMsgReturn(*(HV_VP_INDEX *)pvInput == NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX,
1043 ("*piCpu=%u\n", *(HV_VP_INDEX *)pvInput), STATUS_INVALID_PARAMETER_9);
1044 AssertLogRelMsgReturn(cbOutput == 0, ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_10);
1045 RT_NOREF(pvOutput);
1046
1047 g_IoCtlStopVirtualProcessor.cbInput = cbInput;
1048 g_IoCtlStopVirtualProcessor.cbOutput = cbOutput;
1049 g_IoCtlStopVirtualProcessor.uFunction = uFunction;
1050
1051 return STATUS_SUCCESS;
1052}
1053
1054
1055/**
1056 * Used to fill in g_IoCtlMessageSlotHandleAndGetNext
1057 */
1058static NTSTATUS WINAPI
1059nemR3WinIoctlDetector_MessageSlotHandleAndGetNext(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx,
1060 PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput,
1061 PVOID pvOutput, ULONG cbOutput)
1062{
1063 AssertLogRelMsgReturn(hFile == NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, ("hFile=%p\n", hFile), STATUS_INVALID_PARAMETER_1);
1064 RT_NOREF(hEvt); RT_NOREF(pfnApcCallback); RT_NOREF(pvApcCtx);
1065 AssertLogRelMsgReturn(RT_VALID_PTR(pIos), ("pIos=%p\n", pIos), STATUS_INVALID_PARAMETER_5);
1066
1067 if (g_uBuildNo >= 17758)
1068 {
1069 /* No timeout since about build 17758, it's now always an infinite wait. So, a somewhat compatible change. */
1070 AssertLogRelMsgReturn(cbInput == RT_UOFFSETOF(VID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT, cMillies),
1071 ("cbInput=%#x\n", cbInput),
1072 STATUS_INVALID_PARAMETER_8);
1073 AssertLogRelMsgReturn(RT_VALID_PTR(pvInput), ("pvInput=%p\n", pvInput), STATUS_INVALID_PARAMETER_9);
1074 PCVID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT pVidIn = (PCVID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT)pvInput;
1075 AssertLogRelMsgReturn( pVidIn->iCpu == NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX
1076 && pVidIn->fFlags == VID_MSHAGN_F_HANDLE_MESSAGE,
1077 ("iCpu=%u fFlags=%#x cMillies=%#x\n", pVidIn->iCpu, pVidIn->fFlags, pVidIn->cMillies),
1078 STATUS_INVALID_PARAMETER_9);
1079 AssertLogRelMsgReturn(cbOutput == 0, ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_10);
1080 }
1081 else
1082 {
1083 AssertLogRelMsgReturn(cbInput == sizeof(VID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT), ("cbInput=%#x\n", cbInput),
1084 STATUS_INVALID_PARAMETER_8);
1085 AssertLogRelMsgReturn(RT_VALID_PTR(pvInput), ("pvInput=%p\n", pvInput), STATUS_INVALID_PARAMETER_9);
1086 PCVID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT pVidIn = (PCVID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT)pvInput;
1087 AssertLogRelMsgReturn( pVidIn->iCpu == NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX
1088 && pVidIn->fFlags == VID_MSHAGN_F_HANDLE_MESSAGE
1089 && pVidIn->cMillies == NEM_WIN_IOCTL_DETECTOR_FAKE_TIMEOUT,
1090 ("iCpu=%u fFlags=%#x cMillies=%#x\n", pVidIn->iCpu, pVidIn->fFlags, pVidIn->cMillies),
1091 STATUS_INVALID_PARAMETER_9);
1092 AssertLogRelMsgReturn(cbOutput == 0, ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_10);
1093 RT_NOREF(pvOutput);
1094 }
1095
1096 g_IoCtlMessageSlotHandleAndGetNext.cbInput = cbInput;
1097 g_IoCtlMessageSlotHandleAndGetNext.cbOutput = cbOutput;
1098 g_IoCtlMessageSlotHandleAndGetNext.uFunction = uFunction;
1099
1100 return STATUS_SUCCESS;
1101}
1102
1103/**
1104 * Used to fill in what g_pIoCtlDetectForLogging points to.
1105 */
1106static NTSTATUS WINAPI nemR3WinIoctlDetector_ForLogging(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx,
1107 PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput,
1108 PVOID pvOutput, ULONG cbOutput)
1109{
1110 RT_NOREF(hFile, hEvt, pfnApcCallback, pvApcCtx, pIos, pvInput, pvOutput);
1111
1112 g_pIoCtlDetectForLogging->cbInput = cbInput;
1113 g_pIoCtlDetectForLogging->cbOutput = cbOutput;
1114 g_pIoCtlDetectForLogging->uFunction = uFunction;
1115
1116 return STATUS_SUCCESS;
1117}
1118
1119#endif /* LOG_ENABLED */
1120
1121/**
1122 * Worker for nemR3NativeInit that detect I/O control function numbers for VID.
1123 *
1124 * We use the function numbers directly in ring-0 and to name functions when
1125 * logging NtDeviceIoControlFile calls.
1126 *
1127 * @note We could alternatively do this by disassembling the respective
1128 * functions, but hooking NtDeviceIoControlFile and making fake calls
1129 * more easily provides the desired information.
1130 *
1131 * @returns VBox status code.
1132 * @param pVM The cross context VM structure. Will set I/O
1133 * control info members.
1134 * @param pErrInfo Where to always return error info.
1135 */
1136static int nemR3WinInitDiscoverIoControlProperties(PVM pVM, PRTERRINFO pErrInfo)
1137{
1138 RT_NOREF(pVM, pErrInfo);
1139
1140 /*
1141 * Probe the I/O control information for select VID APIs so we can use
1142 * them directly from ring-0 and better log them.
1143 *
1144 */
1145#ifdef LOG_ENABLED
1146 decltype(NtDeviceIoControlFile) * const pfnOrg = *g_ppfnVidNtDeviceIoControlFile;
1147
1148 /* VidGetHvPartitionId - must work due to our memory management. */
1149 BOOL fRet;
1150 if (g_pfnVidGetHvPartitionId)
1151 {
1152 HV_PARTITION_ID idHvPartition = HV_PARTITION_ID_INVALID;
1153 *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_GetHvPartitionId;
1154 fRet = g_pfnVidGetHvPartitionId(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, &idHvPartition);
1155 *g_ppfnVidNtDeviceIoControlFile = pfnOrg;
1156 AssertReturn(fRet && idHvPartition == NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_ID && g_IoCtlGetHvPartitionId.uFunction != 0,
1157 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
1158 "Problem figuring out VidGetHvPartitionId: fRet=%u idHvPartition=%#x dwErr=%u",
1159 fRet, idHvPartition, GetLastError()) );
1160 LogRel(("NEM: VidGetHvPartitionId -> fun:%#x in:%#x out:%#x\n",
1161 g_IoCtlGetHvPartitionId.uFunction, g_IoCtlGetHvPartitionId.cbInput, g_IoCtlGetHvPartitionId.cbOutput));
1162 }
1163
1164 /* VidGetPartitionProperty - must work as it's fallback for VidGetHvPartitionId. */
1165 if (g_ppfnVidNtDeviceIoControlFile)
1166 {
1167 HV_PARTITION_PROPERTY uPropValue = ~NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_PROPERTY_VALUE;
1168 *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_GetPartitionProperty;
1169 fRet = g_pfnVidGetPartitionProperty(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_PROPERTY_CODE,
1170 &uPropValue);
1171 *g_ppfnVidNtDeviceIoControlFile = pfnOrg;
1172 AssertReturn( fRet
1173 && uPropValue == NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_PROPERTY_VALUE
1174 && g_IoCtlGetHvPartitionId.uFunction != 0,
1175 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
1176 "Problem figuring out VidGetPartitionProperty: fRet=%u uPropValue=%#x dwErr=%u",
1177 fRet, uPropValue, GetLastError()) );
1178 LogRel(("NEM: VidGetPartitionProperty -> fun:%#x in:%#x out:%#x\n",
1179 g_IoCtlGetPartitionProperty.uFunction, g_IoCtlGetPartitionProperty.cbInput, g_IoCtlGetPartitionProperty.cbOutput));
1180 }
1181
1182 /* VidStartVirtualProcessor */
1183 *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_StartVirtualProcessor;
1184 fRet = g_pfnVidStartVirtualProcessor(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX);
1185 *g_ppfnVidNtDeviceIoControlFile = pfnOrg;
1186 AssertStmt(fRet && g_IoCtlStartVirtualProcessor.uFunction != 0,
1187 RTERRINFO_LOG_REL_SET_F(pErrInfo, VERR_NEM_RING3_ONLY,
1188 "Problem figuring out VidStartVirtualProcessor: fRet=%u dwErr=%u", fRet, GetLastError()) );
1189 LogRel(("NEM: VidStartVirtualProcessor -> fun:%#x in:%#x out:%#x\n", g_IoCtlStartVirtualProcessor.uFunction,
1190 g_IoCtlStartVirtualProcessor.cbInput, g_IoCtlStartVirtualProcessor.cbOutput));
1191
1192 /* VidStopVirtualProcessor */
1193 *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_StopVirtualProcessor;
1194 fRet = g_pfnVidStopVirtualProcessor(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX);
1195 *g_ppfnVidNtDeviceIoControlFile = pfnOrg;
1196 AssertStmt(fRet && g_IoCtlStopVirtualProcessor.uFunction != 0,
1197 RTERRINFO_LOG_REL_SET_F(pErrInfo, VERR_NEM_RING3_ONLY,
1198 "Problem figuring out VidStopVirtualProcessor: fRet=%u dwErr=%u", fRet, GetLastError()) );
1199 LogRel(("NEM: VidStopVirtualProcessor -> fun:%#x in:%#x out:%#x\n", g_IoCtlStopVirtualProcessor.uFunction,
1200 g_IoCtlStopVirtualProcessor.cbInput, g_IoCtlStopVirtualProcessor.cbOutput));
1201
1202 /* VidMessageSlotHandleAndGetNext */
1203 *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_MessageSlotHandleAndGetNext;
1204 fRet = g_pfnVidMessageSlotHandleAndGetNext(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE,
1205 NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX, VID_MSHAGN_F_HANDLE_MESSAGE,
1206 NEM_WIN_IOCTL_DETECTOR_FAKE_TIMEOUT);
1207 *g_ppfnVidNtDeviceIoControlFile = pfnOrg;
1208 AssertStmt(fRet && g_IoCtlMessageSlotHandleAndGetNext.uFunction != 0,
1209 RTERRINFO_LOG_REL_SET_F(pErrInfo, VERR_NEM_RING3_ONLY,
1210 "Problem figuring out VidMessageSlotHandleAndGetNext: fRet=%u dwErr=%u",
1211 fRet, GetLastError()) );
1212 LogRel(("NEM: VidMessageSlotHandleAndGetNext -> fun:%#x in:%#x out:%#x\n",
1213 g_IoCtlMessageSlotHandleAndGetNext.uFunction, g_IoCtlMessageSlotHandleAndGetNext.cbInput,
1214 g_IoCtlMessageSlotHandleAndGetNext.cbOutput));
1215
1216 /* The following are only for logging: */
1217 union
1218 {
1219 VID_MAPPED_MESSAGE_SLOT MapSlot;
1220 HV_REGISTER_NAME Name;
1221 HV_REGISTER_VALUE Value;
1222 } uBuf;
1223
1224 /* VidMessageSlotMap */
1225 g_pIoCtlDetectForLogging = &g_IoCtlMessageSlotMap;
1226 *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_ForLogging;
1227 fRet = g_pfnVidMessageSlotMap(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, &uBuf.MapSlot, NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX);
1228 *g_ppfnVidNtDeviceIoControlFile = pfnOrg;
1229 Assert(fRet);
1230 LogRel(("NEM: VidMessageSlotMap -> fun:%#x in:%#x out:%#x\n", g_pIoCtlDetectForLogging->uFunction,
1231 g_pIoCtlDetectForLogging->cbInput, g_pIoCtlDetectForLogging->cbOutput));
1232
1233 /* VidGetVirtualProcessorState */
1234 uBuf.Name = HvRegisterExplicitSuspend;
1235 g_pIoCtlDetectForLogging = &g_IoCtlGetVirtualProcessorState;
1236 *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_ForLogging;
1237 fRet = g_pfnVidGetVirtualProcessorState(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX,
1238 &uBuf.Name, 1, &uBuf.Value);
1239 *g_ppfnVidNtDeviceIoControlFile = pfnOrg;
1240 Assert(fRet);
1241 LogRel(("NEM: VidGetVirtualProcessorState -> fun:%#x in:%#x out:%#x\n", g_pIoCtlDetectForLogging->uFunction,
1242 g_pIoCtlDetectForLogging->cbInput, g_pIoCtlDetectForLogging->cbOutput));
1243
1244 /* VidSetVirtualProcessorState */
1245 uBuf.Name = HvRegisterExplicitSuspend;
1246 g_pIoCtlDetectForLogging = &g_IoCtlSetVirtualProcessorState;
1247 *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_ForLogging;
1248 fRet = g_pfnVidSetVirtualProcessorState(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX,
1249 &uBuf.Name, 1, &uBuf.Value);
1250 *g_ppfnVidNtDeviceIoControlFile = pfnOrg;
1251 Assert(fRet);
1252 LogRel(("NEM: VidSetVirtualProcessorState -> fun:%#x in:%#x out:%#x\n", g_pIoCtlDetectForLogging->uFunction,
1253 g_pIoCtlDetectForLogging->cbInput, g_pIoCtlDetectForLogging->cbOutput));
1254
1255 g_pIoCtlDetectForLogging = NULL;
1256#endif /* LOG_ENABLED */
1257
1258 return VINF_SUCCESS;
1259}
1260
1261
1262/**
1263 * Creates and sets up a Hyper-V (exo) partition.
1264 *
1265 * @returns VBox status code.
1266 * @param pVM The cross context VM structure.
1267 * @param pErrInfo Where to always return error info.
1268 */
1269static int nemR3WinInitCreatePartition(PVM pVM, PRTERRINFO pErrInfo)
1270{
1271 AssertReturn(!pVM->nem.s.hPartition, RTErrInfoSet(pErrInfo, VERR_WRONG_ORDER, "Wrong initalization order"));
1272 AssertReturn(!pVM->nem.s.hPartitionDevice, RTErrInfoSet(pErrInfo, VERR_WRONG_ORDER, "Wrong initalization order"));
1273
1274 /*
1275 * Create the partition.
1276 */
1277 WHV_PARTITION_HANDLE hPartition;
1278 HRESULT hrc = WHvCreatePartition(&hPartition);
1279 if (FAILED(hrc))
1280 return RTErrInfoSetF(pErrInfo, VERR_NEM_VM_CREATE_FAILED, "WHvCreatePartition failed with %Rhrc (Last=%#x/%u)",
1281 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
1282
1283 int rc = VINF_SUCCESS;
1284
1285 /*
1286 * Set partition properties, most importantly the CPU count.
1287 */
1288 /**
1289 * @todo Someone at Microsoft please explain another weird API:
1290 * - Why this API doesn't take the WHV_PARTITION_PROPERTY_CODE value as an
1291 * argument rather than as part of the struct. That is so weird if you've
1292 * used any other NT or windows API, including WHvGetCapability().
1293 * - Why use PVOID when WHV_PARTITION_PROPERTY is what's expected. We
1294 * technically only need 9 bytes for setting/getting
1295 * WHVPartitionPropertyCodeProcessorClFlushSize, but the API insists on 16. */
1296 WHV_PARTITION_PROPERTY Property;
1297 RT_ZERO(Property);
1298 Property.ProcessorCount = pVM->cCpus;
1299 hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeProcessorCount, &Property, sizeof(Property));
1300 if (SUCCEEDED(hrc))
1301 {
1302 RT_ZERO(Property);
1303 Property.ExtendedVmExits.X64CpuidExit = pVM->nem.s.fExtendedCpuIdExit; /** @todo Register fixed results and restrict cpuid exits */
1304 Property.ExtendedVmExits.X64MsrExit = pVM->nem.s.fExtendedMsrExit;
1305 Property.ExtendedVmExits.ExceptionExit = pVM->nem.s.fExtendedXcptExit;
1306 hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeExtendedVmExits, &Property, sizeof(Property));
1307 if (SUCCEEDED(hrc))
1308 {
1309 RT_ZERO(Property);
1310 /*
1311 * If the APIC is enabled and LocalApicEmulation is supported we'll use Hyper-V's APIC emulation
1312 * for best performance.
1313 */
1314 PCFGMNODE pCfgmApic = CFGMR3GetChild(CFGMR3GetRoot(pVM), "/Devices/apic");
1315 if ( pCfgmApic
1316 && pVM->nem.s.fLocalApicEmulation
1317 && 0) /** @todo Finish */
1318 {
1319 /* If setting this fails log an error but continue. */
1320 Property.LocalApicEmulationMode = WHvX64LocalApicEmulationModeXApic;
1321 hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeLocalApicEmulationMode , &Property, sizeof(Property));
1322 if (FAILED(hrc))
1323 {
1324 LogRel(("NEM: Failed setting WHvPartitionPropertyCodeLocalApicEmulationMode to WHvX64LocalApicEmulationModeXApic: %Rhrc (Last=%#x/%u)",
1325 hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
1326 pVM->nem.s.fLocalApicEmulation = false;
1327 }
1328 else
1329 {
1330 /* Rewrite the configuration tree to point to our APIC emulation. */
1331 PCFGMNODE pCfgmDev = CFGMR3GetChild(CFGMR3GetRoot(pVM), "/Devices");
1332 Assert(pCfgmDev);
1333
1334 PCFGMNODE pCfgmApicHv = NULL;
1335 rc = CFGMR3InsertNode(pCfgmDev, "apic-nem", &pCfgmApicHv);
1336 if (RT_SUCCESS(rc))
1337 {
1338 rc = CFGMR3CopyTree(pCfgmApicHv, pCfgmApic, CFGM_COPY_FLAGS_IGNORE_EXISTING_KEYS | CFGM_COPY_FLAGS_IGNORE_EXISTING_VALUES);
1339 if (RT_SUCCESS(rc))
1340 CFGMR3RemoveNode(pCfgmApic);
1341 }
1342
1343 if (RT_FAILURE(rc))
1344 rc = RTErrInfoSetF(pErrInfo, rc, "Failed replace APIC device config with Hyper-V one");
1345 }
1346 }
1347 else
1348 pVM->nem.s.fLocalApicEmulation = false;
1349
1350
1351 if (RT_SUCCESS(rc))
1352 {
1353 /*
1354 * We'll continue setup in nemR3NativeInitAfterCPUM.
1355 */
1356 pVM->nem.s.fCreatedEmts = false;
1357 pVM->nem.s.hPartition = hPartition;
1358 LogRel(("NEM: Created partition %p.\n", hPartition));
1359 return VINF_SUCCESS;
1360 }
1361 }
1362
1363 rc = RTErrInfoSetF(pErrInfo, VERR_NEM_VM_CREATE_FAILED,
1364 "Failed setting WHvPartitionPropertyCodeExtendedVmExits to %'#RX64: %Rhrc",
1365 Property.ExtendedVmExits.AsUINT64, hrc);
1366 }
1367 else
1368 rc = RTErrInfoSetF(pErrInfo, VERR_NEM_VM_CREATE_FAILED,
1369 "Failed setting WHvPartitionPropertyCodeProcessorCount to %u: %Rhrc (Last=%#x/%u)",
1370 pVM->cCpus, hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
1371 WHvDeletePartition(hPartition);
1372
1373 Assert(!pVM->nem.s.hPartitionDevice);
1374 Assert(!pVM->nem.s.hPartition);
1375 return rc;
1376}
1377
1378
1379/**
1380 * Makes sure APIC and firmware will not allow X2APIC mode.
1381 *
1382 * This is rather ugly.
1383 *
1384 * @returns VBox status code
1385 * @param pVM The cross context VM structure.
1386 */
1387static int nemR3WinDisableX2Apic(PVM pVM)
1388{
1389 /*
1390 * First make sure the 'Mode' config value of the APIC isn't set to X2APIC.
1391 * This defaults to APIC, so no need to change unless it's X2APIC.
1392 */
1393 PCFGMNODE pCfg = CFGMR3GetChild(CFGMR3GetRoot(pVM), "/Devices/apic/0/Config");
1394 if (!pCfg)
1395 pCfg = CFGMR3GetChild(CFGMR3GetRoot(pVM), "/Devices/apic-nem/0/Config");
1396 if (pCfg)
1397 {
1398 uint8_t bMode = 0;
1399 int rc = CFGMR3QueryU8(pCfg, "Mode", &bMode);
1400 AssertLogRelMsgReturn(RT_SUCCESS(rc) || rc == VERR_CFGM_VALUE_NOT_FOUND, ("%Rrc\n", rc), rc);
1401 if (RT_SUCCESS(rc) && bMode == PDMAPICMODE_X2APIC)
1402 {
1403 LogRel(("NEM: Adjusting APIC configuration from X2APIC to APIC max mode. X2APIC is not supported by the WinHvPlatform API!\n"));
1404 LogRel(("NEM: Disable Hyper-V if you need X2APIC for your guests!\n"));
1405 rc = CFGMR3RemoveValue(pCfg, "Mode");
1406 rc = CFGMR3InsertInteger(pCfg, "Mode", PDMAPICMODE_APIC);
1407 AssertLogRelRCReturn(rc, rc);
1408 }
1409 }
1410
1411 /*
1412 * Now the firmwares.
1413 * These also defaults to APIC and only needs adjusting if configured to X2APIC (2).
1414 */
1415 static const char * const s_apszFirmwareConfigs[] =
1416 {
1417 "/Devices/efi/0/Config",
1418 "/Devices/pcbios/0/Config",
1419 };
1420 for (unsigned i = 0; i < RT_ELEMENTS(s_apszFirmwareConfigs); i++)
1421 {
1422 pCfg = CFGMR3GetChild(CFGMR3GetRoot(pVM), "/Devices/APIC/0/Config");
1423 if (pCfg)
1424 {
1425 uint8_t bMode = 0;
1426 int rc = CFGMR3QueryU8(pCfg, "APIC", &bMode);
1427 AssertLogRelMsgReturn(RT_SUCCESS(rc) || rc == VERR_CFGM_VALUE_NOT_FOUND, ("%Rrc\n", rc), rc);
1428 if (RT_SUCCESS(rc) && bMode == 2)
1429 {
1430 LogRel(("NEM: Adjusting %s/Mode from 2 (X2APIC) to 1 (APIC).\n", s_apszFirmwareConfigs[i]));
1431 rc = CFGMR3RemoveValue(pCfg, "APIC");
1432 rc = CFGMR3InsertInteger(pCfg, "APIC", 1);
1433 AssertLogRelRCReturn(rc, rc);
1434 }
1435 }
1436 }
1437
1438 return VINF_SUCCESS;
1439}
1440
1441
1442/**
1443 * Try initialize the native API.
1444 *
1445 * This may only do part of the job, more can be done in
1446 * nemR3NativeInitAfterCPUM() and nemR3NativeInitCompleted().
1447 *
1448 * @returns VBox status code.
1449 * @param pVM The cross context VM structure.
1450 * @param fFallback Whether we're in fallback mode or use-NEM mode. In
1451 * the latter we'll fail if we cannot initialize.
1452 * @param fForced Whether the HMForced flag is set and we should
1453 * fail if we cannot initialize.
1454 */
1455int nemR3NativeInit(PVM pVM, bool fFallback, bool fForced)
1456{
1457 g_uBuildNo = RTSystemGetNtBuildNo();
1458
1459 /*
1460 * Some state init.
1461 */
1462#ifdef NEM_WIN_WITH_A20
1463 pVM->nem.s.fA20Enabled = true;
1464#endif
1465#if 0
1466 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
1467 {
1468 PNEMCPU pNemCpu = &pVM->apCpusR3[idCpu]->nem.s;
1469 }
1470#endif
1471
1472 /*
1473 * Error state.
1474 * The error message will be non-empty on failure and 'rc' will be set too.
1475 */
1476 RTERRINFOSTATIC ErrInfo;
1477 PRTERRINFO pErrInfo = RTErrInfoInitStatic(&ErrInfo);
1478 int rc = nemR3WinInitProbeAndLoad(fForced, pErrInfo);
1479 if (RT_SUCCESS(rc))
1480 {
1481 /*
1482 * Check the capabilties of the hypervisor, starting with whether it's present.
1483 */
1484 rc = nemR3WinInitCheckCapabilities(pVM, pErrInfo);
1485 if (RT_SUCCESS(rc))
1486 {
1487 /*
1488 * Discover the VID I/O control function numbers we need (for interception
1489 * only these days).
1490 */
1491 rc = nemR3WinInitDiscoverIoControlProperties(pVM, pErrInfo);
1492 if (RT_SUCCESS(rc))
1493 {
1494 /*
1495 * Create and initialize a partition.
1496 */
1497 rc = nemR3WinInitCreatePartition(pVM, pErrInfo);
1498 if (RT_SUCCESS(rc))
1499 {
1500 /*
1501 * Set ourselves as the execution engine and make config adjustments.
1502 */
1503 VM_SET_MAIN_EXECUTION_ENGINE(pVM, VM_EXEC_ENGINE_NATIVE_API);
1504 Log(("NEM: Marked active!\n"));
1505 nemR3WinDisableX2Apic(pVM);
1506 nemR3DisableCpuIsaExt(pVM, "MONITOR"); /* MONITOR is not supported by Hyper-V (MWAIT is sometimes). */
1507 PGMR3EnableNemMode(pVM);
1508
1509 /*
1510 * Register release statistics
1511 */
1512 STAMR3Register(pVM, (void *)&pVM->nem.s.cMappedPages, STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1513 "/NEM/PagesCurrentlyMapped", STAMUNIT_PAGES, "Number guest pages currently mapped by the VM");
1514 STAMR3Register(pVM, (void *)&pVM->nem.s.StatMapPage, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1515 "/NEM/PagesMapCalls", STAMUNIT_PAGES, "Calls to WHvMapGpaRange/HvCallMapGpaPages");
1516 STAMR3Register(pVM, (void *)&pVM->nem.s.StatMapPageFailed, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1517 "/NEM/PagesMapFails", STAMUNIT_PAGES, "Calls to WHvMapGpaRange/HvCallMapGpaPages that failed");
1518 STAMR3Register(pVM, (void *)&pVM->nem.s.StatUnmapPage, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1519 "/NEM/PagesUnmapCalls", STAMUNIT_PAGES, "Calls to WHvUnmapGpaRange/HvCallUnmapGpaPages");
1520 STAMR3Register(pVM, (void *)&pVM->nem.s.StatUnmapPageFailed, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1521 "/NEM/PagesUnmapFails", STAMUNIT_PAGES, "Calls to WHvUnmapGpaRange/HvCallUnmapGpaPages that failed");
1522 STAMR3Register(pVM, &pVM->nem.s.StatProfMapGpaRange, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS,
1523 "/NEM/PagesMapGpaRange", STAMUNIT_TICKS_PER_CALL, "Profiling calls to WHvMapGpaRange for bigger stuff");
1524 STAMR3Register(pVM, &pVM->nem.s.StatProfUnmapGpaRange, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS,
1525 "/NEM/PagesUnmapGpaRange", STAMUNIT_TICKS_PER_CALL, "Profiling calls to WHvUnmapGpaRange for bigger stuff");
1526 STAMR3Register(pVM, &pVM->nem.s.StatProfMapGpaRangePage, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS,
1527 "/NEM/PagesMapGpaRangePage", STAMUNIT_TICKS_PER_CALL, "Profiling calls to WHvMapGpaRange for single pages");
1528 STAMR3Register(pVM, &pVM->nem.s.StatProfUnmapGpaRangePage, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS,
1529 "/NEM/PagesUnmapGpaRangePage", STAMUNIT_TICKS_PER_CALL, "Profiling calls to WHvUnmapGpaRange for single pages");
1530
1531 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
1532 {
1533 PNEMCPU pNemCpu = &pVM->apCpusR3[idCpu]->nem.s;
1534 STAMR3RegisterF(pVM, &pNemCpu->StatExitPortIo, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of port I/O exits", "/NEM/CPU%u/ExitPortIo", idCpu);
1535 STAMR3RegisterF(pVM, &pNemCpu->StatExitMemUnmapped, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of unmapped memory exits", "/NEM/CPU%u/ExitMemUnmapped", idCpu);
1536 STAMR3RegisterF(pVM, &pNemCpu->StatExitMemIntercept, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of intercepted memory exits", "/NEM/CPU%u/ExitMemIntercept", idCpu);
1537 STAMR3RegisterF(pVM, &pNemCpu->StatExitHalt, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of HLT exits", "/NEM/CPU%u/ExitHalt", idCpu);
1538 STAMR3RegisterF(pVM, &pNemCpu->StatExitInterruptWindow, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of interrupt window exits", "/NEM/CPU%u/ExitInterruptWindow", idCpu);
1539 STAMR3RegisterF(pVM, &pNemCpu->StatExitCpuId, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of CPUID exits", "/NEM/CPU%u/ExitCpuId", idCpu);
1540 STAMR3RegisterF(pVM, &pNemCpu->StatExitMsr, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of MSR access exits", "/NEM/CPU%u/ExitMsr", idCpu);
1541 STAMR3RegisterF(pVM, &pNemCpu->StatExitException, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of exception exits", "/NEM/CPU%u/ExitException", idCpu);
1542 STAMR3RegisterF(pVM, &pNemCpu->StatExitExceptionBp, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of #BP exits", "/NEM/CPU%u/ExitExceptionBp", idCpu);
1543 STAMR3RegisterF(pVM, &pNemCpu->StatExitExceptionDb, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of #DB exits", "/NEM/CPU%u/ExitExceptionDb", idCpu);
1544 STAMR3RegisterF(pVM, &pNemCpu->StatExitExceptionGp, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of #GP exits", "/NEM/CPU%u/ExitExceptionGp", idCpu);
1545 STAMR3RegisterF(pVM, &pNemCpu->StatExitExceptionGpMesa, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of #GP exits from mesa driver", "/NEM/CPU%u/ExitExceptionGpMesa", idCpu);
1546 STAMR3RegisterF(pVM, &pNemCpu->StatExitExceptionUd, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of #UD exits", "/NEM/CPU%u/ExitExceptionUd", idCpu);
1547 STAMR3RegisterF(pVM, &pNemCpu->StatExitExceptionUdHandled, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of handled #UD exits", "/NEM/CPU%u/ExitExceptionUdHandled", idCpu);
1548 STAMR3RegisterF(pVM, &pNemCpu->StatExitUnrecoverable, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of unrecoverable exits", "/NEM/CPU%u/ExitUnrecoverable", idCpu);
1549 STAMR3RegisterF(pVM, &pNemCpu->StatGetMsgTimeout, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of get message timeouts/alerts", "/NEM/CPU%u/GetMsgTimeout", idCpu);
1550 STAMR3RegisterF(pVM, &pNemCpu->StatStopCpuSuccess, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of successful CPU stops", "/NEM/CPU%u/StopCpuSuccess", idCpu);
1551 STAMR3RegisterF(pVM, &pNemCpu->StatStopCpuPending, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of pending CPU stops", "/NEM/CPU%u/StopCpuPending", idCpu);
1552 STAMR3RegisterF(pVM, &pNemCpu->StatStopCpuPendingAlerts,STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of pending CPU stop alerts", "/NEM/CPU%u/StopCpuPendingAlerts", idCpu);
1553 STAMR3RegisterF(pVM, &pNemCpu->StatStopCpuPendingOdd, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of odd pending CPU stops (see code)", "/NEM/CPU%u/StopCpuPendingOdd", idCpu);
1554 STAMR3RegisterF(pVM, &pNemCpu->StatCancelChangedState, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of cancel changed state", "/NEM/CPU%u/CancelChangedState", idCpu);
1555 STAMR3RegisterF(pVM, &pNemCpu->StatCancelAlertedThread, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of cancel alerted EMT", "/NEM/CPU%u/CancelAlertedEMT", idCpu);
1556 STAMR3RegisterF(pVM, &pNemCpu->StatBreakOnFFPre, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of pre execution FF breaks", "/NEM/CPU%u/BreakOnFFPre", idCpu);
1557 STAMR3RegisterF(pVM, &pNemCpu->StatBreakOnFFPost, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of post execution FF breaks", "/NEM/CPU%u/BreakOnFFPost", idCpu);
1558 STAMR3RegisterF(pVM, &pNemCpu->StatBreakOnCancel, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of cancel execution breaks", "/NEM/CPU%u/BreakOnCancel", idCpu);
1559 STAMR3RegisterF(pVM, &pNemCpu->StatBreakOnStatus, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of status code breaks", "/NEM/CPU%u/BreakOnStatus", idCpu);
1560 STAMR3RegisterF(pVM, &pNemCpu->StatImportOnDemand, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of on-demand state imports", "/NEM/CPU%u/ImportOnDemand", idCpu);
1561 STAMR3RegisterF(pVM, &pNemCpu->StatImportOnReturn, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of state imports on loop return", "/NEM/CPU%u/ImportOnReturn", idCpu);
1562 STAMR3RegisterF(pVM, &pNemCpu->StatImportOnReturnSkipped, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of skipped state imports on loop return", "/NEM/CPU%u/ImportOnReturnSkipped", idCpu);
1563 STAMR3RegisterF(pVM, &pNemCpu->StatQueryCpuTick, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of TSC queries", "/NEM/CPU%u/QueryCpuTick", idCpu);
1564 }
1565
1566#if defined(VBOX_WITH_R0_MODULES) && !defined(VBOX_WITH_MINIMAL_R0)
1567 if (!SUPR3IsDriverless())
1568 {
1569 PUVM pUVM = pVM->pUVM;
1570 STAMR3RegisterRefresh(pUVM, &pVM->nem.s.R0Stats.cPagesAvailable, STAMTYPE_U64, STAMVISIBILITY_ALWAYS,
1571 STAMUNIT_PAGES, STAM_REFRESH_GRP_NEM, "Free pages available to the hypervisor",
1572 "/NEM/R0Stats/cPagesAvailable");
1573 STAMR3RegisterRefresh(pUVM, &pVM->nem.s.R0Stats.cPagesInUse, STAMTYPE_U64, STAMVISIBILITY_ALWAYS,
1574 STAMUNIT_PAGES, STAM_REFRESH_GRP_NEM, "Pages in use by hypervisor",
1575 "/NEM/R0Stats/cPagesInUse");
1576 }
1577#endif /* VBOX_WITH_R0_MODULES && !VBOX_WITH_MINIMAL_R0 */
1578
1579 }
1580 }
1581 }
1582 }
1583
1584 /*
1585 * We only fail if in forced mode, otherwise just log the complaint and return.
1586 */
1587 Assert(pVM->bMainExecutionEngine == VM_EXEC_ENGINE_NATIVE_API || RTErrInfoIsSet(pErrInfo));
1588 if ( (fForced || !fFallback)
1589 && pVM->bMainExecutionEngine != VM_EXEC_ENGINE_NATIVE_API)
1590 return VMSetError(pVM, RT_SUCCESS_NP(rc) ? VERR_NEM_NOT_AVAILABLE : rc, RT_SRC_POS, "%s", pErrInfo->pszMsg);
1591
1592 if (RTErrInfoIsSet(pErrInfo))
1593 LogRel(("NEM: Not available: %s\n", pErrInfo->pszMsg));
1594 return VINF_SUCCESS;
1595}
1596
1597
1598/**
1599 * This is called after CPUMR3Init is done.
1600 *
1601 * @returns VBox status code.
1602 * @param pVM The VM handle..
1603 */
1604int nemR3NativeInitAfterCPUM(PVM pVM)
1605{
1606 /*
1607 * Validate sanity.
1608 */
1609 WHV_PARTITION_HANDLE hPartition = pVM->nem.s.hPartition;
1610 AssertReturn(hPartition != NULL, VERR_WRONG_ORDER);
1611 AssertReturn(!pVM->nem.s.hPartitionDevice, VERR_WRONG_ORDER);
1612 AssertReturn(!pVM->nem.s.fCreatedEmts, VERR_WRONG_ORDER);
1613 AssertReturn(pVM->bMainExecutionEngine == VM_EXEC_ENGINE_NATIVE_API, VERR_WRONG_ORDER);
1614
1615 /*
1616 * Determine whether we can and should export/import IA32_SPEC_CTRL.
1617 */
1618 pVM->nem.s.fDoIa32SpecCtrl = pVM->nem.s.fSpeculationControl
1619 && g_CpumHostFeatures.s.fSpecCtrlMsr
1620 && pVM->cpum.ro.GuestFeatures.fSpecCtrlMsr;
1621
1622 /*
1623 * Continue setting up the partition now that we've got most of the CPUID feature stuff.
1624 */
1625 WHV_PARTITION_PROPERTY Property;
1626 HRESULT hrc;
1627
1628#if 0
1629 /* Not sure if we really need to set the vendor.
1630 Update: Apparently we don't. WHvPartitionPropertyCodeProcessorVendor was removed in 17110. */
1631 RT_ZERO(Property);
1632 Property.ProcessorVendor = pVM->nem.s.enmCpuVendor == CPUMCPUVENDOR_AMD ? WHvProcessorVendorAmd
1633 : WHvProcessorVendorIntel;
1634 hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeProcessorVendor, &Property, sizeof(Property));
1635 if (FAILED(hrc))
1636 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS,
1637 "Failed to set WHvPartitionPropertyCodeProcessorVendor to %u: %Rhrc (Last=%#x/%u)",
1638 Property.ProcessorVendor, hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
1639#endif
1640
1641 /* Not sure if we really need to set the cache line flush size. */
1642 RT_ZERO(Property);
1643 Property.ProcessorClFlushSize = pVM->nem.s.cCacheLineFlushShift;
1644 hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeProcessorClFlushSize, &Property, sizeof(Property));
1645 if (FAILED(hrc))
1646 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS,
1647 "Failed to set WHvPartitionPropertyCodeProcessorClFlushSize to %u: %Rhrc (Last=%#x/%u)",
1648 pVM->nem.s.cCacheLineFlushShift, hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
1649
1650 /* Intercept #DB, #BP and #UD exceptions. */
1651 RT_ZERO(Property);
1652 Property.ExceptionExitBitmap = RT_BIT_64(WHvX64ExceptionTypeDebugTrapOrFault)
1653 | RT_BIT_64(WHvX64ExceptionTypeBreakpointTrap)
1654 | RT_BIT_64(WHvX64ExceptionTypeInvalidOpcodeFault);
1655
1656 /* Intercept #GP to workaround the buggy mesa vmwgfx driver. */
1657 PVMCPU pVCpu = pVM->apCpusR3[0]; /** @todo In theory per vCPU, in practice same for all. */
1658 if (pVCpu->nem.s.fTrapXcptGpForLovelyMesaDrv)
1659 Property.ExceptionExitBitmap |= RT_BIT_64(WHvX64ExceptionTypeGeneralProtectionFault);
1660
1661 hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeExceptionExitBitmap, &Property, sizeof(Property));
1662 if (FAILED(hrc))
1663 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS,
1664 "Failed to set WHvPartitionPropertyCodeExceptionExitBitmap to %#RX64: %Rhrc (Last=%#x/%u)",
1665 Property.ExceptionExitBitmap, hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
1666
1667
1668 /*
1669 * Sync CPU features with CPUM.
1670 */
1671 /** @todo sync CPU features with CPUM. */
1672
1673 /* Set the partition property. */
1674 RT_ZERO(Property);
1675 Property.ProcessorFeatures.AsUINT64 = pVM->nem.s.uCpuFeatures.u64;
1676 hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeProcessorFeatures, &Property, sizeof(Property));
1677 if (FAILED(hrc))
1678 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS,
1679 "Failed to set WHvPartitionPropertyCodeProcessorFeatures to %'#RX64: %Rhrc (Last=%#x/%u)",
1680 pVM->nem.s.uCpuFeatures.u64, hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
1681
1682 /*
1683 * Set up the partition.
1684 *
1685 * Seems like this is where the partition is actually instantiated and we get
1686 * a handle to it.
1687 */
1688 hrc = WHvSetupPartition(hPartition);
1689 if (FAILED(hrc))
1690 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS,
1691 "Call to WHvSetupPartition failed: %Rhrc (Last=%#x/%u)",
1692 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
1693
1694 /*
1695 * Hysterical raisins: Get the handle (could also fish this out via VID.DLL NtDeviceIoControlFile intercepting).
1696 */
1697 HANDLE hPartitionDevice;
1698 __try
1699 {
1700 hPartitionDevice = ((HANDLE *)hPartition)[1];
1701 if (!hPartitionDevice)
1702 hPartitionDevice = INVALID_HANDLE_VALUE;
1703 }
1704 __except(EXCEPTION_EXECUTE_HANDLER)
1705 {
1706 hrc = GetExceptionCode();
1707 hPartitionDevice = INVALID_HANDLE_VALUE;
1708 }
1709
1710 /* Test the handle. */
1711 HV_PARTITION_PROPERTY uValue = 0;
1712 if ( g_pfnVidGetPartitionProperty
1713 && hPartitionDevice != INVALID_HANDLE_VALUE
1714 && !g_pfnVidGetPartitionProperty(hPartitionDevice, HvPartitionPropertyProcessorVendor, &uValue))
1715 hPartitionDevice = INVALID_HANDLE_VALUE;
1716 LogRel(("NEM: HvPartitionPropertyProcessorVendor=%#llx (%lld)\n", uValue, uValue));
1717
1718 /*
1719 * More hysterical rasins: Get the partition ID if we can.
1720 */
1721 HV_PARTITION_ID idHvPartition = HV_PARTITION_ID_INVALID;
1722 if ( g_pfnVidGetHvPartitionId
1723 && hPartitionDevice != INVALID_HANDLE_VALUE
1724 && !g_pfnVidGetHvPartitionId(hPartitionDevice, &idHvPartition))
1725 {
1726 idHvPartition = HV_PARTITION_ID_INVALID;
1727 Log(("NEM: VidGetHvPartitionId failed: %#x\n", GetLastError()));
1728 }
1729 pVM->nem.s.hPartitionDevice = hPartitionDevice;
1730
1731 /*
1732 * Setup the EMTs.
1733 */
1734 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
1735 {
1736 pVCpu = pVM->apCpusR3[idCpu];
1737
1738 hrc = WHvCreateVirtualProcessor(hPartition, idCpu, 0 /*fFlags*/);
1739 if (FAILED(hrc))
1740 {
1741 NTSTATUS const rcNtLast = RTNtLastStatusValue();
1742 DWORD const dwErrLast = RTNtLastErrorValue();
1743 while (idCpu-- > 0)
1744 {
1745 HRESULT hrc2 = WHvDeleteVirtualProcessor(hPartition, idCpu);
1746 AssertLogRelMsg(SUCCEEDED(hrc2), ("WHvDeleteVirtualProcessor(%p, %u) -> %Rhrc (Last=%#x/%u)\n",
1747 hPartition, idCpu, hrc2, RTNtLastStatusValue(),
1748 RTNtLastErrorValue()));
1749 }
1750 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS,
1751 "Call to WHvCreateVirtualProcessor failed: %Rhrc (Last=%#x/%u)", hrc, rcNtLast, dwErrLast);
1752 }
1753 }
1754 pVM->nem.s.fCreatedEmts = true;
1755
1756 LogRel(("NEM: Successfully set up partition (device handle %p, partition ID %#llx)\n", hPartitionDevice, idHvPartition));
1757
1758 /*
1759 * Any hyper-v statistics we can get at now? HvCallMapStatsPage isn't accessible any more.
1760 */
1761 /** @todo stats */
1762
1763 /*
1764 * Adjust features.
1765 *
1766 * Note! We've already disabled X2APIC and MONITOR/MWAIT via CFGM during
1767 * the first init call.
1768 */
1769
1770 return VINF_SUCCESS;
1771}
1772
1773
1774int nemR3NativeInitCompleted(PVM pVM, VMINITCOMPLETED enmWhat)
1775{
1776 //BOOL fRet = SetThreadPriority(GetCurrentThread(), 0);
1777 //AssertLogRel(fRet);
1778
1779 NOREF(pVM); NOREF(enmWhat);
1780 return VINF_SUCCESS;
1781}
1782
1783
1784int nemR3NativeTerm(PVM pVM)
1785{
1786 /*
1787 * Delete the partition.
1788 */
1789 WHV_PARTITION_HANDLE hPartition = pVM->nem.s.hPartition;
1790 pVM->nem.s.hPartition = NULL;
1791 pVM->nem.s.hPartitionDevice = NULL;
1792 if (hPartition != NULL)
1793 {
1794 VMCPUID idCpu = pVM->nem.s.fCreatedEmts ? pVM->cCpus : 0;
1795 LogRel(("NEM: Destroying partition %p with its %u VCpus...\n", hPartition, idCpu));
1796 while (idCpu-- > 0)
1797 {
1798 PVMCPU pVCpu = pVM->apCpusR3[idCpu];
1799 pVCpu->nem.s.pvMsgSlotMapping = NULL;
1800 HRESULT hrc = WHvDeleteVirtualProcessor(hPartition, idCpu);
1801 AssertLogRelMsg(SUCCEEDED(hrc), ("WHvDeleteVirtualProcessor(%p, %u) -> %Rhrc (Last=%#x/%u)\n",
1802 hPartition, idCpu, hrc, RTNtLastStatusValue(),
1803 RTNtLastErrorValue()));
1804 }
1805 WHvDeletePartition(hPartition);
1806 }
1807 pVM->nem.s.fCreatedEmts = false;
1808 return VINF_SUCCESS;
1809}
1810
1811
1812/**
1813 * VM reset notification.
1814 *
1815 * @param pVM The cross context VM structure.
1816 */
1817void nemR3NativeReset(PVM pVM)
1818{
1819#if 0
1820 /* Unfix the A20 gate. */
1821 pVM->nem.s.fA20Fixed = false;
1822#else
1823 RT_NOREF(pVM);
1824#endif
1825}
1826
1827
1828/**
1829 * Reset CPU due to INIT IPI or hot (un)plugging.
1830 *
1831 * @param pVCpu The cross context virtual CPU structure of the CPU being
1832 * reset.
1833 * @param fInitIpi Whether this is the INIT IPI or hot (un)plugging case.
1834 */
1835void nemR3NativeResetCpu(PVMCPU pVCpu, bool fInitIpi)
1836{
1837#ifdef NEM_WIN_WITH_A20
1838 /* Lock the A20 gate if INIT IPI, make sure it's enabled. */
1839 if (fInitIpi && pVCpu->idCpu > 0)
1840 {
1841 PVM pVM = pVCpu->CTX_SUFF(pVM);
1842 if (!pVM->nem.s.fA20Enabled)
1843 nemR3NativeNotifySetA20(pVCpu, true);
1844 pVM->nem.s.fA20Enabled = true;
1845 pVM->nem.s.fA20Fixed = true;
1846 }
1847#else
1848 RT_NOREF(pVCpu, fInitIpi);
1849#endif
1850}
1851
1852
1853VBOXSTRICTRC nemR3NativeRunGC(PVM pVM, PVMCPU pVCpu)
1854{
1855 return nemHCWinRunGC(pVM, pVCpu);
1856}
1857
1858
1859VMMR3_INT_DECL(bool) NEMR3CanExecuteGuest(PVM pVM, PVMCPU pVCpu)
1860{
1861 Assert(VM_IS_NEM_ENABLED(pVM));
1862
1863#ifndef NEM_WIN_WITH_A20
1864 /*
1865 * Only execute when the A20 gate is enabled because this lovely Hyper-V
1866 * blackbox does not seem to have any way to enable or disable A20.
1867 */
1868 RT_NOREF(pVM);
1869 return PGMPhysIsA20Enabled(pVCpu);
1870#else
1871 RT_NOREF(pVM, pVCpu);
1872 return true;
1873#endif
1874}
1875
1876
1877bool nemR3NativeSetSingleInstruction(PVM pVM, PVMCPU pVCpu, bool fEnable)
1878{
1879 NOREF(pVM); NOREF(pVCpu); NOREF(fEnable);
1880 return false;
1881}
1882
1883
1884void nemR3NativeNotifyFF(PVM pVM, PVMCPU pVCpu, uint32_t fFlags)
1885{
1886 Log8(("nemR3NativeNotifyFF: canceling %u\n", pVCpu->idCpu));
1887 HRESULT hrc = WHvCancelRunVirtualProcessor(pVM->nem.s.hPartition, pVCpu->idCpu, 0);
1888 AssertMsg(SUCCEEDED(hrc), ("WHvCancelRunVirtualProcessor -> hrc=%Rhrc\n", hrc));
1889 RT_NOREF_PV(hrc);
1890 RT_NOREF_PV(fFlags);
1891}
1892
1893
1894DECLHIDDEN(bool) nemR3NativeNotifyDebugEventChanged(PVM pVM, bool fUseDebugLoop)
1895{
1896 RT_NOREF(pVM, fUseDebugLoop);
1897 return false;
1898}
1899
1900
1901DECLHIDDEN(bool) nemR3NativeNotifyDebugEventChangedPerCpu(PVM pVM, PVMCPU pVCpu, bool fUseDebugLoop)
1902{
1903 RT_NOREF(pVM, pVCpu, fUseDebugLoop);
1904 return false;
1905}
1906
1907
1908DECLINLINE(int) nemR3NativeGCPhys2R3PtrReadOnly(PVM pVM, RTGCPHYS GCPhys, const void **ppv)
1909{
1910 PGMPAGEMAPLOCK Lock;
1911 int rc = PGMPhysGCPhys2CCPtrReadOnly(pVM, GCPhys, ppv, &Lock);
1912 if (RT_SUCCESS(rc))
1913 PGMPhysReleasePageMappingLock(pVM, &Lock);
1914 return rc;
1915}
1916
1917
1918DECLINLINE(int) nemR3NativeGCPhys2R3PtrWriteable(PVM pVM, RTGCPHYS GCPhys, void **ppv)
1919{
1920 PGMPAGEMAPLOCK Lock;
1921 int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhys, ppv, &Lock);
1922 if (RT_SUCCESS(rc))
1923 PGMPhysReleasePageMappingLock(pVM, &Lock);
1924 return rc;
1925}
1926
1927
1928VMMR3_INT_DECL(int) NEMR3NotifyPhysRamRegister(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, void *pvR3,
1929 uint8_t *pu2State, uint32_t *puNemRange)
1930{
1931 Log5(("NEMR3NotifyPhysRamRegister: %RGp LB %RGp, pvR3=%p pu2State=%p (%d) puNemRange=%p (%d)\n",
1932 GCPhys, cb, pvR3, pu2State, pu2State, puNemRange, *puNemRange));
1933
1934 *pu2State = UINT8_MAX;
1935 RT_NOREF(puNemRange);
1936
1937 if (pvR3)
1938 {
1939 STAM_REL_PROFILE_START(&pVM->nem.s.StatProfMapGpaRange, a);
1940 HRESULT hrc = WHvMapGpaRange(pVM->nem.s.hPartition, pvR3, GCPhys, cb,
1941 WHvMapGpaRangeFlagRead | WHvMapGpaRangeFlagWrite | WHvMapGpaRangeFlagExecute);
1942 STAM_REL_PROFILE_STOP(&pVM->nem.s.StatProfMapGpaRange, a);
1943 if (SUCCEEDED(hrc))
1944 *pu2State = NEM_WIN_PAGE_STATE_WRITABLE;
1945 else
1946 {
1947 LogRel(("NEMR3NotifyPhysRamRegister: GCPhys=%RGp LB %RGp pvR3=%p hrc=%Rhrc (%#x) Last=%#x/%u\n",
1948 GCPhys, cb, pvR3, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
1949 STAM_REL_COUNTER_INC(&pVM->nem.s.StatMapPageFailed);
1950 return VERR_NEM_MAP_PAGES_FAILED;
1951 }
1952 }
1953 return VINF_SUCCESS;
1954}
1955
1956
1957VMMR3_INT_DECL(bool) NEMR3IsMmio2DirtyPageTrackingSupported(PVM pVM)
1958{
1959 RT_NOREF(pVM);
1960 return g_pfnWHvQueryGpaRangeDirtyBitmap != NULL;
1961}
1962
1963
1964VMMR3_INT_DECL(int) NEMR3NotifyPhysMmioExMapEarly(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags,
1965 void *pvRam, void *pvMmio2, uint8_t *pu2State, uint32_t *puNemRange)
1966{
1967 Log5(("NEMR3NotifyPhysMmioExMapEarly: %RGp LB %RGp fFlags=%#x pvRam=%p pvMmio2=%p pu2State=%p (%d) puNemRange=%p (%#x)\n",
1968 GCPhys, cb, fFlags, pvRam, pvMmio2, pu2State, *pu2State, puNemRange, puNemRange ? *puNemRange : UINT32_MAX));
1969 RT_NOREF(puNemRange);
1970
1971 /*
1972 * Unmap the RAM we're replacing.
1973 */
1974 if (fFlags & NEM_NOTIFY_PHYS_MMIO_EX_F_REPLACE)
1975 {
1976 STAM_REL_PROFILE_START(&pVM->nem.s.StatProfUnmapGpaRange, a);
1977 HRESULT hrc = WHvUnmapGpaRange(pVM->nem.s.hPartition, GCPhys, cb);
1978 STAM_REL_PROFILE_STOP(&pVM->nem.s.StatProfUnmapGpaRange, a);
1979 if (SUCCEEDED(hrc))
1980 { /* likely */ }
1981 else if (pvMmio2)
1982 LogRel(("NEMR3NotifyPhysMmioExMapEarly: GCPhys=%RGp LB %RGp fFlags=%#x: Unmap -> hrc=%Rhrc (%#x) Last=%#x/%u (ignored)\n",
1983 GCPhys, cb, fFlags, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
1984 else
1985 {
1986 LogRel(("NEMR3NotifyPhysMmioExMapEarly: GCPhys=%RGp LB %RGp fFlags=%#x: Unmap -> hrc=%Rhrc (%#x) Last=%#x/%u\n",
1987 GCPhys, cb, fFlags, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
1988 STAM_REL_COUNTER_INC(&pVM->nem.s.StatUnmapPageFailed);
1989 return VERR_NEM_UNMAP_PAGES_FAILED;
1990 }
1991 }
1992
1993 /*
1994 * Map MMIO2 if any.
1995 */
1996 if (pvMmio2)
1997 {
1998 Assert(fFlags & NEM_NOTIFY_PHYS_MMIO_EX_F_MMIO2);
1999 WHV_MAP_GPA_RANGE_FLAGS fWHvFlags = WHvMapGpaRangeFlagRead | WHvMapGpaRangeFlagWrite | WHvMapGpaRangeFlagExecute;
2000 if ((fFlags & NEM_NOTIFY_PHYS_MMIO_EX_F_TRACK_DIRTY_PAGES) && g_pfnWHvQueryGpaRangeDirtyBitmap)
2001 fWHvFlags |= WHvMapGpaRangeFlagTrackDirtyPages;
2002 STAM_REL_PROFILE_START(&pVM->nem.s.StatProfMapGpaRange, a);
2003 HRESULT hrc = WHvMapGpaRange(pVM->nem.s.hPartition, pvMmio2, GCPhys, cb, fWHvFlags);
2004 STAM_REL_PROFILE_STOP(&pVM->nem.s.StatProfMapGpaRange, a);
2005 if (SUCCEEDED(hrc))
2006 *pu2State = NEM_WIN_PAGE_STATE_WRITABLE;
2007 else
2008 {
2009 LogRel(("NEMR3NotifyPhysMmioExMapEarly: GCPhys=%RGp LB %RGp fFlags=%#x pvMmio2=%p fWHvFlags=%#x: Map -> hrc=%Rhrc (%#x) Last=%#x/%u\n",
2010 GCPhys, cb, fFlags, pvMmio2, fWHvFlags, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
2011 STAM_REL_COUNTER_INC(&pVM->nem.s.StatMapPageFailed);
2012 return VERR_NEM_MAP_PAGES_FAILED;
2013 }
2014 }
2015 else
2016 {
2017 Assert(!(fFlags & NEM_NOTIFY_PHYS_MMIO_EX_F_MMIO2));
2018 *pu2State = NEM_WIN_PAGE_STATE_UNMAPPED;
2019 }
2020 RT_NOREF(pvRam);
2021 return VINF_SUCCESS;
2022}
2023
2024
2025VMMR3_INT_DECL(int) NEMR3NotifyPhysMmioExMapLate(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags,
2026 void *pvRam, void *pvMmio2, uint32_t *puNemRange)
2027{
2028 RT_NOREF(pVM, GCPhys, cb, fFlags, pvRam, pvMmio2, puNemRange);
2029 return VINF_SUCCESS;
2030}
2031
2032
2033VMMR3_INT_DECL(int) NEMR3NotifyPhysMmioExUnmap(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags, void *pvRam,
2034 void *pvMmio2, uint8_t *pu2State, uint32_t *puNemRange)
2035{
2036 int rc = VINF_SUCCESS;
2037 Log5(("NEMR3NotifyPhysMmioExUnmap: %RGp LB %RGp fFlags=%#x pvRam=%p pvMmio2=%p pu2State=%p uNemRange=%#x (%#x)\n",
2038 GCPhys, cb, fFlags, pvRam, pvMmio2, pu2State, puNemRange, *puNemRange));
2039
2040 /*
2041 * Unmap the MMIO2 pages.
2042 */
2043 /** @todo If we implement aliasing (MMIO2 page aliased into MMIO range),
2044 * we may have more stuff to unmap even in case of pure MMIO... */
2045 if (fFlags & NEM_NOTIFY_PHYS_MMIO_EX_F_MMIO2)
2046 {
2047 STAM_REL_PROFILE_START(&pVM->nem.s.StatProfUnmapGpaRange, a);
2048 HRESULT hrc = WHvUnmapGpaRange(pVM->nem.s.hPartition, GCPhys, cb);
2049 STAM_REL_PROFILE_STOP(&pVM->nem.s.StatProfUnmapGpaRange, a);
2050 if (FAILED(hrc))
2051 {
2052 LogRel2(("NEMR3NotifyPhysMmioExUnmap: GCPhys=%RGp LB %RGp fFlags=%#x: Unmap -> hrc=%Rhrc (%#x) Last=%#x/%u (ignored)\n",
2053 GCPhys, cb, fFlags, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
2054 rc = VERR_NEM_UNMAP_PAGES_FAILED;
2055 STAM_REL_COUNTER_INC(&pVM->nem.s.StatUnmapPageFailed);
2056 }
2057 }
2058
2059 /*
2060 * Restore the RAM we replaced.
2061 */
2062 if (fFlags & NEM_NOTIFY_PHYS_MMIO_EX_F_REPLACE)
2063 {
2064 AssertPtr(pvRam);
2065 STAM_REL_PROFILE_START(&pVM->nem.s.StatProfMapGpaRange, a);
2066 HRESULT hrc = WHvMapGpaRange(pVM->nem.s.hPartition, pvRam, GCPhys, cb,
2067 WHvMapGpaRangeFlagRead | WHvMapGpaRangeFlagWrite | WHvMapGpaRangeFlagExecute);
2068 STAM_REL_PROFILE_STOP(&pVM->nem.s.StatProfMapGpaRange, a);
2069 if (SUCCEEDED(hrc))
2070 { /* likely */ }
2071 else
2072 {
2073 LogRel(("NEMR3NotifyPhysMmioExUnmap: GCPhys=%RGp LB %RGp pvMmio2=%p hrc=%Rhrc (%#x) Last=%#x/%u\n",
2074 GCPhys, cb, pvMmio2, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
2075 rc = VERR_NEM_MAP_PAGES_FAILED;
2076 STAM_REL_COUNTER_INC(&pVM->nem.s.StatMapPageFailed);
2077 }
2078 if (pu2State)
2079 *pu2State = NEM_WIN_PAGE_STATE_WRITABLE;
2080 }
2081 /* Mark the pages as unmapped if relevant. */
2082 else if (pu2State)
2083 *pu2State = NEM_WIN_PAGE_STATE_UNMAPPED;
2084
2085 RT_NOREF(pvMmio2, puNemRange);
2086 return rc;
2087}
2088
2089
2090VMMR3_INT_DECL(int) NEMR3PhysMmio2QueryAndResetDirtyBitmap(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t uNemRange,
2091 void *pvBitmap, size_t cbBitmap)
2092{
2093 Assert(VM_IS_NEM_ENABLED(pVM));
2094 AssertReturn(g_pfnWHvQueryGpaRangeDirtyBitmap, VERR_INTERNAL_ERROR_2);
2095 Assert(cbBitmap == (uint32_t)cbBitmap);
2096 RT_NOREF(uNemRange);
2097
2098 /* This is being profiled by PGM, see /PGM/Mmio2QueryAndResetDirtyBitmap. */
2099 HRESULT hrc = WHvQueryGpaRangeDirtyBitmap(pVM->nem.s.hPartition, GCPhys, cb, (UINT64 *)pvBitmap, (uint32_t)cbBitmap);
2100 if (SUCCEEDED(hrc))
2101 return VINF_SUCCESS;
2102
2103 AssertLogRelMsgFailed(("GCPhys=%RGp LB %RGp pvBitmap=%p LB %#zx hrc=%Rhrc (%#x) Last=%#x/%u\n",
2104 GCPhys, cb, pvBitmap, cbBitmap, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
2105 return VERR_NEM_QUERY_DIRTY_BITMAP_FAILED;
2106}
2107
2108
2109VMMR3_INT_DECL(int) NEMR3NotifyPhysRomRegisterEarly(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, void *pvPages, uint32_t fFlags,
2110 uint8_t *pu2State, uint32_t *puNemRange)
2111{
2112 Log5(("nemR3NativeNotifyPhysRomRegisterEarly: %RGp LB %RGp pvPages=%p fFlags=%#x\n", GCPhys, cb, pvPages, fFlags));
2113 *pu2State = UINT8_MAX;
2114 *puNemRange = 0;
2115
2116#if 0 /* Let's not do this after all. We'll protection change notifications for each page and if not we'll map them lazily. */
2117 RTGCPHYS const cPages = cb >> X86_PAGE_SHIFT;
2118 for (RTGCPHYS iPage = 0; iPage < cPages; iPage++, GCPhys += X86_PAGE_SIZE)
2119 {
2120 const void *pvPage;
2121 int rc = nemR3NativeGCPhys2R3PtrReadOnly(pVM, GCPhys, &pvPage);
2122 if (RT_SUCCESS(rc))
2123 {
2124 HRESULT hrc = WHvMapGpaRange(pVM->nem.s.hPartition, (void *)pvPage, GCPhys, X86_PAGE_SIZE,
2125 WHvMapGpaRangeFlagRead | WHvMapGpaRangeFlagExecute);
2126 if (SUCCEEDED(hrc))
2127 { /* likely */ }
2128 else
2129 {
2130 LogRel(("nemR3NativeNotifyPhysRomRegisterEarly: GCPhys=%RGp hrc=%Rhrc (%#x) Last=%#x/%u\n",
2131 GCPhys, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
2132 return VERR_NEM_INIT_FAILED;
2133 }
2134 }
2135 else
2136 {
2137 LogRel(("nemR3NativeNotifyPhysRomRegisterEarly: GCPhys=%RGp rc=%Rrc\n", GCPhys, rc));
2138 return rc;
2139 }
2140 }
2141 RT_NOREF_PV(fFlags);
2142#else
2143 RT_NOREF(pVM, GCPhys, cb, pvPages, fFlags);
2144#endif
2145 return VINF_SUCCESS;
2146}
2147
2148
2149VMMR3_INT_DECL(int) NEMR3NotifyPhysRomRegisterLate(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, void *pvPages,
2150 uint32_t fFlags, uint8_t *pu2State, uint32_t *puNemRange)
2151{
2152 Log5(("nemR3NativeNotifyPhysRomRegisterLate: %RGp LB %RGp pvPages=%p fFlags=%#x pu2State=%p (%d) puNemRange=%p (%#x)\n",
2153 GCPhys, cb, pvPages, fFlags, pu2State, *pu2State, puNemRange, *puNemRange));
2154 *pu2State = UINT8_MAX;
2155
2156 /*
2157 * (Re-)map readonly.
2158 */
2159 AssertPtrReturn(pvPages, VERR_INVALID_POINTER);
2160 STAM_REL_PROFILE_START(&pVM->nem.s.StatProfMapGpaRange, a);
2161 HRESULT hrc = WHvMapGpaRange(pVM->nem.s.hPartition, pvPages, GCPhys, cb, WHvMapGpaRangeFlagRead | WHvMapGpaRangeFlagExecute);
2162 STAM_REL_PROFILE_STOP(&pVM->nem.s.StatProfMapGpaRange, a);
2163 if (SUCCEEDED(hrc))
2164 *pu2State = NEM_WIN_PAGE_STATE_READABLE;
2165 else
2166 {
2167 LogRel(("nemR3NativeNotifyPhysRomRegisterEarly: GCPhys=%RGp LB %RGp pvPages=%p fFlags=%#x hrc=%Rhrc (%#x) Last=%#x/%u\n",
2168 GCPhys, cb, pvPages, fFlags, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
2169 STAM_REL_COUNTER_INC(&pVM->nem.s.StatMapPageFailed);
2170 return VERR_NEM_MAP_PAGES_FAILED;
2171 }
2172 RT_NOREF(fFlags, puNemRange);
2173 return VINF_SUCCESS;
2174}
2175
2176#ifdef NEM_WIN_WITH_A20
2177
2178/**
2179 * @callback_method_impl{FNPGMPHYSNEMCHECKPAGE}
2180 */
2181static DECLCALLBACK(int) nemR3WinUnsetForA20CheckerCallback(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys,
2182 PPGMPHYSNEMPAGEINFO pInfo, void *pvUser)
2183{
2184 /* We'll just unmap the memory. */
2185 if (pInfo->u2NemState > NEM_WIN_PAGE_STATE_UNMAPPED)
2186 {
2187 HRESULT hrc = WHvUnmapGpaRange(pVM->nem.s.hPartition, GCPhys, X86_PAGE_SIZE);
2188 if (SUCCEEDED(hrc))
2189 {
2190 STAM_REL_COUNTER_INC(&pVM->nem.s.StatUnmapPage);
2191 uint32_t cMappedPages = ASMAtomicDecU32(&pVM->nem.s.cMappedPages); NOREF(cMappedPages);
2192 Log5(("NEM GPA unmapped/A20: %RGp (was %s, cMappedPages=%u)\n", GCPhys, g_apszPageStates[pInfo->u2NemState], cMappedPages));
2193 pInfo->u2NemState = NEM_WIN_PAGE_STATE_UNMAPPED;
2194 }
2195 else
2196 {
2197 STAM_REL_COUNTER_INC(&pVM->nem.s.StatUnmapPageFailed);
2198 LogRel(("nemR3WinUnsetForA20CheckerCallback/unmap: GCPhys=%RGp hrc=%Rhrc (%#x) Last=%#x/%u\n",
2199 GCPhys, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
2200 return VERR_INTERNAL_ERROR_2;
2201 }
2202 }
2203 RT_NOREF(pVCpu, pvUser);
2204 return VINF_SUCCESS;
2205}
2206
2207
2208/**
2209 * Unmaps a page from Hyper-V for the purpose of emulating A20 gate behavior.
2210 *
2211 * @returns The PGMPhysNemQueryPageInfo result.
2212 * @param pVM The cross context VM structure.
2213 * @param pVCpu The cross context virtual CPU structure.
2214 * @param GCPhys The page to unmap.
2215 */
2216static int nemR3WinUnmapPageForA20Gate(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys)
2217{
2218 PGMPHYSNEMPAGEINFO Info;
2219 return PGMPhysNemPageInfoChecker(pVM, pVCpu, GCPhys, false /*fMakeWritable*/, &Info,
2220 nemR3WinUnsetForA20CheckerCallback, NULL);
2221}
2222
2223#endif /* NEM_WIN_WITH_A20 */
2224
2225VMMR3_INT_DECL(void) NEMR3NotifySetA20(PVMCPU pVCpu, bool fEnabled)
2226{
2227 Log(("nemR3NativeNotifySetA20: fEnabled=%RTbool\n", fEnabled));
2228 Assert(VM_IS_NEM_ENABLED(pVCpu->CTX_SUFF(pVM)));
2229#ifdef NEM_WIN_WITH_A20
2230 PVM pVM = pVCpu->CTX_SUFF(pVM);
2231 if (!pVM->nem.s.fA20Fixed)
2232 {
2233 pVM->nem.s.fA20Enabled = fEnabled;
2234 for (RTGCPHYS GCPhys = _1M; GCPhys < _1M + _64K; GCPhys += X86_PAGE_SIZE)
2235 nemR3WinUnmapPageForA20Gate(pVM, pVCpu, GCPhys);
2236 }
2237#else
2238 RT_NOREF(pVCpu, fEnabled);
2239#endif
2240}
2241
2242
2243/** @page pg_nem_win NEM/win - Native Execution Manager, Windows.
2244 *
2245 * On Windows the Hyper-V root partition (dom0 in zen terminology) does not have
2246 * nested VT-x or AMD-V capabilities. Early on raw-mode worked inside it, but
2247 * for a while now we've been getting \#GPs when trying to modify CR4 in the
2248 * world switcher. So, when Hyper-V is active on Windows we have little choice
2249 * but to use Hyper-V to run our VMs.
2250 *
2251 *
2252 * @section sub_nem_win_whv The WinHvPlatform API
2253 *
2254 * Since Windows 10 build 17083 there is a documented API for managing Hyper-V
2255 * VMs: header file WinHvPlatform.h and implementation in WinHvPlatform.dll.
2256 * This interface is a wrapper around the undocumented Virtualization
2257 * Infrastructure Driver (VID) API - VID.DLL and VID.SYS. The wrapper is
2258 * written in C++, namespaced, early versions (at least) was using standard C++
2259 * container templates in several places.
2260 *
2261 * When creating a VM using WHvCreatePartition, it will only create the
2262 * WinHvPlatform structures for it, to which you get an abstract pointer. The
2263 * VID API that actually creates the partition is first engaged when you call
2264 * WHvSetupPartition after first setting a lot of properties using
2265 * WHvSetPartitionProperty. Since the VID API is just a very thin wrapper
2266 * around CreateFile and NtDeviceIoControlFile, it returns an actual HANDLE for
2267 * the partition to WinHvPlatform. We fish this HANDLE out of the WinHvPlatform
2268 * partition structures because we need to talk directly to VID for reasons
2269 * we'll get to in a bit. (Btw. we could also intercept the CreateFileW or
2270 * NtDeviceIoControlFile calls from VID.DLL to get the HANDLE should fishing in
2271 * the partition structures become difficult.)
2272 *
2273 * The WinHvPlatform API requires us to both set the number of guest CPUs before
2274 * setting up the partition and call WHvCreateVirtualProcessor for each of them.
2275 * The CPU creation function boils down to a VidMessageSlotMap call that sets up
2276 * and maps a message buffer into ring-3 for async communication with hyper-V
2277 * and/or the VID.SYS thread actually running the CPU thru
2278 * WinHvRunVpDispatchLoop(). When for instance a VMEXIT is encountered, hyper-V
2279 * sends a message that the WHvRunVirtualProcessor API retrieves (and later
2280 * acknowledges) via VidMessageSlotHandleAndGetNext. Since or about build
2281 * 17757 a register page is also mapped into user space when creating the
2282 * virtual CPU. It should be noteded that WHvDeleteVirtualProcessor doesn't do
2283 * much as there seems to be no partner function VidMessagesSlotMap that
2284 * reverses what it did.
2285 *
2286 * Memory is managed thru calls to WHvMapGpaRange and WHvUnmapGpaRange (GPA does
2287 * not mean grade point average here, but rather guest physical addressspace),
2288 * which corresponds to VidCreateVaGpaRangeSpecifyUserVa and VidDestroyGpaRange
2289 * respectively. As 'UserVa' indicates, the functions works on user process
2290 * memory. The mappings are also subject to quota restrictions, so the number
2291 * of ranges are limited and probably their total size as well. Obviously
2292 * VID.SYS keeps track of the ranges, but so does WinHvPlatform, which means
2293 * there is a bit of overhead involved and quota restrctions makes sense.
2294 *
2295 * Running guest code is done through the WHvRunVirtualProcessor function. It
2296 * asynchronously starts or resumes hyper-V CPU execution and then waits for an
2297 * VMEXIT message. Hyper-V / VID.SYS will return information about the message
2298 * in the message buffer mapping, and WHvRunVirtualProcessor will convert that
2299 * finto it's own WHV_RUN_VP_EXIT_CONTEXT format.
2300 *
2301 * Other threads can interrupt the execution by using WHvCancelVirtualProcessor,
2302 * which since or about build 17757 uses VidMessageSlotHandleAndGetNext to do
2303 * the work (earlier builds would open the waiting thread, do a dummy
2304 * QueueUserAPC on it, and let it upon return use VidStopVirtualProcessor to
2305 * do the actual stopping). While there is certainly a race between cancelation
2306 * and the CPU causing a natural VMEXIT, it is not known whether this still
2307 * causes extra work on subsequent WHvRunVirtualProcessor calls (it did in and
2308 * earlier than 17134).
2309 *
2310 * Registers are retrieved and set via WHvGetVirtualProcessorRegisters and
2311 * WHvSetVirtualProcessorRegisters. In addition, several VMEXITs include
2312 * essential register state in the exit context information, potentially making
2313 * it possible to emulate the instruction causing the exit without involving
2314 * WHvGetVirtualProcessorRegisters.
2315 *
2316 *
2317 * @subsection subsec_nem_win_whv_cons Issues & Feedback
2318 *
2319 * Here are some observations (mostly against build 17101):
2320 *
2321 * - The VMEXIT performance is dismal (build 17134).
2322 *
2323 * Our proof of concept implementation with a kernel runloop (i.e. not using
2324 * WHvRunVirtualProcessor and friends, but calling VID.SYS fast I/O control
2325 * entry point directly) delivers 9-10% of the port I/O performance and only
2326 * 6-7% of the MMIO performance that we have with our own hypervisor.
2327 *
2328 * When using the offical WinHvPlatform API, the numbers are %3 for port I/O
2329 * and 5% for MMIO.
2330 *
2331 * While the tests we've done are using tight tight loops only doing port I/O
2332 * and MMIO, the problem is clearly visible when running regular guest OSes.
2333 * Anything that hammers the VGA device would be suffering, for example:
2334 *
2335 * - Windows 2000 boot screen animation overloads us with MMIO exits
2336 * and won't even boot because all the time is spent in interrupt
2337 * handlers and redrawin the screen.
2338 *
2339 * - DSL 4.4 and its bootmenu logo is slower than molasses in january.
2340 *
2341 * We have not found a workaround for this yet.
2342 *
2343 * Something that might improve the issue a little is to detect blocks with
2344 * excessive MMIO and port I/O exits and emulate instructions to cover
2345 * multiple exits before letting Hyper-V have a go at the guest execution
2346 * again. This will only improve the situation under some circumstances,
2347 * since emulating instructions without recompilation can be expensive, so
2348 * there will only be real gains if the exitting instructions are tightly
2349 * packed.
2350 *
2351 * Update: Security fixes during the summer of 2018 caused the performance to
2352 * dropped even more.
2353 *
2354 * Update [build 17757]: Some performance improvements here, but they don't
2355 * yet make up for what was lost this summer.
2356 *
2357 *
2358 * - We need a way to directly modify the TSC offset (or bias if you like).
2359 *
2360 * The current approach of setting the WHvX64RegisterTsc register one by one
2361 * on each virtual CPU in sequence will introduce random inaccuracies,
2362 * especially if the thread doing the job is reschduled at a bad time.
2363 *
2364 *
2365 * - Unable to access WHvX64RegisterMsrMtrrCap (build 17134).
2366 *
2367 *
2368 * - On AMD Ryzen grub/debian 9.0 ends up with a unrecoverable exception
2369 * when IA32_MTRR_PHYSMASK0 is written.
2370 *
2371 *
2372 * - The IA32_APIC_BASE register does not work right:
2373 *
2374 * - Attempts by the guest to clear bit 11 (EN) are ignored, both the
2375 * guest and the VMM reads back the old value.
2376 *
2377 * - Attempts to modify the base address (bits NN:12) seems to be ignored
2378 * in the same way.
2379 *
2380 * - The VMM can modify both the base address as well as the the EN and
2381 * BSP bits, however this is useless if we cannot intercept the WRMSR.
2382 *
2383 * - Attempts by the guest to set the EXTD bit (X2APIC) result in \#GP(0),
2384 * while the VMM ends up with with ERROR_HV_INVALID_PARAMETER. Seems
2385 * there is no way to support X2APIC.
2386 *
2387 *
2388 * - Not sure if this is a thing, but WHvCancelVirtualProcessor seems to cause
2389 * cause a lot more spurious WHvRunVirtualProcessor returns that what we get
2390 * with the replacement code. By spurious returns we mean that the
2391 * subsequent call to WHvRunVirtualProcessor would return immediately.
2392 *
2393 * Update [build 17757]: New cancelation code might have addressed this, but
2394 * haven't had time to test it yet.
2395 *
2396 *
2397 * - There is no API for modifying protection of a page within a GPA range.
2398 *
2399 * From what we can tell, the only way to modify the protection (like readonly
2400 * -> writable, or vice versa) is to first unmap the range and then remap it
2401 * with the new protection.
2402 *
2403 * We are for instance doing this quite a bit in order to track dirty VRAM
2404 * pages. VRAM pages starts out as readonly, when the guest writes to a page
2405 * we take an exit, notes down which page it is, makes it writable and restart
2406 * the instruction. After refreshing the display, we reset all the writable
2407 * pages to readonly again, bulk fashion.
2408 *
2409 * Now to work around this issue, we do page sized GPA ranges. In addition to
2410 * add a lot of tracking overhead to WinHvPlatform and VID.SYS, this also
2411 * causes us to exceed our quota before we've even mapped a default sized
2412 * (128MB) VRAM page-by-page. So, to work around this quota issue we have to
2413 * lazily map pages and actively restrict the number of mappings.
2414 *
2415 * Our best workaround thus far is bypassing WinHvPlatform and VID entirely
2416 * when in comes to guest memory management and instead use the underlying
2417 * hypercalls (HvCallMapGpaPages, HvCallUnmapGpaPages) to do it ourselves.
2418 * (This also maps a whole lot better into our own guest page management
2419 * infrastructure.)
2420 *
2421 * Update [build 17757]: Introduces a KVM like dirty logging API which could
2422 * help tracking dirty VGA pages, while being useless for shadow ROM and
2423 * devices trying catch the guest updating descriptors and such.
2424 *
2425 *
2426 * - Observed problems doing WHvUnmapGpaRange immediately followed by
2427 * WHvMapGpaRange.
2428 *
2429 * As mentioned above, we've been forced to use this sequence when modifying
2430 * page protection. However, when transitioning from readonly to writable,
2431 * we've ended up looping forever with the same write to readonly memory
2432 * VMEXIT. We're wondering if this issue might be related to the lazy mapping
2433 * logic in WinHvPlatform.
2434 *
2435 * Workaround: Insert a WHvRunVirtualProcessor call and make sure to get a GPA
2436 * unmapped exit between the two calls. Not entirely great performance wise
2437 * (or the santity of our code).
2438 *
2439 *
2440 * - Implementing A20 gate behavior is tedious, where as correctly emulating the
2441 * A20M# pin (present on 486 and later) is near impossible for SMP setups
2442 * (e.g. possiblity of two CPUs with different A20 status).
2443 *
2444 * Workaround #1 (obsolete): Only do A20 on CPU 0, restricting the emulation
2445 * to HMA. We unmap all pages related to HMA (0x100000..0x10ffff) when the A20
2446 * state changes, lazily syncing the right pages back when accessed.
2447 *
2448 * Workaround #2 (used): Use IEM when the A20 gate is disabled.
2449 *
2450 *
2451 * - WHVRunVirtualProcessor wastes time converting VID/Hyper-V messages to its
2452 * own format (WHV_RUN_VP_EXIT_CONTEXT).
2453 *
2454 * We understand this might be because Microsoft wishes to remain free to
2455 * modify the VID/Hyper-V messages, but it's still rather silly and does slow
2456 * things down a little. We'd much rather just process the messages directly.
2457 *
2458 *
2459 * - WHVRunVirtualProcessor would've benefited from using a callback interface:
2460 *
2461 * - The potential size changes of the exit context structure wouldn't be
2462 * an issue, since the function could manage that itself.
2463 *
2464 * - State handling could probably be simplified (like cancelation).
2465 *
2466 *
2467 * - WHvGetVirtualProcessorRegisters and WHvSetVirtualProcessorRegisters
2468 * internally converts register names, probably using temporary heap buffers.
2469 *
2470 * From the looks of things, they are converting from WHV_REGISTER_NAME to
2471 * HV_REGISTER_NAME from in the "Virtual Processor Register Names" section in
2472 * the "Hypervisor Top-Level Functional Specification" document. This feels
2473 * like an awful waste of time.
2474 *
2475 * We simply cannot understand why HV_REGISTER_NAME isn't used directly here,
2476 * or at least the same values, making any conversion reduntant. Restricting
2477 * access to certain registers could easily be implement by scanning the
2478 * inputs.
2479 *
2480 * To avoid the heap + conversion overhead, we're currently using the
2481 * HvCallGetVpRegisters and HvCallSetVpRegisters calls directly, at least for
2482 * the ring-0 code.
2483 *
2484 * Update [build 17757]: Register translation has been very cleverly
2485 * optimized and made table driven (2 top level tables, 4 + 1 leaf tables).
2486 * Register information consists of the 32-bit HV register name, register page
2487 * offset, and flags (giving valid offset, size and more). Register
2488 * getting/settings seems to be done by hoping that the register page provides
2489 * it all, and falling back on the VidSetVirtualProcessorState if one or more
2490 * registers are not available there.
2491 *
2492 * Note! We have currently not updated our ring-0 code to take the register
2493 * page into account, so it's suffering a little compared to the ring-3 code
2494 * that now uses the offical APIs for registers.
2495 *
2496 *
2497 * - The YMM and XCR0 registers are not yet named (17083). This probably
2498 * wouldn't be a problem if HV_REGISTER_NAME was used, see previous point.
2499 *
2500 * Update [build 17757]: XCR0 is added. YMM register values seems to be put
2501 * into a yet undocumented XsaveState interface. Approach is a little bulky,
2502 * but saves number of enums and dispenses with register transation. Also,
2503 * the underlying Vid setter API duplicates the input buffer on the heap,
2504 * adding a 16 byte header.
2505 *
2506 *
2507 * - Why does VID.SYS only query/set 32 registers at the time thru the
2508 * HvCallGetVpRegisters and HvCallSetVpRegisters hypercalls?
2509 *
2510 * We've not trouble getting/setting all the registers defined by
2511 * WHV_REGISTER_NAME in one hypercall (around 80). Some kind of stack
2512 * buffering or similar?
2513 *
2514 *
2515 * - To handle the VMMCALL / VMCALL instructions, it seems we need to intercept
2516 * \#UD exceptions and inspect the opcodes. A dedicated exit for hypercalls
2517 * would be more efficient, esp. for guests using \#UD for other purposes..
2518 *
2519 *
2520 * - Wrong instruction length in the VpContext with unmapped GPA memory exit
2521 * contexts on 17115/AMD.
2522 *
2523 * One byte "PUSH CS" was reported as 2 bytes, while a two byte
2524 * "MOV [EBX],EAX" was reported with a 1 byte instruction length. Problem
2525 * naturally present in untranslated hyper-v messages.
2526 *
2527 *
2528 * - The I/O port exit context information seems to be missing the address size
2529 * information needed for correct string I/O emulation.
2530 *
2531 * VT-x provides this information in bits 7:9 in the instruction information
2532 * field on newer CPUs. AMD-V in bits 7:9 in the EXITINFO1 field in the VMCB.
2533 *
2534 * We can probably work around this by scanning the instruction bytes for
2535 * address size prefixes. Haven't investigated it any further yet.
2536 *
2537 *
2538 * - Querying WHvCapabilityCodeExceptionExitBitmap returns zero even when
2539 * intercepts demonstrably works (17134).
2540 *
2541 *
2542 * - Querying HvPartitionPropertyDebugChannelId via HvCallGetPartitionProperty
2543 * (hypercall) hangs the host (17134).
2544 *
2545 * - CommonUtilities::GuidToString needs a 'static' before the hex digit array,
2546 * looks pointless to re-init a stack copy it for each call (novice mistake).
2547 *
2548 *
2549 * Old concerns that have been addressed:
2550 *
2551 * - The WHvCancelVirtualProcessor API schedules a dummy usermode APC callback
2552 * in order to cancel any current or future alertable wait in VID.SYS during
2553 * the VidMessageSlotHandleAndGetNext call.
2554 *
2555 * IIRC this will make the kernel schedule the specified callback thru
2556 * NTDLL!KiUserApcDispatcher by modifying the thread context and quite
2557 * possibly the userland thread stack. When the APC callback returns to
2558 * KiUserApcDispatcher, it will call NtContinue to restore the old thread
2559 * context and resume execution from there. This naturally adds up to some
2560 * CPU cycles, ring transitions aren't for free, especially after Spectre &
2561 * Meltdown mitigations.
2562 *
2563 * Using NtAltertThread call could do the same without the thread context
2564 * modifications and the extra kernel call.
2565 *
2566 * Update: All concerns have addressed in or about build 17757.
2567 *
2568 * The WHvCancelVirtualProcessor API is now implemented using a new
2569 * VidMessageSlotHandleAndGetNext() flag (4). Codepath is slightly longer
2570 * than NtAlertThread, but has the added benefit that spurious wakeups can be
2571 * more easily reduced.
2572 *
2573 *
2574 * - When WHvRunVirtualProcessor returns without a message, or on a terse
2575 * VID message like HLT, it will make a kernel call to get some registers.
2576 * This is potentially inefficient if the caller decides he needs more
2577 * register state.
2578 *
2579 * It would be better to just return what's available and let the caller fetch
2580 * what is missing from his point of view in a single kernel call.
2581 *
2582 * Update: All concerns have been addressed in or about build 17757. Selected
2583 * registers are now available via shared memory and thus HLT should (not
2584 * verified) no longer require a system call to compose the exit context data.
2585 *
2586 *
2587 * - The WHvRunVirtualProcessor implementation does lazy GPA range mappings when
2588 * a unmapped GPA message is received from hyper-V.
2589 *
2590 * Since MMIO is currently realized as unmapped GPA, this will slow down all
2591 * MMIO accesses a tiny little bit as WHvRunVirtualProcessor looks up the
2592 * guest physical address to check if it is a pending lazy mapping.
2593 *
2594 * The lazy mapping feature makes no sense to us. We as API user have all the
2595 * information and can do lazy mapping ourselves if we want/have to (see next
2596 * point).
2597 *
2598 * Update: All concerns have been addressed in or about build 17757.
2599 *
2600 *
2601 * - The WHvGetCapability function has a weird design:
2602 * - The CapabilityCode parameter is pointlessly duplicated in the output
2603 * structure (WHV_CAPABILITY).
2604 *
2605 * - API takes void pointer, but everyone will probably be using
2606 * WHV_CAPABILITY due to WHV_CAPABILITY::CapabilityCode making it
2607 * impractical to use anything else.
2608 *
2609 * - No output size.
2610 *
2611 * - See GetFileAttributesEx, GetFileInformationByHandleEx,
2612 * FindFirstFileEx, and others for typical pattern for generic
2613 * information getters.
2614 *
2615 * Update: All concerns have been addressed in build 17110.
2616 *
2617 *
2618 * - The WHvGetPartitionProperty function uses the same weird design as
2619 * WHvGetCapability, see above.
2620 *
2621 * Update: All concerns have been addressed in build 17110.
2622 *
2623 *
2624 * - The WHvSetPartitionProperty function has a totally weird design too:
2625 * - In contrast to its partner WHvGetPartitionProperty, the property code
2626 * is not a separate input parameter here but part of the input
2627 * structure.
2628 *
2629 * - The input structure is a void pointer rather than a pointer to
2630 * WHV_PARTITION_PROPERTY which everyone probably will be using because
2631 * of the WHV_PARTITION_PROPERTY::PropertyCode field.
2632 *
2633 * - Really, why use PVOID for the input when the function isn't accepting
2634 * minimal sizes. E.g. WHVPartitionPropertyCodeProcessorClFlushSize only
2635 * requires a 9 byte input, but the function insists on 16 bytes (17083).
2636 *
2637 * - See GetFileAttributesEx, SetFileInformationByHandle, FindFirstFileEx,
2638 * and others for typical pattern for generic information setters and
2639 * getters.
2640 *
2641 * Update: All concerns have been addressed in build 17110.
2642 *
2643 *
2644 * @section sec_nem_win_large_pages Large Pages
2645 *
2646 * We've got a standalone memory allocation and access testcase bs3-memalloc-1
2647 * which was run with 48GiB of guest RAM configured on a NUC 11 box running
2648 * Windows 11 GA. In the simplified NEM memory mode no exits should be
2649 * generated while the access tests are running.
2650 *
2651 * The bs3-memalloc-1 results kind of hints at some tiny speed-up if the guest
2652 * RAM is allocated using the MEM_LARGE_PAGES flag, but only in the 3rd access
2653 * check (typical 350 000 MiB/s w/o and around 400 000 MiB/s). The result for
2654 * the 2nd access varies a lot, perhaps hinting at some table optimizations
2655 * going on.
2656 *
2657 * The initial access where the memory is locked/whatever has absolutely horrid
2658 * results regardless of whether large pages are enabled or not. Typically
2659 * bobbing close to 500 MiB/s, non-large pages a little faster.
2660 *
2661 * NEM w/ simplified memory and MEM_LARGE_PAGES:
2662 * @verbatim
2663bs3-memalloc-1: TESTING...
2664bs3-memalloc-1: #0/0x0: 0x0000000000000000 LB 0x000000000009fc00 USABLE (1)
2665bs3-memalloc-1: #1/0x1: 0x000000000009fc00 LB 0x0000000000000400 RESERVED (2)
2666bs3-memalloc-1: #2/0x2: 0x00000000000f0000 LB 0x0000000000010000 RESERVED (2)
2667bs3-memalloc-1: #3/0x3: 0x0000000000100000 LB 0x00000000dfef0000 USABLE (1)
2668bs3-memalloc-1: #4/0x4: 0x00000000dfff0000 LB 0x0000000000010000 ACPI_RECLAIMABLE (3)
2669bs3-memalloc-1: #5/0x5: 0x00000000fec00000 LB 0x0000000000001000 RESERVED (2)
2670bs3-memalloc-1: #6/0x6: 0x00000000fee00000 LB 0x0000000000001000 RESERVED (2)
2671bs3-memalloc-1: #7/0x7: 0x00000000fffc0000 LB 0x0000000000040000 RESERVED (2)
2672bs3-memalloc-1: #8/0x9: 0x0000000100000000 LB 0x0000000b20000000 USABLE (1)
2673bs3-memalloc-1: Found 1 interesting entries covering 0xb20000000 bytes (44 GB).
2674bs3-memalloc-1: From 0x100000000 to 0xc20000000
2675bs3-memalloc-1: INT15h/E820 : PASSED
2676bs3-memalloc-1: Mapping memory above 4GB : PASSED
2677bs3-memalloc-1: Pages : 11 665 408 pages
2678bs3-memalloc-1: MiBs : 45 568 MB
2679bs3-memalloc-1: Alloc elapsed : 90 925 263 996 ns
2680bs3-memalloc-1: Alloc elapsed in ticks : 272 340 387 336 ticks
2681bs3-memalloc-1: Page alloc time : 7 794 ns/page
2682bs3-memalloc-1: Page alloc time in ticks : 23 345 ticks/page
2683bs3-memalloc-1: Alloc thruput : 128 296 pages/s
2684bs3-memalloc-1: Alloc thruput in MiBs : 501 MB/s
2685bs3-memalloc-1: Allocation speed : PASSED
2686bs3-memalloc-1: Access elapsed : 85 074 483 467 ns
2687bs3-memalloc-1: Access elapsed in ticks : 254 816 088 412 ticks
2688bs3-memalloc-1: Page access time : 7 292 ns/page
2689bs3-memalloc-1: Page access time in ticks : 21 843 ticks/page
2690bs3-memalloc-1: Access thruput : 137 119 pages/s
2691bs3-memalloc-1: Access thruput in MiBs : 535 MB/s
2692bs3-memalloc-1: 2nd access : PASSED
2693bs3-memalloc-1: Access elapsed : 112 963 925 ns
2694bs3-memalloc-1: Access elapsed in ticks : 338 284 436 ticks
2695bs3-memalloc-1: Page access time : 9 ns/page
2696bs3-memalloc-1: Page access time in ticks : 28 ticks/page
2697bs3-memalloc-1: Access thruput : 103 266 666 pages/s
2698bs3-memalloc-1: Access thruput in MiBs : 403 385 MB/s
2699bs3-memalloc-1: 3rd access : PASSED
2700bs3-memalloc-1: SUCCESS
2701 * @endverbatim
2702 *
2703 * NEM w/ simplified memory and but no MEM_LARGE_PAGES:
2704 * @verbatim
2705bs3-memalloc-1: From 0x100000000 to 0xc20000000
2706bs3-memalloc-1: Pages : 11 665 408 pages
2707bs3-memalloc-1: MiBs : 45 568 MB
2708bs3-memalloc-1: Alloc elapsed : 90 062 027 900 ns
2709bs3-memalloc-1: Alloc elapsed in ticks : 269 754 826 466 ticks
2710bs3-memalloc-1: Page alloc time : 7 720 ns/page
2711bs3-memalloc-1: Page alloc time in ticks : 23 124 ticks/page
2712bs3-memalloc-1: Alloc thruput : 129 526 pages/s
2713bs3-memalloc-1: Alloc thruput in MiBs : 505 MB/s
2714bs3-memalloc-1: Allocation speed : PASSED
2715bs3-memalloc-1: Access elapsed : 3 596 017 220 ns
2716bs3-memalloc-1: Access elapsed in ticks : 10 770 732 620 ticks
2717bs3-memalloc-1: Page access time : 308 ns/page
2718bs3-memalloc-1: Page access time in ticks : 923 ticks/page
2719bs3-memalloc-1: Access thruput : 3 243 980 pages/s
2720bs3-memalloc-1: Access thruput in MiBs : 12 671 MB/s
2721bs3-memalloc-1: 2nd access : PASSED
2722bs3-memalloc-1: Access elapsed : 133 060 160 ns
2723bs3-memalloc-1: Access elapsed in ticks : 398 459 884 ticks
2724bs3-memalloc-1: Page access time : 11 ns/page
2725bs3-memalloc-1: Page access time in ticks : 34 ticks/page
2726bs3-memalloc-1: Access thruput : 87 670 178 pages/s
2727bs3-memalloc-1: Access thruput in MiBs : 342 461 MB/s
2728bs3-memalloc-1: 3rd access : PASSED
2729 * @endverbatim
2730 *
2731 * Same everything but native VT-x and VBox (stripped output a little):
2732 * @verbatim
2733bs3-memalloc-1: From 0x100000000 to 0xc20000000
2734bs3-memalloc-1: Pages : 11 665 408 pages
2735bs3-memalloc-1: MiBs : 45 568 MB
2736bs3-memalloc-1: Alloc elapsed : 776 111 427 ns
2737bs3-memalloc-1: Alloc elapsed in ticks : 2 323 267 035 ticks
2738bs3-memalloc-1: Page alloc time : 66 ns/page
2739bs3-memalloc-1: Page alloc time in ticks : 199 ticks/page
2740bs3-memalloc-1: Alloc thruput : 15 030 584 pages/s
2741bs3-memalloc-1: Alloc thruput in MiBs : 58 713 MB/s
2742bs3-memalloc-1: Allocation speed : PASSED
2743bs3-memalloc-1: Access elapsed : 112 141 904 ns
2744bs3-memalloc-1: Access elapsed in ticks : 335 751 077 ticks
2745bs3-memalloc-1: Page access time : 9 ns/page
2746bs3-memalloc-1: Page access time in ticks : 28 ticks/page
2747bs3-memalloc-1: Access thruput : 104 023 630 pages/s
2748bs3-memalloc-1: Access thruput in MiBs : 406 342 MB/s
2749bs3-memalloc-1: 2nd access : PASSED
2750bs3-memalloc-1: Access elapsed : 112 023 049 ns
2751bs3-memalloc-1: Access elapsed in ticks : 335 418 343 ticks
2752bs3-memalloc-1: Page access time : 9 ns/page
2753bs3-memalloc-1: Page access time in ticks : 28 ticks/page
2754bs3-memalloc-1: Access thruput : 104 133 998 pages/s
2755bs3-memalloc-1: Access thruput in MiBs : 406 773 MB/s
2756bs3-memalloc-1: 3rd access : PASSED
2757 * @endverbatim
2758 *
2759 * VBox with large pages disabled:
2760 * @verbatim
2761bs3-memalloc-1: From 0x100000000 to 0xc20000000
2762bs3-memalloc-1: Pages : 11 665 408 pages
2763bs3-memalloc-1: MiBs : 45 568 MB
2764bs3-memalloc-1: Alloc elapsed : 50 986 588 028 ns
2765bs3-memalloc-1: Alloc elapsed in ticks : 152 714 862 044 ticks
2766bs3-memalloc-1: Page alloc time : 4 370 ns/page
2767bs3-memalloc-1: Page alloc time in ticks : 13 091 ticks/page
2768bs3-memalloc-1: Alloc thruput : 228 793 pages/s
2769bs3-memalloc-1: Alloc thruput in MiBs : 893 MB/s
2770bs3-memalloc-1: Allocation speed : PASSED
2771bs3-memalloc-1: Access elapsed : 2 849 641 741 ns
2772bs3-memalloc-1: Access elapsed in ticks : 8 535 372 249 ticks
2773bs3-memalloc-1: Page access time : 244 ns/page
2774bs3-memalloc-1: Page access time in ticks : 731 ticks/page
2775bs3-memalloc-1: Access thruput : 4 093 640 pages/s
2776bs3-memalloc-1: Access thruput in MiBs : 15 990 MB/s
2777bs3-memalloc-1: 2nd access : PASSED
2778bs3-memalloc-1: Access elapsed : 2 866 960 770 ns
2779bs3-memalloc-1: Access elapsed in ticks : 8 587 097 799 ticks
2780bs3-memalloc-1: Page access time : 245 ns/page
2781bs3-memalloc-1: Page access time in ticks : 736 ticks/page
2782bs3-memalloc-1: Access thruput : 4 068 910 pages/s
2783bs3-memalloc-1: Access thruput in MiBs : 15 894 MB/s
2784bs3-memalloc-1: 3rd access : PASSED
2785 * @endverbatim
2786 *
2787 * Comparing large pages, therer is an allocation speed difference of two order
2788 * of magnitude. When disabling large pages in VBox the allocation numbers are
2789 * closer, and the is clear from the 2nd and 3rd access tests that VBox doesn't
2790 * spend enough memory on nested page tables as Hyper-V does. The similar 2nd
2791 * and 3rd access numbers the two large page testruns seems to hint strongly at
2792 * Hyper-V eventually getting the large pages in place too, only that it sucks
2793 * hundredfold in the setting up phase.
2794 *
2795 *
2796 *
2797 * @section sec_nem_win_impl Our implementation.
2798 *
2799 * We set out with the goal of wanting to run as much as possible in ring-0,
2800 * reasoning that this would give use the best performance.
2801 *
2802 * This goal was approached gradually, starting out with a pure WinHvPlatform
2803 * implementation, gradually replacing parts: register access, guest memory
2804 * handling, running virtual processors. Then finally moving it all into
2805 * ring-0, while keeping most of it configurable so that we could make
2806 * comparisons (see NEMInternal.h and nemR3NativeRunGC()).
2807 *
2808 *
2809 * @subsection subsect_nem_win_impl_ioctl VID.SYS I/O control calls
2810 *
2811 * To run things in ring-0 we need to talk directly to VID.SYS thru its I/O
2812 * control interface. Looking at changes between like build 17083 and 17101 (if
2813 * memory serves) a set of the VID I/O control numbers shifted a little, which
2814 * means we need to determin them dynamically. We currently do this by hooking
2815 * the NtDeviceIoControlFile API call from VID.DLL and snooping up the
2816 * parameters when making dummy calls to relevant APIs. (We could also
2817 * disassemble the relevant APIs and try fish out the information from that, but
2818 * this is way simpler.)
2819 *
2820 * Issuing I/O control calls from ring-0 is facing a small challenge with
2821 * respect to direct buffering. When using direct buffering the device will
2822 * typically check that the buffer is actually in the user address space range
2823 * and reject kernel addresses. Fortunately, we've got the cross context VM
2824 * structure that is mapped into both kernel and user space, it's also locked
2825 * and safe to access from kernel space. So, we place the I/O control buffers
2826 * in the per-CPU part of it (NEMCPU::uIoCtlBuf) and give the driver the user
2827 * address if direct access buffering or kernel address if not.
2828 *
2829 * The I/O control calls are 'abstracted' in the support driver, see
2830 * SUPR0IoCtlSetupForHandle(), SUPR0IoCtlPerform() and SUPR0IoCtlCleanup().
2831 *
2832 *
2833 * @subsection subsect_nem_win_impl_cpumctx CPUMCTX
2834 *
2835 * Since the CPU state needs to live in Hyper-V when executing, we probably
2836 * should not transfer more than necessary when handling VMEXITs. To help us
2837 * manage this CPUMCTX got a new field CPUMCTX::fExtrn that to indicate which
2838 * part of the state is currently externalized (== in Hyper-V).
2839 *
2840 *
2841 * @subsection sec_nem_win_benchmarks Benchmarks.
2842 *
2843 * @subsubsection subsect_nem_win_benchmarks_bs2t1 17134/2018-06-22: Bootsector2-test1
2844 *
2845 * This is ValidationKit/bootsectors/bootsector2-test1.asm as of 2018-06-22
2846 * (internal r123172) running a the release build of VirtualBox from the same
2847 * source, though with exit optimizations disabled. Host is AMD Threadripper 1950X
2848 * running out an up to date 64-bit Windows 10 build 17134.
2849 *
2850 * The base line column is using the official WinHv API for everything but physical
2851 * memory mapping. The 2nd column is the default NEM/win configuration where we
2852 * put the main execution loop in ring-0, using hypercalls when we can and VID for
2853 * managing execution. The 3rd column is regular VirtualBox using AMD-V directly,
2854 * hyper-V is disabled, main execution loop in ring-0.
2855 *
2856 * @verbatim
2857TESTING... WinHv API Hypercalls + VID VirtualBox AMD-V
2858 32-bit paged protected mode, CPUID : 108 874 ins/sec 113% / 123 602 1198% / 1 305 113
2859 32-bit pae protected mode, CPUID : 106 722 ins/sec 115% / 122 740 1232% / 1 315 201
2860 64-bit long mode, CPUID : 106 798 ins/sec 114% / 122 111 1198% / 1 280 404
2861 16-bit unpaged protected mode, CPUID : 106 835 ins/sec 114% / 121 994 1216% / 1 299 665
2862 32-bit unpaged protected mode, CPUID : 105 257 ins/sec 115% / 121 772 1235% / 1 300 860
2863 real mode, CPUID : 104 507 ins/sec 116% / 121 800 1228% / 1 283 848
2864CPUID EAX=1 : PASSED
2865 32-bit paged protected mode, RDTSC : 99 581 834 ins/sec 100% / 100 323 307 93% / 93 473 299
2866 32-bit pae protected mode, RDTSC : 99 620 585 ins/sec 100% / 99 960 952 84% / 83 968 839
2867 64-bit long mode, RDTSC : 100 540 009 ins/sec 100% / 100 946 372 93% / 93 652 826
2868 16-bit unpaged protected mode, RDTSC : 99 688 473 ins/sec 100% / 100 097 751 76% / 76 281 287
2869 32-bit unpaged protected mode, RDTSC : 98 385 857 ins/sec 102% / 100 510 404 94% / 93 379 536
2870 real mode, RDTSC : 100 087 967 ins/sec 101% / 101 386 138 93% / 93 234 999
2871RDTSC : PASSED
2872 32-bit paged protected mode, Read CR4 : 2 156 102 ins/sec 98% / 2 121 967 17114% / 369 009 009
2873 32-bit pae protected mode, Read CR4 : 2 163 820 ins/sec 98% / 2 133 804 17469% / 377 999 261
2874 64-bit long mode, Read CR4 : 2 164 822 ins/sec 98% / 2 128 698 18875% / 408 619 313
2875 16-bit unpaged protected mode, Read CR4 : 2 162 367 ins/sec 100% / 2 168 508 17132% / 370 477 568
2876 32-bit unpaged protected mode, Read CR4 : 2 163 189 ins/sec 100% / 2 169 808 16768% / 362 734 679
2877 real mode, Read CR4 : 2 162 436 ins/sec 100% / 2 164 914 15551% / 336 288 998
2878Read CR4 : PASSED
2879 real mode, 32-bit IN : 104 649 ins/sec 118% / 123 513 1028% / 1 075 831
2880 real mode, 32-bit OUT : 107 102 ins/sec 115% / 123 660 982% / 1 052 259
2881 real mode, 32-bit IN-to-ring-3 : 105 697 ins/sec 98% / 104 471 201% / 213 216
2882 real mode, 32-bit OUT-to-ring-3 : 105 830 ins/sec 98% / 104 598 198% / 210 495
2883 16-bit unpaged protected mode, 32-bit IN : 104 855 ins/sec 117% / 123 174 1029% / 1 079 591
2884 16-bit unpaged protected mode, 32-bit OUT : 107 529 ins/sec 115% / 124 250 992% / 1 067 053
2885 16-bit unpaged protected mode, 32-bit IN-to-ring-3 : 106 337 ins/sec 103% / 109 565 196% / 209 367
2886 16-bit unpaged protected mode, 32-bit OUT-to-ring-3 : 107 558 ins/sec 100% / 108 237 191% / 206 387
2887 32-bit unpaged protected mode, 32-bit IN : 106 351 ins/sec 116% / 123 584 1016% / 1 081 325
2888 32-bit unpaged protected mode, 32-bit OUT : 106 424 ins/sec 116% / 124 252 995% / 1 059 408
2889 32-bit unpaged protected mode, 32-bit IN-to-ring-3 : 104 035 ins/sec 101% / 105 305 202% / 210 750
2890 32-bit unpaged protected mode, 32-bit OUT-to-ring-3 : 103 831 ins/sec 102% / 106 919 205% / 213 198
2891 32-bit paged protected mode, 32-bit IN : 103 356 ins/sec 119% / 123 870 1041% / 1 076 463
2892 32-bit paged protected mode, 32-bit OUT : 107 177 ins/sec 115% / 124 302 998% / 1 069 655
2893 32-bit paged protected mode, 32-bit IN-to-ring-3 : 104 491 ins/sec 100% / 104 744 200% / 209 264
2894 32-bit paged protected mode, 32-bit OUT-to-ring-3 : 106 603 ins/sec 97% / 103 849 197% / 210 219
2895 32-bit pae protected mode, 32-bit IN : 105 923 ins/sec 115% / 122 759 1041% / 1 103 261
2896 32-bit pae protected mode, 32-bit OUT : 107 083 ins/sec 117% / 126 057 1024% / 1 096 667
2897 32-bit pae protected mode, 32-bit IN-to-ring-3 : 106 114 ins/sec 97% / 103 496 199% / 211 312
2898 32-bit pae protected mode, 32-bit OUT-to-ring-3 : 105 675 ins/sec 96% / 102 096 198% / 209 890
2899 64-bit long mode, 32-bit IN : 105 800 ins/sec 113% / 120 006 1013% / 1 072 116
2900 64-bit long mode, 32-bit OUT : 105 635 ins/sec 113% / 120 375 997% / 1 053 655
2901 64-bit long mode, 32-bit IN-to-ring-3 : 105 274 ins/sec 95% / 100 763 197% / 208 026
2902 64-bit long mode, 32-bit OUT-to-ring-3 : 106 262 ins/sec 94% / 100 749 196% / 209 288
2903NOP I/O Port Access : PASSED
2904 32-bit paged protected mode, 32-bit read : 57 687 ins/sec 119% / 69 136 1197% / 690 548
2905 32-bit paged protected mode, 32-bit write : 57 957 ins/sec 118% / 68 935 1183% / 685 930
2906 32-bit paged protected mode, 32-bit read-to-ring-3 : 57 958 ins/sec 95% / 55 432 276% / 160 505
2907 32-bit paged protected mode, 32-bit write-to-ring-3 : 57 922 ins/sec 100% / 58 340 304% / 176 464
2908 32-bit pae protected mode, 32-bit read : 57 478 ins/sec 119% / 68 453 1141% / 656 159
2909 32-bit pae protected mode, 32-bit write : 57 226 ins/sec 118% / 68 097 1157% / 662 504
2910 32-bit pae protected mode, 32-bit read-to-ring-3 : 57 582 ins/sec 94% / 54 651 268% / 154 867
2911 32-bit pae protected mode, 32-bit write-to-ring-3 : 57 697 ins/sec 100% / 57 750 299% / 173 030
2912 64-bit long mode, 32-bit read : 57 128 ins/sec 118% / 67 779 1071% / 611 949
2913 64-bit long mode, 32-bit write : 57 127 ins/sec 118% / 67 632 1084% / 619 395
2914 64-bit long mode, 32-bit read-to-ring-3 : 57 181 ins/sec 94% / 54 123 265% / 151 937
2915 64-bit long mode, 32-bit write-to-ring-3 : 57 297 ins/sec 99% / 57 286 294% / 168 694
2916 16-bit unpaged protected mode, 32-bit read : 58 827 ins/sec 118% / 69 545 1185% / 697 602
2917 16-bit unpaged protected mode, 32-bit write : 58 678 ins/sec 118% / 69 442 1183% / 694 387
2918 16-bit unpaged protected mode, 32-bit read-to-ring-3 : 57 841 ins/sec 96% / 55 730 275% / 159 163
2919 16-bit unpaged protected mode, 32-bit write-to-ring-3 : 57 855 ins/sec 101% / 58 834 304% / 176 169
2920 32-bit unpaged protected mode, 32-bit read : 58 063 ins/sec 120% / 69 690 1233% / 716 444
2921 32-bit unpaged protected mode, 32-bit write : 57 936 ins/sec 120% / 69 633 1199% / 694 753
2922 32-bit unpaged protected mode, 32-bit read-to-ring-3 : 58 451 ins/sec 96% / 56 183 273% / 159 972
2923 32-bit unpaged protected mode, 32-bit write-to-ring-3 : 58 962 ins/sec 99% / 58 955 298% / 175 936
2924 real mode, 32-bit read : 58 571 ins/sec 118% / 69 478 1160% / 679 917
2925 real mode, 32-bit write : 58 418 ins/sec 118% / 69 320 1185% / 692 513
2926 real mode, 32-bit read-to-ring-3 : 58 072 ins/sec 96% / 55 751 274% / 159 145
2927 real mode, 32-bit write-to-ring-3 : 57 870 ins/sec 101% / 58 755 307% / 178 042
2928NOP MMIO Access : PASSED
2929SUCCESS
2930 * @endverbatim
2931 *
2932 * What we see here is:
2933 *
2934 * - The WinHv API approach is 10 to 12 times slower for exits we can
2935 * handle directly in ring-0 in the VBox AMD-V code.
2936 *
2937 * - The WinHv API approach is 2 to 3 times slower for exits we have to
2938 * go to ring-3 to handle with the VBox AMD-V code.
2939 *
2940 * - By using hypercalls and VID.SYS from ring-0 we gain between
2941 * 13% and 20% over the WinHv API on exits handled in ring-0.
2942 *
2943 * - For exits requiring ring-3 handling are between 6% slower and 3% faster
2944 * than the WinHv API.
2945 *
2946 *
2947 * As a side note, it looks like Hyper-V doesn't let the guest read CR4 but
2948 * triggers exits all the time. This isn't all that important these days since
2949 * OSes like Linux cache the CR4 value specifically to avoid these kinds of exits.
2950 *
2951 *
2952 * @subsubsection subsect_nem_win_benchmarks_bs2t1u1 17134/2018-10-02: Bootsector2-test1
2953 *
2954 * Update on 17134. While expectantly testing a couple of newer builds (17758,
2955 * 17763) hoping for some increases in performance, the numbers turned out
2956 * altogether worse than the June test run. So, we went back to the 1803
2957 * (17134) installation, made sure it was fully up to date (as per 2018-10-02)
2958 * and re-tested.
2959 *
2960 * The numbers had somehow turned significantly worse over the last 3-4 months,
2961 * dropping around 70% for the WinHv API test, more for Hypercalls + VID.
2962 *
2963 * @verbatim
2964TESTING... WinHv API Hypercalls + VID VirtualBox AMD-V *
2965 32-bit paged protected mode, CPUID : 33 270 ins/sec 33 154
2966 real mode, CPUID : 33 534 ins/sec 32 711
2967 [snip]
2968 32-bit paged protected mode, RDTSC : 102 216 011 ins/sec 98 225 419
2969 real mode, RDTSC : 102 492 243 ins/sec 98 225 419
2970 [snip]
2971 32-bit paged protected mode, Read CR4 : 2 096 165 ins/sec 2 123 815
2972 real mode, Read CR4 : 2 081 047 ins/sec 2 075 151
2973 [snip]
2974 32-bit paged protected mode, 32-bit IN : 32 739 ins/sec 33 655
2975 32-bit paged protected mode, 32-bit OUT : 32 702 ins/sec 33 777
2976 32-bit paged protected mode, 32-bit IN-to-ring-3 : 32 579 ins/sec 29 985
2977 32-bit paged protected mode, 32-bit OUT-to-ring-3 : 32 750 ins/sec 29 757
2978 [snip]
2979 32-bit paged protected mode, 32-bit read : 20 042 ins/sec 21 489
2980 32-bit paged protected mode, 32-bit write : 20 036 ins/sec 21 493
2981 32-bit paged protected mode, 32-bit read-to-ring-3 : 19 985 ins/sec 19 143
2982 32-bit paged protected mode, 32-bit write-to-ring-3 : 19 972 ins/sec 19 595
2983
2984 * @endverbatim
2985 *
2986 * Suspects are security updates and/or microcode updates installed since then.
2987 * Given that the RDTSC and CR4 numbers are reasonably unchanges, it seems that
2988 * the Hyper-V core loop (in hvax64.exe) aren't affected. Our ring-0 runloop
2989 * is equally affected as the ring-3 based runloop, so it cannot be ring
2990 * switching as such (unless the ring-0 loop is borked and we didn't notice yet).
2991 *
2992 * The issue is probably in the thread / process switching area, could be
2993 * something special for hyper-V interrupt delivery or worker thread switching.
2994 *
2995 * Really wish this thread ping-pong going on in VID.SYS could be eliminated!
2996 *
2997 *
2998 * @subsubsection subsect_nem_win_benchmarks_bs2t1u2 17763: Bootsector2-test1
2999 *
3000 * Some preliminary numbers for build 17763 on the 3.4 GHz AMD 1950X, the second
3001 * column will improve we get time to have a look the register page.
3002 *
3003 * There is a 50% performance loss here compared to the June numbers with
3004 * build 17134. The RDTSC numbers hits that it isn't in the Hyper-V core
3005 * (hvax64.exe), but something on the NT side.
3006 *
3007 * Clearing bit 20 in nt!KiSpeculationFeatures speeds things up (i.e. changing
3008 * the dword from 0x00300065 to 0x00200065 in windbg). This is checked by
3009 * nt!KePrepareToDispatchVirtualProcessor, making it a no-op if the flag is
3010 * clear. winhvr!WinHvpVpDispatchLoop call that function before making
3011 * hypercall 0xc2, which presumably does the heavy VCpu lifting in hvcax64.exe.
3012 *
3013 * @verbatim
3014TESTING... WinHv API Hypercalls + VID clr(bit-20) + WinHv API
3015 32-bit paged protected mode, CPUID : 54 145 ins/sec 51 436 130 076
3016 real mode, CPUID : 54 178 ins/sec 51 713 130 449
3017 [snip]
3018 32-bit paged protected mode, RDTSC : 98 927 639 ins/sec 100 254 552 100 549 882
3019 real mode, RDTSC : 99 601 206 ins/sec 100 886 699 100 470 957
3020 [snip]
3021 32-bit paged protected mode, 32-bit IN : 54 621 ins/sec 51 524 128 294
3022 32-bit paged protected mode, 32-bit OUT : 54 870 ins/sec 51 671 129 397
3023 32-bit paged protected mode, 32-bit IN-to-ring-3 : 54 624 ins/sec 43 964 127 874
3024 32-bit paged protected mode, 32-bit OUT-to-ring-3 : 54 803 ins/sec 44 087 129 443
3025 [snip]
3026 32-bit paged protected mode, 32-bit read : 28 230 ins/sec 34 042 48 113
3027 32-bit paged protected mode, 32-bit write : 27 962 ins/sec 34 050 48 069
3028 32-bit paged protected mode, 32-bit read-to-ring-3 : 27 841 ins/sec 28 397 48 146
3029 32-bit paged protected mode, 32-bit write-to-ring-3 : 27 896 ins/sec 29 455 47 970
3030 * @endverbatim
3031 *
3032 *
3033 * @subsubsection subsect_nem_win_benchmarks_w2k 17134/2018-06-22: Windows 2000 Boot & Shutdown
3034 *
3035 * Timing the startup and automatic shutdown of a Windows 2000 SP4 guest serves
3036 * as a real world benchmark and example of why exit performance is import. When
3037 * Windows 2000 boots up is doing a lot of VGA redrawing of the boot animation,
3038 * which is very costly. Not having installed guest additions leaves it in a VGA
3039 * mode after the bootup sequence is done, keep up the screen access expenses,
3040 * though the graphics driver more economical than the bootvid code.
3041 *
3042 * The VM was configured to automatically logon. A startup script was installed
3043 * to perform the automatic shuting down and powering off the VM (thru
3044 * vts_shutdown.exe -f -p). An offline snapshot of the VM was taken an restored
3045 * before each test run. The test time run time is calculated from the monotonic
3046 * VBox.log timestamps, starting with the state change to 'RUNNING' and stopping
3047 * at 'POWERING_OFF'.
3048 *
3049 * The host OS and VirtualBox build is the same as for the bootsector2-test1
3050 * scenario.
3051 *
3052 * Results:
3053 *
3054 * - WinHv API for all but physical page mappings:
3055 * 32 min 12.19 seconds
3056 *
3057 * - The default NEM/win configuration where we put the main execution loop
3058 * in ring-0, using hypercalls when we can and VID for managing execution:
3059 * 3 min 23.18 seconds
3060 *
3061 * - Regular VirtualBox using AMD-V directly, hyper-V is disabled, main
3062 * execution loop in ring-0:
3063 * 58.09 seconds
3064 *
3065 * - WinHv API with exit history based optimizations:
3066 * 58.66 seconds
3067 *
3068 * - Hypercall + VID.SYS with exit history base optimizations:
3069 * 58.94 seconds
3070 *
3071 * With a well above average machine needing over half an hour for booting a
3072 * nearly 20 year old guest kind of says it all. The 13%-20% exit performance
3073 * increase we get by using hypercalls and VID.SYS directly pays off a lot here.
3074 * The 3m23s is almost acceptable in comparison to the half an hour.
3075 *
3076 * The similarity between the last three results strongly hits at windows 2000
3077 * doing a lot of waiting during boot and shutdown and isn't the best testcase
3078 * once a basic performance level is reached.
3079 *
3080 *
3081 * @subsubsection subsection_iem_win_benchmarks_deb9_nat Debian 9 NAT performance
3082 *
3083 * This benchmark is about network performance over NAT from a 64-bit Debian 9
3084 * VM with a single CPU. For network performance measurements, we use our own
3085 * NetPerf tool (ValidationKit/utils/network/NetPerf.cpp) to measure latency
3086 * and throughput.
3087 *
3088 * The setups, builds and configurations are as in the previous benchmarks
3089 * (release r123172 on 1950X running 64-bit W10/17134 (2016-06-xx). Please note
3090 * that the exit optimizations hasn't yet been in tuned with NetPerf in mind.
3091 *
3092 * The NAT network setup was selected here since it's the default one and the
3093 * slowest one. There is quite a bit of IPC with worker threads and packet
3094 * processing involved.
3095 *
3096 * Latency test is first up. This is a classic back and forth between the two
3097 * NetPerf instances, where the key measurement is the roundrip latency. The
3098 * values here are the lowest result over 3-6 runs.
3099 *
3100 * Against host system:
3101 * - 152 258 ns/roundtrip - 100% - regular VirtualBox SVM
3102 * - 271 059 ns/roundtrip - 178% - Hypercalls + VID.SYS in ring-0 with exit optimizations.
3103 * - 280 149 ns/roundtrip - 184% - Hypercalls + VID.SYS in ring-0
3104 * - 317 735 ns/roundtrip - 209% - Win HV API with exit optimizations.
3105 * - 342 440 ns/roundtrip - 225% - Win HV API
3106 *
3107 * Against a remote Windows 10 system over a 10Gbps link:
3108 * - 243 969 ns/roundtrip - 100% - regular VirtualBox SVM
3109 * - 384 427 ns/roundtrip - 158% - Win HV API with exit optimizations.
3110 * - 402 411 ns/roundtrip - 165% - Hypercalls + VID.SYS in ring-0
3111 * - 406 313 ns/roundtrip - 167% - Win HV API
3112 * - 413 160 ns/roundtrip - 169% - Hypercalls + VID.SYS in ring-0 with exit optimizations.
3113 *
3114 * What we see here is:
3115 *
3116 * - Consistent and signficant latency increase using Hyper-V compared
3117 * to directly harnessing AMD-V ourselves.
3118 *
3119 * - When talking to the host, it's clear that the hypercalls + VID.SYS
3120 * in ring-0 method pays off.
3121 *
3122 * - When talking to a different host, the numbers are closer and it
3123 * is not longer clear which Hyper-V execution method is better.
3124 *
3125 *
3126 * Throughput benchmarks are performed by one side pushing data full throttle
3127 * for 10 seconds (minus a 1 second at each end of the test), then reversing
3128 * the roles and measuring it in the other direction. The tests ran 3-5 times
3129 * and below are the highest and lowest results in each direction.
3130 *
3131 * Receiving from host system:
3132 * - Regular VirtualBox SVM:
3133 * Max: 96 907 549 bytes/s - 100%
3134 * Min: 86 912 095 bytes/s - 100%
3135 * - Hypercalls + VID.SYS in ring-0:
3136 * Max: 84 036 544 bytes/s - 87%
3137 * Min: 64 978 112 bytes/s - 75%
3138 * - Hypercalls + VID.SYS in ring-0 with exit optimizations:
3139 * Max: 77 760 699 bytes/s - 80%
3140 * Min: 72 677 171 bytes/s - 84%
3141 * - Win HV API with exit optimizations:
3142 * Max: 64 465 905 bytes/s - 67%
3143 * Min: 62 286 369 bytes/s - 72%
3144 * - Win HV API:
3145 * Max: 62 466 631 bytes/s - 64%
3146 * Min: 61 362 782 bytes/s - 70%
3147 *
3148 * Sending to the host system:
3149 * - Regular VirtualBox SVM:
3150 * Max: 87 728 652 bytes/s - 100%
3151 * Min: 86 923 198 bytes/s - 100%
3152 * - Hypercalls + VID.SYS in ring-0:
3153 * Max: 84 280 749 bytes/s - 96%
3154 * Min: 78 369 842 bytes/s - 90%
3155 * - Hypercalls + VID.SYS in ring-0 with exit optimizations:
3156 * Max: 84 119 932 bytes/s - 96%
3157 * Min: 77 396 811 bytes/s - 89%
3158 * - Win HV API:
3159 * Max: 81 714 377 bytes/s - 93%
3160 * Min: 78 697 419 bytes/s - 91%
3161 * - Win HV API with exit optimizations:
3162 * Max: 80 502 488 bytes/s - 91%
3163 * Min: 71 164 978 bytes/s - 82%
3164 *
3165 * Receiving from a remote Windows 10 system over a 10Gbps link:
3166 * - Hypercalls + VID.SYS in ring-0:
3167 * Max: 115 346 922 bytes/s - 136%
3168 * Min: 112 912 035 bytes/s - 137%
3169 * - Regular VirtualBox SVM:
3170 * Max: 84 517 504 bytes/s - 100%
3171 * Min: 82 597 049 bytes/s - 100%
3172 * - Hypercalls + VID.SYS in ring-0 with exit optimizations:
3173 * Max: 77 736 251 bytes/s - 92%
3174 * Min: 73 813 784 bytes/s - 89%
3175 * - Win HV API with exit optimizations:
3176 * Max: 63 035 587 bytes/s - 75%
3177 * Min: 57 538 380 bytes/s - 70%
3178 * - Win HV API:
3179 * Max: 62 279 185 bytes/s - 74%
3180 * Min: 56 813 866 bytes/s - 69%
3181 *
3182 * Sending to a remote Windows 10 system over a 10Gbps link:
3183 * - Win HV API with exit optimizations:
3184 * Max: 116 502 357 bytes/s - 103%
3185 * Min: 49 046 550 bytes/s - 59%
3186 * - Regular VirtualBox SVM:
3187 * Max: 113 030 991 bytes/s - 100%
3188 * Min: 83 059 511 bytes/s - 100%
3189 * - Hypercalls + VID.SYS in ring-0:
3190 * Max: 106 435 031 bytes/s - 94%
3191 * Min: 47 253 510 bytes/s - 57%
3192 * - Hypercalls + VID.SYS in ring-0 with exit optimizations:
3193 * Max: 94 842 287 bytes/s - 84%
3194 * Min: 68 362 172 bytes/s - 82%
3195 * - Win HV API:
3196 * Max: 65 165 225 bytes/s - 58%
3197 * Min: 47 246 573 bytes/s - 57%
3198 *
3199 * What we see here is:
3200 *
3201 * - Again consistent numbers when talking to the host. Showing that the
3202 * ring-0 approach is preferable to the ring-3 one.
3203 *
3204 * - Again when talking to a remote host, things get more difficult to
3205 * make sense of. The spread is larger and direct AMD-V gets beaten by
3206 * a different the Hyper-V approaches in each direction.
3207 *
3208 * - However, if we treat the first entry (remote host) as weird spikes, the
3209 * other entries are consistently worse compared to direct AMD-V. For the
3210 * send case we get really bad results for WinHV.
3211 *
3212 */
3213
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette