VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/NEMR3Native-win.cpp@ 106061

Last change on this file since 106061 was 106061, checked in by vboxsync, 2 months ago

Copyright year updates by scm.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 153.0 KB
Line 
1/* $Id: NEMR3Native-win.cpp 106061 2024-09-16 14:03:52Z vboxsync $ */
2/** @file
3 * NEM - Native execution manager, native ring-3 Windows backend.
4 *
5 * Log group 2: Exit logging.
6 * Log group 3: Log context on exit.
7 * Log group 5: Ring-3 memory management
8 * Log group 6: Ring-0 memory management
9 * Log group 12: API intercepts.
10 */
11
12/*
13 * Copyright (C) 2018-2024 Oracle and/or its affiliates.
14 *
15 * This file is part of VirtualBox base platform packages, as
16 * available from https://www.virtualbox.org.
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation, in version 3 of the
21 * License.
22 *
23 * This program is distributed in the hope that it will be useful, but
24 * WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 * General Public License for more details.
27 *
28 * You should have received a copy of the GNU General Public License
29 * along with this program; if not, see <https://www.gnu.org/licenses>.
30 *
31 * SPDX-License-Identifier: GPL-3.0-only
32 */
33
34
35/*********************************************************************************************************************************
36* Header Files *
37*********************************************************************************************************************************/
38#define LOG_GROUP LOG_GROUP_NEM
39#define VMCPU_INCL_CPUM_GST_CTX
40#include <iprt/nt/nt-and-windows.h>
41#include <iprt/nt/hyperv.h>
42#include <iprt/nt/vid.h>
43#include <WinHvPlatform.h>
44
45#ifndef _WIN32_WINNT_WIN10
46# error "Missing _WIN32_WINNT_WIN10"
47#endif
48#ifndef _WIN32_WINNT_WIN10_RS1 /* Missing define, causing trouble for us. */
49# define _WIN32_WINNT_WIN10_RS1 (_WIN32_WINNT_WIN10 + 1)
50#endif
51#include <sysinfoapi.h>
52#include <debugapi.h>
53#include <errhandlingapi.h>
54#include <fileapi.h>
55#include <winerror.h> /* no api header for this. */
56
57#include <VBox/vmm/nem.h>
58#include <VBox/vmm/iem.h>
59#include <VBox/vmm/em.h>
60#include <VBox/vmm/apic.h>
61#include <VBox/vmm/pdm.h>
62#include <VBox/vmm/dbgftrace.h>
63#include "NEMInternal.h"
64#include <VBox/vmm/vmcc.h>
65
66#include <iprt/ldr.h>
67#include <iprt/path.h>
68#include <iprt/string.h>
69#include <iprt/system.h>
70#include <iprt/utf16.h>
71
72#ifndef NTDDI_WIN10_VB /* Present in W10 2004 SDK, quite possibly earlier. */
73HRESULT WINAPI WHvQueryGpaRangeDirtyBitmap(WHV_PARTITION_HANDLE, WHV_GUEST_PHYSICAL_ADDRESS, UINT64, UINT64 *, UINT32);
74# define WHvMapGpaRangeFlagTrackDirtyPages ((WHV_MAP_GPA_RANGE_FLAGS)0x00000008)
75#endif
76
77
78/*********************************************************************************************************************************
79* Defined Constants And Macros *
80*********************************************************************************************************************************/
81#ifdef LOG_ENABLED
82# define NEM_WIN_INTERCEPT_NT_IO_CTLS
83#endif
84
85/** VID I/O control detection: Fake partition handle input. */
86#define NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE ((HANDLE)(uintptr_t)38479125)
87/** VID I/O control detection: Fake partition ID return. */
88#define NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_ID UINT64_C(0xfa1e000042424242)
89/** VID I/O control detection: The property we get via VidGetPartitionProperty. */
90#define NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_PROPERTY_CODE HvPartitionPropertyProcessorVendor
91/** VID I/O control detection: Fake property value return. */
92#define NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_PROPERTY_VALUE UINT64_C(0xf00dface01020304)
93/** VID I/O control detection: Fake CPU index input. */
94#define NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX UINT32_C(42)
95/** VID I/O control detection: Fake timeout input. */
96#define NEM_WIN_IOCTL_DETECTOR_FAKE_TIMEOUT UINT32_C(0x00080286)
97
98
99/*********************************************************************************************************************************
100* Global Variables *
101*********************************************************************************************************************************/
102/** @name APIs imported from WinHvPlatform.dll
103 * @{ */
104static decltype(WHvGetCapability) * g_pfnWHvGetCapability;
105static decltype(WHvCreatePartition) * g_pfnWHvCreatePartition;
106static decltype(WHvSetupPartition) * g_pfnWHvSetupPartition;
107static decltype(WHvDeletePartition) * g_pfnWHvDeletePartition;
108static decltype(WHvGetPartitionProperty) * g_pfnWHvGetPartitionProperty;
109static decltype(WHvSetPartitionProperty) * g_pfnWHvSetPartitionProperty;
110static decltype(WHvMapGpaRange) * g_pfnWHvMapGpaRange;
111static decltype(WHvUnmapGpaRange) * g_pfnWHvUnmapGpaRange;
112static decltype(WHvTranslateGva) * g_pfnWHvTranslateGva;
113static decltype(WHvQueryGpaRangeDirtyBitmap) * g_pfnWHvQueryGpaRangeDirtyBitmap;
114static decltype(WHvCreateVirtualProcessor) * g_pfnWHvCreateVirtualProcessor;
115static decltype(WHvDeleteVirtualProcessor) * g_pfnWHvDeleteVirtualProcessor;
116static decltype(WHvRunVirtualProcessor) * g_pfnWHvRunVirtualProcessor;
117static decltype(WHvCancelRunVirtualProcessor) * g_pfnWHvCancelRunVirtualProcessor;
118static decltype(WHvGetVirtualProcessorRegisters) * g_pfnWHvGetVirtualProcessorRegisters;
119static decltype(WHvSetVirtualProcessorRegisters) * g_pfnWHvSetVirtualProcessorRegisters;
120/** @} */
121
122/** @name APIs imported from Vid.dll
123 * @{ */
124static decltype(VidGetHvPartitionId) *g_pfnVidGetHvPartitionId;
125static decltype(VidGetPartitionProperty) *g_pfnVidGetPartitionProperty;
126#ifdef LOG_ENABLED
127static decltype(VidStartVirtualProcessor) *g_pfnVidStartVirtualProcessor;
128static decltype(VidStopVirtualProcessor) *g_pfnVidStopVirtualProcessor;
129static decltype(VidMessageSlotMap) *g_pfnVidMessageSlotMap;
130static decltype(VidMessageSlotHandleAndGetNext) *g_pfnVidMessageSlotHandleAndGetNext;
131static decltype(VidGetVirtualProcessorState) *g_pfnVidGetVirtualProcessorState;
132static decltype(VidSetVirtualProcessorState) *g_pfnVidSetVirtualProcessorState;
133static decltype(VidGetVirtualProcessorRunningStatus) *g_pfnVidGetVirtualProcessorRunningStatus;
134#endif
135/** @} */
136
137/** The Windows build number. */
138static uint32_t g_uBuildNo = 17134;
139
140
141
142/**
143 * Import instructions.
144 */
145static const struct
146{
147 uint8_t idxDll; /**< 0 for WinHvPlatform.dll, 1 for vid.dll. */
148 bool fOptional; /**< Set if import is optional. */
149 PFNRT *ppfn; /**< The function pointer variable. */
150 const char *pszName; /**< The function name. */
151} g_aImports[] =
152{
153#define NEM_WIN_IMPORT(a_idxDll, a_fOptional, a_Name) { (a_idxDll), (a_fOptional), (PFNRT *)&RT_CONCAT(g_pfn,a_Name), #a_Name }
154 NEM_WIN_IMPORT(0, false, WHvGetCapability),
155 NEM_WIN_IMPORT(0, false, WHvCreatePartition),
156 NEM_WIN_IMPORT(0, false, WHvSetupPartition),
157 NEM_WIN_IMPORT(0, false, WHvDeletePartition),
158 NEM_WIN_IMPORT(0, false, WHvGetPartitionProperty),
159 NEM_WIN_IMPORT(0, false, WHvSetPartitionProperty),
160 NEM_WIN_IMPORT(0, false, WHvMapGpaRange),
161 NEM_WIN_IMPORT(0, false, WHvUnmapGpaRange),
162 NEM_WIN_IMPORT(0, false, WHvTranslateGva),
163 NEM_WIN_IMPORT(0, true, WHvQueryGpaRangeDirtyBitmap),
164 NEM_WIN_IMPORT(0, false, WHvCreateVirtualProcessor),
165 NEM_WIN_IMPORT(0, false, WHvDeleteVirtualProcessor),
166 NEM_WIN_IMPORT(0, false, WHvRunVirtualProcessor),
167 NEM_WIN_IMPORT(0, false, WHvCancelRunVirtualProcessor),
168 NEM_WIN_IMPORT(0, false, WHvGetVirtualProcessorRegisters),
169 NEM_WIN_IMPORT(0, false, WHvSetVirtualProcessorRegisters),
170
171 NEM_WIN_IMPORT(1, true, VidGetHvPartitionId),
172 NEM_WIN_IMPORT(1, true, VidGetPartitionProperty),
173#ifdef LOG_ENABLED
174 NEM_WIN_IMPORT(1, false, VidMessageSlotMap),
175 NEM_WIN_IMPORT(1, false, VidMessageSlotHandleAndGetNext),
176 NEM_WIN_IMPORT(1, false, VidStartVirtualProcessor),
177 NEM_WIN_IMPORT(1, false, VidStopVirtualProcessor),
178 NEM_WIN_IMPORT(1, false, VidGetVirtualProcessorState),
179 NEM_WIN_IMPORT(1, false, VidSetVirtualProcessorState),
180 NEM_WIN_IMPORT(1, false, VidGetVirtualProcessorRunningStatus),
181#endif
182#undef NEM_WIN_IMPORT
183};
184
185
186/** The real NtDeviceIoControlFile API in NTDLL. */
187static decltype(NtDeviceIoControlFile) *g_pfnNtDeviceIoControlFile;
188/** Pointer to the NtDeviceIoControlFile import table entry. */
189static decltype(NtDeviceIoControlFile) **g_ppfnVidNtDeviceIoControlFile;
190#ifdef LOG_ENABLED
191/** Info about the VidGetHvPartitionId I/O control interface. */
192static NEMWINIOCTL g_IoCtlGetHvPartitionId;
193/** Info about the VidGetPartitionProperty I/O control interface. */
194static NEMWINIOCTL g_IoCtlGetPartitionProperty;
195/** Info about the VidStartVirtualProcessor I/O control interface. */
196static NEMWINIOCTL g_IoCtlStartVirtualProcessor;
197/** Info about the VidStopVirtualProcessor I/O control interface. */
198static NEMWINIOCTL g_IoCtlStopVirtualProcessor;
199/** Info about the VidMessageSlotHandleAndGetNext I/O control interface. */
200static NEMWINIOCTL g_IoCtlMessageSlotHandleAndGetNext;
201/** Info about the VidMessageSlotMap I/O control interface - for logging. */
202static NEMWINIOCTL g_IoCtlMessageSlotMap;
203/** Info about the VidGetVirtualProcessorState I/O control interface - for logging. */
204static NEMWINIOCTL g_IoCtlGetVirtualProcessorState;
205/** Info about the VidSetVirtualProcessorState I/O control interface - for logging. */
206static NEMWINIOCTL g_IoCtlSetVirtualProcessorState;
207/** Pointer to what nemR3WinIoctlDetector_ForLogging should fill in. */
208static NEMWINIOCTL *g_pIoCtlDetectForLogging;
209#endif
210
211#ifdef NEM_WIN_INTERCEPT_NT_IO_CTLS
212/** Mapping slot for CPU #0.
213 * @{ */
214static VID_MESSAGE_MAPPING_HEADER *g_pMsgSlotMapping = NULL;
215static const HV_MESSAGE_HEADER *g_pHvMsgHdr;
216static const HV_X64_INTERCEPT_MESSAGE_HEADER *g_pX64MsgHdr;
217/** @} */
218#endif
219
220
221/*
222 * Let the preprocessor alias the APIs to import variables for better autocompletion.
223 */
224#ifndef IN_SLICKEDIT
225# define WHvGetCapability g_pfnWHvGetCapability
226# define WHvCreatePartition g_pfnWHvCreatePartition
227# define WHvSetupPartition g_pfnWHvSetupPartition
228# define WHvDeletePartition g_pfnWHvDeletePartition
229# define WHvGetPartitionProperty g_pfnWHvGetPartitionProperty
230# define WHvSetPartitionProperty g_pfnWHvSetPartitionProperty
231# define WHvMapGpaRange g_pfnWHvMapGpaRange
232# define WHvUnmapGpaRange g_pfnWHvUnmapGpaRange
233# define WHvTranslateGva g_pfnWHvTranslateGva
234# define WHvQueryGpaRangeDirtyBitmap g_pfnWHvQueryGpaRangeDirtyBitmap
235# define WHvCreateVirtualProcessor g_pfnWHvCreateVirtualProcessor
236# define WHvDeleteVirtualProcessor g_pfnWHvDeleteVirtualProcessor
237# define WHvRunVirtualProcessor g_pfnWHvRunVirtualProcessor
238# define WHvGetRunExitContextSize g_pfnWHvGetRunExitContextSize
239# define WHvCancelRunVirtualProcessor g_pfnWHvCancelRunVirtualProcessor
240# define WHvGetVirtualProcessorRegisters g_pfnWHvGetVirtualProcessorRegisters
241# define WHvSetVirtualProcessorRegisters g_pfnWHvSetVirtualProcessorRegisters
242
243# define VidMessageSlotHandleAndGetNext g_pfnVidMessageSlotHandleAndGetNext
244# define VidStartVirtualProcessor g_pfnVidStartVirtualProcessor
245# define VidStopVirtualProcessor g_pfnVidStopVirtualProcessor
246
247#endif
248
249/** WHV_MEMORY_ACCESS_TYPE names */
250static const char * const g_apszWHvMemAccesstypes[4] = { "read", "write", "exec", "!undefined!" };
251
252
253/*********************************************************************************************************************************
254* Internal Functions *
255*********************************************************************************************************************************/
256DECLINLINE(int) nemR3NativeGCPhys2R3PtrReadOnly(PVM pVM, RTGCPHYS GCPhys, const void **ppv);
257DECLINLINE(int) nemR3NativeGCPhys2R3PtrWriteable(PVM pVM, RTGCPHYS GCPhys, void **ppv);
258
259/*
260 * Instantate the code we used to share with ring-0.
261 */
262#include "../VMMAll/NEMAllNativeTemplate-win.cpp.h"
263
264
265
266#ifdef NEM_WIN_INTERCEPT_NT_IO_CTLS
267/**
268 * Wrapper that logs the call from VID.DLL.
269 *
270 * This is very handy for figuring out why an API call fails.
271 */
272static NTSTATUS WINAPI
273nemR3WinLogWrapper_NtDeviceIoControlFile(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx,
274 PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput,
275 PVOID pvOutput, ULONG cbOutput)
276{
277
278 char szFunction[32];
279 const char *pszFunction;
280 if (uFunction == g_IoCtlMessageSlotHandleAndGetNext.uFunction)
281 pszFunction = "VidMessageSlotHandleAndGetNext";
282 else if (uFunction == g_IoCtlStartVirtualProcessor.uFunction)
283 pszFunction = "VidStartVirtualProcessor";
284 else if (uFunction == g_IoCtlStopVirtualProcessor.uFunction)
285 pszFunction = "VidStopVirtualProcessor";
286 else if (uFunction == g_IoCtlMessageSlotMap.uFunction)
287 pszFunction = "VidMessageSlotMap";
288 else if (uFunction == g_IoCtlGetVirtualProcessorState.uFunction)
289 pszFunction = "VidGetVirtualProcessorState";
290 else if (uFunction == g_IoCtlSetVirtualProcessorState.uFunction)
291 pszFunction = "VidSetVirtualProcessorState";
292 else
293 {
294 RTStrPrintf(szFunction, sizeof(szFunction), "%#x", uFunction);
295 pszFunction = szFunction;
296 }
297
298 if (cbInput > 0 && pvInput)
299 Log12(("VID!NtDeviceIoControlFile: %s/input: %.*Rhxs\n", pszFunction, RT_MIN(cbInput, 32), pvInput));
300 NTSTATUS rcNt = g_pfnNtDeviceIoControlFile(hFile, hEvt, pfnApcCallback, pvApcCtx, pIos, uFunction,
301 pvInput, cbInput, pvOutput, cbOutput);
302 if (!hEvt && !pfnApcCallback && !pvApcCtx)
303 Log12(("VID!NtDeviceIoControlFile: hFile=%#zx pIos=%p->{s:%#x, i:%#zx} uFunction=%s Input=%p LB %#x Output=%p LB %#x) -> %#x; Caller=%p\n",
304 hFile, pIos, pIos->Status, pIos->Information, pszFunction, pvInput, cbInput, pvOutput, cbOutput, rcNt, ASMReturnAddress()));
305 else
306 Log12(("VID!NtDeviceIoControlFile: hFile=%#zx hEvt=%#zx Apc=%p/%p pIos=%p->{s:%#x, i:%#zx} uFunction=%s Input=%p LB %#x Output=%p LB %#x) -> %#x; Caller=%p\n",
307 hFile, hEvt, RT_CB_LOG_CAST(pfnApcCallback), pvApcCtx, pIos, pIos->Status, pIos->Information, pszFunction,
308 pvInput, cbInput, pvOutput, cbOutput, rcNt, ASMReturnAddress()));
309 if (cbOutput > 0 && pvOutput)
310 {
311 Log12(("VID!NtDeviceIoControlFile: %s/output: %.*Rhxs\n", pszFunction, RT_MIN(cbOutput, 32), pvOutput));
312 if (uFunction == 0x2210cc && g_pMsgSlotMapping == NULL && cbOutput >= sizeof(void *))
313 {
314 g_pMsgSlotMapping = *(VID_MESSAGE_MAPPING_HEADER **)pvOutput;
315 g_pHvMsgHdr = (const HV_MESSAGE_HEADER *)(g_pMsgSlotMapping + 1);
316 g_pX64MsgHdr = (const HV_X64_INTERCEPT_MESSAGE_HEADER *)(g_pHvMsgHdr + 1);
317 Log12(("VID!NtDeviceIoControlFile: Message slot mapping: %p\n", g_pMsgSlotMapping));
318 }
319 }
320 if ( g_pMsgSlotMapping
321 && ( uFunction == g_IoCtlMessageSlotHandleAndGetNext.uFunction
322 || uFunction == g_IoCtlStopVirtualProcessor.uFunction
323 || uFunction == g_IoCtlMessageSlotMap.uFunction
324 ))
325 Log12(("VID!NtDeviceIoControlFile: enmVidMsgType=%#x cb=%#x msg=%#x payload=%u cs:rip=%04x:%08RX64 (%s)\n",
326 g_pMsgSlotMapping->enmVidMsgType, g_pMsgSlotMapping->cbMessage,
327 g_pHvMsgHdr->MessageType, g_pHvMsgHdr->PayloadSize,
328 g_pX64MsgHdr->CsSegment.Selector, g_pX64MsgHdr->Rip, pszFunction));
329
330 return rcNt;
331}
332#endif /* NEM_WIN_INTERCEPT_NT_IO_CTLS */
333
334
335/**
336 * Patches the call table of VID.DLL so we can intercept NtDeviceIoControlFile.
337 *
338 * This is for used to figure out the I/O control codes and in logging builds
339 * for logging API calls that WinHvPlatform.dll does.
340 *
341 * @returns VBox status code.
342 * @param hLdrModVid The VID module handle.
343 * @param pErrInfo Where to return additional error information.
344 */
345static int nemR3WinInitVidIntercepts(RTLDRMOD hLdrModVid, PRTERRINFO pErrInfo)
346{
347 /*
348 * Locate the real API.
349 */
350 g_pfnNtDeviceIoControlFile = (decltype(NtDeviceIoControlFile) *)RTLdrGetSystemSymbol("NTDLL.DLL", "NtDeviceIoControlFile");
351 AssertReturn(g_pfnNtDeviceIoControlFile != NULL,
352 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Failed to resolve NtDeviceIoControlFile from NTDLL.DLL"));
353
354 /*
355 * Locate the PE header and get what we need from it.
356 */
357 uint8_t const *pbImage = (uint8_t const *)RTLdrGetNativeHandle(hLdrModVid);
358 IMAGE_DOS_HEADER const *pMzHdr = (IMAGE_DOS_HEADER const *)pbImage;
359 AssertReturn(pMzHdr->e_magic == IMAGE_DOS_SIGNATURE,
360 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL mapping doesn't start with MZ signature: %#x", pMzHdr->e_magic));
361 IMAGE_NT_HEADERS const *pNtHdrs = (IMAGE_NT_HEADERS const *)&pbImage[pMzHdr->e_lfanew];
362 AssertReturn(pNtHdrs->Signature == IMAGE_NT_SIGNATURE,
363 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL has invalid PE signaturre: %#x @%#x",
364 pNtHdrs->Signature, pMzHdr->e_lfanew));
365
366 uint32_t const cbImage = pNtHdrs->OptionalHeader.SizeOfImage;
367 IMAGE_DATA_DIRECTORY const ImportDir = pNtHdrs->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT];
368
369 /*
370 * Walk the import descriptor table looking for NTDLL.DLL.
371 */
372 AssertReturn( ImportDir.Size > 0
373 && ImportDir.Size < cbImage,
374 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL bad import directory size: %#x", ImportDir.Size));
375 AssertReturn( ImportDir.VirtualAddress > 0
376 && ImportDir.VirtualAddress <= cbImage - ImportDir.Size,
377 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL bad import directory RVA: %#x", ImportDir.VirtualAddress));
378
379 for (PIMAGE_IMPORT_DESCRIPTOR pImps = (PIMAGE_IMPORT_DESCRIPTOR)&pbImage[ImportDir.VirtualAddress];
380 pImps->Name != 0 && pImps->FirstThunk != 0;
381 pImps++)
382 {
383 AssertReturn(pImps->Name < cbImage,
384 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL bad import directory entry name: %#x", pImps->Name));
385 const char *pszModName = (const char *)&pbImage[pImps->Name];
386 if (RTStrICmpAscii(pszModName, "ntdll.dll"))
387 continue;
388 AssertReturn(pImps->FirstThunk < cbImage,
389 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL bad FirstThunk: %#x", pImps->FirstThunk));
390 AssertReturn(pImps->OriginalFirstThunk < cbImage,
391 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL bad FirstThunk: %#x", pImps->FirstThunk));
392
393 /*
394 * Walk the thunks table(s) looking for NtDeviceIoControlFile.
395 */
396 uintptr_t *puFirstThunk = (uintptr_t *)&pbImage[pImps->FirstThunk]; /* update this. */
397 if ( pImps->OriginalFirstThunk != 0
398 && pImps->OriginalFirstThunk != pImps->FirstThunk)
399 {
400 uintptr_t const *puOrgThunk = (uintptr_t const *)&pbImage[pImps->OriginalFirstThunk]; /* read from this. */
401 uintptr_t cLeft = (cbImage - (RT_MAX(pImps->FirstThunk, pImps->OriginalFirstThunk)))
402 / sizeof(*puFirstThunk);
403 while (cLeft-- > 0 && *puOrgThunk != 0)
404 {
405 if (!(*puOrgThunk & IMAGE_ORDINAL_FLAG64)) /* ASSUMES 64-bit */
406 {
407 AssertReturn(*puOrgThunk > 0 && *puOrgThunk < cbImage,
408 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "VID.DLL bad thunk entry: %#x", *puOrgThunk));
409
410 const char *pszSymbol = (const char *)&pbImage[*puOrgThunk + 2];
411 if (strcmp(pszSymbol, "NtDeviceIoControlFile") == 0)
412 g_ppfnVidNtDeviceIoControlFile = (decltype(NtDeviceIoControlFile) **)puFirstThunk;
413 }
414
415 puOrgThunk++;
416 puFirstThunk++;
417 }
418 }
419 else
420 {
421 /* No original thunk table, so scan the resolved symbols for a match
422 with the NtDeviceIoControlFile address. */
423 uintptr_t const uNeedle = (uintptr_t)g_pfnNtDeviceIoControlFile;
424 uintptr_t cLeft = (cbImage - pImps->FirstThunk) / sizeof(*puFirstThunk);
425 while (cLeft-- > 0 && *puFirstThunk != 0)
426 {
427 if (*puFirstThunk == uNeedle)
428 g_ppfnVidNtDeviceIoControlFile = (decltype(NtDeviceIoControlFile) **)puFirstThunk;
429 puFirstThunk++;
430 }
431 }
432 }
433
434 if (g_ppfnVidNtDeviceIoControlFile != NULL)
435 {
436 /* Make the thunk writable we can freely modify it. */
437 DWORD fOldProt = PAGE_READONLY;
438 VirtualProtect((void *)(uintptr_t)g_ppfnVidNtDeviceIoControlFile, sizeof(uintptr_t), PAGE_EXECUTE_READWRITE, &fOldProt);
439
440#ifdef NEM_WIN_INTERCEPT_NT_IO_CTLS
441 *g_ppfnVidNtDeviceIoControlFile = nemR3WinLogWrapper_NtDeviceIoControlFile;
442#endif
443 return VINF_SUCCESS;
444 }
445 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Failed to patch NtDeviceIoControlFile import in VID.DLL!");
446}
447
448
449/**
450 * Worker for nemR3NativeInit that probes and load the native API.
451 *
452 * @returns VBox status code.
453 * @param fForced Whether the HMForced flag is set and we should
454 * fail if we cannot initialize.
455 * @param pErrInfo Where to always return error info.
456 */
457static int nemR3WinInitProbeAndLoad(bool fForced, PRTERRINFO pErrInfo)
458{
459 /*
460 * Check that the DLL files we need are present, but without loading them.
461 * We'd like to avoid loading them unnecessarily.
462 */
463 WCHAR wszPath[MAX_PATH + 64];
464 UINT cwcPath = GetSystemDirectoryW(wszPath, MAX_PATH);
465 if (cwcPath >= MAX_PATH || cwcPath < 2)
466 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "GetSystemDirectoryW failed (%#x / %u)", cwcPath, GetLastError());
467
468 if (wszPath[cwcPath - 1] != '\\' || wszPath[cwcPath - 1] != '/')
469 wszPath[cwcPath++] = '\\';
470 RTUtf16CopyAscii(&wszPath[cwcPath], RT_ELEMENTS(wszPath) - cwcPath, "WinHvPlatform.dll");
471 if (GetFileAttributesW(wszPath) == INVALID_FILE_ATTRIBUTES)
472 return RTErrInfoSetF(pErrInfo, VERR_NEM_NOT_AVAILABLE, "The native API dll was not found (%ls)", wszPath);
473
474 /*
475 * Check that we're in a VM and that the hypervisor identifies itself as Hyper-V.
476 */
477 if (!ASMHasCpuId())
478 return RTErrInfoSet(pErrInfo, VERR_NEM_NOT_AVAILABLE, "No CPUID support");
479 if (!RTX86IsValidStdRange(ASMCpuId_EAX(0)))
480 return RTErrInfoSet(pErrInfo, VERR_NEM_NOT_AVAILABLE, "No CPUID leaf #1");
481 if (!(ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_HVP))
482 return RTErrInfoSet(pErrInfo, VERR_NEM_NOT_AVAILABLE, "Not in a hypervisor partition (HVP=0)");
483
484 uint32_t cMaxHyperLeaf = 0;
485 uint32_t uEbx = 0;
486 uint32_t uEcx = 0;
487 uint32_t uEdx = 0;
488 ASMCpuIdExSlow(0x40000000, 0, 0, 0, &cMaxHyperLeaf, &uEbx, &uEcx, &uEdx);
489 if (!RTX86IsValidHypervisorRange(cMaxHyperLeaf))
490 return RTErrInfoSetF(pErrInfo, VERR_NEM_NOT_AVAILABLE, "Invalid hypervisor CPUID range (%#x %#x %#x %#x)",
491 cMaxHyperLeaf, uEbx, uEcx, uEdx);
492 if ( uEbx != UINT32_C(0x7263694d) /* Micr */
493 || uEcx != UINT32_C(0x666f736f) /* osof */
494 || uEdx != UINT32_C(0x76482074) /* t Hv */)
495 return RTErrInfoSetF(pErrInfo, VERR_NEM_NOT_AVAILABLE,
496 "Not Hyper-V CPUID signature: %#x %#x %#x (expected %#x %#x %#x)",
497 uEbx, uEcx, uEdx, UINT32_C(0x7263694d), UINT32_C(0x666f736f), UINT32_C(0x76482074));
498 if (cMaxHyperLeaf < UINT32_C(0x40000005))
499 return RTErrInfoSetF(pErrInfo, VERR_NEM_NOT_AVAILABLE, "Too narrow hypervisor CPUID range (%#x)", cMaxHyperLeaf);
500
501 /** @todo would be great if we could recognize a root partition from the
502 * CPUID info, but I currently don't dare do that. */
503
504 /*
505 * Now try load the DLLs and resolve the APIs.
506 */
507 static const char * const s_apszDllNames[2] = { "WinHvPlatform.dll", "vid.dll" };
508 RTLDRMOD ahMods[2] = { NIL_RTLDRMOD, NIL_RTLDRMOD };
509 int rc = VINF_SUCCESS;
510 for (unsigned i = 0; i < RT_ELEMENTS(s_apszDllNames); i++)
511 {
512 int rc2 = RTLdrLoadSystem(s_apszDllNames[i], true /*fNoUnload*/, &ahMods[i]);
513 if (RT_FAILURE(rc2))
514 {
515 if (!RTErrInfoIsSet(pErrInfo))
516 RTErrInfoSetF(pErrInfo, rc2, "Failed to load API DLL: %s: %Rrc", s_apszDllNames[i], rc2);
517 else
518 RTErrInfoAddF(pErrInfo, rc2, "; %s: %Rrc", s_apszDllNames[i], rc2);
519 ahMods[i] = NIL_RTLDRMOD;
520 rc = VERR_NEM_INIT_FAILED;
521 }
522 }
523 if (RT_SUCCESS(rc))
524 rc = nemR3WinInitVidIntercepts(ahMods[1], pErrInfo);
525 if (RT_SUCCESS(rc))
526 {
527 for (unsigned i = 0; i < RT_ELEMENTS(g_aImports); i++)
528 {
529 int rc2 = RTLdrGetSymbol(ahMods[g_aImports[i].idxDll], g_aImports[i].pszName, (void **)g_aImports[i].ppfn);
530 if (RT_SUCCESS(rc2))
531 {
532 if (g_aImports[i].fOptional)
533 LogRel(("NEM: info: Found optional import %s!%s.\n",
534 s_apszDllNames[g_aImports[i].idxDll], g_aImports[i].pszName));
535 }
536 else
537 {
538 *g_aImports[i].ppfn = NULL;
539
540 LogRel(("NEM: %s: Failed to import %s!%s: %Rrc",
541 g_aImports[i].fOptional ? "info" : fForced ? "fatal" : "error",
542 s_apszDllNames[g_aImports[i].idxDll], g_aImports[i].pszName, rc2));
543 if (!g_aImports[i].fOptional)
544 {
545 if (RTErrInfoIsSet(pErrInfo))
546 RTErrInfoAddF(pErrInfo, rc2, ", %s!%s",
547 s_apszDllNames[g_aImports[i].idxDll], g_aImports[i].pszName);
548 else
549 rc = RTErrInfoSetF(pErrInfo, rc2, "Failed to import: %s!%s",
550 s_apszDllNames[g_aImports[i].idxDll], g_aImports[i].pszName);
551 Assert(RT_FAILURE(rc));
552 }
553 }
554 }
555 if (RT_SUCCESS(rc))
556 {
557 Assert(!RTErrInfoIsSet(pErrInfo));
558 }
559 }
560
561 for (unsigned i = 0; i < RT_ELEMENTS(ahMods); i++)
562 RTLdrClose(ahMods[i]);
563 return rc;
564}
565
566
567/**
568 * Wrapper for different WHvGetCapability signatures.
569 */
570DECLINLINE(HRESULT) WHvGetCapabilityWrapper(WHV_CAPABILITY_CODE enmCap, WHV_CAPABILITY *pOutput, uint32_t cbOutput)
571{
572 return g_pfnWHvGetCapability(enmCap, pOutput, cbOutput, NULL);
573}
574
575
576/**
577 * Worker for nemR3NativeInit that gets the hypervisor capabilities.
578 *
579 * @returns VBox status code.
580 * @param pVM The cross context VM structure.
581 * @param pErrInfo Where to always return error info.
582 */
583static int nemR3WinInitCheckCapabilities(PVM pVM, PRTERRINFO pErrInfo)
584{
585#define NEM_LOG_REL_CAP_EX(a_szField, a_szFmt, a_Value) LogRel(("NEM: %-38s= " a_szFmt "\n", a_szField, a_Value))
586#define NEM_LOG_REL_CAP_SUB_EX(a_szField, a_szFmt, a_Value) LogRel(("NEM: %36s: " a_szFmt "\n", a_szField, a_Value))
587#define NEM_LOG_REL_CAP_SUB(a_szField, a_Value) NEM_LOG_REL_CAP_SUB_EX(a_szField, "%d", a_Value)
588
589 /*
590 * Is the hypervisor present with the desired capability?
591 *
592 * In build 17083 this translates into:
593 * - CPUID[0x00000001].HVP is set
594 * - CPUID[0x40000000] == "Microsoft Hv"
595 * - CPUID[0x40000001].eax == "Hv#1"
596 * - CPUID[0x40000003].ebx[12] is set.
597 * - VidGetExoPartitionProperty(INVALID_HANDLE_VALUE, 0x60000, &Ignored) returns
598 * a non-zero value.
599 */
600 /**
601 * @todo Someone at Microsoft please explain weird API design:
602 * 1. Pointless CapabilityCode duplication int the output;
603 * 2. No output size.
604 */
605 WHV_CAPABILITY Caps;
606 RT_ZERO(Caps);
607 SetLastError(0);
608 HRESULT hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeHypervisorPresent, &Caps, sizeof(Caps));
609 DWORD rcWin = GetLastError();
610 if (FAILED(hrc))
611 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
612 "WHvGetCapability/WHvCapabilityCodeHypervisorPresent failed: %Rhrc (Last=%#x/%u)",
613 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
614 if (!Caps.HypervisorPresent)
615 {
616 if (!RTPathExists(RTPATH_NT_PASSTHRU_PREFIX "Device\\VidExo"))
617 return RTErrInfoSetF(pErrInfo, VERR_NEM_NOT_AVAILABLE,
618 "WHvCapabilityCodeHypervisorPresent is FALSE! Make sure you have enabled the 'Windows Hypervisor Platform' feature.");
619 return RTErrInfoSetF(pErrInfo, VERR_NEM_NOT_AVAILABLE, "WHvCapabilityCodeHypervisorPresent is FALSE! (%u)", rcWin);
620 }
621 LogRel(("NEM: WHvCapabilityCodeHypervisorPresent is TRUE, so this might work...\n"));
622
623
624 /*
625 * Check what extended VM exits are supported.
626 */
627 RT_ZERO(Caps);
628 hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeExtendedVmExits, &Caps, sizeof(Caps));
629 if (FAILED(hrc))
630 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
631 "WHvGetCapability/WHvCapabilityCodeExtendedVmExits failed: %Rhrc (Last=%#x/%u)",
632 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
633 NEM_LOG_REL_CAP_EX("WHvCapabilityCodeExtendedVmExits", "%'#018RX64", Caps.ExtendedVmExits.AsUINT64);
634 pVM->nem.s.fExtendedMsrExit = RT_BOOL(Caps.ExtendedVmExits.X64MsrExit);
635 pVM->nem.s.fExtendedCpuIdExit = RT_BOOL(Caps.ExtendedVmExits.X64CpuidExit);
636 pVM->nem.s.fExtendedXcptExit = RT_BOOL(Caps.ExtendedVmExits.ExceptionExit);
637 NEM_LOG_REL_CAP_SUB("fExtendedMsrExit", pVM->nem.s.fExtendedMsrExit);
638 NEM_LOG_REL_CAP_SUB("fExtendedCpuIdExit", pVM->nem.s.fExtendedCpuIdExit);
639 NEM_LOG_REL_CAP_SUB("fExtendedXcptExit", pVM->nem.s.fExtendedXcptExit);
640 if (Caps.ExtendedVmExits.AsUINT64 & ~(uint64_t)7)
641 LogRel(("NEM: Warning! Unknown VM exit definitions: %#RX64\n", Caps.ExtendedVmExits.AsUINT64));
642 /** @todo RECHECK: WHV_EXTENDED_VM_EXITS typedef. */
643
644 /*
645 * Check features in case they end up defining any.
646 */
647 RT_ZERO(Caps);
648 hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeFeatures, &Caps, sizeof(Caps));
649 if (FAILED(hrc))
650 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
651 "WHvGetCapability/WHvCapabilityCodeFeatures failed: %Rhrc (Last=%#x/%u)",
652 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
653 if (Caps.Features.AsUINT64 & ~(uint64_t)0)
654 LogRel(("NEM: Warning! Unknown feature definitions: %#RX64\n", Caps.Features.AsUINT64));
655 /** @todo RECHECK: WHV_CAPABILITY_FEATURES typedef. */
656
657 /*
658 * Check supported exception exit bitmap bits.
659 * We don't currently require this, so we just log failure.
660 */
661 RT_ZERO(Caps);
662 hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeExceptionExitBitmap, &Caps, sizeof(Caps));
663 if (SUCCEEDED(hrc))
664 LogRel(("NEM: Supported exception exit bitmap: %#RX64\n", Caps.ExceptionExitBitmap));
665 else
666 LogRel(("NEM: Warning! WHvGetCapability/WHvCapabilityCodeExceptionExitBitmap failed: %Rhrc (Last=%#x/%u)",
667 hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
668
669 /*
670 * Check that the CPU vendor is supported.
671 */
672 RT_ZERO(Caps);
673 hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeProcessorVendor, &Caps, sizeof(Caps));
674 if (FAILED(hrc))
675 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
676 "WHvGetCapability/WHvCapabilityCodeProcessorVendor failed: %Rhrc (Last=%#x/%u)",
677 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
678 switch (Caps.ProcessorVendor)
679 {
680 /** @todo RECHECK: WHV_PROCESSOR_VENDOR typedef. */
681 case WHvProcessorVendorIntel:
682 NEM_LOG_REL_CAP_EX("WHvCapabilityCodeProcessorVendor", "%d - Intel", Caps.ProcessorVendor);
683 pVM->nem.s.enmCpuVendor = CPUMCPUVENDOR_INTEL;
684 break;
685 case WHvProcessorVendorAmd:
686 NEM_LOG_REL_CAP_EX("WHvCapabilityCodeProcessorVendor", "%d - AMD", Caps.ProcessorVendor);
687 pVM->nem.s.enmCpuVendor = CPUMCPUVENDOR_AMD;
688 break;
689 default:
690 NEM_LOG_REL_CAP_EX("WHvCapabilityCodeProcessorVendor", "%d", Caps.ProcessorVendor);
691 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Unknown processor vendor: %d", Caps.ProcessorVendor);
692 }
693
694 /*
695 * CPU features, guessing these are virtual CPU features?
696 */
697 RT_ZERO(Caps);
698 hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeProcessorFeatures, &Caps, sizeof(Caps));
699 if (FAILED(hrc))
700 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
701 "WHvGetCapability/WHvCapabilityCodeProcessorFeatures failed: %Rhrc (Last=%#x/%u)",
702 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
703 NEM_LOG_REL_CAP_EX("WHvCapabilityCodeProcessorFeatures", "%'#018RX64", Caps.ProcessorFeatures.AsUINT64);
704#define NEM_LOG_REL_CPU_FEATURE(a_Field) NEM_LOG_REL_CAP_SUB(#a_Field, Caps.ProcessorFeatures.a_Field)
705 NEM_LOG_REL_CPU_FEATURE(Sse3Support);
706 NEM_LOG_REL_CPU_FEATURE(LahfSahfSupport);
707 NEM_LOG_REL_CPU_FEATURE(Ssse3Support);
708 NEM_LOG_REL_CPU_FEATURE(Sse4_1Support);
709 NEM_LOG_REL_CPU_FEATURE(Sse4_2Support);
710 NEM_LOG_REL_CPU_FEATURE(Sse4aSupport);
711 NEM_LOG_REL_CPU_FEATURE(XopSupport);
712 NEM_LOG_REL_CPU_FEATURE(PopCntSupport);
713 NEM_LOG_REL_CPU_FEATURE(Cmpxchg16bSupport);
714 NEM_LOG_REL_CPU_FEATURE(Altmovcr8Support);
715 NEM_LOG_REL_CPU_FEATURE(LzcntSupport);
716 NEM_LOG_REL_CPU_FEATURE(MisAlignSseSupport);
717 NEM_LOG_REL_CPU_FEATURE(MmxExtSupport);
718 NEM_LOG_REL_CPU_FEATURE(Amd3DNowSupport);
719 NEM_LOG_REL_CPU_FEATURE(ExtendedAmd3DNowSupport);
720 NEM_LOG_REL_CPU_FEATURE(Page1GbSupport);
721 NEM_LOG_REL_CPU_FEATURE(AesSupport);
722 NEM_LOG_REL_CPU_FEATURE(PclmulqdqSupport);
723 NEM_LOG_REL_CPU_FEATURE(PcidSupport);
724 NEM_LOG_REL_CPU_FEATURE(Fma4Support);
725 NEM_LOG_REL_CPU_FEATURE(F16CSupport);
726 NEM_LOG_REL_CPU_FEATURE(RdRandSupport);
727 NEM_LOG_REL_CPU_FEATURE(RdWrFsGsSupport);
728 NEM_LOG_REL_CPU_FEATURE(SmepSupport);
729 NEM_LOG_REL_CPU_FEATURE(EnhancedFastStringSupport);
730 NEM_LOG_REL_CPU_FEATURE(Bmi1Support);
731 NEM_LOG_REL_CPU_FEATURE(Bmi2Support);
732 /* two reserved bits here, see below */
733 NEM_LOG_REL_CPU_FEATURE(MovbeSupport);
734 NEM_LOG_REL_CPU_FEATURE(Npiep1Support);
735 NEM_LOG_REL_CPU_FEATURE(DepX87FPUSaveSupport);
736 NEM_LOG_REL_CPU_FEATURE(RdSeedSupport);
737 NEM_LOG_REL_CPU_FEATURE(AdxSupport);
738 NEM_LOG_REL_CPU_FEATURE(IntelPrefetchSupport);
739 NEM_LOG_REL_CPU_FEATURE(SmapSupport);
740 NEM_LOG_REL_CPU_FEATURE(HleSupport);
741 NEM_LOG_REL_CPU_FEATURE(RtmSupport);
742 NEM_LOG_REL_CPU_FEATURE(RdtscpSupport);
743 NEM_LOG_REL_CPU_FEATURE(ClflushoptSupport);
744 NEM_LOG_REL_CPU_FEATURE(ClwbSupport);
745 NEM_LOG_REL_CPU_FEATURE(ShaSupport);
746 NEM_LOG_REL_CPU_FEATURE(X87PointersSavedSupport);
747#undef NEM_LOG_REL_CPU_FEATURE
748 if (Caps.ProcessorFeatures.AsUINT64 & (~(RT_BIT_64(43) - 1) | RT_BIT_64(27) | RT_BIT_64(28)))
749 LogRel(("NEM: Warning! Unknown CPU features: %#RX64\n", Caps.ProcessorFeatures.AsUINT64));
750 pVM->nem.s.uCpuFeatures.u64 = Caps.ProcessorFeatures.AsUINT64;
751 /** @todo RECHECK: WHV_PROCESSOR_FEATURES typedef. */
752
753 /*
754 * The cache line flush size.
755 */
756 RT_ZERO(Caps);
757 hrc = WHvGetCapabilityWrapper(WHvCapabilityCodeProcessorClFlushSize, &Caps, sizeof(Caps));
758 if (FAILED(hrc))
759 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
760 "WHvGetCapability/WHvCapabilityCodeProcessorClFlushSize failed: %Rhrc (Last=%#x/%u)",
761 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
762 NEM_LOG_REL_CAP_EX("WHvCapabilityCodeProcessorClFlushSize", "2^%u", Caps.ProcessorClFlushSize);
763 if (Caps.ProcessorClFlushSize < 8 && Caps.ProcessorClFlushSize > 9)
764 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Unsupported cache line flush size: %u", Caps.ProcessorClFlushSize);
765 pVM->nem.s.cCacheLineFlushShift = Caps.ProcessorClFlushSize;
766
767 /*
768 * See if they've added more properties that we're not aware of.
769 */
770 /** @todo RECHECK: WHV_CAPABILITY_CODE typedef. */
771 if (!IsDebuggerPresent()) /* Too noisy when in debugger, so skip. */
772 {
773 static const struct
774 {
775 uint32_t iMin, iMax; } s_aUnknowns[] =
776 {
777 { 0x0004, 0x000f },
778 { 0x1003, 0x100f },
779 { 0x2000, 0x200f },
780 { 0x3000, 0x300f },
781 { 0x4000, 0x400f },
782 };
783 for (uint32_t j = 0; j < RT_ELEMENTS(s_aUnknowns); j++)
784 for (uint32_t i = s_aUnknowns[j].iMin; i <= s_aUnknowns[j].iMax; i++)
785 {
786 RT_ZERO(Caps);
787 hrc = WHvGetCapabilityWrapper((WHV_CAPABILITY_CODE)i, &Caps, sizeof(Caps));
788 if (SUCCEEDED(hrc))
789 LogRel(("NEM: Warning! Unknown capability %#x returning: %.*Rhxs\n", i, sizeof(Caps), &Caps));
790 }
791 }
792
793 /*
794 * For proper operation, we require CPUID exits.
795 */
796 if (!pVM->nem.s.fExtendedCpuIdExit)
797 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Missing required extended CPUID exit support");
798 if (!pVM->nem.s.fExtendedMsrExit)
799 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Missing required extended MSR exit support");
800 if (!pVM->nem.s.fExtendedXcptExit)
801 return RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED, "Missing required extended exception exit support");
802
803#undef NEM_LOG_REL_CAP_EX
804#undef NEM_LOG_REL_CAP_SUB_EX
805#undef NEM_LOG_REL_CAP_SUB
806 return VINF_SUCCESS;
807}
808
809#ifdef LOG_ENABLED
810
811/**
812 * Used to fill in g_IoCtlGetHvPartitionId.
813 */
814static NTSTATUS WINAPI
815nemR3WinIoctlDetector_GetHvPartitionId(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx,
816 PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput,
817 PVOID pvOutput, ULONG cbOutput)
818{
819 AssertLogRelMsgReturn(hFile == NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, ("hFile=%p\n", hFile), STATUS_INVALID_PARAMETER_1);
820 RT_NOREF(hEvt); RT_NOREF(pfnApcCallback); RT_NOREF(pvApcCtx);
821 AssertLogRelMsgReturn(RT_VALID_PTR(pIos), ("pIos=%p\n", pIos), STATUS_INVALID_PARAMETER_5);
822 AssertLogRelMsgReturn(cbInput == 0, ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_8);
823 RT_NOREF(pvInput);
824
825 AssertLogRelMsgReturn(RT_VALID_PTR(pvOutput), ("pvOutput=%p\n", pvOutput), STATUS_INVALID_PARAMETER_9);
826 AssertLogRelMsgReturn(cbOutput == sizeof(HV_PARTITION_ID), ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_10);
827 *(HV_PARTITION_ID *)pvOutput = NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_ID;
828
829 g_IoCtlGetHvPartitionId.cbInput = cbInput;
830 g_IoCtlGetHvPartitionId.cbOutput = cbOutput;
831 g_IoCtlGetHvPartitionId.uFunction = uFunction;
832
833 return STATUS_SUCCESS;
834}
835
836
837/**
838 * Used to fill in g_IoCtlGetHvPartitionId.
839 */
840static NTSTATUS WINAPI
841nemR3WinIoctlDetector_GetPartitionProperty(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx,
842 PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput,
843 PVOID pvOutput, ULONG cbOutput)
844{
845 AssertLogRelMsgReturn(hFile == NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, ("hFile=%p\n", hFile), STATUS_INVALID_PARAMETER_1);
846 RT_NOREF(hEvt); RT_NOREF(pfnApcCallback); RT_NOREF(pvApcCtx);
847 AssertLogRelMsgReturn(RT_VALID_PTR(pIos), ("pIos=%p\n", pIos), STATUS_INVALID_PARAMETER_5);
848 AssertLogRelMsgReturn(cbInput == sizeof(VID_PARTITION_PROPERTY_CODE), ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_8);
849 AssertLogRelMsgReturn(RT_VALID_PTR(pvInput), ("pvInput=%p\n", pvInput), STATUS_INVALID_PARAMETER_9);
850 AssertLogRelMsgReturn(*(VID_PARTITION_PROPERTY_CODE *)pvInput == NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_PROPERTY_CODE,
851 ("*pvInput=%#x, expected %#x\n", *(HV_PARTITION_PROPERTY_CODE *)pvInput,
852 NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_PROPERTY_CODE), STATUS_INVALID_PARAMETER_9);
853 AssertLogRelMsgReturn(RT_VALID_PTR(pvOutput), ("pvOutput=%p\n", pvOutput), STATUS_INVALID_PARAMETER_9);
854 AssertLogRelMsgReturn(cbOutput == sizeof(HV_PARTITION_PROPERTY), ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_10);
855 *(HV_PARTITION_PROPERTY *)pvOutput = NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_PROPERTY_VALUE;
856
857 g_IoCtlGetPartitionProperty.cbInput = cbInput;
858 g_IoCtlGetPartitionProperty.cbOutput = cbOutput;
859 g_IoCtlGetPartitionProperty.uFunction = uFunction;
860
861 return STATUS_SUCCESS;
862}
863
864
865/**
866 * Used to fill in g_IoCtlStartVirtualProcessor.
867 */
868static NTSTATUS WINAPI
869nemR3WinIoctlDetector_StartVirtualProcessor(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx,
870 PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput,
871 PVOID pvOutput, ULONG cbOutput)
872{
873 AssertLogRelMsgReturn(hFile == NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, ("hFile=%p\n", hFile), STATUS_INVALID_PARAMETER_1);
874 RT_NOREF(hEvt); RT_NOREF(pfnApcCallback); RT_NOREF(pvApcCtx);
875 AssertLogRelMsgReturn(RT_VALID_PTR(pIos), ("pIos=%p\n", pIos), STATUS_INVALID_PARAMETER_5);
876 AssertLogRelMsgReturn(cbInput == sizeof(HV_VP_INDEX), ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_8);
877 AssertLogRelMsgReturn(RT_VALID_PTR(pvInput), ("pvInput=%p\n", pvInput), STATUS_INVALID_PARAMETER_9);
878 AssertLogRelMsgReturn(*(HV_VP_INDEX *)pvInput == NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX,
879 ("*piCpu=%u\n", *(HV_VP_INDEX *)pvInput), STATUS_INVALID_PARAMETER_9);
880 AssertLogRelMsgReturn(cbOutput == 0, ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_10);
881 RT_NOREF(pvOutput);
882
883 g_IoCtlStartVirtualProcessor.cbInput = cbInput;
884 g_IoCtlStartVirtualProcessor.cbOutput = cbOutput;
885 g_IoCtlStartVirtualProcessor.uFunction = uFunction;
886
887 return STATUS_SUCCESS;
888}
889
890
891/**
892 * Used to fill in g_IoCtlStartVirtualProcessor.
893 */
894static NTSTATUS WINAPI
895nemR3WinIoctlDetector_StopVirtualProcessor(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx,
896 PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput,
897 PVOID pvOutput, ULONG cbOutput)
898{
899 AssertLogRelMsgReturn(hFile == NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, ("hFile=%p\n", hFile), STATUS_INVALID_PARAMETER_1);
900 RT_NOREF(hEvt); RT_NOREF(pfnApcCallback); RT_NOREF(pvApcCtx);
901 AssertLogRelMsgReturn(RT_VALID_PTR(pIos), ("pIos=%p\n", pIos), STATUS_INVALID_PARAMETER_5);
902 AssertLogRelMsgReturn(cbInput == sizeof(HV_VP_INDEX), ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_8);
903 AssertLogRelMsgReturn(RT_VALID_PTR(pvInput), ("pvInput=%p\n", pvInput), STATUS_INVALID_PARAMETER_9);
904 AssertLogRelMsgReturn(*(HV_VP_INDEX *)pvInput == NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX,
905 ("*piCpu=%u\n", *(HV_VP_INDEX *)pvInput), STATUS_INVALID_PARAMETER_9);
906 AssertLogRelMsgReturn(cbOutput == 0, ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_10);
907 RT_NOREF(pvOutput);
908
909 g_IoCtlStopVirtualProcessor.cbInput = cbInput;
910 g_IoCtlStopVirtualProcessor.cbOutput = cbOutput;
911 g_IoCtlStopVirtualProcessor.uFunction = uFunction;
912
913 return STATUS_SUCCESS;
914}
915
916
917/**
918 * Used to fill in g_IoCtlMessageSlotHandleAndGetNext
919 */
920static NTSTATUS WINAPI
921nemR3WinIoctlDetector_MessageSlotHandleAndGetNext(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx,
922 PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput,
923 PVOID pvOutput, ULONG cbOutput)
924{
925 AssertLogRelMsgReturn(hFile == NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, ("hFile=%p\n", hFile), STATUS_INVALID_PARAMETER_1);
926 RT_NOREF(hEvt); RT_NOREF(pfnApcCallback); RT_NOREF(pvApcCtx);
927 AssertLogRelMsgReturn(RT_VALID_PTR(pIos), ("pIos=%p\n", pIos), STATUS_INVALID_PARAMETER_5);
928
929 if (g_uBuildNo >= 17758)
930 {
931 /* No timeout since about build 17758, it's now always an infinite wait. So, a somewhat compatible change. */
932 AssertLogRelMsgReturn(cbInput == RT_UOFFSETOF(VID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT, cMillies),
933 ("cbInput=%#x\n", cbInput),
934 STATUS_INVALID_PARAMETER_8);
935 AssertLogRelMsgReturn(RT_VALID_PTR(pvInput), ("pvInput=%p\n", pvInput), STATUS_INVALID_PARAMETER_9);
936 PCVID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT pVidIn = (PCVID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT)pvInput;
937 AssertLogRelMsgReturn( pVidIn->iCpu == NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX
938 && pVidIn->fFlags == VID_MSHAGN_F_HANDLE_MESSAGE,
939 ("iCpu=%u fFlags=%#x cMillies=%#x\n", pVidIn->iCpu, pVidIn->fFlags, pVidIn->cMillies),
940 STATUS_INVALID_PARAMETER_9);
941 AssertLogRelMsgReturn(cbOutput == 0, ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_10);
942 }
943 else
944 {
945 AssertLogRelMsgReturn(cbInput == sizeof(VID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT), ("cbInput=%#x\n", cbInput),
946 STATUS_INVALID_PARAMETER_8);
947 AssertLogRelMsgReturn(RT_VALID_PTR(pvInput), ("pvInput=%p\n", pvInput), STATUS_INVALID_PARAMETER_9);
948 PCVID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT pVidIn = (PCVID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT)pvInput;
949 AssertLogRelMsgReturn( pVidIn->iCpu == NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX
950 && pVidIn->fFlags == VID_MSHAGN_F_HANDLE_MESSAGE
951 && pVidIn->cMillies == NEM_WIN_IOCTL_DETECTOR_FAKE_TIMEOUT,
952 ("iCpu=%u fFlags=%#x cMillies=%#x\n", pVidIn->iCpu, pVidIn->fFlags, pVidIn->cMillies),
953 STATUS_INVALID_PARAMETER_9);
954 AssertLogRelMsgReturn(cbOutput == 0, ("cbInput=%#x\n", cbInput), STATUS_INVALID_PARAMETER_10);
955 RT_NOREF(pvOutput);
956 }
957
958 g_IoCtlMessageSlotHandleAndGetNext.cbInput = cbInput;
959 g_IoCtlMessageSlotHandleAndGetNext.cbOutput = cbOutput;
960 g_IoCtlMessageSlotHandleAndGetNext.uFunction = uFunction;
961
962 return STATUS_SUCCESS;
963}
964
965/**
966 * Used to fill in what g_pIoCtlDetectForLogging points to.
967 */
968static NTSTATUS WINAPI nemR3WinIoctlDetector_ForLogging(HANDLE hFile, HANDLE hEvt, PIO_APC_ROUTINE pfnApcCallback, PVOID pvApcCtx,
969 PIO_STATUS_BLOCK pIos, ULONG uFunction, PVOID pvInput, ULONG cbInput,
970 PVOID pvOutput, ULONG cbOutput)
971{
972 RT_NOREF(hFile, hEvt, pfnApcCallback, pvApcCtx, pIos, pvInput, pvOutput);
973
974 g_pIoCtlDetectForLogging->cbInput = cbInput;
975 g_pIoCtlDetectForLogging->cbOutput = cbOutput;
976 g_pIoCtlDetectForLogging->uFunction = uFunction;
977
978 return STATUS_SUCCESS;
979}
980
981#endif /* LOG_ENABLED */
982
983/**
984 * Worker for nemR3NativeInit that detect I/O control function numbers for VID.
985 *
986 * We use the function numbers directly in ring-0 and to name functions when
987 * logging NtDeviceIoControlFile calls.
988 *
989 * @note We could alternatively do this by disassembling the respective
990 * functions, but hooking NtDeviceIoControlFile and making fake calls
991 * more easily provides the desired information.
992 *
993 * @returns VBox status code.
994 * @param pVM The cross context VM structure. Will set I/O
995 * control info members.
996 * @param pErrInfo Where to always return error info.
997 */
998static int nemR3WinInitDiscoverIoControlProperties(PVM pVM, PRTERRINFO pErrInfo)
999{
1000 RT_NOREF(pVM, pErrInfo);
1001
1002 /*
1003 * Probe the I/O control information for select VID APIs so we can use
1004 * them directly from ring-0 and better log them.
1005 *
1006 */
1007#ifdef LOG_ENABLED
1008 decltype(NtDeviceIoControlFile) * const pfnOrg = *g_ppfnVidNtDeviceIoControlFile;
1009
1010 /* VidGetHvPartitionId - must work due to our memory management. */
1011 BOOL fRet;
1012 if (g_pfnVidGetHvPartitionId)
1013 {
1014 HV_PARTITION_ID idHvPartition = HV_PARTITION_ID_INVALID;
1015 *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_GetHvPartitionId;
1016 fRet = g_pfnVidGetHvPartitionId(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, &idHvPartition);
1017 *g_ppfnVidNtDeviceIoControlFile = pfnOrg;
1018 AssertReturn(fRet && idHvPartition == NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_ID && g_IoCtlGetHvPartitionId.uFunction != 0,
1019 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
1020 "Problem figuring out VidGetHvPartitionId: fRet=%u idHvPartition=%#x dwErr=%u",
1021 fRet, idHvPartition, GetLastError()) );
1022 LogRel(("NEM: VidGetHvPartitionId -> fun:%#x in:%#x out:%#x\n",
1023 g_IoCtlGetHvPartitionId.uFunction, g_IoCtlGetHvPartitionId.cbInput, g_IoCtlGetHvPartitionId.cbOutput));
1024 }
1025
1026 /* VidGetPartitionProperty - must work as it's fallback for VidGetHvPartitionId. */
1027 if (g_ppfnVidNtDeviceIoControlFile)
1028 {
1029 HV_PARTITION_PROPERTY uPropValue = ~NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_PROPERTY_VALUE;
1030 *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_GetPartitionProperty;
1031 fRet = g_pfnVidGetPartitionProperty(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_PROPERTY_CODE,
1032 &uPropValue);
1033 *g_ppfnVidNtDeviceIoControlFile = pfnOrg;
1034 AssertReturn( fRet
1035 && uPropValue == NEM_WIN_IOCTL_DETECTOR_FAKE_PARTITION_PROPERTY_VALUE
1036 && g_IoCtlGetHvPartitionId.uFunction != 0,
1037 RTErrInfoSetF(pErrInfo, VERR_NEM_INIT_FAILED,
1038 "Problem figuring out VidGetPartitionProperty: fRet=%u uPropValue=%#x dwErr=%u",
1039 fRet, uPropValue, GetLastError()) );
1040 LogRel(("NEM: VidGetPartitionProperty -> fun:%#x in:%#x out:%#x\n",
1041 g_IoCtlGetPartitionProperty.uFunction, g_IoCtlGetPartitionProperty.cbInput, g_IoCtlGetPartitionProperty.cbOutput));
1042 }
1043
1044 /* VidStartVirtualProcessor */
1045 *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_StartVirtualProcessor;
1046 fRet = g_pfnVidStartVirtualProcessor(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX);
1047 *g_ppfnVidNtDeviceIoControlFile = pfnOrg;
1048 AssertStmt(fRet && g_IoCtlStartVirtualProcessor.uFunction != 0,
1049 RTERRINFO_LOG_REL_SET_F(pErrInfo, VERR_NEM_RING3_ONLY,
1050 "Problem figuring out VidStartVirtualProcessor: fRet=%u dwErr=%u", fRet, GetLastError()) );
1051 LogRel(("NEM: VidStartVirtualProcessor -> fun:%#x in:%#x out:%#x\n", g_IoCtlStartVirtualProcessor.uFunction,
1052 g_IoCtlStartVirtualProcessor.cbInput, g_IoCtlStartVirtualProcessor.cbOutput));
1053
1054 /* VidStopVirtualProcessor */
1055 *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_StopVirtualProcessor;
1056 fRet = g_pfnVidStopVirtualProcessor(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX);
1057 *g_ppfnVidNtDeviceIoControlFile = pfnOrg;
1058 AssertStmt(fRet && g_IoCtlStopVirtualProcessor.uFunction != 0,
1059 RTERRINFO_LOG_REL_SET_F(pErrInfo, VERR_NEM_RING3_ONLY,
1060 "Problem figuring out VidStopVirtualProcessor: fRet=%u dwErr=%u", fRet, GetLastError()) );
1061 LogRel(("NEM: VidStopVirtualProcessor -> fun:%#x in:%#x out:%#x\n", g_IoCtlStopVirtualProcessor.uFunction,
1062 g_IoCtlStopVirtualProcessor.cbInput, g_IoCtlStopVirtualProcessor.cbOutput));
1063
1064 /* VidMessageSlotHandleAndGetNext */
1065 *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_MessageSlotHandleAndGetNext;
1066 fRet = g_pfnVidMessageSlotHandleAndGetNext(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE,
1067 NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX, VID_MSHAGN_F_HANDLE_MESSAGE,
1068 NEM_WIN_IOCTL_DETECTOR_FAKE_TIMEOUT);
1069 *g_ppfnVidNtDeviceIoControlFile = pfnOrg;
1070 AssertStmt(fRet && g_IoCtlMessageSlotHandleAndGetNext.uFunction != 0,
1071 RTERRINFO_LOG_REL_SET_F(pErrInfo, VERR_NEM_RING3_ONLY,
1072 "Problem figuring out VidMessageSlotHandleAndGetNext: fRet=%u dwErr=%u",
1073 fRet, GetLastError()) );
1074 LogRel(("NEM: VidMessageSlotHandleAndGetNext -> fun:%#x in:%#x out:%#x\n",
1075 g_IoCtlMessageSlotHandleAndGetNext.uFunction, g_IoCtlMessageSlotHandleAndGetNext.cbInput,
1076 g_IoCtlMessageSlotHandleAndGetNext.cbOutput));
1077
1078 /* The following are only for logging: */
1079 union
1080 {
1081 VID_MAPPED_MESSAGE_SLOT MapSlot;
1082 HV_REGISTER_NAME Name;
1083 HV_REGISTER_VALUE Value;
1084 } uBuf;
1085
1086 /* VidMessageSlotMap */
1087 g_pIoCtlDetectForLogging = &g_IoCtlMessageSlotMap;
1088 *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_ForLogging;
1089 fRet = g_pfnVidMessageSlotMap(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, &uBuf.MapSlot, NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX);
1090 *g_ppfnVidNtDeviceIoControlFile = pfnOrg;
1091 Assert(fRet);
1092 LogRel(("NEM: VidMessageSlotMap -> fun:%#x in:%#x out:%#x\n", g_pIoCtlDetectForLogging->uFunction,
1093 g_pIoCtlDetectForLogging->cbInput, g_pIoCtlDetectForLogging->cbOutput));
1094
1095 /* VidGetVirtualProcessorState */
1096 uBuf.Name = HvRegisterExplicitSuspend;
1097 g_pIoCtlDetectForLogging = &g_IoCtlGetVirtualProcessorState;
1098 *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_ForLogging;
1099 fRet = g_pfnVidGetVirtualProcessorState(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX,
1100 &uBuf.Name, 1, &uBuf.Value);
1101 *g_ppfnVidNtDeviceIoControlFile = pfnOrg;
1102 Assert(fRet);
1103 LogRel(("NEM: VidGetVirtualProcessorState -> fun:%#x in:%#x out:%#x\n", g_pIoCtlDetectForLogging->uFunction,
1104 g_pIoCtlDetectForLogging->cbInput, g_pIoCtlDetectForLogging->cbOutput));
1105
1106 /* VidSetVirtualProcessorState */
1107 uBuf.Name = HvRegisterExplicitSuspend;
1108 g_pIoCtlDetectForLogging = &g_IoCtlSetVirtualProcessorState;
1109 *g_ppfnVidNtDeviceIoControlFile = nemR3WinIoctlDetector_ForLogging;
1110 fRet = g_pfnVidSetVirtualProcessorState(NEM_WIN_IOCTL_DETECTOR_FAKE_HANDLE, NEM_WIN_IOCTL_DETECTOR_FAKE_VP_INDEX,
1111 &uBuf.Name, 1, &uBuf.Value);
1112 *g_ppfnVidNtDeviceIoControlFile = pfnOrg;
1113 Assert(fRet);
1114 LogRel(("NEM: VidSetVirtualProcessorState -> fun:%#x in:%#x out:%#x\n", g_pIoCtlDetectForLogging->uFunction,
1115 g_pIoCtlDetectForLogging->cbInput, g_pIoCtlDetectForLogging->cbOutput));
1116
1117 g_pIoCtlDetectForLogging = NULL;
1118#endif /* LOG_ENABLED */
1119
1120 return VINF_SUCCESS;
1121}
1122
1123
1124/**
1125 * Creates and sets up a Hyper-V (exo) partition.
1126 *
1127 * @returns VBox status code.
1128 * @param pVM The cross context VM structure.
1129 * @param pErrInfo Where to always return error info.
1130 */
1131static int nemR3WinInitCreatePartition(PVM pVM, PRTERRINFO pErrInfo)
1132{
1133 AssertReturn(!pVM->nem.s.hPartition, RTErrInfoSet(pErrInfo, VERR_WRONG_ORDER, "Wrong initalization order"));
1134 AssertReturn(!pVM->nem.s.hPartitionDevice, RTErrInfoSet(pErrInfo, VERR_WRONG_ORDER, "Wrong initalization order"));
1135
1136 /*
1137 * Create the partition.
1138 */
1139 WHV_PARTITION_HANDLE hPartition;
1140 HRESULT hrc = WHvCreatePartition(&hPartition);
1141 if (FAILED(hrc))
1142 return RTErrInfoSetF(pErrInfo, VERR_NEM_VM_CREATE_FAILED, "WHvCreatePartition failed with %Rhrc (Last=%#x/%u)",
1143 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
1144
1145 int rc;
1146
1147 /*
1148 * Set partition properties, most importantly the CPU count.
1149 */
1150 /**
1151 * @todo Someone at Microsoft please explain another weird API:
1152 * - Why this API doesn't take the WHV_PARTITION_PROPERTY_CODE value as an
1153 * argument rather than as part of the struct. That is so weird if you've
1154 * used any other NT or windows API, including WHvGetCapability().
1155 * - Why use PVOID when WHV_PARTITION_PROPERTY is what's expected. We
1156 * technically only need 9 bytes for setting/getting
1157 * WHVPartitionPropertyCodeProcessorClFlushSize, but the API insists on 16. */
1158 WHV_PARTITION_PROPERTY Property;
1159 RT_ZERO(Property);
1160 Property.ProcessorCount = pVM->cCpus;
1161 hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeProcessorCount, &Property, sizeof(Property));
1162 if (SUCCEEDED(hrc))
1163 {
1164 RT_ZERO(Property);
1165 Property.ExtendedVmExits.X64CpuidExit = pVM->nem.s.fExtendedCpuIdExit; /** @todo Register fixed results and restrict cpuid exits */
1166 Property.ExtendedVmExits.X64MsrExit = pVM->nem.s.fExtendedMsrExit;
1167 Property.ExtendedVmExits.ExceptionExit = pVM->nem.s.fExtendedXcptExit;
1168 hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeExtendedVmExits, &Property, sizeof(Property));
1169 if (SUCCEEDED(hrc))
1170 {
1171 /*
1172 * We'll continue setup in nemR3NativeInitAfterCPUM.
1173 */
1174 pVM->nem.s.fCreatedEmts = false;
1175 pVM->nem.s.hPartition = hPartition;
1176 LogRel(("NEM: Created partition %p.\n", hPartition));
1177 return VINF_SUCCESS;
1178 }
1179
1180 rc = RTErrInfoSetF(pErrInfo, VERR_NEM_VM_CREATE_FAILED,
1181 "Failed setting WHvPartitionPropertyCodeExtendedVmExits to %'#RX64: %Rhrc",
1182 Property.ExtendedVmExits.AsUINT64, hrc);
1183 }
1184 else
1185 rc = RTErrInfoSetF(pErrInfo, VERR_NEM_VM_CREATE_FAILED,
1186 "Failed setting WHvPartitionPropertyCodeProcessorCount to %u: %Rhrc (Last=%#x/%u)",
1187 pVM->cCpus, hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
1188 WHvDeletePartition(hPartition);
1189
1190 Assert(!pVM->nem.s.hPartitionDevice);
1191 Assert(!pVM->nem.s.hPartition);
1192 return rc;
1193}
1194
1195
1196/**
1197 * Makes sure APIC and firmware will not allow X2APIC mode.
1198 *
1199 * This is rather ugly.
1200 *
1201 * @returns VBox status code
1202 * @param pVM The cross context VM structure.
1203 */
1204static int nemR3WinDisableX2Apic(PVM pVM)
1205{
1206 /*
1207 * First make sure the 'Mode' config value of the APIC isn't set to X2APIC.
1208 * This defaults to APIC, so no need to change unless it's X2APIC.
1209 */
1210 PCFGMNODE pCfg = CFGMR3GetChild(CFGMR3GetRoot(pVM), "/Devices/apic/0/Config");
1211 if (pCfg)
1212 {
1213 uint8_t bMode = 0;
1214 int rc = CFGMR3QueryU8(pCfg, "Mode", &bMode);
1215 AssertLogRelMsgReturn(RT_SUCCESS(rc) || rc == VERR_CFGM_VALUE_NOT_FOUND, ("%Rrc\n", rc), rc);
1216 if (RT_SUCCESS(rc) && bMode == PDMAPICMODE_X2APIC)
1217 {
1218 LogRel(("NEM: Adjusting APIC configuration from X2APIC to APIC max mode. X2APIC is not supported by the WinHvPlatform API!\n"));
1219 LogRel(("NEM: Disable Hyper-V if you need X2APIC for your guests!\n"));
1220 rc = CFGMR3RemoveValue(pCfg, "Mode");
1221 rc = CFGMR3InsertInteger(pCfg, "Mode", PDMAPICMODE_APIC);
1222 AssertLogRelRCReturn(rc, rc);
1223 }
1224 }
1225
1226 /*
1227 * Now the firmwares.
1228 * These also defaults to APIC and only needs adjusting if configured to X2APIC (2).
1229 */
1230 static const char * const s_apszFirmwareConfigs[] =
1231 {
1232 "/Devices/efi/0/Config",
1233 "/Devices/pcbios/0/Config",
1234 };
1235 for (unsigned i = 0; i < RT_ELEMENTS(s_apszFirmwareConfigs); i++)
1236 {
1237 pCfg = CFGMR3GetChild(CFGMR3GetRoot(pVM), "/Devices/APIC/0/Config");
1238 if (pCfg)
1239 {
1240 uint8_t bMode = 0;
1241 int rc = CFGMR3QueryU8(pCfg, "APIC", &bMode);
1242 AssertLogRelMsgReturn(RT_SUCCESS(rc) || rc == VERR_CFGM_VALUE_NOT_FOUND, ("%Rrc\n", rc), rc);
1243 if (RT_SUCCESS(rc) && bMode == 2)
1244 {
1245 LogRel(("NEM: Adjusting %s/Mode from 2 (X2APIC) to 1 (APIC).\n", s_apszFirmwareConfigs[i]));
1246 rc = CFGMR3RemoveValue(pCfg, "APIC");
1247 rc = CFGMR3InsertInteger(pCfg, "APIC", 1);
1248 AssertLogRelRCReturn(rc, rc);
1249 }
1250 }
1251 }
1252
1253 return VINF_SUCCESS;
1254}
1255
1256
1257/**
1258 * Try initialize the native API.
1259 *
1260 * This may only do part of the job, more can be done in
1261 * nemR3NativeInitAfterCPUM() and nemR3NativeInitCompleted().
1262 *
1263 * @returns VBox status code.
1264 * @param pVM The cross context VM structure.
1265 * @param fFallback Whether we're in fallback mode or use-NEM mode. In
1266 * the latter we'll fail if we cannot initialize.
1267 * @param fForced Whether the HMForced flag is set and we should
1268 * fail if we cannot initialize.
1269 */
1270int nemR3NativeInit(PVM pVM, bool fFallback, bool fForced)
1271{
1272 g_uBuildNo = RTSystemGetNtBuildNo();
1273
1274 /*
1275 * Some state init.
1276 */
1277#ifdef NEM_WIN_WITH_A20
1278 pVM->nem.s.fA20Enabled = true;
1279#endif
1280#if 0
1281 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
1282 {
1283 PNEMCPU pNemCpu = &pVM->apCpusR3[idCpu]->nem.s;
1284 }
1285#endif
1286
1287 /*
1288 * Error state.
1289 * The error message will be non-empty on failure and 'rc' will be set too.
1290 */
1291 RTERRINFOSTATIC ErrInfo;
1292 PRTERRINFO pErrInfo = RTErrInfoInitStatic(&ErrInfo);
1293 int rc = nemR3WinInitProbeAndLoad(fForced, pErrInfo);
1294 if (RT_SUCCESS(rc))
1295 {
1296 /*
1297 * Check the capabilties of the hypervisor, starting with whether it's present.
1298 */
1299 rc = nemR3WinInitCheckCapabilities(pVM, pErrInfo);
1300 if (RT_SUCCESS(rc))
1301 {
1302 /*
1303 * Discover the VID I/O control function numbers we need (for interception
1304 * only these days).
1305 */
1306 rc = nemR3WinInitDiscoverIoControlProperties(pVM, pErrInfo);
1307 if (RT_SUCCESS(rc))
1308 {
1309 /*
1310 * Create and initialize a partition.
1311 */
1312 rc = nemR3WinInitCreatePartition(pVM, pErrInfo);
1313 if (RT_SUCCESS(rc))
1314 {
1315 /*
1316 * Set ourselves as the execution engine and make config adjustments.
1317 */
1318 VM_SET_MAIN_EXECUTION_ENGINE(pVM, VM_EXEC_ENGINE_NATIVE_API);
1319 Log(("NEM: Marked active!\n"));
1320 nemR3WinDisableX2Apic(pVM);
1321 nemR3DisableCpuIsaExt(pVM, "MONITOR"); /* MONITOR is not supported by Hyper-V (MWAIT is sometimes). */
1322 PGMR3EnableNemMode(pVM);
1323
1324 /*
1325 * Register release statistics
1326 */
1327 STAMR3Register(pVM, (void *)&pVM->nem.s.cMappedPages, STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1328 "/NEM/PagesCurrentlyMapped", STAMUNIT_PAGES, "Number guest pages currently mapped by the VM");
1329 STAMR3Register(pVM, (void *)&pVM->nem.s.StatMapPage, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1330 "/NEM/PagesMapCalls", STAMUNIT_PAGES, "Calls to WHvMapGpaRange/HvCallMapGpaPages");
1331 STAMR3Register(pVM, (void *)&pVM->nem.s.StatMapPageFailed, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1332 "/NEM/PagesMapFails", STAMUNIT_PAGES, "Calls to WHvMapGpaRange/HvCallMapGpaPages that failed");
1333 STAMR3Register(pVM, (void *)&pVM->nem.s.StatUnmapPage, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1334 "/NEM/PagesUnmapCalls", STAMUNIT_PAGES, "Calls to WHvUnmapGpaRange/HvCallUnmapGpaPages");
1335 STAMR3Register(pVM, (void *)&pVM->nem.s.StatUnmapPageFailed, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1336 "/NEM/PagesUnmapFails", STAMUNIT_PAGES, "Calls to WHvUnmapGpaRange/HvCallUnmapGpaPages that failed");
1337 STAMR3Register(pVM, &pVM->nem.s.StatProfMapGpaRange, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS,
1338 "/NEM/PagesMapGpaRange", STAMUNIT_TICKS_PER_CALL, "Profiling calls to WHvMapGpaRange for bigger stuff");
1339 STAMR3Register(pVM, &pVM->nem.s.StatProfUnmapGpaRange, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS,
1340 "/NEM/PagesUnmapGpaRange", STAMUNIT_TICKS_PER_CALL, "Profiling calls to WHvUnmapGpaRange for bigger stuff");
1341 STAMR3Register(pVM, &pVM->nem.s.StatProfMapGpaRangePage, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS,
1342 "/NEM/PagesMapGpaRangePage", STAMUNIT_TICKS_PER_CALL, "Profiling calls to WHvMapGpaRange for single pages");
1343 STAMR3Register(pVM, &pVM->nem.s.StatProfUnmapGpaRangePage, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS,
1344 "/NEM/PagesUnmapGpaRangePage", STAMUNIT_TICKS_PER_CALL, "Profiling calls to WHvUnmapGpaRange for single pages");
1345
1346 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
1347 {
1348 PNEMCPU pNemCpu = &pVM->apCpusR3[idCpu]->nem.s;
1349 STAMR3RegisterF(pVM, &pNemCpu->StatExitPortIo, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of port I/O exits", "/NEM/CPU%u/ExitPortIo", idCpu);
1350 STAMR3RegisterF(pVM, &pNemCpu->StatExitMemUnmapped, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of unmapped memory exits", "/NEM/CPU%u/ExitMemUnmapped", idCpu);
1351 STAMR3RegisterF(pVM, &pNemCpu->StatExitMemIntercept, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of intercepted memory exits", "/NEM/CPU%u/ExitMemIntercept", idCpu);
1352 STAMR3RegisterF(pVM, &pNemCpu->StatExitHalt, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of HLT exits", "/NEM/CPU%u/ExitHalt", idCpu);
1353 STAMR3RegisterF(pVM, &pNemCpu->StatExitInterruptWindow, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of interrupt window exits", "/NEM/CPU%u/ExitInterruptWindow", idCpu);
1354 STAMR3RegisterF(pVM, &pNemCpu->StatExitCpuId, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of CPUID exits", "/NEM/CPU%u/ExitCpuId", idCpu);
1355 STAMR3RegisterF(pVM, &pNemCpu->StatExitMsr, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of MSR access exits", "/NEM/CPU%u/ExitMsr", idCpu);
1356 STAMR3RegisterF(pVM, &pNemCpu->StatExitException, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of exception exits", "/NEM/CPU%u/ExitException", idCpu);
1357 STAMR3RegisterF(pVM, &pNemCpu->StatExitExceptionBp, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of #BP exits", "/NEM/CPU%u/ExitExceptionBp", idCpu);
1358 STAMR3RegisterF(pVM, &pNemCpu->StatExitExceptionDb, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of #DB exits", "/NEM/CPU%u/ExitExceptionDb", idCpu);
1359 STAMR3RegisterF(pVM, &pNemCpu->StatExitExceptionGp, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of #GP exits", "/NEM/CPU%u/ExitExceptionGp", idCpu);
1360 STAMR3RegisterF(pVM, &pNemCpu->StatExitExceptionGpMesa, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of #GP exits from mesa driver", "/NEM/CPU%u/ExitExceptionGpMesa", idCpu);
1361 STAMR3RegisterF(pVM, &pNemCpu->StatExitExceptionUd, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of #UD exits", "/NEM/CPU%u/ExitExceptionUd", idCpu);
1362 STAMR3RegisterF(pVM, &pNemCpu->StatExitExceptionUdHandled, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of handled #UD exits", "/NEM/CPU%u/ExitExceptionUdHandled", idCpu);
1363 STAMR3RegisterF(pVM, &pNemCpu->StatExitUnrecoverable, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of unrecoverable exits", "/NEM/CPU%u/ExitUnrecoverable", idCpu);
1364 STAMR3RegisterF(pVM, &pNemCpu->StatGetMsgTimeout, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of get message timeouts/alerts", "/NEM/CPU%u/GetMsgTimeout", idCpu);
1365 STAMR3RegisterF(pVM, &pNemCpu->StatStopCpuSuccess, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of successful CPU stops", "/NEM/CPU%u/StopCpuSuccess", idCpu);
1366 STAMR3RegisterF(pVM, &pNemCpu->StatStopCpuPending, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of pending CPU stops", "/NEM/CPU%u/StopCpuPending", idCpu);
1367 STAMR3RegisterF(pVM, &pNemCpu->StatStopCpuPendingAlerts,STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of pending CPU stop alerts", "/NEM/CPU%u/StopCpuPendingAlerts", idCpu);
1368 STAMR3RegisterF(pVM, &pNemCpu->StatStopCpuPendingOdd, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of odd pending CPU stops (see code)", "/NEM/CPU%u/StopCpuPendingOdd", idCpu);
1369 STAMR3RegisterF(pVM, &pNemCpu->StatCancelChangedState, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of cancel changed state", "/NEM/CPU%u/CancelChangedState", idCpu);
1370 STAMR3RegisterF(pVM, &pNemCpu->StatCancelAlertedThread, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of cancel alerted EMT", "/NEM/CPU%u/CancelAlertedEMT", idCpu);
1371 STAMR3RegisterF(pVM, &pNemCpu->StatBreakOnFFPre, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of pre execution FF breaks", "/NEM/CPU%u/BreakOnFFPre", idCpu);
1372 STAMR3RegisterF(pVM, &pNemCpu->StatBreakOnFFPost, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of post execution FF breaks", "/NEM/CPU%u/BreakOnFFPost", idCpu);
1373 STAMR3RegisterF(pVM, &pNemCpu->StatBreakOnCancel, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of cancel execution breaks", "/NEM/CPU%u/BreakOnCancel", idCpu);
1374 STAMR3RegisterF(pVM, &pNemCpu->StatBreakOnStatus, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of status code breaks", "/NEM/CPU%u/BreakOnStatus", idCpu);
1375 STAMR3RegisterF(pVM, &pNemCpu->StatImportOnDemand, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of on-demand state imports", "/NEM/CPU%u/ImportOnDemand", idCpu);
1376 STAMR3RegisterF(pVM, &pNemCpu->StatImportOnReturn, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of state imports on loop return", "/NEM/CPU%u/ImportOnReturn", idCpu);
1377 STAMR3RegisterF(pVM, &pNemCpu->StatImportOnReturnSkipped, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of skipped state imports on loop return", "/NEM/CPU%u/ImportOnReturnSkipped", idCpu);
1378 STAMR3RegisterF(pVM, &pNemCpu->StatQueryCpuTick, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES, "Number of TSC queries", "/NEM/CPU%u/QueryCpuTick", idCpu);
1379 }
1380
1381 if (!SUPR3IsDriverless())
1382 {
1383 PUVM pUVM = pVM->pUVM;
1384 STAMR3RegisterRefresh(pUVM, &pVM->nem.s.R0Stats.cPagesAvailable, STAMTYPE_U64, STAMVISIBILITY_ALWAYS,
1385 STAMUNIT_PAGES, STAM_REFRESH_GRP_NEM, "Free pages available to the hypervisor",
1386 "/NEM/R0Stats/cPagesAvailable");
1387 STAMR3RegisterRefresh(pUVM, &pVM->nem.s.R0Stats.cPagesInUse, STAMTYPE_U64, STAMVISIBILITY_ALWAYS,
1388 STAMUNIT_PAGES, STAM_REFRESH_GRP_NEM, "Pages in use by hypervisor",
1389 "/NEM/R0Stats/cPagesInUse");
1390 }
1391
1392 }
1393 }
1394 }
1395 }
1396
1397 /*
1398 * We only fail if in forced mode, otherwise just log the complaint and return.
1399 */
1400 Assert(pVM->bMainExecutionEngine == VM_EXEC_ENGINE_NATIVE_API || RTErrInfoIsSet(pErrInfo));
1401 if ( (fForced || !fFallback)
1402 && pVM->bMainExecutionEngine != VM_EXEC_ENGINE_NATIVE_API)
1403 return VMSetError(pVM, RT_SUCCESS_NP(rc) ? VERR_NEM_NOT_AVAILABLE : rc, RT_SRC_POS, "%s", pErrInfo->pszMsg);
1404
1405 if (RTErrInfoIsSet(pErrInfo))
1406 LogRel(("NEM: Not available: %s\n", pErrInfo->pszMsg));
1407 return VINF_SUCCESS;
1408}
1409
1410
1411/**
1412 * This is called after CPUMR3Init is done.
1413 *
1414 * @returns VBox status code.
1415 * @param pVM The VM handle..
1416 */
1417int nemR3NativeInitAfterCPUM(PVM pVM)
1418{
1419 /*
1420 * Validate sanity.
1421 */
1422 WHV_PARTITION_HANDLE hPartition = pVM->nem.s.hPartition;
1423 AssertReturn(hPartition != NULL, VERR_WRONG_ORDER);
1424 AssertReturn(!pVM->nem.s.hPartitionDevice, VERR_WRONG_ORDER);
1425 AssertReturn(!pVM->nem.s.fCreatedEmts, VERR_WRONG_ORDER);
1426 AssertReturn(pVM->bMainExecutionEngine == VM_EXEC_ENGINE_NATIVE_API, VERR_WRONG_ORDER);
1427
1428 /*
1429 * Continue setting up the partition now that we've got most of the CPUID feature stuff.
1430 */
1431 WHV_PARTITION_PROPERTY Property;
1432 HRESULT hrc;
1433
1434#if 0
1435 /* Not sure if we really need to set the vendor.
1436 Update: Apparently we don't. WHvPartitionPropertyCodeProcessorVendor was removed in 17110. */
1437 RT_ZERO(Property);
1438 Property.ProcessorVendor = pVM->nem.s.enmCpuVendor == CPUMCPUVENDOR_AMD ? WHvProcessorVendorAmd
1439 : WHvProcessorVendorIntel;
1440 hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeProcessorVendor, &Property, sizeof(Property));
1441 if (FAILED(hrc))
1442 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS,
1443 "Failed to set WHvPartitionPropertyCodeProcessorVendor to %u: %Rhrc (Last=%#x/%u)",
1444 Property.ProcessorVendor, hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
1445#endif
1446
1447 /* Not sure if we really need to set the cache line flush size. */
1448 RT_ZERO(Property);
1449 Property.ProcessorClFlushSize = pVM->nem.s.cCacheLineFlushShift;
1450 hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeProcessorClFlushSize, &Property, sizeof(Property));
1451 if (FAILED(hrc))
1452 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS,
1453 "Failed to set WHvPartitionPropertyCodeProcessorClFlushSize to %u: %Rhrc (Last=%#x/%u)",
1454 pVM->nem.s.cCacheLineFlushShift, hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
1455
1456 /* Intercept #DB, #BP and #UD exceptions. */
1457 RT_ZERO(Property);
1458 Property.ExceptionExitBitmap = RT_BIT_64(WHvX64ExceptionTypeDebugTrapOrFault)
1459 | RT_BIT_64(WHvX64ExceptionTypeBreakpointTrap)
1460 | RT_BIT_64(WHvX64ExceptionTypeInvalidOpcodeFault);
1461
1462 /* Intercept #GP to workaround the buggy mesa vmwgfx driver. */
1463 PVMCPU pVCpu = pVM->apCpusR3[0]; /** @todo In theory per vCPU, in practice same for all. */
1464 if (pVCpu->nem.s.fTrapXcptGpForLovelyMesaDrv)
1465 Property.ExceptionExitBitmap |= RT_BIT_64(WHvX64ExceptionTypeGeneralProtectionFault);
1466
1467 hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeExceptionExitBitmap, &Property, sizeof(Property));
1468 if (FAILED(hrc))
1469 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS,
1470 "Failed to set WHvPartitionPropertyCodeExceptionExitBitmap to %#RX64: %Rhrc (Last=%#x/%u)",
1471 Property.ExceptionExitBitmap, hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
1472
1473
1474 /*
1475 * Sync CPU features with CPUM.
1476 */
1477 /** @todo sync CPU features with CPUM. */
1478
1479 /* Set the partition property. */
1480 RT_ZERO(Property);
1481 Property.ProcessorFeatures.AsUINT64 = pVM->nem.s.uCpuFeatures.u64;
1482 hrc = WHvSetPartitionProperty(hPartition, WHvPartitionPropertyCodeProcessorFeatures, &Property, sizeof(Property));
1483 if (FAILED(hrc))
1484 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS,
1485 "Failed to set WHvPartitionPropertyCodeProcessorFeatures to %'#RX64: %Rhrc (Last=%#x/%u)",
1486 pVM->nem.s.uCpuFeatures.u64, hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
1487
1488 /*
1489 * Set up the partition.
1490 *
1491 * Seems like this is where the partition is actually instantiated and we get
1492 * a handle to it.
1493 */
1494 hrc = WHvSetupPartition(hPartition);
1495 if (FAILED(hrc))
1496 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS,
1497 "Call to WHvSetupPartition failed: %Rhrc (Last=%#x/%u)",
1498 hrc, RTNtLastStatusValue(), RTNtLastErrorValue());
1499
1500 /*
1501 * Hysterical raisins: Get the handle (could also fish this out via VID.DLL NtDeviceIoControlFile intercepting).
1502 */
1503 HANDLE hPartitionDevice;
1504 __try
1505 {
1506 hPartitionDevice = ((HANDLE *)hPartition)[1];
1507 if (!hPartitionDevice)
1508 hPartitionDevice = INVALID_HANDLE_VALUE;
1509 }
1510 __except(EXCEPTION_EXECUTE_HANDLER)
1511 {
1512 hrc = GetExceptionCode();
1513 hPartitionDevice = INVALID_HANDLE_VALUE;
1514 }
1515
1516 /* Test the handle. */
1517 HV_PARTITION_PROPERTY uValue = 0;
1518 if ( g_pfnVidGetPartitionProperty
1519 && hPartitionDevice != INVALID_HANDLE_VALUE
1520 && !g_pfnVidGetPartitionProperty(hPartitionDevice, HvPartitionPropertyProcessorVendor, &uValue))
1521 hPartitionDevice = INVALID_HANDLE_VALUE;
1522 LogRel(("NEM: HvPartitionPropertyProcessorVendor=%#llx (%lld)\n", uValue, uValue));
1523
1524 /*
1525 * More hysterical rasins: Get the partition ID if we can.
1526 */
1527 HV_PARTITION_ID idHvPartition = HV_PARTITION_ID_INVALID;
1528 if ( g_pfnVidGetHvPartitionId
1529 && hPartitionDevice != INVALID_HANDLE_VALUE
1530 && !g_pfnVidGetHvPartitionId(hPartitionDevice, &idHvPartition))
1531 {
1532 idHvPartition = HV_PARTITION_ID_INVALID;
1533 Log(("NEM: VidGetHvPartitionId failed: %#x\n", GetLastError()));
1534 }
1535 pVM->nem.s.hPartitionDevice = hPartitionDevice;
1536
1537 /*
1538 * Setup the EMTs.
1539 */
1540 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
1541 {
1542 pVCpu = pVM->apCpusR3[idCpu];
1543
1544 hrc = WHvCreateVirtualProcessor(hPartition, idCpu, 0 /*fFlags*/);
1545 if (FAILED(hrc))
1546 {
1547 NTSTATUS const rcNtLast = RTNtLastStatusValue();
1548 DWORD const dwErrLast = RTNtLastErrorValue();
1549 while (idCpu-- > 0)
1550 {
1551 HRESULT hrc2 = WHvDeleteVirtualProcessor(hPartition, idCpu);
1552 AssertLogRelMsg(SUCCEEDED(hrc2), ("WHvDeleteVirtualProcessor(%p, %u) -> %Rhrc (Last=%#x/%u)\n",
1553 hPartition, idCpu, hrc2, RTNtLastStatusValue(),
1554 RTNtLastErrorValue()));
1555 }
1556 return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS,
1557 "Call to WHvCreateVirtualProcessor failed: %Rhrc (Last=%#x/%u)", hrc, rcNtLast, dwErrLast);
1558 }
1559 }
1560 pVM->nem.s.fCreatedEmts = true;
1561
1562 LogRel(("NEM: Successfully set up partition (device handle %p, partition ID %#llx)\n", hPartitionDevice, idHvPartition));
1563
1564 /*
1565 * Any hyper-v statistics we can get at now? HvCallMapStatsPage isn't accessible any more.
1566 */
1567 /** @todo stats */
1568
1569 /*
1570 * Adjust features.
1571 *
1572 * Note! We've already disabled X2APIC and MONITOR/MWAIT via CFGM during
1573 * the first init call.
1574 */
1575
1576 return VINF_SUCCESS;
1577}
1578
1579
1580int nemR3NativeInitCompleted(PVM pVM, VMINITCOMPLETED enmWhat)
1581{
1582 //BOOL fRet = SetThreadPriority(GetCurrentThread(), 0);
1583 //AssertLogRel(fRet);
1584
1585 NOREF(pVM); NOREF(enmWhat);
1586 return VINF_SUCCESS;
1587}
1588
1589
1590int nemR3NativeTerm(PVM pVM)
1591{
1592 /*
1593 * Delete the partition.
1594 */
1595 WHV_PARTITION_HANDLE hPartition = pVM->nem.s.hPartition;
1596 pVM->nem.s.hPartition = NULL;
1597 pVM->nem.s.hPartitionDevice = NULL;
1598 if (hPartition != NULL)
1599 {
1600 VMCPUID idCpu = pVM->nem.s.fCreatedEmts ? pVM->cCpus : 0;
1601 LogRel(("NEM: Destroying partition %p with its %u VCpus...\n", hPartition, idCpu));
1602 while (idCpu-- > 0)
1603 {
1604 PVMCPU pVCpu = pVM->apCpusR3[idCpu];
1605 pVCpu->nem.s.pvMsgSlotMapping = NULL;
1606 HRESULT hrc = WHvDeleteVirtualProcessor(hPartition, idCpu);
1607 AssertLogRelMsg(SUCCEEDED(hrc), ("WHvDeleteVirtualProcessor(%p, %u) -> %Rhrc (Last=%#x/%u)\n",
1608 hPartition, idCpu, hrc, RTNtLastStatusValue(),
1609 RTNtLastErrorValue()));
1610 }
1611 WHvDeletePartition(hPartition);
1612 }
1613 pVM->nem.s.fCreatedEmts = false;
1614 return VINF_SUCCESS;
1615}
1616
1617
1618/**
1619 * VM reset notification.
1620 *
1621 * @param pVM The cross context VM structure.
1622 */
1623void nemR3NativeReset(PVM pVM)
1624{
1625#if 0
1626 /* Unfix the A20 gate. */
1627 pVM->nem.s.fA20Fixed = false;
1628#else
1629 RT_NOREF(pVM);
1630#endif
1631}
1632
1633
1634/**
1635 * Reset CPU due to INIT IPI or hot (un)plugging.
1636 *
1637 * @param pVCpu The cross context virtual CPU structure of the CPU being
1638 * reset.
1639 * @param fInitIpi Whether this is the INIT IPI or hot (un)plugging case.
1640 */
1641void nemR3NativeResetCpu(PVMCPU pVCpu, bool fInitIpi)
1642{
1643#ifdef NEM_WIN_WITH_A20
1644 /* Lock the A20 gate if INIT IPI, make sure it's enabled. */
1645 if (fInitIpi && pVCpu->idCpu > 0)
1646 {
1647 PVM pVM = pVCpu->CTX_SUFF(pVM);
1648 if (!pVM->nem.s.fA20Enabled)
1649 nemR3NativeNotifySetA20(pVCpu, true);
1650 pVM->nem.s.fA20Enabled = true;
1651 pVM->nem.s.fA20Fixed = true;
1652 }
1653#else
1654 RT_NOREF(pVCpu, fInitIpi);
1655#endif
1656}
1657
1658
1659VBOXSTRICTRC nemR3NativeRunGC(PVM pVM, PVMCPU pVCpu)
1660{
1661 return nemHCWinRunGC(pVM, pVCpu);
1662}
1663
1664
1665VMMR3_INT_DECL(bool) NEMR3CanExecuteGuest(PVM pVM, PVMCPU pVCpu)
1666{
1667 Assert(VM_IS_NEM_ENABLED(pVM));
1668
1669#ifndef NEM_WIN_WITH_A20
1670 /*
1671 * Only execute when the A20 gate is enabled because this lovely Hyper-V
1672 * blackbox does not seem to have any way to enable or disable A20.
1673 */
1674 RT_NOREF(pVM);
1675 return PGMPhysIsA20Enabled(pVCpu);
1676#else
1677 RT_NOREF(pVM, pVCpu);
1678 return true;
1679#endif
1680}
1681
1682
1683bool nemR3NativeSetSingleInstruction(PVM pVM, PVMCPU pVCpu, bool fEnable)
1684{
1685 NOREF(pVM); NOREF(pVCpu); NOREF(fEnable);
1686 return false;
1687}
1688
1689
1690void nemR3NativeNotifyFF(PVM pVM, PVMCPU pVCpu, uint32_t fFlags)
1691{
1692 Log8(("nemR3NativeNotifyFF: canceling %u\n", pVCpu->idCpu));
1693 HRESULT hrc = WHvCancelRunVirtualProcessor(pVM->nem.s.hPartition, pVCpu->idCpu, 0);
1694 AssertMsg(SUCCEEDED(hrc), ("WHvCancelRunVirtualProcessor -> hrc=%Rhrc\n", hrc));
1695 RT_NOREF_PV(hrc);
1696 RT_NOREF_PV(fFlags);
1697}
1698
1699
1700DECLHIDDEN(bool) nemR3NativeNotifyDebugEventChanged(PVM pVM, bool fUseDebugLoop)
1701{
1702 RT_NOREF(pVM, fUseDebugLoop);
1703 return false;
1704}
1705
1706
1707DECLHIDDEN(bool) nemR3NativeNotifyDebugEventChangedPerCpu(PVM pVM, PVMCPU pVCpu, bool fUseDebugLoop)
1708{
1709 RT_NOREF(pVM, pVCpu, fUseDebugLoop);
1710 return false;
1711}
1712
1713
1714DECLINLINE(int) nemR3NativeGCPhys2R3PtrReadOnly(PVM pVM, RTGCPHYS GCPhys, const void **ppv)
1715{
1716 PGMPAGEMAPLOCK Lock;
1717 int rc = PGMPhysGCPhys2CCPtrReadOnly(pVM, GCPhys, ppv, &Lock);
1718 if (RT_SUCCESS(rc))
1719 PGMPhysReleasePageMappingLock(pVM, &Lock);
1720 return rc;
1721}
1722
1723
1724DECLINLINE(int) nemR3NativeGCPhys2R3PtrWriteable(PVM pVM, RTGCPHYS GCPhys, void **ppv)
1725{
1726 PGMPAGEMAPLOCK Lock;
1727 int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhys, ppv, &Lock);
1728 if (RT_SUCCESS(rc))
1729 PGMPhysReleasePageMappingLock(pVM, &Lock);
1730 return rc;
1731}
1732
1733
1734VMMR3_INT_DECL(int) NEMR3NotifyPhysRamRegister(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, void *pvR3,
1735 uint8_t *pu2State, uint32_t *puNemRange)
1736{
1737 Log5(("NEMR3NotifyPhysRamRegister: %RGp LB %RGp, pvR3=%p pu2State=%p (%d) puNemRange=%p (%d)\n",
1738 GCPhys, cb, pvR3, pu2State, pu2State, puNemRange, *puNemRange));
1739
1740 *pu2State = UINT8_MAX;
1741 RT_NOREF(puNemRange);
1742
1743 if (pvR3)
1744 {
1745 STAM_REL_PROFILE_START(&pVM->nem.s.StatProfMapGpaRange, a);
1746 HRESULT hrc = WHvMapGpaRange(pVM->nem.s.hPartition, pvR3, GCPhys, cb,
1747 WHvMapGpaRangeFlagRead | WHvMapGpaRangeFlagWrite | WHvMapGpaRangeFlagExecute);
1748 STAM_REL_PROFILE_STOP(&pVM->nem.s.StatProfMapGpaRange, a);
1749 if (SUCCEEDED(hrc))
1750 *pu2State = NEM_WIN_PAGE_STATE_WRITABLE;
1751 else
1752 {
1753 LogRel(("NEMR3NotifyPhysRamRegister: GCPhys=%RGp LB %RGp pvR3=%p hrc=%Rhrc (%#x) Last=%#x/%u\n",
1754 GCPhys, cb, pvR3, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
1755 STAM_REL_COUNTER_INC(&pVM->nem.s.StatMapPageFailed);
1756 return VERR_NEM_MAP_PAGES_FAILED;
1757 }
1758 }
1759 return VINF_SUCCESS;
1760}
1761
1762
1763VMMR3_INT_DECL(bool) NEMR3IsMmio2DirtyPageTrackingSupported(PVM pVM)
1764{
1765 RT_NOREF(pVM);
1766 return g_pfnWHvQueryGpaRangeDirtyBitmap != NULL;
1767}
1768
1769
1770VMMR3_INT_DECL(int) NEMR3NotifyPhysMmioExMapEarly(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags,
1771 void *pvRam, void *pvMmio2, uint8_t *pu2State, uint32_t *puNemRange)
1772{
1773 Log5(("NEMR3NotifyPhysMmioExMapEarly: %RGp LB %RGp fFlags=%#x pvRam=%p pvMmio2=%p pu2State=%p (%d) puNemRange=%p (%#x)\n",
1774 GCPhys, cb, fFlags, pvRam, pvMmio2, pu2State, *pu2State, puNemRange, puNemRange ? *puNemRange : UINT32_MAX));
1775 RT_NOREF(puNemRange);
1776
1777 /*
1778 * Unmap the RAM we're replacing.
1779 */
1780 if (fFlags & NEM_NOTIFY_PHYS_MMIO_EX_F_REPLACE)
1781 {
1782 STAM_REL_PROFILE_START(&pVM->nem.s.StatProfUnmapGpaRange, a);
1783 HRESULT hrc = WHvUnmapGpaRange(pVM->nem.s.hPartition, GCPhys, cb);
1784 STAM_REL_PROFILE_STOP(&pVM->nem.s.StatProfUnmapGpaRange, a);
1785 if (SUCCEEDED(hrc))
1786 { /* likely */ }
1787 else if (pvMmio2)
1788 LogRel(("NEMR3NotifyPhysMmioExMapEarly: GCPhys=%RGp LB %RGp fFlags=%#x: Unmap -> hrc=%Rhrc (%#x) Last=%#x/%u (ignored)\n",
1789 GCPhys, cb, fFlags, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
1790 else
1791 {
1792 LogRel(("NEMR3NotifyPhysMmioExMapEarly: GCPhys=%RGp LB %RGp fFlags=%#x: Unmap -> hrc=%Rhrc (%#x) Last=%#x/%u\n",
1793 GCPhys, cb, fFlags, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
1794 STAM_REL_COUNTER_INC(&pVM->nem.s.StatUnmapPageFailed);
1795 return VERR_NEM_UNMAP_PAGES_FAILED;
1796 }
1797 }
1798
1799 /*
1800 * Map MMIO2 if any.
1801 */
1802 if (pvMmio2)
1803 {
1804 Assert(fFlags & NEM_NOTIFY_PHYS_MMIO_EX_F_MMIO2);
1805 WHV_MAP_GPA_RANGE_FLAGS fWHvFlags = WHvMapGpaRangeFlagRead | WHvMapGpaRangeFlagWrite | WHvMapGpaRangeFlagExecute;
1806 if ((fFlags & NEM_NOTIFY_PHYS_MMIO_EX_F_TRACK_DIRTY_PAGES) && g_pfnWHvQueryGpaRangeDirtyBitmap)
1807 fWHvFlags |= WHvMapGpaRangeFlagTrackDirtyPages;
1808 STAM_REL_PROFILE_START(&pVM->nem.s.StatProfMapGpaRange, a);
1809 HRESULT hrc = WHvMapGpaRange(pVM->nem.s.hPartition, pvMmio2, GCPhys, cb, fWHvFlags);
1810 STAM_REL_PROFILE_STOP(&pVM->nem.s.StatProfMapGpaRange, a);
1811 if (SUCCEEDED(hrc))
1812 *pu2State = NEM_WIN_PAGE_STATE_WRITABLE;
1813 else
1814 {
1815 LogRel(("NEMR3NotifyPhysMmioExMapEarly: GCPhys=%RGp LB %RGp fFlags=%#x pvMmio2=%p fWHvFlags=%#x: Map -> hrc=%Rhrc (%#x) Last=%#x/%u\n",
1816 GCPhys, cb, fFlags, pvMmio2, fWHvFlags, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
1817 STAM_REL_COUNTER_INC(&pVM->nem.s.StatMapPageFailed);
1818 return VERR_NEM_MAP_PAGES_FAILED;
1819 }
1820 }
1821 else
1822 {
1823 Assert(!(fFlags & NEM_NOTIFY_PHYS_MMIO_EX_F_MMIO2));
1824 *pu2State = NEM_WIN_PAGE_STATE_UNMAPPED;
1825 }
1826 RT_NOREF(pvRam);
1827 return VINF_SUCCESS;
1828}
1829
1830
1831VMMR3_INT_DECL(int) NEMR3NotifyPhysMmioExMapLate(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags,
1832 void *pvRam, void *pvMmio2, uint32_t *puNemRange)
1833{
1834 RT_NOREF(pVM, GCPhys, cb, fFlags, pvRam, pvMmio2, puNemRange);
1835 return VINF_SUCCESS;
1836}
1837
1838
1839VMMR3_INT_DECL(int) NEMR3NotifyPhysMmioExUnmap(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags, void *pvRam,
1840 void *pvMmio2, uint8_t *pu2State, uint32_t *puNemRange)
1841{
1842 int rc = VINF_SUCCESS;
1843 Log5(("NEMR3NotifyPhysMmioExUnmap: %RGp LB %RGp fFlags=%#x pvRam=%p pvMmio2=%p pu2State=%p uNemRange=%#x (%#x)\n",
1844 GCPhys, cb, fFlags, pvRam, pvMmio2, pu2State, puNemRange, *puNemRange));
1845
1846 /*
1847 * Unmap the MMIO2 pages.
1848 */
1849 /** @todo If we implement aliasing (MMIO2 page aliased into MMIO range),
1850 * we may have more stuff to unmap even in case of pure MMIO... */
1851 if (fFlags & NEM_NOTIFY_PHYS_MMIO_EX_F_MMIO2)
1852 {
1853 STAM_REL_PROFILE_START(&pVM->nem.s.StatProfUnmapGpaRange, a);
1854 HRESULT hrc = WHvUnmapGpaRange(pVM->nem.s.hPartition, GCPhys, cb);
1855 STAM_REL_PROFILE_STOP(&pVM->nem.s.StatProfUnmapGpaRange, a);
1856 if (FAILED(hrc))
1857 {
1858 LogRel2(("NEMR3NotifyPhysMmioExUnmap: GCPhys=%RGp LB %RGp fFlags=%#x: Unmap -> hrc=%Rhrc (%#x) Last=%#x/%u (ignored)\n",
1859 GCPhys, cb, fFlags, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
1860 rc = VERR_NEM_UNMAP_PAGES_FAILED;
1861 STAM_REL_COUNTER_INC(&pVM->nem.s.StatUnmapPageFailed);
1862 }
1863 }
1864
1865 /*
1866 * Restore the RAM we replaced.
1867 */
1868 if (fFlags & NEM_NOTIFY_PHYS_MMIO_EX_F_REPLACE)
1869 {
1870 AssertPtr(pvRam);
1871 STAM_REL_PROFILE_START(&pVM->nem.s.StatProfMapGpaRange, a);
1872 HRESULT hrc = WHvMapGpaRange(pVM->nem.s.hPartition, pvRam, GCPhys, cb,
1873 WHvMapGpaRangeFlagRead | WHvMapGpaRangeFlagWrite | WHvMapGpaRangeFlagExecute);
1874 STAM_REL_PROFILE_STOP(&pVM->nem.s.StatProfMapGpaRange, a);
1875 if (SUCCEEDED(hrc))
1876 { /* likely */ }
1877 else
1878 {
1879 LogRel(("NEMR3NotifyPhysMmioExUnmap: GCPhys=%RGp LB %RGp pvMmio2=%p hrc=%Rhrc (%#x) Last=%#x/%u\n",
1880 GCPhys, cb, pvMmio2, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
1881 rc = VERR_NEM_MAP_PAGES_FAILED;
1882 STAM_REL_COUNTER_INC(&pVM->nem.s.StatMapPageFailed);
1883 }
1884 if (pu2State)
1885 *pu2State = NEM_WIN_PAGE_STATE_WRITABLE;
1886 }
1887 /* Mark the pages as unmapped if relevant. */
1888 else if (pu2State)
1889 *pu2State = NEM_WIN_PAGE_STATE_UNMAPPED;
1890
1891 RT_NOREF(pvMmio2, puNemRange);
1892 return rc;
1893}
1894
1895
1896VMMR3_INT_DECL(int) NEMR3PhysMmio2QueryAndResetDirtyBitmap(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t uNemRange,
1897 void *pvBitmap, size_t cbBitmap)
1898{
1899 Assert(VM_IS_NEM_ENABLED(pVM));
1900 AssertReturn(g_pfnWHvQueryGpaRangeDirtyBitmap, VERR_INTERNAL_ERROR_2);
1901 Assert(cbBitmap == (uint32_t)cbBitmap);
1902 RT_NOREF(uNemRange);
1903
1904 /* This is being profiled by PGM, see /PGM/Mmio2QueryAndResetDirtyBitmap. */
1905 HRESULT hrc = WHvQueryGpaRangeDirtyBitmap(pVM->nem.s.hPartition, GCPhys, cb, (UINT64 *)pvBitmap, (uint32_t)cbBitmap);
1906 if (SUCCEEDED(hrc))
1907 return VINF_SUCCESS;
1908
1909 AssertLogRelMsgFailed(("GCPhys=%RGp LB %RGp pvBitmap=%p LB %#zx hrc=%Rhrc (%#x) Last=%#x/%u\n",
1910 GCPhys, cb, pvBitmap, cbBitmap, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
1911 return VERR_NEM_QUERY_DIRTY_BITMAP_FAILED;
1912}
1913
1914
1915VMMR3_INT_DECL(int) NEMR3NotifyPhysRomRegisterEarly(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, void *pvPages, uint32_t fFlags,
1916 uint8_t *pu2State, uint32_t *puNemRange)
1917{
1918 Log5(("nemR3NativeNotifyPhysRomRegisterEarly: %RGp LB %RGp pvPages=%p fFlags=%#x\n", GCPhys, cb, pvPages, fFlags));
1919 *pu2State = UINT8_MAX;
1920 *puNemRange = 0;
1921
1922#if 0 /* Let's not do this after all. We'll protection change notifications for each page and if not we'll map them lazily. */
1923 RTGCPHYS const cPages = cb >> X86_PAGE_SHIFT;
1924 for (RTGCPHYS iPage = 0; iPage < cPages; iPage++, GCPhys += X86_PAGE_SIZE)
1925 {
1926 const void *pvPage;
1927 int rc = nemR3NativeGCPhys2R3PtrReadOnly(pVM, GCPhys, &pvPage);
1928 if (RT_SUCCESS(rc))
1929 {
1930 HRESULT hrc = WHvMapGpaRange(pVM->nem.s.hPartition, (void *)pvPage, GCPhys, X86_PAGE_SIZE,
1931 WHvMapGpaRangeFlagRead | WHvMapGpaRangeFlagExecute);
1932 if (SUCCEEDED(hrc))
1933 { /* likely */ }
1934 else
1935 {
1936 LogRel(("nemR3NativeNotifyPhysRomRegisterEarly: GCPhys=%RGp hrc=%Rhrc (%#x) Last=%#x/%u\n",
1937 GCPhys, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
1938 return VERR_NEM_INIT_FAILED;
1939 }
1940 }
1941 else
1942 {
1943 LogRel(("nemR3NativeNotifyPhysRomRegisterEarly: GCPhys=%RGp rc=%Rrc\n", GCPhys, rc));
1944 return rc;
1945 }
1946 }
1947 RT_NOREF_PV(fFlags);
1948#else
1949 RT_NOREF(pVM, GCPhys, cb, pvPages, fFlags);
1950#endif
1951 return VINF_SUCCESS;
1952}
1953
1954
1955VMMR3_INT_DECL(int) NEMR3NotifyPhysRomRegisterLate(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, void *pvPages,
1956 uint32_t fFlags, uint8_t *pu2State, uint32_t *puNemRange)
1957{
1958 Log5(("nemR3NativeNotifyPhysRomRegisterLate: %RGp LB %RGp pvPages=%p fFlags=%#x pu2State=%p (%d) puNemRange=%p (%#x)\n",
1959 GCPhys, cb, pvPages, fFlags, pu2State, *pu2State, puNemRange, *puNemRange));
1960 *pu2State = UINT8_MAX;
1961
1962 /*
1963 * (Re-)map readonly.
1964 */
1965 AssertPtrReturn(pvPages, VERR_INVALID_POINTER);
1966 STAM_REL_PROFILE_START(&pVM->nem.s.StatProfMapGpaRange, a);
1967 HRESULT hrc = WHvMapGpaRange(pVM->nem.s.hPartition, pvPages, GCPhys, cb, WHvMapGpaRangeFlagRead | WHvMapGpaRangeFlagExecute);
1968 STAM_REL_PROFILE_STOP(&pVM->nem.s.StatProfMapGpaRange, a);
1969 if (SUCCEEDED(hrc))
1970 *pu2State = NEM_WIN_PAGE_STATE_READABLE;
1971 else
1972 {
1973 LogRel(("nemR3NativeNotifyPhysRomRegisterEarly: GCPhys=%RGp LB %RGp pvPages=%p fFlags=%#x hrc=%Rhrc (%#x) Last=%#x/%u\n",
1974 GCPhys, cb, pvPages, fFlags, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
1975 STAM_REL_COUNTER_INC(&pVM->nem.s.StatMapPageFailed);
1976 return VERR_NEM_MAP_PAGES_FAILED;
1977 }
1978 RT_NOREF(fFlags, puNemRange);
1979 return VINF_SUCCESS;
1980}
1981
1982#ifdef NEM_WIN_WITH_A20
1983
1984/**
1985 * @callback_method_impl{FNPGMPHYSNEMCHECKPAGE}
1986 */
1987static DECLCALLBACK(int) nemR3WinUnsetForA20CheckerCallback(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys,
1988 PPGMPHYSNEMPAGEINFO pInfo, void *pvUser)
1989{
1990 /* We'll just unmap the memory. */
1991 if (pInfo->u2NemState > NEM_WIN_PAGE_STATE_UNMAPPED)
1992 {
1993 HRESULT hrc = WHvUnmapGpaRange(pVM->nem.s.hPartition, GCPhys, X86_PAGE_SIZE);
1994 if (SUCCEEDED(hrc))
1995 {
1996 STAM_REL_COUNTER_INC(&pVM->nem.s.StatUnmapPage);
1997 uint32_t cMappedPages = ASMAtomicDecU32(&pVM->nem.s.cMappedPages); NOREF(cMappedPages);
1998 Log5(("NEM GPA unmapped/A20: %RGp (was %s, cMappedPages=%u)\n", GCPhys, g_apszPageStates[pInfo->u2NemState], cMappedPages));
1999 pInfo->u2NemState = NEM_WIN_PAGE_STATE_UNMAPPED;
2000 }
2001 else
2002 {
2003 STAM_REL_COUNTER_INC(&pVM->nem.s.StatUnmapPageFailed);
2004 LogRel(("nemR3WinUnsetForA20CheckerCallback/unmap: GCPhys=%RGp hrc=%Rhrc (%#x) Last=%#x/%u\n",
2005 GCPhys, hrc, hrc, RTNtLastStatusValue(), RTNtLastErrorValue()));
2006 return VERR_INTERNAL_ERROR_2;
2007 }
2008 }
2009 RT_NOREF(pVCpu, pvUser);
2010 return VINF_SUCCESS;
2011}
2012
2013
2014/**
2015 * Unmaps a page from Hyper-V for the purpose of emulating A20 gate behavior.
2016 *
2017 * @returns The PGMPhysNemQueryPageInfo result.
2018 * @param pVM The cross context VM structure.
2019 * @param pVCpu The cross context virtual CPU structure.
2020 * @param GCPhys The page to unmap.
2021 */
2022static int nemR3WinUnmapPageForA20Gate(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys)
2023{
2024 PGMPHYSNEMPAGEINFO Info;
2025 return PGMPhysNemPageInfoChecker(pVM, pVCpu, GCPhys, false /*fMakeWritable*/, &Info,
2026 nemR3WinUnsetForA20CheckerCallback, NULL);
2027}
2028
2029#endif /* NEM_WIN_WITH_A20 */
2030
2031VMMR3_INT_DECL(void) NEMR3NotifySetA20(PVMCPU pVCpu, bool fEnabled)
2032{
2033 Log(("nemR3NativeNotifySetA20: fEnabled=%RTbool\n", fEnabled));
2034 Assert(VM_IS_NEM_ENABLED(pVCpu->CTX_SUFF(pVM)));
2035#ifdef NEM_WIN_WITH_A20
2036 PVM pVM = pVCpu->CTX_SUFF(pVM);
2037 if (!pVM->nem.s.fA20Fixed)
2038 {
2039 pVM->nem.s.fA20Enabled = fEnabled;
2040 for (RTGCPHYS GCPhys = _1M; GCPhys < _1M + _64K; GCPhys += X86_PAGE_SIZE)
2041 nemR3WinUnmapPageForA20Gate(pVM, pVCpu, GCPhys);
2042 }
2043#else
2044 RT_NOREF(pVCpu, fEnabled);
2045#endif
2046}
2047
2048
2049/** @page pg_nem_win NEM/win - Native Execution Manager, Windows.
2050 *
2051 * On Windows the Hyper-V root partition (dom0 in zen terminology) does not have
2052 * nested VT-x or AMD-V capabilities. Early on raw-mode worked inside it, but
2053 * for a while now we've been getting \#GPs when trying to modify CR4 in the
2054 * world switcher. So, when Hyper-V is active on Windows we have little choice
2055 * but to use Hyper-V to run our VMs.
2056 *
2057 *
2058 * @section sub_nem_win_whv The WinHvPlatform API
2059 *
2060 * Since Windows 10 build 17083 there is a documented API for managing Hyper-V
2061 * VMs: header file WinHvPlatform.h and implementation in WinHvPlatform.dll.
2062 * This interface is a wrapper around the undocumented Virtualization
2063 * Infrastructure Driver (VID) API - VID.DLL and VID.SYS. The wrapper is
2064 * written in C++, namespaced, early versions (at least) was using standard C++
2065 * container templates in several places.
2066 *
2067 * When creating a VM using WHvCreatePartition, it will only create the
2068 * WinHvPlatform structures for it, to which you get an abstract pointer. The
2069 * VID API that actually creates the partition is first engaged when you call
2070 * WHvSetupPartition after first setting a lot of properties using
2071 * WHvSetPartitionProperty. Since the VID API is just a very thin wrapper
2072 * around CreateFile and NtDeviceIoControlFile, it returns an actual HANDLE for
2073 * the partition to WinHvPlatform. We fish this HANDLE out of the WinHvPlatform
2074 * partition structures because we need to talk directly to VID for reasons
2075 * we'll get to in a bit. (Btw. we could also intercept the CreateFileW or
2076 * NtDeviceIoControlFile calls from VID.DLL to get the HANDLE should fishing in
2077 * the partition structures become difficult.)
2078 *
2079 * The WinHvPlatform API requires us to both set the number of guest CPUs before
2080 * setting up the partition and call WHvCreateVirtualProcessor for each of them.
2081 * The CPU creation function boils down to a VidMessageSlotMap call that sets up
2082 * and maps a message buffer into ring-3 for async communication with hyper-V
2083 * and/or the VID.SYS thread actually running the CPU thru
2084 * WinHvRunVpDispatchLoop(). When for instance a VMEXIT is encountered, hyper-V
2085 * sends a message that the WHvRunVirtualProcessor API retrieves (and later
2086 * acknowledges) via VidMessageSlotHandleAndGetNext. Since or about build
2087 * 17757 a register page is also mapped into user space when creating the
2088 * virtual CPU. It should be noteded that WHvDeleteVirtualProcessor doesn't do
2089 * much as there seems to be no partner function VidMessagesSlotMap that
2090 * reverses what it did.
2091 *
2092 * Memory is managed thru calls to WHvMapGpaRange and WHvUnmapGpaRange (GPA does
2093 * not mean grade point average here, but rather guest physical addressspace),
2094 * which corresponds to VidCreateVaGpaRangeSpecifyUserVa and VidDestroyGpaRange
2095 * respectively. As 'UserVa' indicates, the functions works on user process
2096 * memory. The mappings are also subject to quota restrictions, so the number
2097 * of ranges are limited and probably their total size as well. Obviously
2098 * VID.SYS keeps track of the ranges, but so does WinHvPlatform, which means
2099 * there is a bit of overhead involved and quota restrctions makes sense.
2100 *
2101 * Running guest code is done through the WHvRunVirtualProcessor function. It
2102 * asynchronously starts or resumes hyper-V CPU execution and then waits for an
2103 * VMEXIT message. Hyper-V / VID.SYS will return information about the message
2104 * in the message buffer mapping, and WHvRunVirtualProcessor will convert that
2105 * finto it's own WHV_RUN_VP_EXIT_CONTEXT format.
2106 *
2107 * Other threads can interrupt the execution by using WHvCancelVirtualProcessor,
2108 * which since or about build 17757 uses VidMessageSlotHandleAndGetNext to do
2109 * the work (earlier builds would open the waiting thread, do a dummy
2110 * QueueUserAPC on it, and let it upon return use VidStopVirtualProcessor to
2111 * do the actual stopping). While there is certainly a race between cancelation
2112 * and the CPU causing a natural VMEXIT, it is not known whether this still
2113 * causes extra work on subsequent WHvRunVirtualProcessor calls (it did in and
2114 * earlier than 17134).
2115 *
2116 * Registers are retrieved and set via WHvGetVirtualProcessorRegisters and
2117 * WHvSetVirtualProcessorRegisters. In addition, several VMEXITs include
2118 * essential register state in the exit context information, potentially making
2119 * it possible to emulate the instruction causing the exit without involving
2120 * WHvGetVirtualProcessorRegisters.
2121 *
2122 *
2123 * @subsection subsec_nem_win_whv_cons Issues & Feedback
2124 *
2125 * Here are some observations (mostly against build 17101):
2126 *
2127 * - The VMEXIT performance is dismal (build 17134).
2128 *
2129 * Our proof of concept implementation with a kernel runloop (i.e. not using
2130 * WHvRunVirtualProcessor and friends, but calling VID.SYS fast I/O control
2131 * entry point directly) delivers 9-10% of the port I/O performance and only
2132 * 6-7% of the MMIO performance that we have with our own hypervisor.
2133 *
2134 * When using the offical WinHvPlatform API, the numbers are %3 for port I/O
2135 * and 5% for MMIO.
2136 *
2137 * While the tests we've done are using tight tight loops only doing port I/O
2138 * and MMIO, the problem is clearly visible when running regular guest OSes.
2139 * Anything that hammers the VGA device would be suffering, for example:
2140 *
2141 * - Windows 2000 boot screen animation overloads us with MMIO exits
2142 * and won't even boot because all the time is spent in interrupt
2143 * handlers and redrawin the screen.
2144 *
2145 * - DSL 4.4 and its bootmenu logo is slower than molasses in january.
2146 *
2147 * We have not found a workaround for this yet.
2148 *
2149 * Something that might improve the issue a little is to detect blocks with
2150 * excessive MMIO and port I/O exits and emulate instructions to cover
2151 * multiple exits before letting Hyper-V have a go at the guest execution
2152 * again. This will only improve the situation under some circumstances,
2153 * since emulating instructions without recompilation can be expensive, so
2154 * there will only be real gains if the exitting instructions are tightly
2155 * packed.
2156 *
2157 * Update: Security fixes during the summer of 2018 caused the performance to
2158 * dropped even more.
2159 *
2160 * Update [build 17757]: Some performance improvements here, but they don't
2161 * yet make up for what was lost this summer.
2162 *
2163 *
2164 * - We need a way to directly modify the TSC offset (or bias if you like).
2165 *
2166 * The current approach of setting the WHvX64RegisterTsc register one by one
2167 * on each virtual CPU in sequence will introduce random inaccuracies,
2168 * especially if the thread doing the job is reschduled at a bad time.
2169 *
2170 *
2171 * - Unable to access WHvX64RegisterMsrMtrrCap (build 17134).
2172 *
2173 *
2174 * - On AMD Ryzen grub/debian 9.0 ends up with a unrecoverable exception
2175 * when IA32_MTRR_PHYSMASK0 is written.
2176 *
2177 *
2178 * - The IA32_APIC_BASE register does not work right:
2179 *
2180 * - Attempts by the guest to clear bit 11 (EN) are ignored, both the
2181 * guest and the VMM reads back the old value.
2182 *
2183 * - Attempts to modify the base address (bits NN:12) seems to be ignored
2184 * in the same way.
2185 *
2186 * - The VMM can modify both the base address as well as the the EN and
2187 * BSP bits, however this is useless if we cannot intercept the WRMSR.
2188 *
2189 * - Attempts by the guest to set the EXTD bit (X2APIC) result in \#GP(0),
2190 * while the VMM ends up with with ERROR_HV_INVALID_PARAMETER. Seems
2191 * there is no way to support X2APIC.
2192 *
2193 *
2194 * - Not sure if this is a thing, but WHvCancelVirtualProcessor seems to cause
2195 * cause a lot more spurious WHvRunVirtualProcessor returns that what we get
2196 * with the replacement code. By spurious returns we mean that the
2197 * subsequent call to WHvRunVirtualProcessor would return immediately.
2198 *
2199 * Update [build 17757]: New cancelation code might have addressed this, but
2200 * haven't had time to test it yet.
2201 *
2202 *
2203 * - There is no API for modifying protection of a page within a GPA range.
2204 *
2205 * From what we can tell, the only way to modify the protection (like readonly
2206 * -> writable, or vice versa) is to first unmap the range and then remap it
2207 * with the new protection.
2208 *
2209 * We are for instance doing this quite a bit in order to track dirty VRAM
2210 * pages. VRAM pages starts out as readonly, when the guest writes to a page
2211 * we take an exit, notes down which page it is, makes it writable and restart
2212 * the instruction. After refreshing the display, we reset all the writable
2213 * pages to readonly again, bulk fashion.
2214 *
2215 * Now to work around this issue, we do page sized GPA ranges. In addition to
2216 * add a lot of tracking overhead to WinHvPlatform and VID.SYS, this also
2217 * causes us to exceed our quota before we've even mapped a default sized
2218 * (128MB) VRAM page-by-page. So, to work around this quota issue we have to
2219 * lazily map pages and actively restrict the number of mappings.
2220 *
2221 * Our best workaround thus far is bypassing WinHvPlatform and VID entirely
2222 * when in comes to guest memory management and instead use the underlying
2223 * hypercalls (HvCallMapGpaPages, HvCallUnmapGpaPages) to do it ourselves.
2224 * (This also maps a whole lot better into our own guest page management
2225 * infrastructure.)
2226 *
2227 * Update [build 17757]: Introduces a KVM like dirty logging API which could
2228 * help tracking dirty VGA pages, while being useless for shadow ROM and
2229 * devices trying catch the guest updating descriptors and such.
2230 *
2231 *
2232 * - Observed problems doing WHvUnmapGpaRange immediately followed by
2233 * WHvMapGpaRange.
2234 *
2235 * As mentioned above, we've been forced to use this sequence when modifying
2236 * page protection. However, when transitioning from readonly to writable,
2237 * we've ended up looping forever with the same write to readonly memory
2238 * VMEXIT. We're wondering if this issue might be related to the lazy mapping
2239 * logic in WinHvPlatform.
2240 *
2241 * Workaround: Insert a WHvRunVirtualProcessor call and make sure to get a GPA
2242 * unmapped exit between the two calls. Not entirely great performance wise
2243 * (or the santity of our code).
2244 *
2245 *
2246 * - Implementing A20 gate behavior is tedious, where as correctly emulating the
2247 * A20M# pin (present on 486 and later) is near impossible for SMP setups
2248 * (e.g. possiblity of two CPUs with different A20 status).
2249 *
2250 * Workaround #1 (obsolete): Only do A20 on CPU 0, restricting the emulation
2251 * to HMA. We unmap all pages related to HMA (0x100000..0x10ffff) when the A20
2252 * state changes, lazily syncing the right pages back when accessed.
2253 *
2254 * Workaround #2 (used): Use IEM when the A20 gate is disabled.
2255 *
2256 *
2257 * - WHVRunVirtualProcessor wastes time converting VID/Hyper-V messages to its
2258 * own format (WHV_RUN_VP_EXIT_CONTEXT).
2259 *
2260 * We understand this might be because Microsoft wishes to remain free to
2261 * modify the VID/Hyper-V messages, but it's still rather silly and does slow
2262 * things down a little. We'd much rather just process the messages directly.
2263 *
2264 *
2265 * - WHVRunVirtualProcessor would've benefited from using a callback interface:
2266 *
2267 * - The potential size changes of the exit context structure wouldn't be
2268 * an issue, since the function could manage that itself.
2269 *
2270 * - State handling could probably be simplified (like cancelation).
2271 *
2272 *
2273 * - WHvGetVirtualProcessorRegisters and WHvSetVirtualProcessorRegisters
2274 * internally converts register names, probably using temporary heap buffers.
2275 *
2276 * From the looks of things, they are converting from WHV_REGISTER_NAME to
2277 * HV_REGISTER_NAME from in the "Virtual Processor Register Names" section in
2278 * the "Hypervisor Top-Level Functional Specification" document. This feels
2279 * like an awful waste of time.
2280 *
2281 * We simply cannot understand why HV_REGISTER_NAME isn't used directly here,
2282 * or at least the same values, making any conversion reduntant. Restricting
2283 * access to certain registers could easily be implement by scanning the
2284 * inputs.
2285 *
2286 * To avoid the heap + conversion overhead, we're currently using the
2287 * HvCallGetVpRegisters and HvCallSetVpRegisters calls directly, at least for
2288 * the ring-0 code.
2289 *
2290 * Update [build 17757]: Register translation has been very cleverly
2291 * optimized and made table driven (2 top level tables, 4 + 1 leaf tables).
2292 * Register information consists of the 32-bit HV register name, register page
2293 * offset, and flags (giving valid offset, size and more). Register
2294 * getting/settings seems to be done by hoping that the register page provides
2295 * it all, and falling back on the VidSetVirtualProcessorState if one or more
2296 * registers are not available there.
2297 *
2298 * Note! We have currently not updated our ring-0 code to take the register
2299 * page into account, so it's suffering a little compared to the ring-3 code
2300 * that now uses the offical APIs for registers.
2301 *
2302 *
2303 * - The YMM and XCR0 registers are not yet named (17083). This probably
2304 * wouldn't be a problem if HV_REGISTER_NAME was used, see previous point.
2305 *
2306 * Update [build 17757]: XCR0 is added. YMM register values seems to be put
2307 * into a yet undocumented XsaveState interface. Approach is a little bulky,
2308 * but saves number of enums and dispenses with register transation. Also,
2309 * the underlying Vid setter API duplicates the input buffer on the heap,
2310 * adding a 16 byte header.
2311 *
2312 *
2313 * - Why does VID.SYS only query/set 32 registers at the time thru the
2314 * HvCallGetVpRegisters and HvCallSetVpRegisters hypercalls?
2315 *
2316 * We've not trouble getting/setting all the registers defined by
2317 * WHV_REGISTER_NAME in one hypercall (around 80). Some kind of stack
2318 * buffering or similar?
2319 *
2320 *
2321 * - To handle the VMMCALL / VMCALL instructions, it seems we need to intercept
2322 * \#UD exceptions and inspect the opcodes. A dedicated exit for hypercalls
2323 * would be more efficient, esp. for guests using \#UD for other purposes..
2324 *
2325 *
2326 * - Wrong instruction length in the VpContext with unmapped GPA memory exit
2327 * contexts on 17115/AMD.
2328 *
2329 * One byte "PUSH CS" was reported as 2 bytes, while a two byte
2330 * "MOV [EBX],EAX" was reported with a 1 byte instruction length. Problem
2331 * naturally present in untranslated hyper-v messages.
2332 *
2333 *
2334 * - The I/O port exit context information seems to be missing the address size
2335 * information needed for correct string I/O emulation.
2336 *
2337 * VT-x provides this information in bits 7:9 in the instruction information
2338 * field on newer CPUs. AMD-V in bits 7:9 in the EXITINFO1 field in the VMCB.
2339 *
2340 * We can probably work around this by scanning the instruction bytes for
2341 * address size prefixes. Haven't investigated it any further yet.
2342 *
2343 *
2344 * - Querying WHvCapabilityCodeExceptionExitBitmap returns zero even when
2345 * intercepts demonstrably works (17134).
2346 *
2347 *
2348 * - Querying HvPartitionPropertyDebugChannelId via HvCallGetPartitionProperty
2349 * (hypercall) hangs the host (17134).
2350 *
2351 * - CommonUtilities::GuidToString needs a 'static' before the hex digit array,
2352 * looks pointless to re-init a stack copy it for each call (novice mistake).
2353 *
2354 *
2355 * Old concerns that have been addressed:
2356 *
2357 * - The WHvCancelVirtualProcessor API schedules a dummy usermode APC callback
2358 * in order to cancel any current or future alertable wait in VID.SYS during
2359 * the VidMessageSlotHandleAndGetNext call.
2360 *
2361 * IIRC this will make the kernel schedule the specified callback thru
2362 * NTDLL!KiUserApcDispatcher by modifying the thread context and quite
2363 * possibly the userland thread stack. When the APC callback returns to
2364 * KiUserApcDispatcher, it will call NtContinue to restore the old thread
2365 * context and resume execution from there. This naturally adds up to some
2366 * CPU cycles, ring transitions aren't for free, especially after Spectre &
2367 * Meltdown mitigations.
2368 *
2369 * Using NtAltertThread call could do the same without the thread context
2370 * modifications and the extra kernel call.
2371 *
2372 * Update: All concerns have addressed in or about build 17757.
2373 *
2374 * The WHvCancelVirtualProcessor API is now implemented using a new
2375 * VidMessageSlotHandleAndGetNext() flag (4). Codepath is slightly longer
2376 * than NtAlertThread, but has the added benefit that spurious wakeups can be
2377 * more easily reduced.
2378 *
2379 *
2380 * - When WHvRunVirtualProcessor returns without a message, or on a terse
2381 * VID message like HLT, it will make a kernel call to get some registers.
2382 * This is potentially inefficient if the caller decides he needs more
2383 * register state.
2384 *
2385 * It would be better to just return what's available and let the caller fetch
2386 * what is missing from his point of view in a single kernel call.
2387 *
2388 * Update: All concerns have been addressed in or about build 17757. Selected
2389 * registers are now available via shared memory and thus HLT should (not
2390 * verified) no longer require a system call to compose the exit context data.
2391 *
2392 *
2393 * - The WHvRunVirtualProcessor implementation does lazy GPA range mappings when
2394 * a unmapped GPA message is received from hyper-V.
2395 *
2396 * Since MMIO is currently realized as unmapped GPA, this will slow down all
2397 * MMIO accesses a tiny little bit as WHvRunVirtualProcessor looks up the
2398 * guest physical address to check if it is a pending lazy mapping.
2399 *
2400 * The lazy mapping feature makes no sense to us. We as API user have all the
2401 * information and can do lazy mapping ourselves if we want/have to (see next
2402 * point).
2403 *
2404 * Update: All concerns have been addressed in or about build 17757.
2405 *
2406 *
2407 * - The WHvGetCapability function has a weird design:
2408 * - The CapabilityCode parameter is pointlessly duplicated in the output
2409 * structure (WHV_CAPABILITY).
2410 *
2411 * - API takes void pointer, but everyone will probably be using
2412 * WHV_CAPABILITY due to WHV_CAPABILITY::CapabilityCode making it
2413 * impractical to use anything else.
2414 *
2415 * - No output size.
2416 *
2417 * - See GetFileAttributesEx, GetFileInformationByHandleEx,
2418 * FindFirstFileEx, and others for typical pattern for generic
2419 * information getters.
2420 *
2421 * Update: All concerns have been addressed in build 17110.
2422 *
2423 *
2424 * - The WHvGetPartitionProperty function uses the same weird design as
2425 * WHvGetCapability, see above.
2426 *
2427 * Update: All concerns have been addressed in build 17110.
2428 *
2429 *
2430 * - The WHvSetPartitionProperty function has a totally weird design too:
2431 * - In contrast to its partner WHvGetPartitionProperty, the property code
2432 * is not a separate input parameter here but part of the input
2433 * structure.
2434 *
2435 * - The input structure is a void pointer rather than a pointer to
2436 * WHV_PARTITION_PROPERTY which everyone probably will be using because
2437 * of the WHV_PARTITION_PROPERTY::PropertyCode field.
2438 *
2439 * - Really, why use PVOID for the input when the function isn't accepting
2440 * minimal sizes. E.g. WHVPartitionPropertyCodeProcessorClFlushSize only
2441 * requires a 9 byte input, but the function insists on 16 bytes (17083).
2442 *
2443 * - See GetFileAttributesEx, SetFileInformationByHandle, FindFirstFileEx,
2444 * and others for typical pattern for generic information setters and
2445 * getters.
2446 *
2447 * Update: All concerns have been addressed in build 17110.
2448 *
2449 *
2450 * @section sec_nem_win_large_pages Large Pages
2451 *
2452 * We've got a standalone memory allocation and access testcase bs3-memalloc-1
2453 * which was run with 48GiB of guest RAM configured on a NUC 11 box running
2454 * Windows 11 GA. In the simplified NEM memory mode no exits should be
2455 * generated while the access tests are running.
2456 *
2457 * The bs3-memalloc-1 results kind of hints at some tiny speed-up if the guest
2458 * RAM is allocated using the MEM_LARGE_PAGES flag, but only in the 3rd access
2459 * check (typical 350 000 MiB/s w/o and around 400 000 MiB/s). The result for
2460 * the 2nd access varies a lot, perhaps hinting at some table optimizations
2461 * going on.
2462 *
2463 * The initial access where the memory is locked/whatever has absolutely horrid
2464 * results regardless of whether large pages are enabled or not. Typically
2465 * bobbing close to 500 MiB/s, non-large pages a little faster.
2466 *
2467 * NEM w/ simplified memory and MEM_LARGE_PAGES:
2468 * @verbatim
2469bs3-memalloc-1: TESTING...
2470bs3-memalloc-1: #0/0x0: 0x0000000000000000 LB 0x000000000009fc00 USABLE (1)
2471bs3-memalloc-1: #1/0x1: 0x000000000009fc00 LB 0x0000000000000400 RESERVED (2)
2472bs3-memalloc-1: #2/0x2: 0x00000000000f0000 LB 0x0000000000010000 RESERVED (2)
2473bs3-memalloc-1: #3/0x3: 0x0000000000100000 LB 0x00000000dfef0000 USABLE (1)
2474bs3-memalloc-1: #4/0x4: 0x00000000dfff0000 LB 0x0000000000010000 ACPI_RECLAIMABLE (3)
2475bs3-memalloc-1: #5/0x5: 0x00000000fec00000 LB 0x0000000000001000 RESERVED (2)
2476bs3-memalloc-1: #6/0x6: 0x00000000fee00000 LB 0x0000000000001000 RESERVED (2)
2477bs3-memalloc-1: #7/0x7: 0x00000000fffc0000 LB 0x0000000000040000 RESERVED (2)
2478bs3-memalloc-1: #8/0x9: 0x0000000100000000 LB 0x0000000b20000000 USABLE (1)
2479bs3-memalloc-1: Found 1 interesting entries covering 0xb20000000 bytes (44 GB).
2480bs3-memalloc-1: From 0x100000000 to 0xc20000000
2481bs3-memalloc-1: INT15h/E820 : PASSED
2482bs3-memalloc-1: Mapping memory above 4GB : PASSED
2483bs3-memalloc-1: Pages : 11 665 408 pages
2484bs3-memalloc-1: MiBs : 45 568 MB
2485bs3-memalloc-1: Alloc elapsed : 90 925 263 996 ns
2486bs3-memalloc-1: Alloc elapsed in ticks : 272 340 387 336 ticks
2487bs3-memalloc-1: Page alloc time : 7 794 ns/page
2488bs3-memalloc-1: Page alloc time in ticks : 23 345 ticks/page
2489bs3-memalloc-1: Alloc thruput : 128 296 pages/s
2490bs3-memalloc-1: Alloc thruput in MiBs : 501 MB/s
2491bs3-memalloc-1: Allocation speed : PASSED
2492bs3-memalloc-1: Access elapsed : 85 074 483 467 ns
2493bs3-memalloc-1: Access elapsed in ticks : 254 816 088 412 ticks
2494bs3-memalloc-1: Page access time : 7 292 ns/page
2495bs3-memalloc-1: Page access time in ticks : 21 843 ticks/page
2496bs3-memalloc-1: Access thruput : 137 119 pages/s
2497bs3-memalloc-1: Access thruput in MiBs : 535 MB/s
2498bs3-memalloc-1: 2nd access : PASSED
2499bs3-memalloc-1: Access elapsed : 112 963 925 ns
2500bs3-memalloc-1: Access elapsed in ticks : 338 284 436 ticks
2501bs3-memalloc-1: Page access time : 9 ns/page
2502bs3-memalloc-1: Page access time in ticks : 28 ticks/page
2503bs3-memalloc-1: Access thruput : 103 266 666 pages/s
2504bs3-memalloc-1: Access thruput in MiBs : 403 385 MB/s
2505bs3-memalloc-1: 3rd access : PASSED
2506bs3-memalloc-1: SUCCESS
2507 * @endverbatim
2508 *
2509 * NEM w/ simplified memory and but no MEM_LARGE_PAGES:
2510 * @verbatim
2511bs3-memalloc-1: From 0x100000000 to 0xc20000000
2512bs3-memalloc-1: Pages : 11 665 408 pages
2513bs3-memalloc-1: MiBs : 45 568 MB
2514bs3-memalloc-1: Alloc elapsed : 90 062 027 900 ns
2515bs3-memalloc-1: Alloc elapsed in ticks : 269 754 826 466 ticks
2516bs3-memalloc-1: Page alloc time : 7 720 ns/page
2517bs3-memalloc-1: Page alloc time in ticks : 23 124 ticks/page
2518bs3-memalloc-1: Alloc thruput : 129 526 pages/s
2519bs3-memalloc-1: Alloc thruput in MiBs : 505 MB/s
2520bs3-memalloc-1: Allocation speed : PASSED
2521bs3-memalloc-1: Access elapsed : 3 596 017 220 ns
2522bs3-memalloc-1: Access elapsed in ticks : 10 770 732 620 ticks
2523bs3-memalloc-1: Page access time : 308 ns/page
2524bs3-memalloc-1: Page access time in ticks : 923 ticks/page
2525bs3-memalloc-1: Access thruput : 3 243 980 pages/s
2526bs3-memalloc-1: Access thruput in MiBs : 12 671 MB/s
2527bs3-memalloc-1: 2nd access : PASSED
2528bs3-memalloc-1: Access elapsed : 133 060 160 ns
2529bs3-memalloc-1: Access elapsed in ticks : 398 459 884 ticks
2530bs3-memalloc-1: Page access time : 11 ns/page
2531bs3-memalloc-1: Page access time in ticks : 34 ticks/page
2532bs3-memalloc-1: Access thruput : 87 670 178 pages/s
2533bs3-memalloc-1: Access thruput in MiBs : 342 461 MB/s
2534bs3-memalloc-1: 3rd access : PASSED
2535 * @endverbatim
2536 *
2537 * Same everything but native VT-x and VBox (stripped output a little):
2538 * @verbatim
2539bs3-memalloc-1: From 0x100000000 to 0xc20000000
2540bs3-memalloc-1: Pages : 11 665 408 pages
2541bs3-memalloc-1: MiBs : 45 568 MB
2542bs3-memalloc-1: Alloc elapsed : 776 111 427 ns
2543bs3-memalloc-1: Alloc elapsed in ticks : 2 323 267 035 ticks
2544bs3-memalloc-1: Page alloc time : 66 ns/page
2545bs3-memalloc-1: Page alloc time in ticks : 199 ticks/page
2546bs3-memalloc-1: Alloc thruput : 15 030 584 pages/s
2547bs3-memalloc-1: Alloc thruput in MiBs : 58 713 MB/s
2548bs3-memalloc-1: Allocation speed : PASSED
2549bs3-memalloc-1: Access elapsed : 112 141 904 ns
2550bs3-memalloc-1: Access elapsed in ticks : 335 751 077 ticks
2551bs3-memalloc-1: Page access time : 9 ns/page
2552bs3-memalloc-1: Page access time in ticks : 28 ticks/page
2553bs3-memalloc-1: Access thruput : 104 023 630 pages/s
2554bs3-memalloc-1: Access thruput in MiBs : 406 342 MB/s
2555bs3-memalloc-1: 2nd access : PASSED
2556bs3-memalloc-1: Access elapsed : 112 023 049 ns
2557bs3-memalloc-1: Access elapsed in ticks : 335 418 343 ticks
2558bs3-memalloc-1: Page access time : 9 ns/page
2559bs3-memalloc-1: Page access time in ticks : 28 ticks/page
2560bs3-memalloc-1: Access thruput : 104 133 998 pages/s
2561bs3-memalloc-1: Access thruput in MiBs : 406 773 MB/s
2562bs3-memalloc-1: 3rd access : PASSED
2563 * @endverbatim
2564 *
2565 * VBox with large pages disabled:
2566 * @verbatim
2567bs3-memalloc-1: From 0x100000000 to 0xc20000000
2568bs3-memalloc-1: Pages : 11 665 408 pages
2569bs3-memalloc-1: MiBs : 45 568 MB
2570bs3-memalloc-1: Alloc elapsed : 50 986 588 028 ns
2571bs3-memalloc-1: Alloc elapsed in ticks : 152 714 862 044 ticks
2572bs3-memalloc-1: Page alloc time : 4 370 ns/page
2573bs3-memalloc-1: Page alloc time in ticks : 13 091 ticks/page
2574bs3-memalloc-1: Alloc thruput : 228 793 pages/s
2575bs3-memalloc-1: Alloc thruput in MiBs : 893 MB/s
2576bs3-memalloc-1: Allocation speed : PASSED
2577bs3-memalloc-1: Access elapsed : 2 849 641 741 ns
2578bs3-memalloc-1: Access elapsed in ticks : 8 535 372 249 ticks
2579bs3-memalloc-1: Page access time : 244 ns/page
2580bs3-memalloc-1: Page access time in ticks : 731 ticks/page
2581bs3-memalloc-1: Access thruput : 4 093 640 pages/s
2582bs3-memalloc-1: Access thruput in MiBs : 15 990 MB/s
2583bs3-memalloc-1: 2nd access : PASSED
2584bs3-memalloc-1: Access elapsed : 2 866 960 770 ns
2585bs3-memalloc-1: Access elapsed in ticks : 8 587 097 799 ticks
2586bs3-memalloc-1: Page access time : 245 ns/page
2587bs3-memalloc-1: Page access time in ticks : 736 ticks/page
2588bs3-memalloc-1: Access thruput : 4 068 910 pages/s
2589bs3-memalloc-1: Access thruput in MiBs : 15 894 MB/s
2590bs3-memalloc-1: 3rd access : PASSED
2591 * @endverbatim
2592 *
2593 * Comparing large pages, therer is an allocation speed difference of two order
2594 * of magnitude. When disabling large pages in VBox the allocation numbers are
2595 * closer, and the is clear from the 2nd and 3rd access tests that VBox doesn't
2596 * spend enough memory on nested page tables as Hyper-V does. The similar 2nd
2597 * and 3rd access numbers the two large page testruns seems to hint strongly at
2598 * Hyper-V eventually getting the large pages in place too, only that it sucks
2599 * hundredfold in the setting up phase.
2600 *
2601 *
2602 *
2603 * @section sec_nem_win_impl Our implementation.
2604 *
2605 * We set out with the goal of wanting to run as much as possible in ring-0,
2606 * reasoning that this would give use the best performance.
2607 *
2608 * This goal was approached gradually, starting out with a pure WinHvPlatform
2609 * implementation, gradually replacing parts: register access, guest memory
2610 * handling, running virtual processors. Then finally moving it all into
2611 * ring-0, while keeping most of it configurable so that we could make
2612 * comparisons (see NEMInternal.h and nemR3NativeRunGC()).
2613 *
2614 *
2615 * @subsection subsect_nem_win_impl_ioctl VID.SYS I/O control calls
2616 *
2617 * To run things in ring-0 we need to talk directly to VID.SYS thru its I/O
2618 * control interface. Looking at changes between like build 17083 and 17101 (if
2619 * memory serves) a set of the VID I/O control numbers shifted a little, which
2620 * means we need to determin them dynamically. We currently do this by hooking
2621 * the NtDeviceIoControlFile API call from VID.DLL and snooping up the
2622 * parameters when making dummy calls to relevant APIs. (We could also
2623 * disassemble the relevant APIs and try fish out the information from that, but
2624 * this is way simpler.)
2625 *
2626 * Issuing I/O control calls from ring-0 is facing a small challenge with
2627 * respect to direct buffering. When using direct buffering the device will
2628 * typically check that the buffer is actually in the user address space range
2629 * and reject kernel addresses. Fortunately, we've got the cross context VM
2630 * structure that is mapped into both kernel and user space, it's also locked
2631 * and safe to access from kernel space. So, we place the I/O control buffers
2632 * in the per-CPU part of it (NEMCPU::uIoCtlBuf) and give the driver the user
2633 * address if direct access buffering or kernel address if not.
2634 *
2635 * The I/O control calls are 'abstracted' in the support driver, see
2636 * SUPR0IoCtlSetupForHandle(), SUPR0IoCtlPerform() and SUPR0IoCtlCleanup().
2637 *
2638 *
2639 * @subsection subsect_nem_win_impl_cpumctx CPUMCTX
2640 *
2641 * Since the CPU state needs to live in Hyper-V when executing, we probably
2642 * should not transfer more than necessary when handling VMEXITs. To help us
2643 * manage this CPUMCTX got a new field CPUMCTX::fExtrn that to indicate which
2644 * part of the state is currently externalized (== in Hyper-V).
2645 *
2646 *
2647 * @subsection sec_nem_win_benchmarks Benchmarks.
2648 *
2649 * @subsubsection subsect_nem_win_benchmarks_bs2t1 17134/2018-06-22: Bootsector2-test1
2650 *
2651 * This is ValidationKit/bootsectors/bootsector2-test1.asm as of 2018-06-22
2652 * (internal r123172) running a the release build of VirtualBox from the same
2653 * source, though with exit optimizations disabled. Host is AMD Threadripper 1950X
2654 * running out an up to date 64-bit Windows 10 build 17134.
2655 *
2656 * The base line column is using the official WinHv API for everything but physical
2657 * memory mapping. The 2nd column is the default NEM/win configuration where we
2658 * put the main execution loop in ring-0, using hypercalls when we can and VID for
2659 * managing execution. The 3rd column is regular VirtualBox using AMD-V directly,
2660 * hyper-V is disabled, main execution loop in ring-0.
2661 *
2662 * @verbatim
2663TESTING... WinHv API Hypercalls + VID VirtualBox AMD-V
2664 32-bit paged protected mode, CPUID : 108 874 ins/sec 113% / 123 602 1198% / 1 305 113
2665 32-bit pae protected mode, CPUID : 106 722 ins/sec 115% / 122 740 1232% / 1 315 201
2666 64-bit long mode, CPUID : 106 798 ins/sec 114% / 122 111 1198% / 1 280 404
2667 16-bit unpaged protected mode, CPUID : 106 835 ins/sec 114% / 121 994 1216% / 1 299 665
2668 32-bit unpaged protected mode, CPUID : 105 257 ins/sec 115% / 121 772 1235% / 1 300 860
2669 real mode, CPUID : 104 507 ins/sec 116% / 121 800 1228% / 1 283 848
2670CPUID EAX=1 : PASSED
2671 32-bit paged protected mode, RDTSC : 99 581 834 ins/sec 100% / 100 323 307 93% / 93 473 299
2672 32-bit pae protected mode, RDTSC : 99 620 585 ins/sec 100% / 99 960 952 84% / 83 968 839
2673 64-bit long mode, RDTSC : 100 540 009 ins/sec 100% / 100 946 372 93% / 93 652 826
2674 16-bit unpaged protected mode, RDTSC : 99 688 473 ins/sec 100% / 100 097 751 76% / 76 281 287
2675 32-bit unpaged protected mode, RDTSC : 98 385 857 ins/sec 102% / 100 510 404 94% / 93 379 536
2676 real mode, RDTSC : 100 087 967 ins/sec 101% / 101 386 138 93% / 93 234 999
2677RDTSC : PASSED
2678 32-bit paged protected mode, Read CR4 : 2 156 102 ins/sec 98% / 2 121 967 17114% / 369 009 009
2679 32-bit pae protected mode, Read CR4 : 2 163 820 ins/sec 98% / 2 133 804 17469% / 377 999 261
2680 64-bit long mode, Read CR4 : 2 164 822 ins/sec 98% / 2 128 698 18875% / 408 619 313
2681 16-bit unpaged protected mode, Read CR4 : 2 162 367 ins/sec 100% / 2 168 508 17132% / 370 477 568
2682 32-bit unpaged protected mode, Read CR4 : 2 163 189 ins/sec 100% / 2 169 808 16768% / 362 734 679
2683 real mode, Read CR4 : 2 162 436 ins/sec 100% / 2 164 914 15551% / 336 288 998
2684Read CR4 : PASSED
2685 real mode, 32-bit IN : 104 649 ins/sec 118% / 123 513 1028% / 1 075 831
2686 real mode, 32-bit OUT : 107 102 ins/sec 115% / 123 660 982% / 1 052 259
2687 real mode, 32-bit IN-to-ring-3 : 105 697 ins/sec 98% / 104 471 201% / 213 216
2688 real mode, 32-bit OUT-to-ring-3 : 105 830 ins/sec 98% / 104 598 198% / 210 495
2689 16-bit unpaged protected mode, 32-bit IN : 104 855 ins/sec 117% / 123 174 1029% / 1 079 591
2690 16-bit unpaged protected mode, 32-bit OUT : 107 529 ins/sec 115% / 124 250 992% / 1 067 053
2691 16-bit unpaged protected mode, 32-bit IN-to-ring-3 : 106 337 ins/sec 103% / 109 565 196% / 209 367
2692 16-bit unpaged protected mode, 32-bit OUT-to-ring-3 : 107 558 ins/sec 100% / 108 237 191% / 206 387
2693 32-bit unpaged protected mode, 32-bit IN : 106 351 ins/sec 116% / 123 584 1016% / 1 081 325
2694 32-bit unpaged protected mode, 32-bit OUT : 106 424 ins/sec 116% / 124 252 995% / 1 059 408
2695 32-bit unpaged protected mode, 32-bit IN-to-ring-3 : 104 035 ins/sec 101% / 105 305 202% / 210 750
2696 32-bit unpaged protected mode, 32-bit OUT-to-ring-3 : 103 831 ins/sec 102% / 106 919 205% / 213 198
2697 32-bit paged protected mode, 32-bit IN : 103 356 ins/sec 119% / 123 870 1041% / 1 076 463
2698 32-bit paged protected mode, 32-bit OUT : 107 177 ins/sec 115% / 124 302 998% / 1 069 655
2699 32-bit paged protected mode, 32-bit IN-to-ring-3 : 104 491 ins/sec 100% / 104 744 200% / 209 264
2700 32-bit paged protected mode, 32-bit OUT-to-ring-3 : 106 603 ins/sec 97% / 103 849 197% / 210 219
2701 32-bit pae protected mode, 32-bit IN : 105 923 ins/sec 115% / 122 759 1041% / 1 103 261
2702 32-bit pae protected mode, 32-bit OUT : 107 083 ins/sec 117% / 126 057 1024% / 1 096 667
2703 32-bit pae protected mode, 32-bit IN-to-ring-3 : 106 114 ins/sec 97% / 103 496 199% / 211 312
2704 32-bit pae protected mode, 32-bit OUT-to-ring-3 : 105 675 ins/sec 96% / 102 096 198% / 209 890
2705 64-bit long mode, 32-bit IN : 105 800 ins/sec 113% / 120 006 1013% / 1 072 116
2706 64-bit long mode, 32-bit OUT : 105 635 ins/sec 113% / 120 375 997% / 1 053 655
2707 64-bit long mode, 32-bit IN-to-ring-3 : 105 274 ins/sec 95% / 100 763 197% / 208 026
2708 64-bit long mode, 32-bit OUT-to-ring-3 : 106 262 ins/sec 94% / 100 749 196% / 209 288
2709NOP I/O Port Access : PASSED
2710 32-bit paged protected mode, 32-bit read : 57 687 ins/sec 119% / 69 136 1197% / 690 548
2711 32-bit paged protected mode, 32-bit write : 57 957 ins/sec 118% / 68 935 1183% / 685 930
2712 32-bit paged protected mode, 32-bit read-to-ring-3 : 57 958 ins/sec 95% / 55 432 276% / 160 505
2713 32-bit paged protected mode, 32-bit write-to-ring-3 : 57 922 ins/sec 100% / 58 340 304% / 176 464
2714 32-bit pae protected mode, 32-bit read : 57 478 ins/sec 119% / 68 453 1141% / 656 159
2715 32-bit pae protected mode, 32-bit write : 57 226 ins/sec 118% / 68 097 1157% / 662 504
2716 32-bit pae protected mode, 32-bit read-to-ring-3 : 57 582 ins/sec 94% / 54 651 268% / 154 867
2717 32-bit pae protected mode, 32-bit write-to-ring-3 : 57 697 ins/sec 100% / 57 750 299% / 173 030
2718 64-bit long mode, 32-bit read : 57 128 ins/sec 118% / 67 779 1071% / 611 949
2719 64-bit long mode, 32-bit write : 57 127 ins/sec 118% / 67 632 1084% / 619 395
2720 64-bit long mode, 32-bit read-to-ring-3 : 57 181 ins/sec 94% / 54 123 265% / 151 937
2721 64-bit long mode, 32-bit write-to-ring-3 : 57 297 ins/sec 99% / 57 286 294% / 168 694
2722 16-bit unpaged protected mode, 32-bit read : 58 827 ins/sec 118% / 69 545 1185% / 697 602
2723 16-bit unpaged protected mode, 32-bit write : 58 678 ins/sec 118% / 69 442 1183% / 694 387
2724 16-bit unpaged protected mode, 32-bit read-to-ring-3 : 57 841 ins/sec 96% / 55 730 275% / 159 163
2725 16-bit unpaged protected mode, 32-bit write-to-ring-3 : 57 855 ins/sec 101% / 58 834 304% / 176 169
2726 32-bit unpaged protected mode, 32-bit read : 58 063 ins/sec 120% / 69 690 1233% / 716 444
2727 32-bit unpaged protected mode, 32-bit write : 57 936 ins/sec 120% / 69 633 1199% / 694 753
2728 32-bit unpaged protected mode, 32-bit read-to-ring-3 : 58 451 ins/sec 96% / 56 183 273% / 159 972
2729 32-bit unpaged protected mode, 32-bit write-to-ring-3 : 58 962 ins/sec 99% / 58 955 298% / 175 936
2730 real mode, 32-bit read : 58 571 ins/sec 118% / 69 478 1160% / 679 917
2731 real mode, 32-bit write : 58 418 ins/sec 118% / 69 320 1185% / 692 513
2732 real mode, 32-bit read-to-ring-3 : 58 072 ins/sec 96% / 55 751 274% / 159 145
2733 real mode, 32-bit write-to-ring-3 : 57 870 ins/sec 101% / 58 755 307% / 178 042
2734NOP MMIO Access : PASSED
2735SUCCESS
2736 * @endverbatim
2737 *
2738 * What we see here is:
2739 *
2740 * - The WinHv API approach is 10 to 12 times slower for exits we can
2741 * handle directly in ring-0 in the VBox AMD-V code.
2742 *
2743 * - The WinHv API approach is 2 to 3 times slower for exits we have to
2744 * go to ring-3 to handle with the VBox AMD-V code.
2745 *
2746 * - By using hypercalls and VID.SYS from ring-0 we gain between
2747 * 13% and 20% over the WinHv API on exits handled in ring-0.
2748 *
2749 * - For exits requiring ring-3 handling are between 6% slower and 3% faster
2750 * than the WinHv API.
2751 *
2752 *
2753 * As a side note, it looks like Hyper-V doesn't let the guest read CR4 but
2754 * triggers exits all the time. This isn't all that important these days since
2755 * OSes like Linux cache the CR4 value specifically to avoid these kinds of exits.
2756 *
2757 *
2758 * @subsubsection subsect_nem_win_benchmarks_bs2t1u1 17134/2018-10-02: Bootsector2-test1
2759 *
2760 * Update on 17134. While expectantly testing a couple of newer builds (17758,
2761 * 17763) hoping for some increases in performance, the numbers turned out
2762 * altogether worse than the June test run. So, we went back to the 1803
2763 * (17134) installation, made sure it was fully up to date (as per 2018-10-02)
2764 * and re-tested.
2765 *
2766 * The numbers had somehow turned significantly worse over the last 3-4 months,
2767 * dropping around 70% for the WinHv API test, more for Hypercalls + VID.
2768 *
2769 * @verbatim
2770TESTING... WinHv API Hypercalls + VID VirtualBox AMD-V *
2771 32-bit paged protected mode, CPUID : 33 270 ins/sec 33 154
2772 real mode, CPUID : 33 534 ins/sec 32 711
2773 [snip]
2774 32-bit paged protected mode, RDTSC : 102 216 011 ins/sec 98 225 419
2775 real mode, RDTSC : 102 492 243 ins/sec 98 225 419
2776 [snip]
2777 32-bit paged protected mode, Read CR4 : 2 096 165 ins/sec 2 123 815
2778 real mode, Read CR4 : 2 081 047 ins/sec 2 075 151
2779 [snip]
2780 32-bit paged protected mode, 32-bit IN : 32 739 ins/sec 33 655
2781 32-bit paged protected mode, 32-bit OUT : 32 702 ins/sec 33 777
2782 32-bit paged protected mode, 32-bit IN-to-ring-3 : 32 579 ins/sec 29 985
2783 32-bit paged protected mode, 32-bit OUT-to-ring-3 : 32 750 ins/sec 29 757
2784 [snip]
2785 32-bit paged protected mode, 32-bit read : 20 042 ins/sec 21 489
2786 32-bit paged protected mode, 32-bit write : 20 036 ins/sec 21 493
2787 32-bit paged protected mode, 32-bit read-to-ring-3 : 19 985 ins/sec 19 143
2788 32-bit paged protected mode, 32-bit write-to-ring-3 : 19 972 ins/sec 19 595
2789
2790 * @endverbatim
2791 *
2792 * Suspects are security updates and/or microcode updates installed since then.
2793 * Given that the RDTSC and CR4 numbers are reasonably unchanges, it seems that
2794 * the Hyper-V core loop (in hvax64.exe) aren't affected. Our ring-0 runloop
2795 * is equally affected as the ring-3 based runloop, so it cannot be ring
2796 * switching as such (unless the ring-0 loop is borked and we didn't notice yet).
2797 *
2798 * The issue is probably in the thread / process switching area, could be
2799 * something special for hyper-V interrupt delivery or worker thread switching.
2800 *
2801 * Really wish this thread ping-pong going on in VID.SYS could be eliminated!
2802 *
2803 *
2804 * @subsubsection subsect_nem_win_benchmarks_bs2t1u2 17763: Bootsector2-test1
2805 *
2806 * Some preliminary numbers for build 17763 on the 3.4 GHz AMD 1950X, the second
2807 * column will improve we get time to have a look the register page.
2808 *
2809 * There is a 50% performance loss here compared to the June numbers with
2810 * build 17134. The RDTSC numbers hits that it isn't in the Hyper-V core
2811 * (hvax64.exe), but something on the NT side.
2812 *
2813 * Clearing bit 20 in nt!KiSpeculationFeatures speeds things up (i.e. changing
2814 * the dword from 0x00300065 to 0x00200065 in windbg). This is checked by
2815 * nt!KePrepareToDispatchVirtualProcessor, making it a no-op if the flag is
2816 * clear. winhvr!WinHvpVpDispatchLoop call that function before making
2817 * hypercall 0xc2, which presumably does the heavy VCpu lifting in hvcax64.exe.
2818 *
2819 * @verbatim
2820TESTING... WinHv API Hypercalls + VID clr(bit-20) + WinHv API
2821 32-bit paged protected mode, CPUID : 54 145 ins/sec 51 436 130 076
2822 real mode, CPUID : 54 178 ins/sec 51 713 130 449
2823 [snip]
2824 32-bit paged protected mode, RDTSC : 98 927 639 ins/sec 100 254 552 100 549 882
2825 real mode, RDTSC : 99 601 206 ins/sec 100 886 699 100 470 957
2826 [snip]
2827 32-bit paged protected mode, 32-bit IN : 54 621 ins/sec 51 524 128 294
2828 32-bit paged protected mode, 32-bit OUT : 54 870 ins/sec 51 671 129 397
2829 32-bit paged protected mode, 32-bit IN-to-ring-3 : 54 624 ins/sec 43 964 127 874
2830 32-bit paged protected mode, 32-bit OUT-to-ring-3 : 54 803 ins/sec 44 087 129 443
2831 [snip]
2832 32-bit paged protected mode, 32-bit read : 28 230 ins/sec 34 042 48 113
2833 32-bit paged protected mode, 32-bit write : 27 962 ins/sec 34 050 48 069
2834 32-bit paged protected mode, 32-bit read-to-ring-3 : 27 841 ins/sec 28 397 48 146
2835 32-bit paged protected mode, 32-bit write-to-ring-3 : 27 896 ins/sec 29 455 47 970
2836 * @endverbatim
2837 *
2838 *
2839 * @subsubsection subsect_nem_win_benchmarks_w2k 17134/2018-06-22: Windows 2000 Boot & Shutdown
2840 *
2841 * Timing the startup and automatic shutdown of a Windows 2000 SP4 guest serves
2842 * as a real world benchmark and example of why exit performance is import. When
2843 * Windows 2000 boots up is doing a lot of VGA redrawing of the boot animation,
2844 * which is very costly. Not having installed guest additions leaves it in a VGA
2845 * mode after the bootup sequence is done, keep up the screen access expenses,
2846 * though the graphics driver more economical than the bootvid code.
2847 *
2848 * The VM was configured to automatically logon. A startup script was installed
2849 * to perform the automatic shuting down and powering off the VM (thru
2850 * vts_shutdown.exe -f -p). An offline snapshot of the VM was taken an restored
2851 * before each test run. The test time run time is calculated from the monotonic
2852 * VBox.log timestamps, starting with the state change to 'RUNNING' and stopping
2853 * at 'POWERING_OFF'.
2854 *
2855 * The host OS and VirtualBox build is the same as for the bootsector2-test1
2856 * scenario.
2857 *
2858 * Results:
2859 *
2860 * - WinHv API for all but physical page mappings:
2861 * 32 min 12.19 seconds
2862 *
2863 * - The default NEM/win configuration where we put the main execution loop
2864 * in ring-0, using hypercalls when we can and VID for managing execution:
2865 * 3 min 23.18 seconds
2866 *
2867 * - Regular VirtualBox using AMD-V directly, hyper-V is disabled, main
2868 * execution loop in ring-0:
2869 * 58.09 seconds
2870 *
2871 * - WinHv API with exit history based optimizations:
2872 * 58.66 seconds
2873 *
2874 * - Hypercall + VID.SYS with exit history base optimizations:
2875 * 58.94 seconds
2876 *
2877 * With a well above average machine needing over half an hour for booting a
2878 * nearly 20 year old guest kind of says it all. The 13%-20% exit performance
2879 * increase we get by using hypercalls and VID.SYS directly pays off a lot here.
2880 * The 3m23s is almost acceptable in comparison to the half an hour.
2881 *
2882 * The similarity between the last three results strongly hits at windows 2000
2883 * doing a lot of waiting during boot and shutdown and isn't the best testcase
2884 * once a basic performance level is reached.
2885 *
2886 *
2887 * @subsubsection subsection_iem_win_benchmarks_deb9_nat Debian 9 NAT performance
2888 *
2889 * This benchmark is about network performance over NAT from a 64-bit Debian 9
2890 * VM with a single CPU. For network performance measurements, we use our own
2891 * NetPerf tool (ValidationKit/utils/network/NetPerf.cpp) to measure latency
2892 * and throughput.
2893 *
2894 * The setups, builds and configurations are as in the previous benchmarks
2895 * (release r123172 on 1950X running 64-bit W10/17134 (2016-06-xx). Please note
2896 * that the exit optimizations hasn't yet been in tuned with NetPerf in mind.
2897 *
2898 * The NAT network setup was selected here since it's the default one and the
2899 * slowest one. There is quite a bit of IPC with worker threads and packet
2900 * processing involved.
2901 *
2902 * Latency test is first up. This is a classic back and forth between the two
2903 * NetPerf instances, where the key measurement is the roundrip latency. The
2904 * values here are the lowest result over 3-6 runs.
2905 *
2906 * Against host system:
2907 * - 152 258 ns/roundtrip - 100% - regular VirtualBox SVM
2908 * - 271 059 ns/roundtrip - 178% - Hypercalls + VID.SYS in ring-0 with exit optimizations.
2909 * - 280 149 ns/roundtrip - 184% - Hypercalls + VID.SYS in ring-0
2910 * - 317 735 ns/roundtrip - 209% - Win HV API with exit optimizations.
2911 * - 342 440 ns/roundtrip - 225% - Win HV API
2912 *
2913 * Against a remote Windows 10 system over a 10Gbps link:
2914 * - 243 969 ns/roundtrip - 100% - regular VirtualBox SVM
2915 * - 384 427 ns/roundtrip - 158% - Win HV API with exit optimizations.
2916 * - 402 411 ns/roundtrip - 165% - Hypercalls + VID.SYS in ring-0
2917 * - 406 313 ns/roundtrip - 167% - Win HV API
2918 * - 413 160 ns/roundtrip - 169% - Hypercalls + VID.SYS in ring-0 with exit optimizations.
2919 *
2920 * What we see here is:
2921 *
2922 * - Consistent and signficant latency increase using Hyper-V compared
2923 * to directly harnessing AMD-V ourselves.
2924 *
2925 * - When talking to the host, it's clear that the hypercalls + VID.SYS
2926 * in ring-0 method pays off.
2927 *
2928 * - When talking to a different host, the numbers are closer and it
2929 * is not longer clear which Hyper-V execution method is better.
2930 *
2931 *
2932 * Throughput benchmarks are performed by one side pushing data full throttle
2933 * for 10 seconds (minus a 1 second at each end of the test), then reversing
2934 * the roles and measuring it in the other direction. The tests ran 3-5 times
2935 * and below are the highest and lowest results in each direction.
2936 *
2937 * Receiving from host system:
2938 * - Regular VirtualBox SVM:
2939 * Max: 96 907 549 bytes/s - 100%
2940 * Min: 86 912 095 bytes/s - 100%
2941 * - Hypercalls + VID.SYS in ring-0:
2942 * Max: 84 036 544 bytes/s - 87%
2943 * Min: 64 978 112 bytes/s - 75%
2944 * - Hypercalls + VID.SYS in ring-0 with exit optimizations:
2945 * Max: 77 760 699 bytes/s - 80%
2946 * Min: 72 677 171 bytes/s - 84%
2947 * - Win HV API with exit optimizations:
2948 * Max: 64 465 905 bytes/s - 67%
2949 * Min: 62 286 369 bytes/s - 72%
2950 * - Win HV API:
2951 * Max: 62 466 631 bytes/s - 64%
2952 * Min: 61 362 782 bytes/s - 70%
2953 *
2954 * Sending to the host system:
2955 * - Regular VirtualBox SVM:
2956 * Max: 87 728 652 bytes/s - 100%
2957 * Min: 86 923 198 bytes/s - 100%
2958 * - Hypercalls + VID.SYS in ring-0:
2959 * Max: 84 280 749 bytes/s - 96%
2960 * Min: 78 369 842 bytes/s - 90%
2961 * - Hypercalls + VID.SYS in ring-0 with exit optimizations:
2962 * Max: 84 119 932 bytes/s - 96%
2963 * Min: 77 396 811 bytes/s - 89%
2964 * - Win HV API:
2965 * Max: 81 714 377 bytes/s - 93%
2966 * Min: 78 697 419 bytes/s - 91%
2967 * - Win HV API with exit optimizations:
2968 * Max: 80 502 488 bytes/s - 91%
2969 * Min: 71 164 978 bytes/s - 82%
2970 *
2971 * Receiving from a remote Windows 10 system over a 10Gbps link:
2972 * - Hypercalls + VID.SYS in ring-0:
2973 * Max: 115 346 922 bytes/s - 136%
2974 * Min: 112 912 035 bytes/s - 137%
2975 * - Regular VirtualBox SVM:
2976 * Max: 84 517 504 bytes/s - 100%
2977 * Min: 82 597 049 bytes/s - 100%
2978 * - Hypercalls + VID.SYS in ring-0 with exit optimizations:
2979 * Max: 77 736 251 bytes/s - 92%
2980 * Min: 73 813 784 bytes/s - 89%
2981 * - Win HV API with exit optimizations:
2982 * Max: 63 035 587 bytes/s - 75%
2983 * Min: 57 538 380 bytes/s - 70%
2984 * - Win HV API:
2985 * Max: 62 279 185 bytes/s - 74%
2986 * Min: 56 813 866 bytes/s - 69%
2987 *
2988 * Sending to a remote Windows 10 system over a 10Gbps link:
2989 * - Win HV API with exit optimizations:
2990 * Max: 116 502 357 bytes/s - 103%
2991 * Min: 49 046 550 bytes/s - 59%
2992 * - Regular VirtualBox SVM:
2993 * Max: 113 030 991 bytes/s - 100%
2994 * Min: 83 059 511 bytes/s - 100%
2995 * - Hypercalls + VID.SYS in ring-0:
2996 * Max: 106 435 031 bytes/s - 94%
2997 * Min: 47 253 510 bytes/s - 57%
2998 * - Hypercalls + VID.SYS in ring-0 with exit optimizations:
2999 * Max: 94 842 287 bytes/s - 84%
3000 * Min: 68 362 172 bytes/s - 82%
3001 * - Win HV API:
3002 * Max: 65 165 225 bytes/s - 58%
3003 * Min: 47 246 573 bytes/s - 57%
3004 *
3005 * What we see here is:
3006 *
3007 * - Again consistent numbers when talking to the host. Showing that the
3008 * ring-0 approach is preferable to the ring-3 one.
3009 *
3010 * - Again when talking to a remote host, things get more difficult to
3011 * make sense of. The spread is larger and direct AMD-V gets beaten by
3012 * a different the Hyper-V approaches in each direction.
3013 *
3014 * - However, if we treat the first entry (remote host) as weird spikes, the
3015 * other entries are consistently worse compared to direct AMD-V. For the
3016 * send case we get really bad results for WinHV.
3017 *
3018 */
3019
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette