VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp@ 80090

Last change on this file since 80090 was 80047, checked in by vboxsync, 6 years ago

Main: Kicking out raw-mode - GVMMR0, HMR0. bugref:9517

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 121.0 KB
Line 
1/* $Id: GVMMR0.cpp 80047 2019-07-29 19:12:29Z vboxsync $ */
2/** @file
3 * GVMM - Global VM Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2019 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/** @page pg_gvmm GVMM - The Global VM Manager
20 *
21 * The Global VM Manager lives in ring-0. Its main function at the moment is
22 * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
23 * each of them, and assign them unique identifiers (so GMM can track page
24 * owners). The GVMM also manage some of the host CPU resources, like the
25 * periodic preemption timer.
26 *
27 * The GVMM will create a ring-0 object for each VM when it is registered, this
28 * is both for session cleanup purposes and for having a point where it is
29 * possible to implement usage polices later (in SUPR0ObjRegister).
30 *
31 *
32 * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
33 *
34 * On system that sports a high resolution kernel timer API, we use per-cpu
35 * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
36 * execution. The timer frequency is calculating by taking the max
37 * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
38 * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
39 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
40 *
41 * The TMCalcHostTimerFrequency() part of the things gets its takes the max
42 * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
43 * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
44 * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
45 * AMD-V and raw-mode execution environments.
46 */
47
48
49/*********************************************************************************************************************************
50* Header Files *
51*********************************************************************************************************************************/
52#define LOG_GROUP LOG_GROUP_GVMM
53#include <VBox/vmm/gvmm.h>
54#include <VBox/vmm/gmm.h>
55#include "GVMMR0Internal.h"
56#include <VBox/vmm/gvm.h>
57#include <VBox/vmm/vm.h>
58#include <VBox/vmm/vmcpuset.h>
59#include <VBox/vmm/vmm.h>
60#ifdef VBOX_WITH_NEM_R0
61# include <VBox/vmm/nem.h>
62#endif
63#include <VBox/param.h>
64#include <VBox/err.h>
65
66#include <iprt/asm.h>
67#include <iprt/asm-amd64-x86.h>
68#include <iprt/critsect.h>
69#include <iprt/mem.h>
70#include <iprt/semaphore.h>
71#include <iprt/time.h>
72#include <VBox/log.h>
73#include <iprt/thread.h>
74#include <iprt/process.h>
75#include <iprt/param.h>
76#include <iprt/string.h>
77#include <iprt/assert.h>
78#include <iprt/mem.h>
79#include <iprt/memobj.h>
80#include <iprt/mp.h>
81#include <iprt/cpuset.h>
82#include <iprt/spinlock.h>
83#include <iprt/timer.h>
84
85#include "dtrace/VBoxVMM.h"
86
87
88/*********************************************************************************************************************************
89* Defined Constants And Macros *
90*********************************************************************************************************************************/
91#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(DOXYGEN_RUNNING)
92/** Define this to enable the periodic preemption timer. */
93# define GVMM_SCHED_WITH_PPT
94#endif
95
96
97/** @def GVMM_CHECK_SMAP_SETUP
98 * SMAP check setup. */
99/** @def GVMM_CHECK_SMAP_CHECK
100 * Checks that the AC flag is set if SMAP is enabled. If AC is not set,
101 * it will be logged and @a a_BadExpr is executed. */
102/** @def GVMM_CHECK_SMAP_CHECK2
103 * Checks that the AC flag is set if SMAP is enabled. If AC is not set, it will
104 * be logged, written to the VMs assertion text buffer, and @a a_BadExpr is
105 * executed. */
106#if defined(VBOX_STRICT) || 1
107# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = SUPR0GetKernelFeatures()
108# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) \
109 do { \
110 if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
111 { \
112 RTCCUINTREG fEflCheck = ASMGetFlags(); \
113 if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
114 { /* likely */ } \
115 else \
116 { \
117 SUPR0Printf("%s, line %d: EFLAGS.AC is clear! (%#x)\n", __FUNCTION__, __LINE__, (uint32_t)fEflCheck); \
118 a_BadExpr; \
119 } \
120 } \
121 } while (0)
122# define GVMM_CHECK_SMAP_CHECK2(a_pVM, a_BadExpr) \
123 do { \
124 if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
125 { \
126 RTCCUINTREG fEflCheck = ASMGetFlags(); \
127 if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
128 { /* likely */ } \
129 else \
130 { \
131 SUPR0BadContext((a_pVM) ? (a_pVM)->pSession : NULL, __FILE__, __LINE__, "EFLAGS.AC is zero!"); \
132 a_BadExpr; \
133 } \
134 } \
135 } while (0)
136#else
137# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = 0
138# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) NOREF(fKernelFeatures)
139# define GVMM_CHECK_SMAP_CHECK2(a_pVM, a_BadExpr) NOREF(fKernelFeatures)
140#endif
141
142
143
144/*********************************************************************************************************************************
145* Structures and Typedefs *
146*********************************************************************************************************************************/
147
148/**
149 * Global VM handle.
150 */
151typedef struct GVMHANDLE
152{
153 /** The index of the next handle in the list (free or used). (0 is nil.) */
154 uint16_t volatile iNext;
155 /** Our own index / handle value. */
156 uint16_t iSelf;
157 /** The process ID of the handle owner.
158 * This is used for access checks. */
159 RTPROCESS ProcId;
160 /** The pointer to the ring-0 only (aka global) VM structure. */
161 PGVM pGVM;
162 /** The ring-0 mapping of the shared VM instance data. */
163 PVM pVM;
164 /** The virtual machine object. */
165 void *pvObj;
166 /** The session this VM is associated with. */
167 PSUPDRVSESSION pSession;
168 /** The ring-0 handle of the EMT0 thread.
169 * This is used for ownership checks as well as looking up a VM handle by thread
170 * at times like assertions. */
171 RTNATIVETHREAD hEMT0;
172} GVMHANDLE;
173/** Pointer to a global VM handle. */
174typedef GVMHANDLE *PGVMHANDLE;
175
176/** Number of GVM handles (including the NIL handle). */
177#if HC_ARCH_BITS == 64
178# define GVMM_MAX_HANDLES 8192
179#else
180# define GVMM_MAX_HANDLES 128
181#endif
182
183/**
184 * Per host CPU GVMM data.
185 */
186typedef struct GVMMHOSTCPU
187{
188 /** Magic number (GVMMHOSTCPU_MAGIC). */
189 uint32_t volatile u32Magic;
190 /** The CPU ID. */
191 RTCPUID idCpu;
192 /** The CPU set index. */
193 uint32_t idxCpuSet;
194
195#ifdef GVMM_SCHED_WITH_PPT
196 /** Periodic preemption timer data. */
197 struct
198 {
199 /** The handle to the periodic preemption timer. */
200 PRTTIMER pTimer;
201 /** Spinlock protecting the data below. */
202 RTSPINLOCK hSpinlock;
203 /** The smalles Hz that we need to care about. (static) */
204 uint32_t uMinHz;
205 /** The number of ticks between each historization. */
206 uint32_t cTicksHistoriziationInterval;
207 /** The current historization tick (counting up to
208 * cTicksHistoriziationInterval and then resetting). */
209 uint32_t iTickHistorization;
210 /** The current timer interval. This is set to 0 when inactive. */
211 uint32_t cNsInterval;
212 /** The current timer frequency. This is set to 0 when inactive. */
213 uint32_t uTimerHz;
214 /** The current max frequency reported by the EMTs.
215 * This gets historicize and reset by the timer callback. This is
216 * read without holding the spinlock, so needs atomic updating. */
217 uint32_t volatile uDesiredHz;
218 /** Whether the timer was started or not. */
219 bool volatile fStarted;
220 /** Set if we're starting timer. */
221 bool volatile fStarting;
222 /** The index of the next history entry (mod it). */
223 uint32_t iHzHistory;
224 /** Historicized uDesiredHz values. The array wraps around, new entries
225 * are added at iHzHistory. This is updated approximately every
226 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
227 uint32_t aHzHistory[8];
228 /** Statistics counter for recording the number of interval changes. */
229 uint32_t cChanges;
230 /** Statistics counter for recording the number of timer starts. */
231 uint32_t cStarts;
232 } Ppt;
233#endif /* GVMM_SCHED_WITH_PPT */
234
235} GVMMHOSTCPU;
236/** Pointer to the per host CPU GVMM data. */
237typedef GVMMHOSTCPU *PGVMMHOSTCPU;
238/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
239#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
240/** The interval on history entry should cover (approximately) give in
241 * nanoseconds. */
242#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
243
244
245/**
246 * The GVMM instance data.
247 */
248typedef struct GVMM
249{
250 /** Eyecatcher / magic. */
251 uint32_t u32Magic;
252 /** The index of the head of the free handle chain. (0 is nil.) */
253 uint16_t volatile iFreeHead;
254 /** The index of the head of the active handle chain. (0 is nil.) */
255 uint16_t volatile iUsedHead;
256 /** The number of VMs. */
257 uint16_t volatile cVMs;
258 /** Alignment padding. */
259 uint16_t u16Reserved;
260 /** The number of EMTs. */
261 uint32_t volatile cEMTs;
262 /** The number of EMTs that have halted in GVMMR0SchedHalt. */
263 uint32_t volatile cHaltedEMTs;
264 /** Mini lock for restricting early wake-ups to one thread. */
265 bool volatile fDoingEarlyWakeUps;
266 bool afPadding[3]; /**< explicit alignment padding. */
267 /** When the next halted or sleeping EMT will wake up.
268 * This is set to 0 when it needs recalculating and to UINT64_MAX when
269 * there are no halted or sleeping EMTs in the GVMM. */
270 uint64_t uNsNextEmtWakeup;
271 /** The lock used to serialize VM creation, destruction and associated events that
272 * isn't performance critical. Owners may acquire the list lock. */
273 RTCRITSECT CreateDestroyLock;
274 /** The lock used to serialize used list updates and accesses.
275 * This indirectly includes scheduling since the scheduler will have to walk the
276 * used list to examin running VMs. Owners may not acquire any other locks. */
277 RTCRITSECTRW UsedLock;
278 /** The handle array.
279 * The size of this array defines the maximum number of currently running VMs.
280 * The first entry is unused as it represents the NIL handle. */
281 GVMHANDLE aHandles[GVMM_MAX_HANDLES];
282
283 /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
284 * The number of EMTs that means we no longer consider ourselves alone on a
285 * CPU/Core.
286 */
287 uint32_t cEMTsMeansCompany;
288 /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
289 * The minimum sleep time for when we're alone, in nano seconds.
290 */
291 uint32_t nsMinSleepAlone;
292 /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
293 * The minimum sleep time for when we've got company, in nano seconds.
294 */
295 uint32_t nsMinSleepCompany;
296 /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
297 * The limit for the first round of early wake-ups, given in nano seconds.
298 */
299 uint32_t nsEarlyWakeUp1;
300 /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
301 * The limit for the second round of early wake-ups, given in nano seconds.
302 */
303 uint32_t nsEarlyWakeUp2;
304
305 /** Set if we're doing early wake-ups.
306 * This reflects nsEarlyWakeUp1 and nsEarlyWakeUp2. */
307 bool volatile fDoEarlyWakeUps;
308
309 /** The number of entries in the host CPU array (aHostCpus). */
310 uint32_t cHostCpus;
311 /** Per host CPU data (variable length). */
312 GVMMHOSTCPU aHostCpus[1];
313} GVMM;
314AssertCompileMemberAlignment(GVMM, CreateDestroyLock, 8);
315AssertCompileMemberAlignment(GVMM, UsedLock, 8);
316AssertCompileMemberAlignment(GVMM, uNsNextEmtWakeup, 8);
317/** Pointer to the GVMM instance data. */
318typedef GVMM *PGVMM;
319
320/** The GVMM::u32Magic value (Charlie Haden). */
321#define GVMM_MAGIC UINT32_C(0x19370806)
322
323
324
325/*********************************************************************************************************************************
326* Global Variables *
327*********************************************************************************************************************************/
328/** Pointer to the GVMM instance data.
329 * (Just my general dislike for global variables.) */
330static PGVMM g_pGVMM = NULL;
331
332/** Macro for obtaining and validating the g_pGVMM pointer.
333 * On failure it will return from the invoking function with the specified return value.
334 *
335 * @param pGVMM The name of the pGVMM variable.
336 * @param rc The return value on failure. Use VERR_GVMM_INSTANCE for VBox
337 * status codes.
338 */
339#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
340 do { \
341 (pGVMM) = g_pGVMM;\
342 AssertPtrReturn((pGVMM), (rc)); \
343 AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
344 } while (0)
345
346/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
347 * On failure it will return from the invoking function.
348 *
349 * @param pGVMM The name of the pGVMM variable.
350 */
351#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
352 do { \
353 (pGVMM) = g_pGVMM;\
354 AssertPtrReturnVoid((pGVMM)); \
355 AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
356 } while (0)
357
358
359/*********************************************************************************************************************************
360* Internal Functions *
361*********************************************************************************************************************************/
362#ifdef VBOX_BUGREF_9217
363static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession);
364#else
365static void gvmmR0InitPerVMData(PGVM pGVM);
366#endif
367static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
368static int gvmmR0ByGVMandVM(PGVM pGVM, PVM pVM, PGVMM *ppGVMM, bool fTakeUsedLock);
369static int gvmmR0ByGVMandVMandEMT(PGVM pGVM, PVM pVM, VMCPUID idCpu, PGVMM *ppGVMM);
370
371#ifdef GVMM_SCHED_WITH_PPT
372static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
373#endif
374
375
376/**
377 * Initializes the GVMM.
378 *
379 * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
380 *
381 * @returns VBox status code.
382 */
383GVMMR0DECL(int) GVMMR0Init(void)
384{
385 LogFlow(("GVMMR0Init:\n"));
386
387 /*
388 * Allocate and initialize the instance data.
389 */
390 uint32_t cHostCpus = RTMpGetArraySize();
391 AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE);
392
393 PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVMM, aHostCpus[cHostCpus]));
394 if (!pGVMM)
395 return VERR_NO_MEMORY;
396 int rc = RTCritSectInitEx(&pGVMM->CreateDestroyLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE,
397 "GVMM-CreateDestroyLock");
398 if (RT_SUCCESS(rc))
399 {
400 rc = RTCritSectRwInitEx(&pGVMM->UsedLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, "GVMM-UsedLock");
401 if (RT_SUCCESS(rc))
402 {
403 pGVMM->u32Magic = GVMM_MAGIC;
404 pGVMM->iUsedHead = 0;
405 pGVMM->iFreeHead = 1;
406
407 /* the nil handle */
408 pGVMM->aHandles[0].iSelf = 0;
409 pGVMM->aHandles[0].iNext = 0;
410
411 /* the tail */
412 unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
413 pGVMM->aHandles[i].iSelf = i;
414 pGVMM->aHandles[i].iNext = 0; /* nil */
415
416 /* the rest */
417 while (i-- > 1)
418 {
419 pGVMM->aHandles[i].iSelf = i;
420 pGVMM->aHandles[i].iNext = i + 1;
421 }
422
423 /* The default configuration values. */
424 uint32_t cNsResolution = RTSemEventMultiGetResolution();
425 pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
426 if (cNsResolution >= 5*RT_NS_100US)
427 {
428 pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
429 pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
430 pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
431 pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
432 }
433 else if (cNsResolution > RT_NS_100US)
434 {
435 pGVMM->nsMinSleepAlone = cNsResolution / 2;
436 pGVMM->nsMinSleepCompany = cNsResolution / 4;
437 pGVMM->nsEarlyWakeUp1 = 0;
438 pGVMM->nsEarlyWakeUp2 = 0;
439 }
440 else
441 {
442 pGVMM->nsMinSleepAlone = 2000;
443 pGVMM->nsMinSleepCompany = 2000;
444 pGVMM->nsEarlyWakeUp1 = 0;
445 pGVMM->nsEarlyWakeUp2 = 0;
446 }
447 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
448
449 /* The host CPU data. */
450 pGVMM->cHostCpus = cHostCpus;
451 uint32_t iCpu = cHostCpus;
452 RTCPUSET PossibleSet;
453 RTMpGetSet(&PossibleSet);
454 while (iCpu-- > 0)
455 {
456 pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
457#ifdef GVMM_SCHED_WITH_PPT
458 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
459 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
460 pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
461 pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
462 //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
463 //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
464 //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
465 //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
466 //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
467 //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
468 //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
469 //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
470#endif
471
472 if (RTCpuSetIsMember(&PossibleSet, iCpu))
473 {
474 pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
475 pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
476
477#ifdef GVMM_SCHED_WITH_PPT
478 rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
479 50*1000*1000 /* whatever */,
480 RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
481 gvmmR0SchedPeriodicPreemptionTimerCallback,
482 &pGVMM->aHostCpus[iCpu]);
483 if (RT_SUCCESS(rc))
484 rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "GVMM/CPU");
485 if (RT_FAILURE(rc))
486 {
487 while (iCpu < cHostCpus)
488 {
489 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
490 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
491 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
492 iCpu++;
493 }
494 break;
495 }
496#endif
497 }
498 else
499 {
500 pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
501 pGVMM->aHostCpus[iCpu].u32Magic = 0;
502 }
503 }
504 if (RT_SUCCESS(rc))
505 {
506 g_pGVMM = pGVMM;
507 LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
508 return VINF_SUCCESS;
509 }
510
511 /* bail out. */
512 RTCritSectRwDelete(&pGVMM->UsedLock);
513 }
514 RTCritSectDelete(&pGVMM->CreateDestroyLock);
515 }
516
517 RTMemFree(pGVMM);
518 return rc;
519}
520
521
522/**
523 * Terminates the GVM.
524 *
525 * This is called while owning the loader semaphore (see supdrvLdrFree()).
526 * And unless something is wrong, there should be absolutely no VMs
527 * registered at this point.
528 */
529GVMMR0DECL(void) GVMMR0Term(void)
530{
531 LogFlow(("GVMMR0Term:\n"));
532
533 PGVMM pGVMM = g_pGVMM;
534 g_pGVMM = NULL;
535 if (RT_UNLIKELY(!VALID_PTR(pGVMM)))
536 {
537 SUPR0Printf("GVMMR0Term: pGVMM=%RKv\n", pGVMM);
538 return;
539 }
540
541 /*
542 * First of all, stop all active timers.
543 */
544 uint32_t cActiveTimers = 0;
545 uint32_t iCpu = pGVMM->cHostCpus;
546 while (iCpu-- > 0)
547 {
548 ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
549#ifdef GVMM_SCHED_WITH_PPT
550 if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
551 && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
552 cActiveTimers++;
553#endif
554 }
555 if (cActiveTimers)
556 RTThreadSleep(1); /* fudge */
557
558 /*
559 * Invalidate the and free resources.
560 */
561 pGVMM->u32Magic = ~GVMM_MAGIC;
562 RTCritSectRwDelete(&pGVMM->UsedLock);
563 RTCritSectDelete(&pGVMM->CreateDestroyLock);
564
565 pGVMM->iFreeHead = 0;
566 if (pGVMM->iUsedHead)
567 {
568 SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
569 pGVMM->iUsedHead = 0;
570 }
571
572#ifdef GVMM_SCHED_WITH_PPT
573 iCpu = pGVMM->cHostCpus;
574 while (iCpu-- > 0)
575 {
576 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
577 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
578 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
579 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
580 }
581#endif
582
583 RTMemFree(pGVMM);
584}
585
586
587/**
588 * A quick hack for setting global config values.
589 *
590 * @returns VBox status code.
591 *
592 * @param pSession The session handle. Used for authentication.
593 * @param pszName The variable name.
594 * @param u64Value The new value.
595 */
596GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
597{
598 /*
599 * Validate input.
600 */
601 PGVMM pGVMM;
602 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
603 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
604 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
605
606 /*
607 * String switch time!
608 */
609 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
610 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
611 int rc = VINF_SUCCESS;
612 pszName += sizeof("/GVMM/") - 1;
613 if (!strcmp(pszName, "cEMTsMeansCompany"))
614 {
615 if (u64Value <= UINT32_MAX)
616 pGVMM->cEMTsMeansCompany = u64Value;
617 else
618 rc = VERR_OUT_OF_RANGE;
619 }
620 else if (!strcmp(pszName, "MinSleepAlone"))
621 {
622 if (u64Value <= RT_NS_100MS)
623 pGVMM->nsMinSleepAlone = u64Value;
624 else
625 rc = VERR_OUT_OF_RANGE;
626 }
627 else if (!strcmp(pszName, "MinSleepCompany"))
628 {
629 if (u64Value <= RT_NS_100MS)
630 pGVMM->nsMinSleepCompany = u64Value;
631 else
632 rc = VERR_OUT_OF_RANGE;
633 }
634 else if (!strcmp(pszName, "EarlyWakeUp1"))
635 {
636 if (u64Value <= RT_NS_100MS)
637 {
638 pGVMM->nsEarlyWakeUp1 = u64Value;
639 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
640 }
641 else
642 rc = VERR_OUT_OF_RANGE;
643 }
644 else if (!strcmp(pszName, "EarlyWakeUp2"))
645 {
646 if (u64Value <= RT_NS_100MS)
647 {
648 pGVMM->nsEarlyWakeUp2 = u64Value;
649 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
650 }
651 else
652 rc = VERR_OUT_OF_RANGE;
653 }
654 else
655 rc = VERR_CFGM_VALUE_NOT_FOUND;
656 return rc;
657}
658
659
660/**
661 * A quick hack for getting global config values.
662 *
663 * @returns VBox status code.
664 *
665 * @param pSession The session handle. Used for authentication.
666 * @param pszName The variable name.
667 * @param pu64Value Where to return the value.
668 */
669GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
670{
671 /*
672 * Validate input.
673 */
674 PGVMM pGVMM;
675 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
676 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
677 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
678 AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
679
680 /*
681 * String switch time!
682 */
683 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
684 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
685 int rc = VINF_SUCCESS;
686 pszName += sizeof("/GVMM/") - 1;
687 if (!strcmp(pszName, "cEMTsMeansCompany"))
688 *pu64Value = pGVMM->cEMTsMeansCompany;
689 else if (!strcmp(pszName, "MinSleepAlone"))
690 *pu64Value = pGVMM->nsMinSleepAlone;
691 else if (!strcmp(pszName, "MinSleepCompany"))
692 *pu64Value = pGVMM->nsMinSleepCompany;
693 else if (!strcmp(pszName, "EarlyWakeUp1"))
694 *pu64Value = pGVMM->nsEarlyWakeUp1;
695 else if (!strcmp(pszName, "EarlyWakeUp2"))
696 *pu64Value = pGVMM->nsEarlyWakeUp2;
697 else
698 rc = VERR_CFGM_VALUE_NOT_FOUND;
699 return rc;
700}
701
702
703/**
704 * Acquire the 'used' lock in shared mode.
705 *
706 * This prevents destruction of the VM while we're in ring-0.
707 *
708 * @returns IPRT status code, see RTSemFastMutexRequest.
709 * @param a_pGVMM The GVMM instance data.
710 * @sa GVMMR0_USED_SHARED_UNLOCK, GVMMR0_USED_EXCLUSIVE_LOCK
711 */
712#define GVMMR0_USED_SHARED_LOCK(a_pGVMM) RTCritSectRwEnterShared(&(a_pGVMM)->UsedLock)
713
714/**
715 * Release the 'used' lock in when owning it in shared mode.
716 *
717 * @returns IPRT status code, see RTSemFastMutexRequest.
718 * @param a_pGVMM The GVMM instance data.
719 * @sa GVMMR0_USED_SHARED_LOCK
720 */
721#define GVMMR0_USED_SHARED_UNLOCK(a_pGVMM) RTCritSectRwLeaveShared(&(a_pGVMM)->UsedLock)
722
723/**
724 * Acquire the 'used' lock in exclusive mode.
725 *
726 * Only use this function when making changes to the used list.
727 *
728 * @returns IPRT status code, see RTSemFastMutexRequest.
729 * @param a_pGVMM The GVMM instance data.
730 * @sa GVMMR0_USED_EXCLUSIVE_UNLOCK
731 */
732#define GVMMR0_USED_EXCLUSIVE_LOCK(a_pGVMM) RTCritSectRwEnterExcl(&(a_pGVMM)->UsedLock)
733
734/**
735 * Release the 'used' lock when owning it in exclusive mode.
736 *
737 * @returns IPRT status code, see RTSemFastMutexRelease.
738 * @param a_pGVMM The GVMM instance data.
739 * @sa GVMMR0_USED_EXCLUSIVE_LOCK, GVMMR0_USED_SHARED_UNLOCK
740 */
741#define GVMMR0_USED_EXCLUSIVE_UNLOCK(a_pGVMM) RTCritSectRwLeaveExcl(&(a_pGVMM)->UsedLock)
742
743
744/**
745 * Try acquire the 'create & destroy' lock.
746 *
747 * @returns IPRT status code, see RTSemFastMutexRequest.
748 * @param pGVMM The GVMM instance data.
749 */
750DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
751{
752 LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
753 int rc = RTCritSectEnter(&pGVMM->CreateDestroyLock);
754 LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
755 return rc;
756}
757
758
759/**
760 * Release the 'create & destroy' lock.
761 *
762 * @returns IPRT status code, see RTSemFastMutexRequest.
763 * @param pGVMM The GVMM instance data.
764 */
765DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
766{
767 LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
768 int rc = RTCritSectLeave(&pGVMM->CreateDestroyLock);
769 AssertRC(rc);
770 return rc;
771}
772
773
774/**
775 * Request wrapper for the GVMMR0CreateVM API.
776 *
777 * @returns VBox status code.
778 * @param pReq The request buffer.
779 * @param pSession The session handle. The VM will be associated with this.
780 */
781GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq, PSUPDRVSESSION pSession)
782{
783 /*
784 * Validate the request.
785 */
786 if (!VALID_PTR(pReq))
787 return VERR_INVALID_POINTER;
788 if (pReq->Hdr.cbReq != sizeof(*pReq))
789 return VERR_INVALID_PARAMETER;
790 if (pReq->pSession != pSession)
791 return VERR_INVALID_POINTER;
792
793 /*
794 * Execute it.
795 */
796 PVM pVM;
797 pReq->pVMR0 = NULL;
798 pReq->pVMR3 = NIL_RTR3PTR;
799 int rc = GVMMR0CreateVM(pSession, pReq->cCpus, &pVM);
800 if (RT_SUCCESS(rc))
801 {
802 pReq->pVMR0 = pVM;
803 pReq->pVMR3 = pVM->pVMR3;
804 }
805 return rc;
806}
807
808
809/**
810 * Allocates the VM structure and registers it with GVM.
811 *
812 * The caller will become the VM owner and there by the EMT.
813 *
814 * @returns VBox status code.
815 * @param pSession The support driver session.
816 * @param cCpus Number of virtual CPUs for the new VM.
817 * @param ppVM Where to store the pointer to the VM structure.
818 *
819 * @thread EMT.
820 */
821GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PVM *ppVM)
822{
823 LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
824 PGVMM pGVMM;
825 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
826
827 AssertPtrReturn(ppVM, VERR_INVALID_POINTER);
828 *ppVM = NULL;
829
830 if ( cCpus == 0
831 || cCpus > VMM_MAX_CPU_COUNT)
832 return VERR_INVALID_PARAMETER;
833
834 RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
835 AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT);
836 RTPROCESS ProcId = RTProcSelf();
837 AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT);
838
839 /*
840 * The whole allocation process is protected by the lock.
841 */
842 int rc = gvmmR0CreateDestroyLock(pGVMM);
843 AssertRCReturn(rc, rc);
844
845 /*
846 * Only one VM per session.
847 */
848 if (SUPR0GetSessionVM(pSession) != NULL)
849 {
850 gvmmR0CreateDestroyUnlock(pGVMM);
851 SUPR0Printf("GVMMR0CreateVM: The session %p already got a VM: %p\n", pSession, SUPR0GetSessionVM(pSession));
852 return VERR_ALREADY_EXISTS;
853 }
854
855 /*
856 * Allocate a handle first so we don't waste resources unnecessarily.
857 */
858 uint16_t iHandle = pGVMM->iFreeHead;
859 if (iHandle)
860 {
861 PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
862
863 /* consistency checks, a bit paranoid as always. */
864 if ( !pHandle->pVM
865 && !pHandle->pGVM
866 && !pHandle->pvObj
867 && pHandle->iSelf == iHandle)
868 {
869 pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
870 if (pHandle->pvObj)
871 {
872 /*
873 * Move the handle from the free to used list and perform permission checks.
874 */
875 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
876 AssertRC(rc);
877
878 pGVMM->iFreeHead = pHandle->iNext;
879 pHandle->iNext = pGVMM->iUsedHead;
880 pGVMM->iUsedHead = iHandle;
881 pGVMM->cVMs++;
882
883 pHandle->pVM = NULL;
884 pHandle->pGVM = NULL;
885 pHandle->pSession = pSession;
886 pHandle->hEMT0 = NIL_RTNATIVETHREAD;
887 pHandle->ProcId = NIL_RTPROCESS;
888
889 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
890
891 rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
892 if (RT_SUCCESS(rc))
893 {
894#ifdef VBOX_BUGREF_9217
895 /*
896 * Allocate memory for the VM structure (combined VM + GVM).
897 */
898 const uint32_t cbVM = RT_UOFFSETOF_DYN(GVM, aCpus[cCpus]);
899 const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
900 RTR0MEMOBJ hVMMemObj = NIL_RTR0MEMOBJ;
901 rc = RTR0MemObjAllocPage(&hVMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
902 if (RT_SUCCESS(rc))
903 {
904 PGVM pGVM = (PGVM)RTR0MemObjAddress(hVMMemObj);
905 AssertPtr(pGVM);
906
907 /*
908 * Initialise the structure.
909 */
910 RT_BZERO(pGVM, cPages << PAGE_SHIFT);
911 gvmmR0InitPerVMData(pGVM, iHandle, cCpus, pSession);
912 GMMR0InitPerVMData(pGVM);
913 pGVM->gvmm.s.VMMemObj = hVMMemObj;
914
915 /*
916 * Allocate page array.
917 * This currently have to be made available to ring-3, but this is should change eventually.
918 */
919 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
920 if (RT_SUCCESS(rc))
921 {
922 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
923 for (uint32_t iPage = 0; iPage < cPages; iPage++)
924 {
925 paPages[iPage].uReserved = 0;
926 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
927 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
928 }
929
930 /*
931 * Map the page array, VM and VMCPU structures into ring-3.
932 */
933 AssertCompileSizeAlignment(VM, PAGE_SIZE);
934 rc = RTR0MemObjMapUserEx(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
935 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
936 0 /*offSub*/, sizeof(VM));
937 for (VMCPUID i = 0; i < cCpus && RT_SUCCESS(rc); i++)
938 {
939 AssertCompileSizeAlignment(VMCPU, PAGE_SIZE);
940 rc = RTR0MemObjMapUserEx(&pGVM->aCpus[i].gvmm.s.VMCpuMapObj, pGVM->gvmm.s.VMMemObj,
941 (RTR3PTR)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
942 RT_UOFFSETOF_DYN(GVM, aCpus[i]), sizeof(VMCPU));
943 }
944 if (RT_SUCCESS(rc))
945 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
946 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
947 NIL_RTR0PROCESS);
948 if (RT_SUCCESS(rc))
949 {
950 /*
951 * Initialize all the VM pointer.
952 */
953 PVMR3 pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
954 AssertPtr((void *)pVMR3);
955
956 for (VMCPUID i = 0; i < cCpus; i++)
957 {
958 pGVM->aCpus[i].pVMR0 = pGVM;
959 pGVM->aCpus[i].pVMR3 = pVMR3;
960 pGVM->apCpus[i] = RTR0MemObjAddressR3(pGVM->aCpus[i].gvmm.s.VMCpuMapObj);
961 AssertPtr((void *)pGVM->apCpus[i]);
962 }
963
964 pGVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
965 AssertPtr((void *)pGVM->paVMPagesR3);
966
967 /*
968 * Complete the handle - take the UsedLock sem just to be careful.
969 */
970 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
971 AssertRC(rc);
972
973 pHandle->pVM = pGVM;
974 pHandle->pGVM = pGVM;
975 pHandle->hEMT0 = hEMT0;
976 pHandle->ProcId = ProcId;
977 pGVM->pVMR3 = pVMR3;
978 pGVM->aCpus[0].hEMT = hEMT0;
979 pGVM->aCpus[0].hNativeThreadR0 = hEMT0;
980 pGVMM->cEMTs += cCpus;
981
982 /* Associate it with the session and create the context hook for EMT0. */
983 rc = SUPR0SetSessionVM(pSession, pGVM, pGVM);
984 if (RT_SUCCESS(rc))
985 {
986 rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[0]);
987 if (RT_SUCCESS(rc))
988 {
989 /*
990 * Done!
991 */
992 VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pGVM, ProcId, (void *)hEMT0, cCpus);
993
994 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
995 gvmmR0CreateDestroyUnlock(pGVMM);
996
997 CPUMR0RegisterVCpuThread(&pGVM->aCpus[0]);
998
999 *ppVM = pGVM;
1000 Log(("GVMMR0CreateVM: pVMR3=%p pGVM=%p hGVM=%d\n", pVMR3, pGVM, iHandle));
1001 return VINF_SUCCESS;
1002 }
1003
1004 SUPR0SetSessionVM(pSession, NULL, NULL);
1005 }
1006 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1007 }
1008
1009 /* Cleanup mappings. */
1010 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1011 {
1012 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
1013 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1014 }
1015 for (VMCPUID i = 0; i < cCpus; i++)
1016 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1017 {
1018 RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */);
1019 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1020 }
1021 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1022 {
1023 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */);
1024 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1025 }
1026 }
1027 }
1028
1029#else
1030 /*
1031 * Allocate the global VM structure (GVM) and initialize it.
1032 */
1033 PGVM pGVM = (PGVM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVM, aCpus[cCpus]));
1034 if (pGVM)
1035 {
1036 pGVM->u32Magic = GVM_MAGIC;
1037 pGVM->hSelf = iHandle;
1038 pGVM->pVM = NULL;
1039 pGVM->cCpus = cCpus;
1040 pGVM->pSession = pSession;
1041
1042 gvmmR0InitPerVMData(pGVM);
1043 GMMR0InitPerVMData(pGVM);
1044
1045 /*
1046 * Allocate the shared VM structure and associated page array.
1047 */
1048 const uint32_t cbVM = RT_UOFFSETOF_DYN(VM, aCpus[cCpus]);
1049 const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
1050 rc = RTR0MemObjAllocLow(&pGVM->gvmm.s.VMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
1051 if (RT_SUCCESS(rc))
1052 {
1053 PVM pVM = (PVM)RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj); AssertPtr(pVM);
1054 memset(pVM, 0, cPages << PAGE_SHIFT);
1055 pVM->enmVMState = VMSTATE_CREATING;
1056 pVM->pVMR0 = pVM;
1057 pVM->pSession = pSession;
1058 pVM->hSelf = iHandle;
1059 pVM->cbSelf = cbVM;
1060 pVM->cCpus = cCpus;
1061 pVM->uCpuExecutionCap = 100; /* default is no cap. */
1062 AssertCompileMemberAlignment(VM, cpum, 64);
1063 AssertCompileMemberAlignment(VM, tm, 64);
1064 AssertCompileMemberAlignment(VM, aCpus, PAGE_SIZE);
1065
1066 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
1067 if (RT_SUCCESS(rc))
1068 {
1069 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
1070 for (uint32_t iPage = 0; iPage < cPages; iPage++)
1071 {
1072 paPages[iPage].uReserved = 0;
1073 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
1074 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
1075 }
1076
1077 /*
1078 * Map them into ring-3.
1079 */
1080 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
1081 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
1082 if (RT_SUCCESS(rc))
1083 {
1084 PVMR3 pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
1085 pVM->pVMR3 = pVMR3;
1086 AssertPtr((void *)pVMR3);
1087
1088 /* Initialize all the VM pointers. */
1089 for (VMCPUID i = 0; i < cCpus; i++)
1090 {
1091 pVM->aCpus[i].idCpu = i;
1092 pVM->aCpus[i].pVMR0 = pVM;
1093 pVM->aCpus[i].pVMR3 = pVMR3;
1094 pVM->aCpus[i].idHostCpu = NIL_RTCPUID;
1095 pVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1096 }
1097
1098 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
1099 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
1100 NIL_RTR0PROCESS);
1101 if (RT_SUCCESS(rc))
1102 {
1103 pVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
1104 AssertPtr((void *)pVM->paVMPagesR3);
1105
1106 /* complete the handle - take the UsedLock sem just to be careful. */
1107 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1108 AssertRC(rc);
1109
1110 pHandle->pVM = pVM;
1111 pHandle->pGVM = pGVM;
1112 pHandle->hEMT0 = hEMT0;
1113 pHandle->ProcId = ProcId;
1114 pGVM->pVM = pVM;
1115 pGVM->pVMR3 = pVMR3;
1116 pGVM->aCpus[0].hEMT = hEMT0;
1117 pVM->aCpus[0].hNativeThreadR0 = hEMT0;
1118 pGVMM->cEMTs += cCpus;
1119
1120 for (VMCPUID i = 0; i < cCpus; i++)
1121 {
1122 pGVM->aCpus[i].pVCpu = &pVM->aCpus[i];
1123 pGVM->aCpus[i].pVM = pVM;
1124 }
1125
1126 /* Associate it with the session and create the context hook for EMT0. */
1127 rc = SUPR0SetSessionVM(pSession, pGVM, pVM);
1128 if (RT_SUCCESS(rc))
1129 {
1130 rc = VMMR0ThreadCtxHookCreateForEmt(&pVM->aCpus[0]);
1131 if (RT_SUCCESS(rc))
1132 {
1133 /*
1134 * Done!
1135 */
1136 VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pVM, ProcId, (void *)hEMT0, cCpus);
1137
1138 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1139 gvmmR0CreateDestroyUnlock(pGVMM);
1140
1141 CPUMR0RegisterVCpuThread(&pVM->aCpus[0]);
1142
1143 *ppVM = pVM;
1144 Log(("GVMMR0CreateVM: pVM=%p pVMR3=%p pGVM=%p hGVM=%d\n", pVM, pVMR3, pGVM, iHandle));
1145 return VINF_SUCCESS;
1146 }
1147
1148 SUPR0SetSessionVM(pSession, NULL, NULL);
1149 }
1150 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1151 }
1152
1153 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
1154 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1155 }
1156 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */);
1157 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1158 }
1159 RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */);
1160 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1161 }
1162 }
1163#endif
1164 }
1165 /* else: The user wasn't permitted to create this VM. */
1166
1167 /*
1168 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
1169 * object reference here. A little extra mess because of non-recursive lock.
1170 */
1171 void *pvObj = pHandle->pvObj;
1172 pHandle->pvObj = NULL;
1173 gvmmR0CreateDestroyUnlock(pGVMM);
1174
1175 SUPR0ObjRelease(pvObj, pSession);
1176
1177 SUPR0Printf("GVMMR0CreateVM: failed, rc=%Rrc\n", rc);
1178 return rc;
1179 }
1180
1181 rc = VERR_NO_MEMORY;
1182 }
1183 else
1184 rc = VERR_GVMM_IPE_1;
1185 }
1186 else
1187 rc = VERR_GVM_TOO_MANY_VMS;
1188
1189 gvmmR0CreateDestroyUnlock(pGVMM);
1190 return rc;
1191}
1192
1193
1194#ifdef VBOX_BUGREF_9217
1195/**
1196 * Initializes the per VM data belonging to GVMM.
1197 *
1198 * @param pGVM Pointer to the global VM structure.
1199 */
1200static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession)
1201#else
1202/**
1203 * Initializes the per VM data belonging to GVMM.
1204 *
1205 * @param pGVM Pointer to the global VM structure.
1206 */
1207static void gvmmR0InitPerVMData(PGVM pGVM)
1208#endif
1209{
1210 AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
1211 AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
1212#ifdef VBOX_BUGREF_9217
1213 AssertCompileMemberAlignment(VM, cpum, 64);
1214 AssertCompileMemberAlignment(VM, tm, 64);
1215
1216 /* GVM: */
1217 pGVM->u32Magic = GVM_MAGIC;
1218 pGVM->hSelfSafe = hSelf;
1219 pGVM->cCpusSafe = cCpus;
1220 pGVM->pSessionSafe = pSession;
1221
1222 /* VM: */
1223 pGVM->enmVMState = VMSTATE_CREATING;
1224 pGVM->pVMR0 = pGVM;
1225 pGVM->pSession = pSession;
1226 pGVM->hSelf = hSelf;
1227 pGVM->cCpus = cCpus;
1228 pGVM->uCpuExecutionCap = 100; /* default is no cap. */
1229 pGVM->uStructVersion = 1;
1230 pGVM->cbSelf = sizeof(VM);
1231 pGVM->cbVCpu = sizeof(VMCPU);
1232#endif
1233
1234 /* GVMM: */
1235 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1236 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1237 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1238 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1239 pGVM->gvmm.s.fDoneVMMR0Init = false;
1240 pGVM->gvmm.s.fDoneVMMR0Term = false;
1241
1242 /*
1243 * Per virtual CPU.
1244 */
1245 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1246 {
1247 pGVM->aCpus[i].idCpu = i;
1248#ifdef VBOX_BUGREF_9217
1249 pGVM->aCpus[i].idCpuSafe = i;
1250#endif
1251 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1252#ifdef VBOX_BUGREF_9217
1253 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1254#endif
1255 pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
1256 pGVM->aCpus[i].pGVM = pGVM;
1257#ifndef VBOX_BUGREF_9217
1258 pGVM->aCpus[i].pVCpu = NULL;
1259 pGVM->aCpus[i].pVM = NULL;
1260#endif
1261#ifdef VBOX_BUGREF_9217
1262 pGVM->aCpus[i].idHostCpu = NIL_RTCPUID;
1263 pGVM->aCpus[i].iHostCpuSet = UINT32_MAX;
1264 pGVM->aCpus[i].hNativeThread = NIL_RTNATIVETHREAD;
1265 pGVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1266 pGVM->aCpus[i].enmState = VMCPUSTATE_STOPPED;
1267#endif
1268 }
1269}
1270
1271
1272/**
1273 * Does the VM initialization.
1274 *
1275 * @returns VBox status code.
1276 * @param pGVM The global (ring-0) VM structure.
1277 */
1278GVMMR0DECL(int) GVMMR0InitVM(PGVM pGVM)
1279{
1280 LogFlow(("GVMMR0InitVM: pGVM=%p\n", pGVM));
1281
1282 int rc = VERR_INTERNAL_ERROR_3;
1283 if ( !pGVM->gvmm.s.fDoneVMMR0Init
1284 && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
1285 {
1286 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1287 {
1288 rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
1289 if (RT_FAILURE(rc))
1290 {
1291 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1292 break;
1293 }
1294 }
1295 }
1296 else
1297 rc = VERR_WRONG_ORDER;
1298
1299 LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
1300 return rc;
1301}
1302
1303
1304/**
1305 * Indicates that we're done with the ring-0 initialization
1306 * of the VM.
1307 *
1308 * @param pGVM The global (ring-0) VM structure.
1309 * @thread EMT(0)
1310 */
1311GVMMR0DECL(void) GVMMR0DoneInitVM(PGVM pGVM)
1312{
1313 /* Set the indicator. */
1314 pGVM->gvmm.s.fDoneVMMR0Init = true;
1315}
1316
1317
1318/**
1319 * Indicates that we're doing the ring-0 termination of the VM.
1320 *
1321 * @returns true if termination hasn't been done already, false if it has.
1322 * @param pGVM Pointer to the global VM structure. Optional.
1323 * @thread EMT(0) or session cleanup thread.
1324 */
1325GVMMR0DECL(bool) GVMMR0DoingTermVM(PGVM pGVM)
1326{
1327 /* Validate the VM structure, state and handle. */
1328 AssertPtrReturn(pGVM, false);
1329
1330 /* Set the indicator. */
1331 if (pGVM->gvmm.s.fDoneVMMR0Term)
1332 return false;
1333 pGVM->gvmm.s.fDoneVMMR0Term = true;
1334 return true;
1335}
1336
1337
1338/**
1339 * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1340 *
1341 * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1342 * and the caller is not the EMT thread, unfortunately. For security reasons, it
1343 * would've been nice if the caller was actually the EMT thread or that we somehow
1344 * could've associated the calling thread with the VM up front.
1345 *
1346 * @returns VBox status code.
1347 * @param pGVM The global (ring-0) VM structure.
1348 * @param pVM The cross context VM structure.
1349 *
1350 * @thread EMT(0) if it's associated with the VM, otherwise any thread.
1351 */
1352GVMMR0DECL(int) GVMMR0DestroyVM(PGVM pGVM, PVM pVM)
1353{
1354 LogFlow(("GVMMR0DestroyVM: pGVM=%p pVM=%p\n", pGVM, pVM));
1355 PGVMM pGVMM;
1356 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1357
1358 /*
1359 * Validate the VM structure, state and caller.
1360 */
1361 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1362 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1363 AssertReturn(!((uintptr_t)pVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1364#ifdef VBOX_BUGREF_9217
1365 AssertReturn(pGVM == pVM, VERR_INVALID_POINTER);
1366#else
1367 AssertReturn(pGVM->pVM == pVM, VERR_INVALID_POINTER);
1368#endif
1369 AssertMsgReturn(pVM->enmVMState >= VMSTATE_CREATING && pVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pVM->enmVMState),
1370 VERR_WRONG_ORDER);
1371
1372 uint32_t hGVM = pGVM->hSelf;
1373 ASMCompilerBarrier();
1374 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_VM_HANDLE);
1375 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1376
1377 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1378 AssertReturn(pHandle->pVM == pVM, VERR_NOT_OWNER);
1379
1380 RTPROCESS ProcId = RTProcSelf();
1381 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1382 AssertReturn( ( pHandle->hEMT0 == hSelf
1383 && pHandle->ProcId == ProcId)
1384 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1385
1386 /*
1387 * Lookup the handle and destroy the object.
1388 * Since the lock isn't recursive and we'll have to leave it before dereferencing the
1389 * object, we take some precautions against racing callers just in case...
1390 */
1391 int rc = gvmmR0CreateDestroyLock(pGVMM);
1392 AssertRC(rc);
1393
1394 /* Be careful here because we might theoretically be racing someone else cleaning up. */
1395 if ( pHandle->pVM == pVM
1396 && ( ( pHandle->hEMT0 == hSelf
1397 && pHandle->ProcId == ProcId)
1398 || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1399 && VALID_PTR(pHandle->pvObj)
1400 && VALID_PTR(pHandle->pSession)
1401 && VALID_PTR(pHandle->pGVM)
1402 && pHandle->pGVM->u32Magic == GVM_MAGIC)
1403 {
1404 /* Check that other EMTs have deregistered. */
1405 uint32_t cNotDeregistered = 0;
1406 for (VMCPUID idCpu = 1; idCpu < pGVM->cCpus; idCpu++)
1407 cNotDeregistered += pGVM->aCpus[idCpu].hEMT != ~(RTNATIVETHREAD)1; /* see GVMMR0DeregisterVCpu for the value */
1408 if (cNotDeregistered == 0)
1409 {
1410 /* Grab the object pointer. */
1411 void *pvObj = pHandle->pvObj;
1412 pHandle->pvObj = NULL;
1413 gvmmR0CreateDestroyUnlock(pGVMM);
1414
1415 SUPR0ObjRelease(pvObj, pHandle->pSession);
1416 }
1417 else
1418 {
1419 gvmmR0CreateDestroyUnlock(pGVMM);
1420 rc = VERR_GVMM_NOT_ALL_EMTS_DEREGISTERED;
1421 }
1422 }
1423 else
1424 {
1425 SUPR0Printf("GVMMR0DestroyVM: pHandle=%RKv:{.pVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pVM=%p hSelf=%p\n",
1426 pHandle, pHandle->pVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pVM, hSelf);
1427 gvmmR0CreateDestroyUnlock(pGVMM);
1428 rc = VERR_GVMM_IPE_2;
1429 }
1430
1431 return rc;
1432}
1433
1434
1435/**
1436 * Performs VM cleanup task as part of object destruction.
1437 *
1438 * @param pGVM The GVM pointer.
1439 */
1440static void gvmmR0CleanupVM(PGVM pGVM)
1441{
1442 if ( pGVM->gvmm.s.fDoneVMMR0Init
1443 && !pGVM->gvmm.s.fDoneVMMR0Term)
1444 {
1445 if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1446#ifdef VBOX_BUGREF_9217
1447 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM
1448#else
1449 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM->pVM
1450#endif
1451 )
1452 {
1453 LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1454#ifdef VBOX_BUGREF_9217
1455 VMMR0TermVM(pGVM, pGVM, NIL_VMCPUID);
1456#else
1457 VMMR0TermVM(pGVM, pGVM->pVM, NIL_VMCPUID);
1458#endif
1459 }
1460 else
1461#ifdef VBOX_BUGREF_9217
1462 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pGVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM));
1463#else
1464 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM->pVM));
1465#endif
1466 }
1467
1468 GMMR0CleanupVM(pGVM);
1469#ifdef VBOX_WITH_NEM_R0
1470 NEMR0CleanupVM(pGVM);
1471#endif
1472
1473 AssertCompile(NIL_RTTHREADCTXHOOK == (RTTHREADCTXHOOK)0); /* Depends on zero initialized memory working for NIL at the moment. */
1474#ifdef VBOX_BUGREF_9217
1475 for (VMCPUID idCpu = 0; idCpu < pGVM->cCpusSafe; idCpu++)
1476#else
1477 for (VMCPUID idCpu = 0; idCpu < pGVM->cCpus; idCpu++)
1478#endif
1479 {
1480 /** @todo Can we busy wait here for all thread-context hooks to be
1481 * deregistered before releasing (destroying) it? Only until we find a
1482 * solution for not deregistering hooks everytime we're leaving HMR0
1483 * context. */
1484#ifdef VBOX_BUGREF_9217
1485 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->aCpus[idCpu]);
1486#else
1487 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->pVM->aCpus[idCpu]);
1488#endif
1489 }
1490}
1491
1492
1493/**
1494 * @callback_method_impl{FNSUPDRVDESTRUCTOR,VM handle destructor}
1495 *
1496 * pvUser1 is the GVM instance pointer.
1497 * pvUser2 is the handle pointer.
1498 */
1499static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvUser1, void *pvUser2)
1500{
1501 LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvUser1, pvUser2));
1502
1503 NOREF(pvObj);
1504
1505 /*
1506 * Some quick, paranoid, input validation.
1507 */
1508 PGVMHANDLE pHandle = (PGVMHANDLE)pvUser2;
1509 AssertPtr(pHandle);
1510 PGVMM pGVMM = (PGVMM)pvUser1;
1511 Assert(pGVMM == g_pGVMM);
1512 const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1513 if ( !iHandle
1514 || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1515 || iHandle != pHandle->iSelf)
1516 {
1517 SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1518 return;
1519 }
1520
1521 int rc = gvmmR0CreateDestroyLock(pGVMM);
1522 AssertRC(rc);
1523 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1524 AssertRC(rc);
1525
1526 /*
1527 * This is a tad slow but a doubly linked list is too much hassle.
1528 */
1529 if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1530 {
1531 SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1532 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1533 gvmmR0CreateDestroyUnlock(pGVMM);
1534 return;
1535 }
1536
1537 if (pGVMM->iUsedHead == iHandle)
1538 pGVMM->iUsedHead = pHandle->iNext;
1539 else
1540 {
1541 uint16_t iPrev = pGVMM->iUsedHead;
1542 int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1543 while (iPrev)
1544 {
1545 if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1546 {
1547 SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1548 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1549 gvmmR0CreateDestroyUnlock(pGVMM);
1550 return;
1551 }
1552 if (RT_UNLIKELY(c-- <= 0))
1553 {
1554 iPrev = 0;
1555 break;
1556 }
1557
1558 if (pGVMM->aHandles[iPrev].iNext == iHandle)
1559 break;
1560 iPrev = pGVMM->aHandles[iPrev].iNext;
1561 }
1562 if (!iPrev)
1563 {
1564 SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1565 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1566 gvmmR0CreateDestroyUnlock(pGVMM);
1567 return;
1568 }
1569
1570 Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1571 pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1572 }
1573 pHandle->iNext = 0;
1574 pGVMM->cVMs--;
1575
1576 /*
1577 * Do the global cleanup round.
1578 */
1579 PGVM pGVM = pHandle->pGVM;
1580 if ( VALID_PTR(pGVM)
1581 && pGVM->u32Magic == GVM_MAGIC)
1582 {
1583 pGVMM->cEMTs -= pGVM->cCpus;
1584
1585 if (pGVM->pSession)
1586 SUPR0SetSessionVM(pGVM->pSession, NULL, NULL);
1587
1588 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1589
1590 gvmmR0CleanupVM(pGVM);
1591
1592 /*
1593 * Do the GVMM cleanup - must be done last.
1594 */
1595 /* The VM and VM pages mappings/allocations. */
1596 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1597 {
1598 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1599 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1600 }
1601
1602 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1603 {
1604 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1605 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1606 }
1607
1608 if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1609 {
1610 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1611 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1612 }
1613
1614#ifndef VBOX_BUGREF_9217
1615 if (pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ)
1616 {
1617 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */); AssertRC(rc);
1618 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1619 }
1620#endif
1621
1622 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1623 {
1624 if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1625 {
1626 rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1627 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1628 }
1629#ifdef VBOX_BUGREF_9217
1630 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1631 {
1632 rc = RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */); AssertRC(rc);
1633 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1634 }
1635#endif
1636 }
1637
1638 /* the GVM structure itself. */
1639 pGVM->u32Magic |= UINT32_C(0x80000000);
1640#ifdef VBOX_BUGREF_9217
1641 Assert(pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ);
1642 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, true /*fFreeMappings*/); AssertRC(rc);
1643#else
1644 RTMemFree(pGVM);
1645#endif
1646 pGVM = NULL;
1647
1648 /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1649 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1650 AssertRC(rc);
1651 }
1652 /* else: GVMMR0CreateVM cleanup. */
1653
1654 /*
1655 * Free the handle.
1656 */
1657 pHandle->iNext = pGVMM->iFreeHead;
1658 pGVMM->iFreeHead = iHandle;
1659 ASMAtomicWriteNullPtr(&pHandle->pGVM);
1660 ASMAtomicWriteNullPtr(&pHandle->pVM);
1661 ASMAtomicWriteNullPtr(&pHandle->pvObj);
1662 ASMAtomicWriteNullPtr(&pHandle->pSession);
1663 ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1664 ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS);
1665
1666 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1667 gvmmR0CreateDestroyUnlock(pGVMM);
1668 LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1669}
1670
1671
1672/**
1673 * Registers the calling thread as the EMT of a Virtual CPU.
1674 *
1675 * Note that VCPU 0 is automatically registered during VM creation.
1676 *
1677 * @returns VBox status code
1678 * @param pGVM The global (ring-0) VM structure.
1679 * @param pVM The cross context VM structure.
1680 * @param idCpu VCPU id to register the current thread as.
1681 */
1682GVMMR0DECL(int) GVMMR0RegisterVCpu(PGVM pGVM, PVM pVM, VMCPUID idCpu)
1683{
1684 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1685
1686 /*
1687 * Validate the VM structure, state and handle.
1688 */
1689 PGVMM pGVMM;
1690 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, false /* fTakeUsedLock */); /** @todo take lock here. */
1691 if (RT_SUCCESS(rc))
1692 {
1693 if (idCpu < pGVM->cCpus)
1694 {
1695 /* Check that the EMT isn't already assigned to a thread. */
1696 if (pGVM->aCpus[idCpu].hEMT == NIL_RTNATIVETHREAD)
1697 {
1698#ifdef VBOX_BUGREF_9217
1699 Assert(pGVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD);
1700#else
1701 Assert(pVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD);
1702#endif
1703
1704 /* A thread may only be one EMT. */
1705 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1706 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1707 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1708 if (RT_SUCCESS(rc))
1709 {
1710 /*
1711 * Do the assignment, then try setup the hook. Undo if that fails.
1712 */
1713#ifdef VBOX_BUGREF_9217
1714 pGVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf();
1715
1716 rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[idCpu]);
1717 if (RT_SUCCESS(rc))
1718 CPUMR0RegisterVCpuThread(&pGVM->aCpus[idCpu]);
1719 else
1720 pGVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = NIL_RTNATIVETHREAD;
1721#else
1722 pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf();
1723
1724 rc = VMMR0ThreadCtxHookCreateForEmt(&pVM->aCpus[idCpu]);
1725 if (RT_SUCCESS(rc))
1726 CPUMR0RegisterVCpuThread(&pVM->aCpus[idCpu]);
1727 else
1728 pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = NIL_RTNATIVETHREAD;
1729#endif
1730 }
1731 }
1732 else
1733 rc = VERR_ACCESS_DENIED;
1734 }
1735 else
1736 rc = VERR_INVALID_CPU_ID;
1737 }
1738 return rc;
1739}
1740
1741
1742/**
1743 * Deregisters the calling thread as the EMT of a Virtual CPU.
1744 *
1745 * Note that VCPU 0 shall call GVMMR0DestroyVM intead of this API.
1746 *
1747 * @returns VBox status code
1748 * @param pGVM The global (ring-0) VM structure.
1749 * @param pVM The cross context VM structure.
1750 * @param idCpu VCPU id to register the current thread as.
1751 */
1752GVMMR0DECL(int) GVMMR0DeregisterVCpu(PGVM pGVM, PVM pVM, VMCPUID idCpu)
1753{
1754 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1755
1756 /*
1757 * Validate the VM structure, state and handle.
1758 */
1759 PGVMM pGVMM;
1760 int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
1761 if (RT_SUCCESS(rc))
1762 {
1763 /*
1764 * Take the destruction lock and recheck the handle state to
1765 * prevent racing GVMMR0DestroyVM.
1766 */
1767 gvmmR0CreateDestroyLock(pGVMM);
1768 uint32_t hSelf = pGVM->hSelf;
1769 ASMCompilerBarrier();
1770 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1771 && pGVMM->aHandles[hSelf].pvObj != NULL
1772 && pGVMM->aHandles[hSelf].pGVM == pGVM)
1773 {
1774 /*
1775 * Do per-EMT cleanups.
1776 */
1777#ifdef VBOX_BUGREF_9217
1778 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->aCpus[idCpu]);
1779#else
1780 VMMR0ThreadCtxHookDestroyForEmt(&pVM->aCpus[idCpu]);
1781#endif
1782
1783 /*
1784 * Invalidate hEMT. We don't use NIL here as that would allow
1785 * GVMMR0RegisterVCpu to be called again, and we don't want that.
1786 */
1787 AssertCompile(~(RTNATIVETHREAD)1 != NIL_RTNATIVETHREAD);
1788 pGVM->aCpus[idCpu].hEMT = ~(RTNATIVETHREAD)1;
1789#ifdef VBOX_BUGREF_9217
1790 pGVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1791#else
1792 pVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1793#endif
1794 }
1795
1796 gvmmR0CreateDestroyUnlock(pGVMM);
1797 }
1798 return rc;
1799}
1800
1801
1802/**
1803 * Lookup a GVM structure by its handle.
1804 *
1805 * @returns The GVM pointer on success, NULL on failure.
1806 * @param hGVM The global VM handle. Asserts on bad handle.
1807 */
1808GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1809{
1810 PGVMM pGVMM;
1811 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1812
1813 /*
1814 * Validate.
1815 */
1816 AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1817 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1818
1819 /*
1820 * Look it up.
1821 */
1822 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1823 AssertPtrReturn(pHandle->pVM, NULL);
1824 AssertPtrReturn(pHandle->pvObj, NULL);
1825 PGVM pGVM = pHandle->pGVM;
1826 AssertPtrReturn(pGVM, NULL);
1827#ifdef VBOX_BUGREF_9217
1828 AssertReturn(pGVM == pHandle->pVM, NULL);
1829#else
1830 AssertReturn(pGVM->pVM == pHandle->pVM, NULL);
1831#endif
1832
1833 return pHandle->pGVM;
1834}
1835
1836
1837/**
1838 * Lookup a GVM structure by the shared VM structure.
1839 *
1840 * The calling thread must be in the same process as the VM. All current lookups
1841 * are by threads inside the same process, so this will not be an issue.
1842 *
1843 * @returns VBox status code.
1844 * @param pVM The cross context VM structure.
1845 * @param ppGVM Where to store the GVM pointer.
1846 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1847 * @param fTakeUsedLock Whether to take the used lock or not. We take it in
1848 * shared mode when requested.
1849 *
1850 * Be very careful if not taking the lock as it's
1851 * possible that the VM will disappear then!
1852 *
1853 * @remark This will not assert on an invalid pVM but try return silently.
1854 */
1855static int gvmmR0ByVM(PVM pVM, PGVM *ppGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1856{
1857 RTPROCESS ProcId = RTProcSelf();
1858 PGVMM pGVMM;
1859 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1860
1861 /*
1862 * Validate.
1863 */
1864 if (RT_UNLIKELY( !VALID_PTR(pVM)
1865 || ((uintptr_t)pVM & PAGE_OFFSET_MASK)))
1866 return VERR_INVALID_POINTER;
1867 if (RT_UNLIKELY( pVM->enmVMState < VMSTATE_CREATING
1868 || pVM->enmVMState >= VMSTATE_TERMINATED))
1869 return VERR_INVALID_POINTER;
1870
1871 uint16_t hGVM = pVM->hSelf;
1872 ASMCompilerBarrier();
1873 if (RT_UNLIKELY( hGVM == NIL_GVM_HANDLE
1874 || hGVM >= RT_ELEMENTS(pGVMM->aHandles)))
1875 return VERR_INVALID_HANDLE;
1876
1877 /*
1878 * Look it up.
1879 */
1880 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1881 PGVM pGVM;
1882 if (fTakeUsedLock)
1883 {
1884 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
1885 AssertRCReturn(rc, rc);
1886
1887 pGVM = pHandle->pGVM;
1888#ifdef VBOX_BUGREF_9217
1889 if (RT_UNLIKELY( pHandle->pVM != pVM
1890 || pHandle->ProcId != ProcId
1891 || !VALID_PTR(pHandle->pvObj)
1892 || !VALID_PTR(pGVM)
1893 || pGVM != pVM))
1894#else
1895 if (RT_UNLIKELY( pHandle->pVM != pVM
1896 || pHandle->ProcId != ProcId
1897 || !VALID_PTR(pHandle->pvObj)
1898 || !VALID_PTR(pGVM)
1899 || pGVM->pVM != pVM))
1900#endif
1901 {
1902 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
1903 return VERR_INVALID_HANDLE;
1904 }
1905 }
1906 else
1907 {
1908 if (RT_UNLIKELY(pHandle->pVM != pVM))
1909 return VERR_INVALID_HANDLE;
1910 if (RT_UNLIKELY(pHandle->ProcId != ProcId))
1911 return VERR_INVALID_HANDLE;
1912 if (RT_UNLIKELY(!VALID_PTR(pHandle->pvObj)))
1913 return VERR_INVALID_HANDLE;
1914
1915 pGVM = pHandle->pGVM;
1916 if (RT_UNLIKELY(!VALID_PTR(pGVM)))
1917 return VERR_INVALID_HANDLE;
1918#ifdef VBOX_BUGREF_9217
1919 if (RT_UNLIKELY(pGVM != pVM))
1920#else
1921 if (RT_UNLIKELY(pGVM->pVM != pVM))
1922#endif
1923 return VERR_INVALID_HANDLE;
1924 }
1925
1926 *ppGVM = pGVM;
1927 *ppGVMM = pGVMM;
1928 return VINF_SUCCESS;
1929}
1930
1931
1932/**
1933 * Fast look up a GVM structure by the cross context VM structure.
1934 *
1935 * This is mainly used a glue function, so performance is .
1936 *
1937 * @returns GVM on success, NULL on failure.
1938 * @param pVM The cross context VM structure. ASSUMES to be
1939 * reasonably valid, so we can do fewer checks than in
1940 * gvmmR0ByVM.
1941 *
1942 * @note Do not use this on pVM structures from userland!
1943 */
1944GVMMR0DECL(PGVM) GVMMR0FastGetGVMByVM(PVM pVM)
1945{
1946 AssertPtr(pVM);
1947 Assert(!((uintptr_t)pVM & PAGE_OFFSET_MASK));
1948
1949 PGVMM pGVMM;
1950 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1951
1952 /*
1953 * Validate.
1954 */
1955 uint16_t hGVM = pVM->hSelf;
1956 ASMCompilerBarrier();
1957 AssertReturn(hGVM != NIL_GVM_HANDLE && hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1958
1959 /*
1960 * Look it up and check pVM against the value in the handle and GVM structures.
1961 */
1962 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1963 AssertReturn(pHandle->pVM == pVM, NULL);
1964
1965 PGVM pGVM = pHandle->pGVM;
1966 AssertPtrReturn(pGVM, NULL);
1967#ifdef VBOX_BUGREF_9217
1968 AssertReturn(pGVM == pVM, NULL);
1969#else
1970 AssertReturn(pGVM->pVM == pVM, NULL);
1971#endif
1972
1973 return pGVM;
1974}
1975
1976
1977/**
1978 * Check that the given GVM and VM structures match up.
1979 *
1980 * The calling thread must be in the same process as the VM. All current lookups
1981 * are by threads inside the same process, so this will not be an issue.
1982 *
1983 * @returns VBox status code.
1984 * @param pGVM The global (ring-0) VM structure.
1985 * @param pVM The cross context VM structure.
1986 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1987 * @param fTakeUsedLock Whether to take the used lock or not. We take it in
1988 * shared mode when requested.
1989 *
1990 * Be very careful if not taking the lock as it's
1991 * possible that the VM will disappear then!
1992 *
1993 * @remark This will not assert on an invalid pVM but try return silently.
1994 */
1995static int gvmmR0ByGVMandVM(PGVM pGVM, PVM pVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1996{
1997 /*
1998 * Check the pointers.
1999 */
2000 int rc;
2001 if (RT_LIKELY(RT_VALID_PTR(pGVM)))
2002 {
2003 if (RT_LIKELY( RT_VALID_PTR(pVM)
2004 && ((uintptr_t)pVM & PAGE_OFFSET_MASK) == 0))
2005 {
2006#ifdef VBOX_BUGREF_9217
2007 if (RT_LIKELY(pGVM == pVM))
2008#else
2009 if (RT_LIKELY(pGVM->pVM == pVM))
2010#endif
2011 {
2012 /*
2013 * Get the pGVMM instance and check the VM handle.
2014 */
2015 PGVMM pGVMM;
2016 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2017
2018 uint16_t hGVM = pGVM->hSelf;
2019 if (RT_LIKELY( hGVM != NIL_GVM_HANDLE
2020 && hGVM < RT_ELEMENTS(pGVMM->aHandles)))
2021 {
2022 RTPROCESS const pidSelf = RTProcSelf();
2023 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
2024 if (fTakeUsedLock)
2025 {
2026 rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
2027 AssertRCReturn(rc, rc);
2028 }
2029
2030 if (RT_LIKELY( pHandle->pGVM == pGVM
2031 && pHandle->pVM == pVM
2032 && pHandle->ProcId == pidSelf
2033 && RT_VALID_PTR(pHandle->pvObj)))
2034 {
2035 /*
2036 * Some more VM data consistency checks.
2037 */
2038 if (RT_LIKELY( pVM->cCpus == pGVM->cCpus
2039 && pVM->hSelf == hGVM
2040 && pVM->enmVMState >= VMSTATE_CREATING
2041 && pVM->enmVMState <= VMSTATE_TERMINATED
2042 && pVM->pVMR0 == pVM))
2043 {
2044 *ppGVMM = pGVMM;
2045 return VINF_SUCCESS;
2046 }
2047 }
2048
2049 if (fTakeUsedLock)
2050 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2051 }
2052 }
2053 rc = VERR_INVALID_VM_HANDLE;
2054 }
2055 else
2056 rc = VERR_INVALID_POINTER;
2057 }
2058 else
2059 rc = VERR_INVALID_POINTER;
2060 return rc;
2061}
2062
2063
2064/**
2065 * Check that the given GVM and VM structures match up.
2066 *
2067 * The calling thread must be in the same process as the VM. All current lookups
2068 * are by threads inside the same process, so this will not be an issue.
2069 *
2070 * @returns VBox status code.
2071 * @param pGVM The global (ring-0) VM structure.
2072 * @param pVM The cross context VM structure.
2073 * @param idCpu The (alleged) Virtual CPU ID of the calling EMT.
2074 * @param ppGVMM Where to store the pointer to the GVMM instance data.
2075 * @thread EMT
2076 *
2077 * @remarks This will assert in all failure paths.
2078 */
2079static int gvmmR0ByGVMandVMandEMT(PGVM pGVM, PVM pVM, VMCPUID idCpu, PGVMM *ppGVMM)
2080{
2081 /*
2082 * Check the pointers.
2083 */
2084 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
2085
2086 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
2087 AssertReturn(((uintptr_t)pVM & PAGE_OFFSET_MASK) == 0, VERR_INVALID_POINTER);
2088#ifdef VBOX_BUGREF_9217
2089 AssertReturn(pGVM == pVM, VERR_INVALID_VM_HANDLE);
2090#else
2091 AssertReturn(pGVM->pVM == pVM, VERR_INVALID_VM_HANDLE);
2092#endif
2093
2094
2095 /*
2096 * Get the pGVMM instance and check the VM handle.
2097 */
2098 PGVMM pGVMM;
2099 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2100
2101 uint16_t hGVM = pGVM->hSelf;
2102 ASMCompilerBarrier();
2103 AssertReturn( hGVM != NIL_GVM_HANDLE
2104 && hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
2105
2106 RTPROCESS const pidSelf = RTProcSelf();
2107 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
2108 AssertReturn( pHandle->pGVM == pGVM
2109 && pHandle->pVM == pVM
2110 && pHandle->ProcId == pidSelf
2111 && RT_VALID_PTR(pHandle->pvObj),
2112 VERR_INVALID_HANDLE);
2113
2114 /*
2115 * Check the EMT claim.
2116 */
2117 RTNATIVETHREAD const hAllegedEMT = RTThreadNativeSelf();
2118 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
2119 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
2120
2121 /*
2122 * Some more VM data consistency checks.
2123 */
2124 AssertReturn(pVM->cCpus == pGVM->cCpus, VERR_INCONSISTENT_VM_HANDLE);
2125 AssertReturn(pVM->hSelf == hGVM, VERR_INCONSISTENT_VM_HANDLE);
2126 AssertReturn(pVM->pVMR0 == pVM, VERR_INCONSISTENT_VM_HANDLE);
2127 AssertReturn( pVM->enmVMState >= VMSTATE_CREATING
2128 && pVM->enmVMState <= VMSTATE_TERMINATED, VERR_INCONSISTENT_VM_HANDLE);
2129
2130 *ppGVMM = pGVMM;
2131 return VINF_SUCCESS;
2132}
2133
2134
2135/**
2136 * Validates a GVM/VM pair.
2137 *
2138 * @returns VBox status code.
2139 * @param pGVM The global (ring-0) VM structure.
2140 * @param pVM The cross context VM structure.
2141 */
2142GVMMR0DECL(int) GVMMR0ValidateGVMandVM(PGVM pGVM, PVM pVM)
2143{
2144 PGVMM pGVMM;
2145 return gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, false /*fTakeUsedLock*/);
2146}
2147
2148
2149
2150/**
2151 * Validates a GVM/VM/EMT combo.
2152 *
2153 * @returns VBox status code.
2154 * @param pGVM The global (ring-0) VM structure.
2155 * @param pVM The cross context VM structure.
2156 * @param idCpu The Virtual CPU ID of the calling EMT.
2157 * @thread EMT(idCpu)
2158 */
2159GVMMR0DECL(int) GVMMR0ValidateGVMandVMandEMT(PGVM pGVM, PVM pVM, VMCPUID idCpu)
2160{
2161 PGVMM pGVMM;
2162 return gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
2163}
2164
2165
2166/**
2167 * Looks up the VM belonging to the specified EMT thread.
2168 *
2169 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
2170 * unnecessary kernel panics when the EMT thread hits an assertion. The
2171 * call may or not be an EMT thread.
2172 *
2173 * @returns Pointer to the VM on success, NULL on failure.
2174 * @param hEMT The native thread handle of the EMT.
2175 * NIL_RTNATIVETHREAD means the current thread
2176 */
2177GVMMR0DECL(PVM) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
2178{
2179 /*
2180 * No Assertions here as we're usually called in a AssertMsgN or
2181 * RTAssert* context.
2182 */
2183 PGVMM pGVMM = g_pGVMM;
2184 if ( !VALID_PTR(pGVMM)
2185 || pGVMM->u32Magic != GVMM_MAGIC)
2186 return NULL;
2187
2188 if (hEMT == NIL_RTNATIVETHREAD)
2189 hEMT = RTThreadNativeSelf();
2190 RTPROCESS ProcId = RTProcSelf();
2191
2192 /*
2193 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
2194 */
2195/** @todo introduce some pid hash table here, please. */
2196 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
2197 {
2198 if ( pGVMM->aHandles[i].iSelf == i
2199 && pGVMM->aHandles[i].ProcId == ProcId
2200 && VALID_PTR(pGVMM->aHandles[i].pvObj)
2201 && VALID_PTR(pGVMM->aHandles[i].pVM)
2202 && VALID_PTR(pGVMM->aHandles[i].pGVM))
2203 {
2204 if (pGVMM->aHandles[i].hEMT0 == hEMT)
2205 return pGVMM->aHandles[i].pVM;
2206
2207 /* This is fearly safe with the current process per VM approach. */
2208 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2209 VMCPUID const cCpus = pGVM->cCpus;
2210 ASMCompilerBarrier();
2211 if ( cCpus < 1
2212 || cCpus > VMM_MAX_CPU_COUNT)
2213 continue;
2214 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
2215 if (pGVM->aCpus[idCpu].hEMT == hEMT)
2216 return pGVMM->aHandles[i].pVM;
2217 }
2218 }
2219 return NULL;
2220}
2221
2222
2223/**
2224 * Looks up the GVMCPU belonging to the specified EMT thread.
2225 *
2226 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
2227 * unnecessary kernel panics when the EMT thread hits an assertion. The
2228 * call may or not be an EMT thread.
2229 *
2230 * @returns Pointer to the VM on success, NULL on failure.
2231 * @param hEMT The native thread handle of the EMT.
2232 * NIL_RTNATIVETHREAD means the current thread
2233 */
2234GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByEMT(RTNATIVETHREAD hEMT)
2235{
2236 /*
2237 * No Assertions here as we're usually called in a AssertMsgN,
2238 * RTAssert*, Log and LogRel contexts.
2239 */
2240 PGVMM pGVMM = g_pGVMM;
2241 if ( !VALID_PTR(pGVMM)
2242 || pGVMM->u32Magic != GVMM_MAGIC)
2243 return NULL;
2244
2245 if (hEMT == NIL_RTNATIVETHREAD)
2246 hEMT = RTThreadNativeSelf();
2247 RTPROCESS ProcId = RTProcSelf();
2248
2249 /*
2250 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
2251 */
2252/** @todo introduce some pid hash table here, please. */
2253 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
2254 {
2255 if ( pGVMM->aHandles[i].iSelf == i
2256 && pGVMM->aHandles[i].ProcId == ProcId
2257 && VALID_PTR(pGVMM->aHandles[i].pvObj)
2258 && VALID_PTR(pGVMM->aHandles[i].pVM)
2259 && VALID_PTR(pGVMM->aHandles[i].pGVM))
2260 {
2261 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2262 if (pGVMM->aHandles[i].hEMT0 == hEMT)
2263 return &pGVM->aCpus[0];
2264
2265 /* This is fearly safe with the current process per VM approach. */
2266 VMCPUID const cCpus = pGVM->cCpus;
2267 ASMCompilerBarrier();
2268 ASMCompilerBarrier();
2269 if ( cCpus < 1
2270 || cCpus > VMM_MAX_CPU_COUNT)
2271 continue;
2272 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
2273 if (pGVM->aCpus[idCpu].hEMT == hEMT)
2274 return &pGVM->aCpus[idCpu];
2275 }
2276 }
2277 return NULL;
2278}
2279
2280
2281/**
2282 * This is will wake up expired and soon-to-be expired VMs.
2283 *
2284 * @returns Number of VMs that has been woken up.
2285 * @param pGVMM Pointer to the GVMM instance data.
2286 * @param u64Now The current time.
2287 */
2288static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
2289{
2290 /*
2291 * Skip this if we've got disabled because of high resolution wakeups or by
2292 * the user.
2293 */
2294 if (!pGVMM->fDoEarlyWakeUps)
2295 return 0;
2296
2297/** @todo Rewrite this algorithm. See performance defect XYZ. */
2298
2299 /*
2300 * A cheap optimization to stop wasting so much time here on big setups.
2301 */
2302 const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
2303 if ( pGVMM->cHaltedEMTs == 0
2304 || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
2305 return 0;
2306
2307 /*
2308 * Only one thread doing this at a time.
2309 */
2310 if (!ASMAtomicCmpXchgBool(&pGVMM->fDoingEarlyWakeUps, true, false))
2311 return 0;
2312
2313 /*
2314 * The first pass will wake up VMs which have actually expired
2315 * and look for VMs that should be woken up in the 2nd and 3rd passes.
2316 */
2317 const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
2318 uint64_t u64Min = UINT64_MAX;
2319 unsigned cWoken = 0;
2320 unsigned cHalted = 0;
2321 unsigned cTodo2nd = 0;
2322 unsigned cTodo3rd = 0;
2323 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2324 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2325 i = pGVMM->aHandles[i].iNext)
2326 {
2327 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2328 if ( VALID_PTR(pCurGVM)
2329 && pCurGVM->u32Magic == GVM_MAGIC)
2330 {
2331 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2332 {
2333 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2334 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2335 if (u64)
2336 {
2337 if (u64 <= u64Now)
2338 {
2339 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2340 {
2341 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2342 AssertRC(rc);
2343 cWoken++;
2344 }
2345 }
2346 else
2347 {
2348 cHalted++;
2349 if (u64 <= uNsEarlyWakeUp1)
2350 cTodo2nd++;
2351 else if (u64 <= uNsEarlyWakeUp2)
2352 cTodo3rd++;
2353 else if (u64 < u64Min)
2354 u64 = u64Min;
2355 }
2356 }
2357 }
2358 }
2359 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2360 }
2361
2362 if (cTodo2nd)
2363 {
2364 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2365 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2366 i = pGVMM->aHandles[i].iNext)
2367 {
2368 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2369 if ( VALID_PTR(pCurGVM)
2370 && pCurGVM->u32Magic == GVM_MAGIC)
2371 {
2372 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2373 {
2374 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2375 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2376 if ( u64
2377 && u64 <= uNsEarlyWakeUp1)
2378 {
2379 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2380 {
2381 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2382 AssertRC(rc);
2383 cWoken++;
2384 }
2385 }
2386 }
2387 }
2388 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2389 }
2390 }
2391
2392 if (cTodo3rd)
2393 {
2394 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2395 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2396 i = pGVMM->aHandles[i].iNext)
2397 {
2398 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2399 if ( VALID_PTR(pCurGVM)
2400 && pCurGVM->u32Magic == GVM_MAGIC)
2401 {
2402 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2403 {
2404 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2405 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2406 if ( u64
2407 && u64 <= uNsEarlyWakeUp2)
2408 {
2409 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2410 {
2411 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2412 AssertRC(rc);
2413 cWoken++;
2414 }
2415 }
2416 }
2417 }
2418 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2419 }
2420 }
2421
2422 /*
2423 * Set the minimum value.
2424 */
2425 pGVMM->uNsNextEmtWakeup = u64Min;
2426
2427 ASMAtomicWriteBool(&pGVMM->fDoingEarlyWakeUps, false);
2428 return cWoken;
2429}
2430
2431
2432/**
2433 * Halt the EMT thread.
2434 *
2435 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2436 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2437 * @param pGVM The global (ring-0) VM structure.
2438 * @param pVM The cross context VM structure.
2439 * @param pGVCpu The global (ring-0) CPU structure of the calling
2440 * EMT.
2441 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2442 * @thread EMT(pGVCpu).
2443 */
2444GVMMR0DECL(int) GVMMR0SchedHalt(PGVM pGVM, PVM pVM, PGVMCPU pGVCpu, uint64_t u64ExpireGipTime)
2445{
2446 LogFlow(("GVMMR0SchedHalt: pGVM=%p pVM=%p pGVCpu=%p(%d) u64ExpireGipTime=%#RX64\n",
2447 pGVM, pVM, pGVCpu, pGVCpu->idCpu, u64ExpireGipTime));
2448 GVMM_CHECK_SMAP_SETUP();
2449 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2450
2451 PGVMM pGVMM;
2452 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2453
2454 pGVM->gvmm.s.StatsSched.cHaltCalls++;
2455 Assert(!pGVCpu->gvmm.s.u64HaltExpire);
2456
2457 /*
2458 * If we're doing early wake-ups, we must take the UsedList lock before we
2459 * start querying the current time.
2460 * Note! Interrupts must NOT be disabled at this point because we ask for GIP time!
2461 */
2462 bool const fDoEarlyWakeUps = pGVMM->fDoEarlyWakeUps;
2463 if (fDoEarlyWakeUps)
2464 {
2465 int rc2 = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc2);
2466 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2467 }
2468
2469 pGVCpu->gvmm.s.iCpuEmt = ASMGetApicId();
2470
2471 /* GIP hack: We might are frequently sleeping for short intervals where the
2472 difference between GIP and system time matters on systems with high resolution
2473 system time. So, convert the input from GIP to System time in that case. */
2474 Assert(ASMGetFlags() & X86_EFL_IF);
2475 const uint64_t u64NowSys = RTTimeSystemNanoTS();
2476 const uint64_t u64NowGip = RTTimeNanoTS();
2477 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2478
2479 if (fDoEarlyWakeUps)
2480 {
2481 pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
2482 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2483 }
2484
2485 /*
2486 * Go to sleep if we must...
2487 * Cap the sleep time to 1 second to be on the safe side.
2488 */
2489 int rc;
2490 uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
2491 if ( u64NowGip < u64ExpireGipTime
2492 && cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
2493 ? pGVMM->nsMinSleepCompany
2494 : pGVMM->nsMinSleepAlone))
2495 {
2496 pGVM->gvmm.s.StatsSched.cHaltBlocking++;
2497 if (cNsInterval > RT_NS_1SEC)
2498 u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
2499 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
2500 ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
2501 if (fDoEarlyWakeUps)
2502 {
2503 if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
2504 pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
2505 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2506 }
2507 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2508
2509 rc = RTSemEventMultiWaitEx(pGVCpu->gvmm.s.HaltEventMulti,
2510 RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
2511 u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
2512 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2513
2514 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2515 ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
2516
2517 /* Reset the semaphore to try prevent a few false wake-ups. */
2518 if (rc == VINF_SUCCESS)
2519 {
2520 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2521 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2522 }
2523 else if (rc == VERR_TIMEOUT)
2524 {
2525 pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
2526 rc = VINF_SUCCESS;
2527 }
2528 }
2529 else
2530 {
2531 pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
2532 if (fDoEarlyWakeUps)
2533 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2534 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2535 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2536 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2537 rc = VINF_SUCCESS;
2538 }
2539
2540 return rc;
2541}
2542
2543
2544/**
2545 * Halt the EMT thread.
2546 *
2547 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2548 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2549 * @param pGVM The global (ring-0) VM structure.
2550 * @param pVM The cross context VM structure.
2551 * @param idCpu The Virtual CPU ID of the calling EMT.
2552 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2553 * @thread EMT(idCpu).
2554 */
2555GVMMR0DECL(int) GVMMR0SchedHaltReq(PGVM pGVM, PVM pVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
2556{
2557 GVMM_CHECK_SMAP_SETUP();
2558 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2559 PGVMM pGVMM;
2560 int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
2561 if (RT_SUCCESS(rc))
2562 {
2563 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2564 rc = GVMMR0SchedHalt(pGVM, pVM, &pGVM->aCpus[idCpu], u64ExpireGipTime);
2565 }
2566 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2567 return rc;
2568}
2569
2570
2571
2572/**
2573 * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
2574 * the a sleeping EMT.
2575 *
2576 * @retval VINF_SUCCESS if successfully woken up.
2577 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2578 *
2579 * @param pGVM The global (ring-0) VM structure.
2580 * @param pGVCpu The global (ring-0) VCPU structure.
2581 */
2582DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
2583{
2584 pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
2585
2586 /*
2587 * Signal the semaphore regardless of whether it's current blocked on it.
2588 *
2589 * The reason for this is that there is absolutely no way we can be 100%
2590 * certain that it isn't *about* go to go to sleep on it and just got
2591 * delayed a bit en route. So, we will always signal the semaphore when
2592 * the it is flagged as halted in the VMM.
2593 */
2594/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
2595 int rc;
2596 if (pGVCpu->gvmm.s.u64HaltExpire)
2597 {
2598 rc = VINF_SUCCESS;
2599 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2600 }
2601 else
2602 {
2603 rc = VINF_GVM_NOT_BLOCKED;
2604 pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
2605 }
2606
2607 int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2608 AssertRC(rc2);
2609
2610 return rc;
2611}
2612
2613
2614/**
2615 * Wakes up the halted EMT thread so it can service a pending request.
2616 *
2617 * @returns VBox status code.
2618 * @retval VINF_SUCCESS if successfully woken up.
2619 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2620 *
2621 * @param pGVM The global (ring-0) VM structure.
2622 * @param pVM The cross context VM structure.
2623 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2624 * @param fTakeUsedLock Take the used lock or not
2625 * @thread Any but EMT(idCpu).
2626 */
2627GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
2628{
2629 GVMM_CHECK_SMAP_SETUP();
2630 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2631
2632 /*
2633 * Validate input and take the UsedLock.
2634 */
2635 PGVMM pGVMM;
2636 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, fTakeUsedLock);
2637 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2638 if (RT_SUCCESS(rc))
2639 {
2640 if (idCpu < pGVM->cCpus)
2641 {
2642 /*
2643 * Do the actual job.
2644 */
2645 rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2646 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2647
2648 if (fTakeUsedLock && pGVMM->fDoEarlyWakeUps)
2649 {
2650 /*
2651 * While we're here, do a round of scheduling.
2652 */
2653 Assert(ASMGetFlags() & X86_EFL_IF);
2654 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2655 pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2656 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2657 }
2658 }
2659 else
2660 rc = VERR_INVALID_CPU_ID;
2661
2662 if (fTakeUsedLock)
2663 {
2664 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2665 AssertRC(rc2);
2666 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2667 }
2668 }
2669
2670 LogFlow(("GVMMR0SchedWakeUpEx: returns %Rrc\n", rc));
2671 return rc;
2672}
2673
2674
2675/**
2676 * Wakes up the halted EMT thread so it can service a pending request.
2677 *
2678 * @returns VBox status code.
2679 * @retval VINF_SUCCESS if successfully woken up.
2680 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2681 *
2682 * @param pGVM The global (ring-0) VM structure.
2683 * @param pVM The cross context VM structure.
2684 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2685 * @thread Any but EMT(idCpu).
2686 */
2687GVMMR0DECL(int) GVMMR0SchedWakeUp(PGVM pGVM, PVM pVM, VMCPUID idCpu)
2688{
2689 return GVMMR0SchedWakeUpEx(pGVM, pVM, idCpu, true /* fTakeUsedLock */);
2690}
2691
2692
2693/**
2694 * Wakes up the halted EMT thread so it can service a pending request, no GVM
2695 * parameter and no used locking.
2696 *
2697 * @returns VBox status code.
2698 * @retval VINF_SUCCESS if successfully woken up.
2699 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2700 *
2701 * @param pVM The cross context VM structure.
2702 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2703 * @thread Any but EMT(idCpu).
2704 * @deprecated Don't use in new code if possible! Use the GVM variant.
2705 */
2706GVMMR0DECL(int) GVMMR0SchedWakeUpNoGVMNoLock(PVM pVM, VMCPUID idCpu)
2707{
2708 GVMM_CHECK_SMAP_SETUP();
2709 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2710 PGVM pGVM;
2711 PGVMM pGVMM;
2712 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, false /*fTakeUsedLock*/);
2713 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2714 if (RT_SUCCESS(rc))
2715 rc = GVMMR0SchedWakeUpEx(pGVM, pVM, idCpu, false /*fTakeUsedLock*/);
2716 return rc;
2717}
2718
2719
2720/**
2721 * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
2722 * the Virtual CPU if it's still busy executing guest code.
2723 *
2724 * @returns VBox status code.
2725 * @retval VINF_SUCCESS if poked successfully.
2726 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2727 *
2728 * @param pGVM The global (ring-0) VM structure.
2729 * @param pVCpu The cross context virtual CPU structure.
2730 */
2731DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPU pVCpu)
2732{
2733 pGVM->gvmm.s.StatsSched.cPokeCalls++;
2734
2735 RTCPUID idHostCpu = pVCpu->idHostCpu;
2736 if ( idHostCpu == NIL_RTCPUID
2737 || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
2738 {
2739 pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
2740 return VINF_GVM_NOT_BUSY_IN_GC;
2741 }
2742
2743 /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
2744 RTMpPokeCpu(idHostCpu);
2745 return VINF_SUCCESS;
2746}
2747
2748
2749/**
2750 * Pokes an EMT if it's still busy running guest code.
2751 *
2752 * @returns VBox status code.
2753 * @retval VINF_SUCCESS if poked successfully.
2754 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2755 *
2756 * @param pGVM The global (ring-0) VM structure.
2757 * @param pVM The cross context VM structure.
2758 * @param idCpu The ID of the virtual CPU to poke.
2759 * @param fTakeUsedLock Take the used lock or not
2760 */
2761GVMMR0DECL(int) GVMMR0SchedPokeEx(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
2762{
2763 /*
2764 * Validate input and take the UsedLock.
2765 */
2766 PGVMM pGVMM;
2767 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, fTakeUsedLock);
2768 if (RT_SUCCESS(rc))
2769 {
2770 if (idCpu < pGVM->cCpus)
2771#ifdef VBOX_BUGREF_9217
2772 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2773#else
2774 rc = gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2775#endif
2776 else
2777 rc = VERR_INVALID_CPU_ID;
2778
2779 if (fTakeUsedLock)
2780 {
2781 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2782 AssertRC(rc2);
2783 }
2784 }
2785
2786 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2787 return rc;
2788}
2789
2790
2791/**
2792 * Pokes an EMT if it's still busy running guest code.
2793 *
2794 * @returns VBox status code.
2795 * @retval VINF_SUCCESS if poked successfully.
2796 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2797 *
2798 * @param pGVM The global (ring-0) VM structure.
2799 * @param pVM The cross context VM structure.
2800 * @param idCpu The ID of the virtual CPU to poke.
2801 */
2802GVMMR0DECL(int) GVMMR0SchedPoke(PGVM pGVM, PVM pVM, VMCPUID idCpu)
2803{
2804 return GVMMR0SchedPokeEx(pGVM, pVM, idCpu, true /* fTakeUsedLock */);
2805}
2806
2807
2808/**
2809 * Pokes an EMT if it's still busy running guest code, no GVM parameter and no
2810 * used locking.
2811 *
2812 * @returns VBox status code.
2813 * @retval VINF_SUCCESS if poked successfully.
2814 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2815 *
2816 * @param pVM The cross context VM structure.
2817 * @param idCpu The ID of the virtual CPU to poke.
2818 *
2819 * @deprecated Don't use in new code if possible! Use the GVM variant.
2820 */
2821GVMMR0DECL(int) GVMMR0SchedPokeNoGVMNoLock(PVM pVM, VMCPUID idCpu)
2822{
2823 PGVM pGVM;
2824 PGVMM pGVMM;
2825 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, false /*fTakeUsedLock*/);
2826 if (RT_SUCCESS(rc))
2827 {
2828 if (idCpu < pGVM->cCpus)
2829#ifdef VBOX_BUGREF_9217
2830 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2831#else
2832 rc = gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2833#endif
2834 else
2835 rc = VERR_INVALID_CPU_ID;
2836 }
2837 return rc;
2838}
2839
2840
2841/**
2842 * Wakes up a set of halted EMT threads so they can service pending request.
2843 *
2844 * @returns VBox status code, no informational stuff.
2845 *
2846 * @param pGVM The global (ring-0) VM structure.
2847 * @param pVM The cross context VM structure.
2848 * @param pSleepSet The set of sleepers to wake up.
2849 * @param pPokeSet The set of CPUs to poke.
2850 */
2851GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PGVM pGVM, PVM pVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2852{
2853 AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2854 AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2855 GVMM_CHECK_SMAP_SETUP();
2856 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2857 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2858
2859 /*
2860 * Validate input and take the UsedLock.
2861 */
2862 PGVMM pGVMM;
2863 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /* fTakeUsedLock */);
2864 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2865 if (RT_SUCCESS(rc))
2866 {
2867 rc = VINF_SUCCESS;
2868 VMCPUID idCpu = pGVM->cCpus;
2869 while (idCpu-- > 0)
2870 {
2871 /* Don't try poke or wake up ourselves. */
2872 if (pGVM->aCpus[idCpu].hEMT == hSelf)
2873 continue;
2874
2875 /* just ignore errors for now. */
2876 if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2877 {
2878 gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2879 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2880 }
2881 else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2882 {
2883#ifdef VBOX_BUGREF_9217
2884 gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2885#else
2886 gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2887#endif
2888 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2889 }
2890 }
2891
2892 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2893 AssertRC(rc2);
2894 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2895 }
2896
2897 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2898 return rc;
2899}
2900
2901
2902/**
2903 * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2904 *
2905 * @returns see GVMMR0SchedWakeUpAndPokeCpus.
2906 * @param pGVM The global (ring-0) VM structure.
2907 * @param pVM The cross context VM structure.
2908 * @param pReq Pointer to the request packet.
2909 */
2910GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PGVM pGVM, PVM pVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2911{
2912 /*
2913 * Validate input and pass it on.
2914 */
2915 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2916 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2917
2918 return GVMMR0SchedWakeUpAndPokeCpus(pGVM, pVM, &pReq->SleepSet, &pReq->PokeSet);
2919}
2920
2921
2922
2923/**
2924 * Poll the schedule to see if someone else should get a chance to run.
2925 *
2926 * This is a bit hackish and will not work too well if the machine is
2927 * under heavy load from non-VM processes.
2928 *
2929 * @returns VINF_SUCCESS if not yielded.
2930 * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2931 * @param pGVM The global (ring-0) VM structure.
2932 * @param pVM The cross context VM structure.
2933 * @param idCpu The Virtual CPU ID of the calling EMT.
2934 * @param fYield Whether to yield or not.
2935 * This is for when we're spinning in the halt loop.
2936 * @thread EMT(idCpu).
2937 */
2938GVMMR0DECL(int) GVMMR0SchedPoll(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fYield)
2939{
2940 /*
2941 * Validate input.
2942 */
2943 PGVMM pGVMM;
2944 int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
2945 if (RT_SUCCESS(rc))
2946 {
2947 /*
2948 * We currently only implement helping doing wakeups (fYield = false), so don't
2949 * bother taking the lock if gvmmR0SchedDoWakeUps is not going to do anything.
2950 */
2951 if (!fYield && pGVMM->fDoEarlyWakeUps)
2952 {
2953 rc = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc);
2954 pGVM->gvmm.s.StatsSched.cPollCalls++;
2955
2956 Assert(ASMGetFlags() & X86_EFL_IF);
2957 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2958
2959 pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2960
2961 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2962 }
2963 /*
2964 * Not quite sure what we could do here...
2965 */
2966 else if (fYield)
2967 rc = VERR_NOT_IMPLEMENTED; /** @todo implement this... */
2968 else
2969 rc = VINF_SUCCESS;
2970 }
2971
2972 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
2973 return rc;
2974}
2975
2976
2977#ifdef GVMM_SCHED_WITH_PPT
2978/**
2979 * Timer callback for the periodic preemption timer.
2980 *
2981 * @param pTimer The timer handle.
2982 * @param pvUser Pointer to the per cpu structure.
2983 * @param iTick The current tick.
2984 */
2985static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2986{
2987 PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
2988 NOREF(pTimer); NOREF(iTick);
2989
2990 /*
2991 * Termination check
2992 */
2993 if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
2994 return;
2995
2996 /*
2997 * Do the house keeping.
2998 */
2999 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3000
3001 if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
3002 {
3003 /*
3004 * Historicize the max frequency.
3005 */
3006 uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
3007 pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
3008 pCpu->Ppt.iTickHistorization = 0;
3009 pCpu->Ppt.uDesiredHz = 0;
3010
3011 /*
3012 * Check if the current timer frequency.
3013 */
3014 uint32_t uHistMaxHz = 0;
3015 for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
3016 if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
3017 uHistMaxHz = pCpu->Ppt.aHzHistory[i];
3018 if (uHistMaxHz == pCpu->Ppt.uTimerHz)
3019 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3020 else if (uHistMaxHz)
3021 {
3022 /*
3023 * Reprogram it.
3024 */
3025 pCpu->Ppt.cChanges++;
3026 pCpu->Ppt.iTickHistorization = 0;
3027 pCpu->Ppt.uTimerHz = uHistMaxHz;
3028 uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
3029 pCpu->Ppt.cNsInterval = cNsInterval;
3030 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
3031 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
3032 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
3033 / cNsInterval;
3034 else
3035 pCpu->Ppt.cTicksHistoriziationInterval = 1;
3036 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3037
3038 /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
3039 RTTimerChangeInterval(pTimer, cNsInterval);
3040 }
3041 else
3042 {
3043 /*
3044 * Stop it.
3045 */
3046 pCpu->Ppt.fStarted = false;
3047 pCpu->Ppt.uTimerHz = 0;
3048 pCpu->Ppt.cNsInterval = 0;
3049 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3050
3051 /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
3052 RTTimerStop(pTimer);
3053 }
3054 }
3055 else
3056 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3057}
3058#endif /* GVMM_SCHED_WITH_PPT */
3059
3060
3061/**
3062 * Updates the periodic preemption timer for the calling CPU.
3063 *
3064 * The caller must have disabled preemption!
3065 * The caller must check that the host can do high resolution timers.
3066 *
3067 * @param pVM The cross context VM structure.
3068 * @param idHostCpu The current host CPU id.
3069 * @param uHz The desired frequency.
3070 */
3071GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PVM pVM, RTCPUID idHostCpu, uint32_t uHz)
3072{
3073 NOREF(pVM);
3074#ifdef GVMM_SCHED_WITH_PPT
3075 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3076 Assert(RTTimerCanDoHighResolution());
3077
3078 /*
3079 * Resolve the per CPU data.
3080 */
3081 uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
3082 PGVMM pGVMM = g_pGVMM;
3083 if ( !VALID_PTR(pGVMM)
3084 || pGVMM->u32Magic != GVMM_MAGIC)
3085 return;
3086 AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
3087 PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
3088 AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
3089 && pCpu->idCpu == idHostCpu,
3090 ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
3091
3092 /*
3093 * Check whether we need to do anything about the timer.
3094 * We have to be a little bit careful since we might be race the timer
3095 * callback here.
3096 */
3097 if (uHz > 16384)
3098 uHz = 16384; /** @todo add a query method for this! */
3099 if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
3100 && uHz >= pCpu->Ppt.uMinHz
3101 && !pCpu->Ppt.fStarting /* solaris paranoia */))
3102 {
3103 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3104
3105 pCpu->Ppt.uDesiredHz = uHz;
3106 uint32_t cNsInterval = 0;
3107 if (!pCpu->Ppt.fStarted)
3108 {
3109 pCpu->Ppt.cStarts++;
3110 pCpu->Ppt.fStarted = true;
3111 pCpu->Ppt.fStarting = true;
3112 pCpu->Ppt.iTickHistorization = 0;
3113 pCpu->Ppt.uTimerHz = uHz;
3114 pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
3115 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
3116 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
3117 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
3118 / cNsInterval;
3119 else
3120 pCpu->Ppt.cTicksHistoriziationInterval = 1;
3121 }
3122
3123 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3124
3125 if (cNsInterval)
3126 {
3127 RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
3128 int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
3129 AssertRC(rc);
3130
3131 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3132 if (RT_FAILURE(rc))
3133 pCpu->Ppt.fStarted = false;
3134 pCpu->Ppt.fStarting = false;
3135 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3136 }
3137 }
3138#else /* !GVMM_SCHED_WITH_PPT */
3139 NOREF(idHostCpu); NOREF(uHz);
3140#endif /* !GVMM_SCHED_WITH_PPT */
3141}
3142
3143
3144/**
3145 * Retrieves the GVMM statistics visible to the caller.
3146 *
3147 * @returns VBox status code.
3148 *
3149 * @param pStats Where to put the statistics.
3150 * @param pSession The current session.
3151 * @param pGVM The GVM to obtain statistics for. Optional.
3152 * @param pVM The VM structure corresponding to @a pGVM.
3153 */
3154GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM, PVM pVM)
3155{
3156 LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pGVM=%p pVM=%p\n", pStats, pSession, pGVM, pVM));
3157
3158 /*
3159 * Validate input.
3160 */
3161 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
3162 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
3163 pStats->cVMs = 0; /* (crash before taking the sem...) */
3164
3165 /*
3166 * Take the lock and get the VM statistics.
3167 */
3168 PGVMM pGVMM;
3169 if (pGVM)
3170 {
3171 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /*fTakeUsedLock*/);
3172 if (RT_FAILURE(rc))
3173 return rc;
3174 pStats->SchedVM = pGVM->gvmm.s.StatsSched;
3175 }
3176 else
3177 {
3178 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3179 memset(&pStats->SchedVM, 0, sizeof(pStats->SchedVM));
3180
3181 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
3182 AssertRCReturn(rc, rc);
3183 }
3184
3185 /*
3186 * Enumerate the VMs and add the ones visible to the statistics.
3187 */
3188 pStats->cVMs = 0;
3189 pStats->cEMTs = 0;
3190 memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
3191
3192 for (unsigned i = pGVMM->iUsedHead;
3193 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3194 i = pGVMM->aHandles[i].iNext)
3195 {
3196 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
3197 void *pvObj = pGVMM->aHandles[i].pvObj;
3198 if ( VALID_PTR(pvObj)
3199 && VALID_PTR(pOtherGVM)
3200 && pOtherGVM->u32Magic == GVM_MAGIC
3201 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
3202 {
3203 pStats->cVMs++;
3204 pStats->cEMTs += pOtherGVM->cCpus;
3205
3206 pStats->SchedSum.cHaltCalls += pOtherGVM->gvmm.s.StatsSched.cHaltCalls;
3207 pStats->SchedSum.cHaltBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltBlocking;
3208 pStats->SchedSum.cHaltTimeouts += pOtherGVM->gvmm.s.StatsSched.cHaltTimeouts;
3209 pStats->SchedSum.cHaltNotBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltNotBlocking;
3210 pStats->SchedSum.cHaltWakeUps += pOtherGVM->gvmm.s.StatsSched.cHaltWakeUps;
3211
3212 pStats->SchedSum.cWakeUpCalls += pOtherGVM->gvmm.s.StatsSched.cWakeUpCalls;
3213 pStats->SchedSum.cWakeUpNotHalted += pOtherGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
3214 pStats->SchedSum.cWakeUpWakeUps += pOtherGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
3215
3216 pStats->SchedSum.cPokeCalls += pOtherGVM->gvmm.s.StatsSched.cPokeCalls;
3217 pStats->SchedSum.cPokeNotBusy += pOtherGVM->gvmm.s.StatsSched.cPokeNotBusy;
3218
3219 pStats->SchedSum.cPollCalls += pOtherGVM->gvmm.s.StatsSched.cPollCalls;
3220 pStats->SchedSum.cPollHalts += pOtherGVM->gvmm.s.StatsSched.cPollHalts;
3221 pStats->SchedSum.cPollWakeUps += pOtherGVM->gvmm.s.StatsSched.cPollWakeUps;
3222 }
3223 }
3224
3225 /*
3226 * Copy out the per host CPU statistics.
3227 */
3228 uint32_t iDstCpu = 0;
3229 uint32_t cSrcCpus = pGVMM->cHostCpus;
3230 for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
3231 {
3232 if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
3233 {
3234 pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
3235 pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
3236#ifdef GVMM_SCHED_WITH_PPT
3237 pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
3238 pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
3239 pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
3240 pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
3241#else
3242 pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
3243 pStats->aHostCpus[iDstCpu].uTimerHz = 0;
3244 pStats->aHostCpus[iDstCpu].cChanges = 0;
3245 pStats->aHostCpus[iDstCpu].cStarts = 0;
3246#endif
3247 iDstCpu++;
3248 if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
3249 break;
3250 }
3251 }
3252 pStats->cHostCpus = iDstCpu;
3253
3254 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3255
3256 return VINF_SUCCESS;
3257}
3258
3259
3260/**
3261 * VMMR0 request wrapper for GVMMR0QueryStatistics.
3262 *
3263 * @returns see GVMMR0QueryStatistics.
3264 * @param pGVM The global (ring-0) VM structure. Optional.
3265 * @param pVM The cross context VM structure. Optional.
3266 * @param pReq Pointer to the request packet.
3267 * @param pSession The current session.
3268 */
3269GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PGVM pGVM, PVM pVM, PGVMMQUERYSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3270{
3271 /*
3272 * Validate input and pass it on.
3273 */
3274 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3275 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3276 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3277
3278 return GVMMR0QueryStatistics(&pReq->Stats, pSession, pGVM, pVM);
3279}
3280
3281
3282/**
3283 * Resets the specified GVMM statistics.
3284 *
3285 * @returns VBox status code.
3286 *
3287 * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
3288 * @param pSession The current session.
3289 * @param pGVM The GVM to reset statistics for. Optional.
3290 * @param pVM The VM structure corresponding to @a pGVM.
3291 */
3292GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM, PVM pVM)
3293{
3294 LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pGVM=%p pVM=%p\n", pStats, pSession, pGVM, pVM));
3295
3296 /*
3297 * Validate input.
3298 */
3299 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
3300 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
3301
3302 /*
3303 * Take the lock and get the VM statistics.
3304 */
3305 PGVMM pGVMM;
3306 if (pGVM)
3307 {
3308 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /*fTakeUsedLock*/);
3309 if (RT_FAILURE(rc))
3310 return rc;
3311# define MAYBE_RESET_FIELD(field) \
3312 do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3313 MAYBE_RESET_FIELD(cHaltCalls);
3314 MAYBE_RESET_FIELD(cHaltBlocking);
3315 MAYBE_RESET_FIELD(cHaltTimeouts);
3316 MAYBE_RESET_FIELD(cHaltNotBlocking);
3317 MAYBE_RESET_FIELD(cHaltWakeUps);
3318 MAYBE_RESET_FIELD(cWakeUpCalls);
3319 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3320 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3321 MAYBE_RESET_FIELD(cPokeCalls);
3322 MAYBE_RESET_FIELD(cPokeNotBusy);
3323 MAYBE_RESET_FIELD(cPollCalls);
3324 MAYBE_RESET_FIELD(cPollHalts);
3325 MAYBE_RESET_FIELD(cPollWakeUps);
3326# undef MAYBE_RESET_FIELD
3327 }
3328 else
3329 {
3330 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3331
3332 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
3333 AssertRCReturn(rc, rc);
3334 }
3335
3336 /*
3337 * Enumerate the VMs and add the ones visible to the statistics.
3338 */
3339 if (!ASMMemIsZero(&pStats->SchedSum, sizeof(pStats->SchedSum)))
3340 {
3341 for (unsigned i = pGVMM->iUsedHead;
3342 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3343 i = pGVMM->aHandles[i].iNext)
3344 {
3345 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
3346 void *pvObj = pGVMM->aHandles[i].pvObj;
3347 if ( VALID_PTR(pvObj)
3348 && VALID_PTR(pOtherGVM)
3349 && pOtherGVM->u32Magic == GVM_MAGIC
3350 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
3351 {
3352# define MAYBE_RESET_FIELD(field) \
3353 do { if (pStats->SchedSum. field ) { pOtherGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3354 MAYBE_RESET_FIELD(cHaltCalls);
3355 MAYBE_RESET_FIELD(cHaltBlocking);
3356 MAYBE_RESET_FIELD(cHaltTimeouts);
3357 MAYBE_RESET_FIELD(cHaltNotBlocking);
3358 MAYBE_RESET_FIELD(cHaltWakeUps);
3359 MAYBE_RESET_FIELD(cWakeUpCalls);
3360 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3361 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3362 MAYBE_RESET_FIELD(cPokeCalls);
3363 MAYBE_RESET_FIELD(cPokeNotBusy);
3364 MAYBE_RESET_FIELD(cPollCalls);
3365 MAYBE_RESET_FIELD(cPollHalts);
3366 MAYBE_RESET_FIELD(cPollWakeUps);
3367# undef MAYBE_RESET_FIELD
3368 }
3369 }
3370 }
3371
3372 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3373
3374 return VINF_SUCCESS;
3375}
3376
3377
3378/**
3379 * VMMR0 request wrapper for GVMMR0ResetStatistics.
3380 *
3381 * @returns see GVMMR0ResetStatistics.
3382 * @param pGVM The global (ring-0) VM structure. Optional.
3383 * @param pVM The cross context VM structure. Optional.
3384 * @param pReq Pointer to the request packet.
3385 * @param pSession The current session.
3386 */
3387GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PGVM pGVM, PVM pVM, PGVMMRESETSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3388{
3389 /*
3390 * Validate input and pass it on.
3391 */
3392 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3393 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3394 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3395
3396 return GVMMR0ResetStatistics(&pReq->Stats, pSession, pGVM, pVM);
3397}
3398
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette