VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp@ 70606

Last change on this file since 70606 was 69111, checked in by vboxsync, 7 years ago

(C) year

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 104.0 KB
Line 
1/* $Id: GVMMR0.cpp 69111 2017-10-17 14:26:02Z vboxsync $ */
2/** @file
3 * GVMM - Global VM Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2017 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/** @page pg_gvmm GVMM - The Global VM Manager
20 *
21 * The Global VM Manager lives in ring-0. Its main function at the moment is
22 * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
23 * each of them, and assign them unique identifiers (so GMM can track page
24 * owners). The GVMM also manage some of the host CPU resources, like the
25 * periodic preemption timer.
26 *
27 * The GVMM will create a ring-0 object for each VM when it is registered, this
28 * is both for session cleanup purposes and for having a point where it is
29 * possible to implement usage polices later (in SUPR0ObjRegister).
30 *
31 *
32 * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
33 *
34 * On system that sports a high resolution kernel timer API, we use per-cpu
35 * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
36 * execution. The timer frequency is calculating by taking the max
37 * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
38 * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
39 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
40 *
41 * The TMCalcHostTimerFrequency() part of the things gets its takes the max
42 * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
43 * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
44 * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
45 * AMD-V and raw-mode execution environments.
46 */
47
48
49/*********************************************************************************************************************************
50* Header Files *
51*********************************************************************************************************************************/
52#define LOG_GROUP LOG_GROUP_GVMM
53#include <VBox/vmm/gvmm.h>
54#include <VBox/vmm/gmm.h>
55#include "GVMMR0Internal.h"
56#include <VBox/vmm/gvm.h>
57#include <VBox/vmm/vm.h>
58#include <VBox/vmm/vmcpuset.h>
59#include <VBox/vmm/vmm.h>
60#include <VBox/param.h>
61#include <VBox/err.h>
62
63#include <iprt/asm.h>
64#include <iprt/asm-amd64-x86.h>
65#include <iprt/critsect.h>
66#include <iprt/mem.h>
67#include <iprt/semaphore.h>
68#include <iprt/time.h>
69#include <VBox/log.h>
70#include <iprt/thread.h>
71#include <iprt/process.h>
72#include <iprt/param.h>
73#include <iprt/string.h>
74#include <iprt/assert.h>
75#include <iprt/mem.h>
76#include <iprt/memobj.h>
77#include <iprt/mp.h>
78#include <iprt/cpuset.h>
79#include <iprt/spinlock.h>
80#include <iprt/timer.h>
81
82#include "dtrace/VBoxVMM.h"
83
84
85/*********************************************************************************************************************************
86* Defined Constants And Macros *
87*********************************************************************************************************************************/
88#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(DOXYGEN_RUNNING)
89/** Define this to enable the periodic preemption timer. */
90# define GVMM_SCHED_WITH_PPT
91#endif
92
93
94/** @def GVMM_CHECK_SMAP_SETUP
95 * SMAP check setup. */
96/** @def GVMM_CHECK_SMAP_CHECK
97 * Checks that the AC flag is set if SMAP is enabled. If AC is not set,
98 * it will be logged and @a a_BadExpr is executed. */
99/** @def GVMM_CHECK_SMAP_CHECK2
100 * Checks that the AC flag is set if SMAP is enabled. If AC is not set, it will
101 * be logged, written to the VMs assertion text buffer, and @a a_BadExpr is
102 * executed. */
103#if defined(VBOX_STRICT) || 1
104# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = SUPR0GetKernelFeatures()
105# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) \
106 do { \
107 if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
108 { \
109 RTCCUINTREG fEflCheck = ASMGetFlags(); \
110 if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
111 { /* likely */ } \
112 else \
113 { \
114 SUPR0Printf("%s, line %d: EFLAGS.AC is clear! (%#x)\n", __FUNCTION__, __LINE__, (uint32_t)fEflCheck); \
115 a_BadExpr; \
116 } \
117 } \
118 } while (0)
119# define GVMM_CHECK_SMAP_CHECK2(a_pVM, a_BadExpr) \
120 do { \
121 if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
122 { \
123 RTCCUINTREG fEflCheck = ASMGetFlags(); \
124 if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
125 { /* likely */ } \
126 else \
127 { \
128 SUPR0BadContext((a_pVM) ? (a_pVM)->pSession : NULL, __FILE__, __LINE__, "EFLAGS.AC is zero!"); \
129 a_BadExpr; \
130 } \
131 } \
132 } while (0)
133#else
134# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = 0
135# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) NOREF(fKernelFeatures)
136# define GVMM_CHECK_SMAP_CHECK2(a_pVM, a_BadExpr) NOREF(fKernelFeatures)
137#endif
138
139
140
141/*********************************************************************************************************************************
142* Structures and Typedefs *
143*********************************************************************************************************************************/
144
145/**
146 * Global VM handle.
147 */
148typedef struct GVMHANDLE
149{
150 /** The index of the next handle in the list (free or used). (0 is nil.) */
151 uint16_t volatile iNext;
152 /** Our own index / handle value. */
153 uint16_t iSelf;
154 /** The process ID of the handle owner.
155 * This is used for access checks. */
156 RTPROCESS ProcId;
157 /** The pointer to the ring-0 only (aka global) VM structure. */
158 PGVM pGVM;
159 /** The ring-0 mapping of the shared VM instance data. */
160 PVM pVM;
161 /** The virtual machine object. */
162 void *pvObj;
163 /** The session this VM is associated with. */
164 PSUPDRVSESSION pSession;
165 /** The ring-0 handle of the EMT0 thread.
166 * This is used for ownership checks as well as looking up a VM handle by thread
167 * at times like assertions. */
168 RTNATIVETHREAD hEMT0;
169} GVMHANDLE;
170/** Pointer to a global VM handle. */
171typedef GVMHANDLE *PGVMHANDLE;
172
173/** Number of GVM handles (including the NIL handle). */
174#if HC_ARCH_BITS == 64
175# define GVMM_MAX_HANDLES 8192
176#else
177# define GVMM_MAX_HANDLES 128
178#endif
179
180/**
181 * Per host CPU GVMM data.
182 */
183typedef struct GVMMHOSTCPU
184{
185 /** Magic number (GVMMHOSTCPU_MAGIC). */
186 uint32_t volatile u32Magic;
187 /** The CPU ID. */
188 RTCPUID idCpu;
189 /** The CPU set index. */
190 uint32_t idxCpuSet;
191
192#ifdef GVMM_SCHED_WITH_PPT
193 /** Periodic preemption timer data. */
194 struct
195 {
196 /** The handle to the periodic preemption timer. */
197 PRTTIMER pTimer;
198 /** Spinlock protecting the data below. */
199 RTSPINLOCK hSpinlock;
200 /** The smalles Hz that we need to care about. (static) */
201 uint32_t uMinHz;
202 /** The number of ticks between each historization. */
203 uint32_t cTicksHistoriziationInterval;
204 /** The current historization tick (counting up to
205 * cTicksHistoriziationInterval and then resetting). */
206 uint32_t iTickHistorization;
207 /** The current timer interval. This is set to 0 when inactive. */
208 uint32_t cNsInterval;
209 /** The current timer frequency. This is set to 0 when inactive. */
210 uint32_t uTimerHz;
211 /** The current max frequency reported by the EMTs.
212 * This gets historicize and reset by the timer callback. This is
213 * read without holding the spinlock, so needs atomic updating. */
214 uint32_t volatile uDesiredHz;
215 /** Whether the timer was started or not. */
216 bool volatile fStarted;
217 /** Set if we're starting timer. */
218 bool volatile fStarting;
219 /** The index of the next history entry (mod it). */
220 uint32_t iHzHistory;
221 /** Historicized uDesiredHz values. The array wraps around, new entries
222 * are added at iHzHistory. This is updated approximately every
223 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
224 uint32_t aHzHistory[8];
225 /** Statistics counter for recording the number of interval changes. */
226 uint32_t cChanges;
227 /** Statistics counter for recording the number of timer starts. */
228 uint32_t cStarts;
229 } Ppt;
230#endif /* GVMM_SCHED_WITH_PPT */
231
232} GVMMHOSTCPU;
233/** Pointer to the per host CPU GVMM data. */
234typedef GVMMHOSTCPU *PGVMMHOSTCPU;
235/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
236#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
237/** The interval on history entry should cover (approximately) give in
238 * nanoseconds. */
239#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
240
241
242/**
243 * The GVMM instance data.
244 */
245typedef struct GVMM
246{
247 /** Eyecatcher / magic. */
248 uint32_t u32Magic;
249 /** The index of the head of the free handle chain. (0 is nil.) */
250 uint16_t volatile iFreeHead;
251 /** The index of the head of the active handle chain. (0 is nil.) */
252 uint16_t volatile iUsedHead;
253 /** The number of VMs. */
254 uint16_t volatile cVMs;
255 /** Alignment padding. */
256 uint16_t u16Reserved;
257 /** The number of EMTs. */
258 uint32_t volatile cEMTs;
259 /** The number of EMTs that have halted in GVMMR0SchedHalt. */
260 uint32_t volatile cHaltedEMTs;
261 /** Mini lock for restricting early wake-ups to one thread. */
262 bool volatile fDoingEarlyWakeUps;
263 bool afPadding[3]; /**< explicit alignment padding. */
264 /** When the next halted or sleeping EMT will wake up.
265 * This is set to 0 when it needs recalculating and to UINT64_MAX when
266 * there are no halted or sleeping EMTs in the GVMM. */
267 uint64_t uNsNextEmtWakeup;
268 /** The lock used to serialize VM creation, destruction and associated events that
269 * isn't performance critical. Owners may acquire the list lock. */
270 RTCRITSECT CreateDestroyLock;
271 /** The lock used to serialize used list updates and accesses.
272 * This indirectly includes scheduling since the scheduler will have to walk the
273 * used list to examin running VMs. Owners may not acquire any other locks. */
274 RTCRITSECTRW UsedLock;
275 /** The handle array.
276 * The size of this array defines the maximum number of currently running VMs.
277 * The first entry is unused as it represents the NIL handle. */
278 GVMHANDLE aHandles[GVMM_MAX_HANDLES];
279
280 /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
281 * The number of EMTs that means we no longer consider ourselves alone on a
282 * CPU/Core.
283 */
284 uint32_t cEMTsMeansCompany;
285 /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
286 * The minimum sleep time for when we're alone, in nano seconds.
287 */
288 uint32_t nsMinSleepAlone;
289 /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
290 * The minimum sleep time for when we've got company, in nano seconds.
291 */
292 uint32_t nsMinSleepCompany;
293 /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
294 * The limit for the first round of early wake-ups, given in nano seconds.
295 */
296 uint32_t nsEarlyWakeUp1;
297 /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
298 * The limit for the second round of early wake-ups, given in nano seconds.
299 */
300 uint32_t nsEarlyWakeUp2;
301
302 /** Set if we're doing early wake-ups.
303 * This reflects nsEarlyWakeUp1 and nsEarlyWakeUp2. */
304 bool volatile fDoEarlyWakeUps;
305
306 /** The number of entries in the host CPU array (aHostCpus). */
307 uint32_t cHostCpus;
308 /** Per host CPU data (variable length). */
309 GVMMHOSTCPU aHostCpus[1];
310} GVMM;
311AssertCompileMemberAlignment(GVMM, CreateDestroyLock, 8);
312AssertCompileMemberAlignment(GVMM, UsedLock, 8);
313AssertCompileMemberAlignment(GVMM, uNsNextEmtWakeup, 8);
314/** Pointer to the GVMM instance data. */
315typedef GVMM *PGVMM;
316
317/** The GVMM::u32Magic value (Charlie Haden). */
318#define GVMM_MAGIC UINT32_C(0x19370806)
319
320
321
322/*********************************************************************************************************************************
323* Global Variables *
324*********************************************************************************************************************************/
325/** Pointer to the GVMM instance data.
326 * (Just my general dislike for global variables.) */
327static PGVMM g_pGVMM = NULL;
328
329/** Macro for obtaining and validating the g_pGVMM pointer.
330 * On failure it will return from the invoking function with the specified return value.
331 *
332 * @param pGVMM The name of the pGVMM variable.
333 * @param rc The return value on failure. Use VERR_GVMM_INSTANCE for VBox
334 * status codes.
335 */
336#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
337 do { \
338 (pGVMM) = g_pGVMM;\
339 AssertPtrReturn((pGVMM), (rc)); \
340 AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
341 } while (0)
342
343/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
344 * On failure it will return from the invoking function.
345 *
346 * @param pGVMM The name of the pGVMM variable.
347 */
348#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
349 do { \
350 (pGVMM) = g_pGVMM;\
351 AssertPtrReturnVoid((pGVMM)); \
352 AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
353 } while (0)
354
355
356/*********************************************************************************************************************************
357* Internal Functions *
358*********************************************************************************************************************************/
359static void gvmmR0InitPerVMData(PGVM pGVM);
360static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
361static int gvmmR0ByGVMandVM(PGVM pGVM, PVM pVM, PGVMM *ppGVMM, bool fTakeUsedLock);
362static int gvmmR0ByGVMandVMandEMT(PGVM pGVM, PVM pVM, VMCPUID idCpu, PGVMM *ppGVMM);
363
364#ifdef GVMM_SCHED_WITH_PPT
365static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
366#endif
367
368
369/**
370 * Initializes the GVMM.
371 *
372 * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
373 *
374 * @returns VBox status code.
375 */
376GVMMR0DECL(int) GVMMR0Init(void)
377{
378 LogFlow(("GVMMR0Init:\n"));
379
380 /*
381 * Allocate and initialize the instance data.
382 */
383 uint32_t cHostCpus = RTMpGetArraySize();
384 AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE);
385
386 PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF(GVMM, aHostCpus[cHostCpus]));
387 if (!pGVMM)
388 return VERR_NO_MEMORY;
389 int rc = RTCritSectInitEx(&pGVMM->CreateDestroyLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE,
390 "GVMM-CreateDestroyLock");
391 if (RT_SUCCESS(rc))
392 {
393 rc = RTCritSectRwInitEx(&pGVMM->UsedLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, "GVMM-UsedLock");
394 if (RT_SUCCESS(rc))
395 {
396 pGVMM->u32Magic = GVMM_MAGIC;
397 pGVMM->iUsedHead = 0;
398 pGVMM->iFreeHead = 1;
399
400 /* the nil handle */
401 pGVMM->aHandles[0].iSelf = 0;
402 pGVMM->aHandles[0].iNext = 0;
403
404 /* the tail */
405 unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
406 pGVMM->aHandles[i].iSelf = i;
407 pGVMM->aHandles[i].iNext = 0; /* nil */
408
409 /* the rest */
410 while (i-- > 1)
411 {
412 pGVMM->aHandles[i].iSelf = i;
413 pGVMM->aHandles[i].iNext = i + 1;
414 }
415
416 /* The default configuration values. */
417 uint32_t cNsResolution = RTSemEventMultiGetResolution();
418 pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
419 if (cNsResolution >= 5*RT_NS_100US)
420 {
421 pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
422 pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
423 pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
424 pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
425 }
426 else if (cNsResolution > RT_NS_100US)
427 {
428 pGVMM->nsMinSleepAlone = cNsResolution / 2;
429 pGVMM->nsMinSleepCompany = cNsResolution / 4;
430 pGVMM->nsEarlyWakeUp1 = 0;
431 pGVMM->nsEarlyWakeUp2 = 0;
432 }
433 else
434 {
435 pGVMM->nsMinSleepAlone = 2000;
436 pGVMM->nsMinSleepCompany = 2000;
437 pGVMM->nsEarlyWakeUp1 = 0;
438 pGVMM->nsEarlyWakeUp2 = 0;
439 }
440 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
441
442 /* The host CPU data. */
443 pGVMM->cHostCpus = cHostCpus;
444 uint32_t iCpu = cHostCpus;
445 RTCPUSET PossibleSet;
446 RTMpGetSet(&PossibleSet);
447 while (iCpu-- > 0)
448 {
449 pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
450#ifdef GVMM_SCHED_WITH_PPT
451 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
452 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
453 pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
454 pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
455 //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
456 //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
457 //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
458 //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
459 //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
460 //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
461 //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
462 //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
463#endif
464
465 if (RTCpuSetIsMember(&PossibleSet, iCpu))
466 {
467 pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
468 pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
469
470#ifdef GVMM_SCHED_WITH_PPT
471 rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
472 50*1000*1000 /* whatever */,
473 RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
474 gvmmR0SchedPeriodicPreemptionTimerCallback,
475 &pGVMM->aHostCpus[iCpu]);
476 if (RT_SUCCESS(rc))
477 rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "GVMM/CPU");
478 if (RT_FAILURE(rc))
479 {
480 while (iCpu < cHostCpus)
481 {
482 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
483 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
484 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
485 iCpu++;
486 }
487 break;
488 }
489#endif
490 }
491 else
492 {
493 pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
494 pGVMM->aHostCpus[iCpu].u32Magic = 0;
495 }
496 }
497 if (RT_SUCCESS(rc))
498 {
499 g_pGVMM = pGVMM;
500 LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
501 return VINF_SUCCESS;
502 }
503
504 /* bail out. */
505 RTCritSectRwDelete(&pGVMM->UsedLock);
506 }
507 RTCritSectDelete(&pGVMM->CreateDestroyLock);
508 }
509
510 RTMemFree(pGVMM);
511 return rc;
512}
513
514
515/**
516 * Terminates the GVM.
517 *
518 * This is called while owning the loader semaphore (see supdrvLdrFree()).
519 * And unless something is wrong, there should be absolutely no VMs
520 * registered at this point.
521 */
522GVMMR0DECL(void) GVMMR0Term(void)
523{
524 LogFlow(("GVMMR0Term:\n"));
525
526 PGVMM pGVMM = g_pGVMM;
527 g_pGVMM = NULL;
528 if (RT_UNLIKELY(!VALID_PTR(pGVMM)))
529 {
530 SUPR0Printf("GVMMR0Term: pGVMM=%RKv\n", pGVMM);
531 return;
532 }
533
534 /*
535 * First of all, stop all active timers.
536 */
537 uint32_t cActiveTimers = 0;
538 uint32_t iCpu = pGVMM->cHostCpus;
539 while (iCpu-- > 0)
540 {
541 ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
542#ifdef GVMM_SCHED_WITH_PPT
543 if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
544 && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
545 cActiveTimers++;
546#endif
547 }
548 if (cActiveTimers)
549 RTThreadSleep(1); /* fudge */
550
551 /*
552 * Invalidate the and free resources.
553 */
554 pGVMM->u32Magic = ~GVMM_MAGIC;
555 RTCritSectRwDelete(&pGVMM->UsedLock);
556 RTCritSectDelete(&pGVMM->CreateDestroyLock);
557
558 pGVMM->iFreeHead = 0;
559 if (pGVMM->iUsedHead)
560 {
561 SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
562 pGVMM->iUsedHead = 0;
563 }
564
565#ifdef GVMM_SCHED_WITH_PPT
566 iCpu = pGVMM->cHostCpus;
567 while (iCpu-- > 0)
568 {
569 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
570 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
571 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
572 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
573 }
574#endif
575
576 RTMemFree(pGVMM);
577}
578
579
580/**
581 * A quick hack for setting global config values.
582 *
583 * @returns VBox status code.
584 *
585 * @param pSession The session handle. Used for authentication.
586 * @param pszName The variable name.
587 * @param u64Value The new value.
588 */
589GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
590{
591 /*
592 * Validate input.
593 */
594 PGVMM pGVMM;
595 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
596 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
597 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
598
599 /*
600 * String switch time!
601 */
602 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
603 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
604 int rc = VINF_SUCCESS;
605 pszName += sizeof("/GVMM/") - 1;
606 if (!strcmp(pszName, "cEMTsMeansCompany"))
607 {
608 if (u64Value <= UINT32_MAX)
609 pGVMM->cEMTsMeansCompany = u64Value;
610 else
611 rc = VERR_OUT_OF_RANGE;
612 }
613 else if (!strcmp(pszName, "MinSleepAlone"))
614 {
615 if (u64Value <= RT_NS_100MS)
616 pGVMM->nsMinSleepAlone = u64Value;
617 else
618 rc = VERR_OUT_OF_RANGE;
619 }
620 else if (!strcmp(pszName, "MinSleepCompany"))
621 {
622 if (u64Value <= RT_NS_100MS)
623 pGVMM->nsMinSleepCompany = u64Value;
624 else
625 rc = VERR_OUT_OF_RANGE;
626 }
627 else if (!strcmp(pszName, "EarlyWakeUp1"))
628 {
629 if (u64Value <= RT_NS_100MS)
630 {
631 pGVMM->nsEarlyWakeUp1 = u64Value;
632 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
633 }
634 else
635 rc = VERR_OUT_OF_RANGE;
636 }
637 else if (!strcmp(pszName, "EarlyWakeUp2"))
638 {
639 if (u64Value <= RT_NS_100MS)
640 {
641 pGVMM->nsEarlyWakeUp2 = u64Value;
642 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
643 }
644 else
645 rc = VERR_OUT_OF_RANGE;
646 }
647 else
648 rc = VERR_CFGM_VALUE_NOT_FOUND;
649 return rc;
650}
651
652
653/**
654 * A quick hack for getting global config values.
655 *
656 * @returns VBox status code.
657 *
658 * @param pSession The session handle. Used for authentication.
659 * @param pszName The variable name.
660 * @param pu64Value Where to return the value.
661 */
662GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
663{
664 /*
665 * Validate input.
666 */
667 PGVMM pGVMM;
668 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
669 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
670 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
671 AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
672
673 /*
674 * String switch time!
675 */
676 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
677 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
678 int rc = VINF_SUCCESS;
679 pszName += sizeof("/GVMM/") - 1;
680 if (!strcmp(pszName, "cEMTsMeansCompany"))
681 *pu64Value = pGVMM->cEMTsMeansCompany;
682 else if (!strcmp(pszName, "MinSleepAlone"))
683 *pu64Value = pGVMM->nsMinSleepAlone;
684 else if (!strcmp(pszName, "MinSleepCompany"))
685 *pu64Value = pGVMM->nsMinSleepCompany;
686 else if (!strcmp(pszName, "EarlyWakeUp1"))
687 *pu64Value = pGVMM->nsEarlyWakeUp1;
688 else if (!strcmp(pszName, "EarlyWakeUp2"))
689 *pu64Value = pGVMM->nsEarlyWakeUp2;
690 else
691 rc = VERR_CFGM_VALUE_NOT_FOUND;
692 return rc;
693}
694
695
696/**
697 * Acquire the 'used' lock in shared mode.
698 *
699 * This prevents destruction of the VM while we're in ring-0.
700 *
701 * @returns IPRT status code, see RTSemFastMutexRequest.
702 * @param a_pGVMM The GVMM instance data.
703 * @sa GVMMR0_USED_SHARED_UNLOCK, GVMMR0_USED_EXCLUSIVE_LOCK
704 */
705#define GVMMR0_USED_SHARED_LOCK(a_pGVMM) RTCritSectRwEnterShared(&(a_pGVMM)->UsedLock)
706
707/**
708 * Release the 'used' lock in when owning it in shared mode.
709 *
710 * @returns IPRT status code, see RTSemFastMutexRequest.
711 * @param a_pGVMM The GVMM instance data.
712 * @sa GVMMR0_USED_SHARED_LOCK
713 */
714#define GVMMR0_USED_SHARED_UNLOCK(a_pGVMM) RTCritSectRwLeaveShared(&(a_pGVMM)->UsedLock)
715
716/**
717 * Acquire the 'used' lock in exclusive mode.
718 *
719 * Only use this function when making changes to the used list.
720 *
721 * @returns IPRT status code, see RTSemFastMutexRequest.
722 * @param a_pGVMM The GVMM instance data.
723 * @sa GVMMR0_USED_EXCLUSIVE_UNLOCK
724 */
725#define GVMMR0_USED_EXCLUSIVE_LOCK(a_pGVMM) RTCritSectRwEnterExcl(&(a_pGVMM)->UsedLock)
726
727/**
728 * Release the 'used' lock when owning it in exclusive mode.
729 *
730 * @returns IPRT status code, see RTSemFastMutexRelease.
731 * @param a_pGVMM The GVMM instance data.
732 * @sa GVMMR0_USED_EXCLUSIVE_LOCK, GVMMR0_USED_SHARED_UNLOCK
733 */
734#define GVMMR0_USED_EXCLUSIVE_UNLOCK(a_pGVMM) RTCritSectRwLeaveExcl(&(a_pGVMM)->UsedLock)
735
736
737/**
738 * Try acquire the 'create & destroy' lock.
739 *
740 * @returns IPRT status code, see RTSemFastMutexRequest.
741 * @param pGVMM The GVMM instance data.
742 */
743DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
744{
745 LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
746 int rc = RTCritSectEnter(&pGVMM->CreateDestroyLock);
747 LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
748 return rc;
749}
750
751
752/**
753 * Release the 'create & destroy' lock.
754 *
755 * @returns IPRT status code, see RTSemFastMutexRequest.
756 * @param pGVMM The GVMM instance data.
757 */
758DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
759{
760 LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
761 int rc = RTCritSectLeave(&pGVMM->CreateDestroyLock);
762 AssertRC(rc);
763 return rc;
764}
765
766
767/**
768 * Request wrapper for the GVMMR0CreateVM API.
769 *
770 * @returns VBox status code.
771 * @param pReq The request buffer.
772 * @param pSession The session handle. The VM will be associated with this.
773 */
774GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq, PSUPDRVSESSION pSession)
775{
776 /*
777 * Validate the request.
778 */
779 if (!VALID_PTR(pReq))
780 return VERR_INVALID_POINTER;
781 if (pReq->Hdr.cbReq != sizeof(*pReq))
782 return VERR_INVALID_PARAMETER;
783 if (pReq->pSession != pSession)
784 return VERR_INVALID_POINTER;
785
786 /*
787 * Execute it.
788 */
789 PVM pVM;
790 pReq->pVMR0 = NULL;
791 pReq->pVMR3 = NIL_RTR3PTR;
792 int rc = GVMMR0CreateVM(pSession, pReq->cCpus, &pVM);
793 if (RT_SUCCESS(rc))
794 {
795 pReq->pVMR0 = pVM;
796 pReq->pVMR3 = pVM->pVMR3;
797 }
798 return rc;
799}
800
801
802/**
803 * Allocates the VM structure and registers it with GVM.
804 *
805 * The caller will become the VM owner and there by the EMT.
806 *
807 * @returns VBox status code.
808 * @param pSession The support driver session.
809 * @param cCpus Number of virtual CPUs for the new VM.
810 * @param ppVM Where to store the pointer to the VM structure.
811 *
812 * @thread EMT.
813 */
814GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PVM *ppVM)
815{
816 LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
817 PGVMM pGVMM;
818 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
819
820 AssertPtrReturn(ppVM, VERR_INVALID_POINTER);
821 *ppVM = NULL;
822
823 if ( cCpus == 0
824 || cCpus > VMM_MAX_CPU_COUNT)
825 return VERR_INVALID_PARAMETER;
826
827 RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
828 AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT);
829 RTPROCESS ProcId = RTProcSelf();
830 AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT);
831
832 /*
833 * The whole allocation process is protected by the lock.
834 */
835 int rc = gvmmR0CreateDestroyLock(pGVMM);
836 AssertRCReturn(rc, rc);
837
838 /*
839 * Only one VM per session.
840 */
841 if (SUPR0GetSessionVM(pSession) != NULL)
842 {
843 gvmmR0CreateDestroyUnlock(pGVMM);
844 SUPR0Printf("GVMMR0CreateVM: The session %p already got a VM: %p\n", pSession, SUPR0GetSessionVM(pSession));
845 return VERR_ALREADY_EXISTS;
846 }
847
848 /*
849 * Allocate a handle first so we don't waste resources unnecessarily.
850 */
851 uint16_t iHandle = pGVMM->iFreeHead;
852 if (iHandle)
853 {
854 PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
855
856 /* consistency checks, a bit paranoid as always. */
857 if ( !pHandle->pVM
858 && !pHandle->pGVM
859 && !pHandle->pvObj
860 && pHandle->iSelf == iHandle)
861 {
862 pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
863 if (pHandle->pvObj)
864 {
865 /*
866 * Move the handle from the free to used list and perform permission checks.
867 */
868 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
869 AssertRC(rc);
870
871 pGVMM->iFreeHead = pHandle->iNext;
872 pHandle->iNext = pGVMM->iUsedHead;
873 pGVMM->iUsedHead = iHandle;
874 pGVMM->cVMs++;
875
876 pHandle->pVM = NULL;
877 pHandle->pGVM = NULL;
878 pHandle->pSession = pSession;
879 pHandle->hEMT0 = NIL_RTNATIVETHREAD;
880 pHandle->ProcId = NIL_RTPROCESS;
881
882 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
883
884 rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
885 if (RT_SUCCESS(rc))
886 {
887 /*
888 * Allocate the global VM structure (GVM) and initialize it.
889 */
890 PGVM pGVM = (PGVM)RTMemAllocZ(RT_UOFFSETOF(GVM, aCpus[cCpus]));
891 if (pGVM)
892 {
893 pGVM->u32Magic = GVM_MAGIC;
894 pGVM->hSelf = iHandle;
895 pGVM->pVM = NULL;
896 pGVM->cCpus = cCpus;
897 pGVM->pSession = pSession;
898
899 gvmmR0InitPerVMData(pGVM);
900 GMMR0InitPerVMData(pGVM);
901
902 /*
903 * Allocate the shared VM structure and associated page array.
904 */
905 const uint32_t cbVM = RT_UOFFSETOF(VM, aCpus[cCpus]);
906 const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
907 rc = RTR0MemObjAllocLow(&pGVM->gvmm.s.VMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
908 if (RT_SUCCESS(rc))
909 {
910 PVM pVM = (PVM)RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj); AssertPtr(pVM);
911 memset(pVM, 0, cPages << PAGE_SHIFT);
912 pVM->enmVMState = VMSTATE_CREATING;
913 pVM->pVMR0 = pVM;
914 pVM->pSession = pSession;
915 pVM->hSelf = iHandle;
916 pVM->cbSelf = cbVM;
917 pVM->cCpus = cCpus;
918 pVM->uCpuExecutionCap = 100; /* default is no cap. */
919 pVM->offVMCPU = RT_UOFFSETOF(VM, aCpus);
920 AssertCompileMemberAlignment(VM, cpum, 64);
921 AssertCompileMemberAlignment(VM, tm, 64);
922 AssertCompileMemberAlignment(VM, aCpus, PAGE_SIZE);
923
924 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
925 if (RT_SUCCESS(rc))
926 {
927 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
928 for (uint32_t iPage = 0; iPage < cPages; iPage++)
929 {
930 paPages[iPage].uReserved = 0;
931 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
932 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
933 }
934
935 /*
936 * Map them into ring-3.
937 */
938 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
939 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
940 if (RT_SUCCESS(rc))
941 {
942 pVM->pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
943 AssertPtr((void *)pVM->pVMR3);
944
945 /* Initialize all the VM pointers. */
946 for (uint32_t i = 0; i < cCpus; i++)
947 {
948 pVM->aCpus[i].pVMR0 = pVM;
949 pVM->aCpus[i].pVMR3 = pVM->pVMR3;
950 pVM->aCpus[i].idHostCpu = NIL_RTCPUID;
951 pVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
952 }
953
954 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
955 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
956 NIL_RTR0PROCESS);
957 if (RT_SUCCESS(rc))
958 {
959 pVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
960 AssertPtr((void *)pVM->paVMPagesR3);
961
962 /* complete the handle - take the UsedLock sem just to be careful. */
963 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
964 AssertRC(rc);
965
966 pHandle->pVM = pVM;
967 pHandle->pGVM = pGVM;
968 pHandle->hEMT0 = hEMT0;
969 pHandle->ProcId = ProcId;
970 pGVM->pVM = pVM;
971 pGVM->aCpus[0].hEMT = hEMT0;
972 pVM->aCpus[0].hNativeThreadR0 = hEMT0;
973 pGVMM->cEMTs += cCpus;
974
975 /* Associate it with the session and create the context hook for EMT0. */
976 rc = SUPR0SetSessionVM(pSession, pGVM, pVM);
977 if (RT_SUCCESS(rc))
978 {
979 rc = VMMR0ThreadCtxHookCreateForEmt(&pVM->aCpus[0]);
980 if (RT_SUCCESS(rc))
981 {
982 /*
983 * Done!
984 */
985 VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pVM, ProcId, (void *)hEMT0, cCpus);
986
987 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
988 gvmmR0CreateDestroyUnlock(pGVMM);
989
990 CPUMR0RegisterVCpuThread(&pVM->aCpus[0]);
991
992 *ppVM = pVM;
993 Log(("GVMMR0CreateVM: pVM=%p pVMR3=%p pGVM=%p hGVM=%d\n", pVM, pVM->pVMR3, pGVM, iHandle));
994 return VINF_SUCCESS;
995 }
996
997 SUPR0SetSessionVM(pSession, NULL, NULL);
998 }
999 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1000 }
1001
1002 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
1003 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1004 }
1005 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */);
1006 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1007 }
1008 RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */);
1009 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1010 }
1011 }
1012 }
1013 /* else: The user wasn't permitted to create this VM. */
1014
1015 /*
1016 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
1017 * object reference here. A little extra mess because of non-recursive lock.
1018 */
1019 void *pvObj = pHandle->pvObj;
1020 pHandle->pvObj = NULL;
1021 gvmmR0CreateDestroyUnlock(pGVMM);
1022
1023 SUPR0ObjRelease(pvObj, pSession);
1024
1025 SUPR0Printf("GVMMR0CreateVM: failed, rc=%d\n", rc);
1026 return rc;
1027 }
1028
1029 rc = VERR_NO_MEMORY;
1030 }
1031 else
1032 rc = VERR_GVMM_IPE_1;
1033 }
1034 else
1035 rc = VERR_GVM_TOO_MANY_VMS;
1036
1037 gvmmR0CreateDestroyUnlock(pGVMM);
1038 return rc;
1039}
1040
1041
1042/**
1043 * Initializes the per VM data belonging to GVMM.
1044 *
1045 * @param pGVM Pointer to the global VM structure.
1046 */
1047static void gvmmR0InitPerVMData(PGVM pGVM)
1048{
1049 AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
1050 AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
1051 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1052 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1053 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1054 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1055 pGVM->gvmm.s.fDoneVMMR0Init = false;
1056 pGVM->gvmm.s.fDoneVMMR0Term = false;
1057
1058 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1059 {
1060 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1061 pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
1062 }
1063}
1064
1065
1066/**
1067 * Does the VM initialization.
1068 *
1069 * @returns VBox status code.
1070 * @param pGVM The global (ring-0) VM structure.
1071 */
1072GVMMR0DECL(int) GVMMR0InitVM(PGVM pGVM)
1073{
1074 LogFlow(("GVMMR0InitVM: pGVM=%p\n", pGVM));
1075
1076 int rc = VERR_INTERNAL_ERROR_3;
1077 if ( !pGVM->gvmm.s.fDoneVMMR0Init
1078 && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
1079 {
1080 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1081 {
1082 rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
1083 if (RT_FAILURE(rc))
1084 {
1085 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1086 break;
1087 }
1088 }
1089 }
1090 else
1091 rc = VERR_WRONG_ORDER;
1092
1093 LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
1094 return rc;
1095}
1096
1097
1098/**
1099 * Indicates that we're done with the ring-0 initialization
1100 * of the VM.
1101 *
1102 * @param pGVM The global (ring-0) VM structure.
1103 * @thread EMT(0)
1104 */
1105GVMMR0DECL(void) GVMMR0DoneInitVM(PGVM pGVM)
1106{
1107 /* Set the indicator. */
1108 pGVM->gvmm.s.fDoneVMMR0Init = true;
1109}
1110
1111
1112/**
1113 * Indicates that we're doing the ring-0 termination of the VM.
1114 *
1115 * @returns true if termination hasn't been done already, false if it has.
1116 * @param pGVM Pointer to the global VM structure. Optional.
1117 * @thread EMT(0) or session cleanup thread.
1118 */
1119GVMMR0DECL(bool) GVMMR0DoingTermVM(PGVM pGVM)
1120{
1121 /* Validate the VM structure, state and handle. */
1122 AssertPtrReturn(pGVM, false);
1123
1124 /* Set the indicator. */
1125 if (pGVM->gvmm.s.fDoneVMMR0Term)
1126 return false;
1127 pGVM->gvmm.s.fDoneVMMR0Term = true;
1128 return true;
1129}
1130
1131
1132/**
1133 * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1134 *
1135 * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1136 * and the caller is not the EMT thread, unfortunately. For security reasons, it
1137 * would've been nice if the caller was actually the EMT thread or that we somehow
1138 * could've associated the calling thread with the VM up front.
1139 *
1140 * @returns VBox status code.
1141 * @param pGVM The global (ring-0) VM structure.
1142 * @param pVM The cross context VM structure.
1143 *
1144 * @thread EMT(0) if it's associated with the VM, otherwise any thread.
1145 */
1146GVMMR0DECL(int) GVMMR0DestroyVM(PGVM pGVM, PVM pVM)
1147{
1148 LogFlow(("GVMMR0DestroyVM: pGVM=%p pVM=%p\n", pGVM, pVM));
1149 PGVMM pGVMM;
1150 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1151
1152 /*
1153 * Validate the VM structure, state and caller.
1154 */
1155 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1156 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1157 AssertReturn(!((uintptr_t)pVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1158 AssertReturn(pGVM->pVM == pVM, VERR_INVALID_POINTER);
1159 AssertMsgReturn(pVM->enmVMState >= VMSTATE_CREATING && pVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pVM->enmVMState),
1160 VERR_WRONG_ORDER);
1161
1162 uint32_t hGVM = pGVM->hSelf;
1163 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_VM_HANDLE);
1164 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1165
1166 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1167 AssertReturn(pHandle->pVM == pVM, VERR_NOT_OWNER);
1168
1169 RTPROCESS ProcId = RTProcSelf();
1170 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1171 AssertReturn( ( pHandle->hEMT0 == hSelf
1172 && pHandle->ProcId == ProcId)
1173 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1174
1175 /*
1176 * Lookup the handle and destroy the object.
1177 * Since the lock isn't recursive and we'll have to leave it before dereferencing the
1178 * object, we take some precautions against racing callers just in case...
1179 */
1180 int rc = gvmmR0CreateDestroyLock(pGVMM);
1181 AssertRC(rc);
1182
1183 /* Be careful here because we might theoretically be racing someone else cleaning up. */
1184 if ( pHandle->pVM == pVM
1185 && ( ( pHandle->hEMT0 == hSelf
1186 && pHandle->ProcId == ProcId)
1187 || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1188 && VALID_PTR(pHandle->pvObj)
1189 && VALID_PTR(pHandle->pSession)
1190 && VALID_PTR(pHandle->pGVM)
1191 && pHandle->pGVM->u32Magic == GVM_MAGIC)
1192 {
1193 /* Check that other EMTs have deregistered. */
1194 uint32_t cNotDeregistered = 0;
1195 for (VMCPUID idCpu = 1; idCpu < pGVM->cCpus; idCpu++)
1196 cNotDeregistered += pGVM->aCpus[idCpu].hEMT != ~(RTNATIVETHREAD)1; /* see GVMMR0DeregisterVCpu for the value */
1197 if (cNotDeregistered == 0)
1198 {
1199 /* Grab the object pointer. */
1200 void *pvObj = pHandle->pvObj;
1201 pHandle->pvObj = NULL;
1202 gvmmR0CreateDestroyUnlock(pGVMM);
1203
1204 SUPR0ObjRelease(pvObj, pHandle->pSession);
1205 }
1206 else
1207 {
1208 gvmmR0CreateDestroyUnlock(pGVMM);
1209 rc = VERR_GVMM_NOT_ALL_EMTS_DEREGISTERED;
1210 }
1211 }
1212 else
1213 {
1214 SUPR0Printf("GVMMR0DestroyVM: pHandle=%RKv:{.pVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pVM=%p hSelf=%p\n",
1215 pHandle, pHandle->pVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pVM, hSelf);
1216 gvmmR0CreateDestroyUnlock(pGVMM);
1217 rc = VERR_GVMM_IPE_2;
1218 }
1219
1220 return rc;
1221}
1222
1223
1224/**
1225 * Performs VM cleanup task as part of object destruction.
1226 *
1227 * @param pGVM The GVM pointer.
1228 */
1229static void gvmmR0CleanupVM(PGVM pGVM)
1230{
1231 if ( pGVM->gvmm.s.fDoneVMMR0Init
1232 && !pGVM->gvmm.s.fDoneVMMR0Term)
1233 {
1234 if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1235 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM->pVM)
1236 {
1237 LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1238 VMMR0TermVM(pGVM, pGVM->pVM, NIL_VMCPUID);
1239 }
1240 else
1241 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM->pVM));
1242 }
1243
1244 GMMR0CleanupVM(pGVM);
1245
1246 AssertCompile((uintptr_t)NIL_RTTHREADCTXHOOK == 0); /* Depends on zero initialized memory working for NIL at the moment. */
1247 for (VMCPUID idCpu = 0; idCpu < pGVM->cCpus; idCpu++)
1248 {
1249 /** @todo Can we busy wait here for all thread-context hooks to be
1250 * deregistered before releasing (destroying) it? Only until we find a
1251 * solution for not deregistering hooks everytime we're leaving HMR0
1252 * context. */
1253 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->pVM->aCpus[idCpu]);
1254 }
1255}
1256
1257
1258/**
1259 * @callback_method_impl{FNSUPDRVDESTRUCTOR,VM handle destructor}
1260 *
1261 * pvUser1 is the GVM instance pointer.
1262 * pvUser2 is the handle pointer.
1263 */
1264static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvUser1, void *pvUser2)
1265{
1266 LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvUser1, pvUser2));
1267
1268 NOREF(pvObj);
1269
1270 /*
1271 * Some quick, paranoid, input validation.
1272 */
1273 PGVMHANDLE pHandle = (PGVMHANDLE)pvUser2;
1274 AssertPtr(pHandle);
1275 PGVMM pGVMM = (PGVMM)pvUser1;
1276 Assert(pGVMM == g_pGVMM);
1277 const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1278 if ( !iHandle
1279 || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1280 || iHandle != pHandle->iSelf)
1281 {
1282 SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1283 return;
1284 }
1285
1286 int rc = gvmmR0CreateDestroyLock(pGVMM);
1287 AssertRC(rc);
1288 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1289 AssertRC(rc);
1290
1291 /*
1292 * This is a tad slow but a doubly linked list is too much hassle.
1293 */
1294 if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1295 {
1296 SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1297 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1298 gvmmR0CreateDestroyUnlock(pGVMM);
1299 return;
1300 }
1301
1302 if (pGVMM->iUsedHead == iHandle)
1303 pGVMM->iUsedHead = pHandle->iNext;
1304 else
1305 {
1306 uint16_t iPrev = pGVMM->iUsedHead;
1307 int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1308 while (iPrev)
1309 {
1310 if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1311 {
1312 SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1313 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1314 gvmmR0CreateDestroyUnlock(pGVMM);
1315 return;
1316 }
1317 if (RT_UNLIKELY(c-- <= 0))
1318 {
1319 iPrev = 0;
1320 break;
1321 }
1322
1323 if (pGVMM->aHandles[iPrev].iNext == iHandle)
1324 break;
1325 iPrev = pGVMM->aHandles[iPrev].iNext;
1326 }
1327 if (!iPrev)
1328 {
1329 SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1330 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1331 gvmmR0CreateDestroyUnlock(pGVMM);
1332 return;
1333 }
1334
1335 Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1336 pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1337 }
1338 pHandle->iNext = 0;
1339 pGVMM->cVMs--;
1340
1341 /*
1342 * Do the global cleanup round.
1343 */
1344 PGVM pGVM = pHandle->pGVM;
1345 if ( VALID_PTR(pGVM)
1346 && pGVM->u32Magic == GVM_MAGIC)
1347 {
1348 pGVMM->cEMTs -= pGVM->cCpus;
1349
1350 if (pGVM->pSession)
1351 SUPR0SetSessionVM(pGVM->pSession, NULL, NULL);
1352
1353 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1354
1355 gvmmR0CleanupVM(pGVM);
1356
1357 /*
1358 * Do the GVMM cleanup - must be done last.
1359 */
1360 /* The VM and VM pages mappings/allocations. */
1361 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1362 {
1363 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1364 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1365 }
1366
1367 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1368 {
1369 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1370 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1371 }
1372
1373 if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1374 {
1375 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1376 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1377 }
1378
1379 if (pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ)
1380 {
1381 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */); AssertRC(rc);
1382 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1383 }
1384
1385 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1386 {
1387 if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1388 {
1389 rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1390 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1391 }
1392 }
1393
1394 /* the GVM structure itself. */
1395 pGVM->u32Magic |= UINT32_C(0x80000000);
1396 RTMemFree(pGVM);
1397
1398 /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1399 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1400 AssertRC(rc);
1401 }
1402 /* else: GVMMR0CreateVM cleanup. */
1403
1404 /*
1405 * Free the handle.
1406 */
1407 pHandle->iNext = pGVMM->iFreeHead;
1408 pGVMM->iFreeHead = iHandle;
1409 ASMAtomicWriteNullPtr(&pHandle->pGVM);
1410 ASMAtomicWriteNullPtr(&pHandle->pVM);
1411 ASMAtomicWriteNullPtr(&pHandle->pvObj);
1412 ASMAtomicWriteNullPtr(&pHandle->pSession);
1413 ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1414 ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS);
1415
1416 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1417 gvmmR0CreateDestroyUnlock(pGVMM);
1418 LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1419}
1420
1421
1422/**
1423 * Registers the calling thread as the EMT of a Virtual CPU.
1424 *
1425 * Note that VCPU 0 is automatically registered during VM creation.
1426 *
1427 * @returns VBox status code
1428 * @param pGVM The global (ring-0) VM structure.
1429 * @param pVM The cross context VM structure.
1430 * @param idCpu VCPU id to register the current thread as.
1431 */
1432GVMMR0DECL(int) GVMMR0RegisterVCpu(PGVM pGVM, PVM pVM, VMCPUID idCpu)
1433{
1434 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1435
1436 /*
1437 * Validate the VM structure, state and handle.
1438 */
1439 PGVMM pGVMM;
1440 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, false /* fTakeUsedLock */); /** @todo take lock here. */
1441 if (RT_SUCCESS(rc))
1442 {
1443 if (idCpu < pGVM->cCpus)
1444 {
1445 /* Check that the EMT isn't already assigned to a thread. */
1446 if (pGVM->aCpus[idCpu].hEMT == NIL_RTNATIVETHREAD)
1447 {
1448 Assert(pVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD);
1449
1450 /* A thread may only be one EMT. */
1451 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1452 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1453 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1454 if (RT_SUCCESS(rc))
1455 {
1456 /*
1457 * Do the assignment, then try setup the hook. Undo if that fails.
1458 */
1459 pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf();
1460
1461 rc = VMMR0ThreadCtxHookCreateForEmt(&pVM->aCpus[idCpu]);
1462 if (RT_SUCCESS(rc))
1463 CPUMR0RegisterVCpuThread(&pVM->aCpus[idCpu]);
1464 else
1465 pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = NIL_RTNATIVETHREAD;
1466 }
1467 }
1468 else
1469 rc = VERR_ACCESS_DENIED;
1470 }
1471 else
1472 rc = VERR_INVALID_CPU_ID;
1473 }
1474 return rc;
1475}
1476
1477
1478/**
1479 * Deregisters the calling thread as the EMT of a Virtual CPU.
1480 *
1481 * Note that VCPU 0 shall call GVMMR0DestroyVM intead of this API.
1482 *
1483 * @returns VBox status code
1484 * @param pGVM The global (ring-0) VM structure.
1485 * @param pVM The cross context VM structure.
1486 * @param idCpu VCPU id to register the current thread as.
1487 */
1488GVMMR0DECL(int) GVMMR0DeregisterVCpu(PGVM pGVM, PVM pVM, VMCPUID idCpu)
1489{
1490 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1491
1492 /*
1493 * Validate the VM structure, state and handle.
1494 */
1495 PGVMM pGVMM;
1496 int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
1497 if (RT_SUCCESS(rc))
1498 {
1499 /*
1500 * Take the destruction lock and recheck the handle state to
1501 * prevent racing GVMMR0DestroyVM.
1502 */
1503 gvmmR0CreateDestroyLock(pGVMM);
1504 uint32_t hSelf = pGVM->hSelf;
1505 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1506 && pGVMM->aHandles[hSelf].pvObj != NULL
1507 && pGVMM->aHandles[hSelf].pGVM == pGVM)
1508 {
1509 /*
1510 * Do per-EMT cleanups.
1511 */
1512 VMMR0ThreadCtxHookDestroyForEmt(&pVM->aCpus[idCpu]);
1513
1514 /*
1515 * Invalidate hEMT. We don't use NIL here as that would allow
1516 * GVMMR0RegisterVCpu to be called again, and we don't want that.
1517 */
1518 AssertCompile(~(RTNATIVETHREAD)1 != NIL_RTNATIVETHREAD);
1519 pGVM->aCpus[idCpu].hEMT = ~(RTNATIVETHREAD)1;
1520 pVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1521 }
1522
1523 gvmmR0CreateDestroyUnlock(pGVMM);
1524 }
1525 return rc;
1526}
1527
1528
1529/**
1530 * Lookup a GVM structure by its handle.
1531 *
1532 * @returns The GVM pointer on success, NULL on failure.
1533 * @param hGVM The global VM handle. Asserts on bad handle.
1534 */
1535GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1536{
1537 PGVMM pGVMM;
1538 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1539
1540 /*
1541 * Validate.
1542 */
1543 AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1544 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1545
1546 /*
1547 * Look it up.
1548 */
1549 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1550 AssertPtrReturn(pHandle->pVM, NULL);
1551 AssertPtrReturn(pHandle->pvObj, NULL);
1552 PGVM pGVM = pHandle->pGVM;
1553 AssertPtrReturn(pGVM, NULL);
1554 AssertReturn(pGVM->pVM == pHandle->pVM, NULL);
1555
1556 return pHandle->pGVM;
1557}
1558
1559
1560/**
1561 * Lookup a GVM structure by the shared VM structure.
1562 *
1563 * The calling thread must be in the same process as the VM. All current lookups
1564 * are by threads inside the same process, so this will not be an issue.
1565 *
1566 * @returns VBox status code.
1567 * @param pVM The cross context VM structure.
1568 * @param ppGVM Where to store the GVM pointer.
1569 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1570 * @param fTakeUsedLock Whether to take the used lock or not. We take it in
1571 * shared mode when requested.
1572 *
1573 * Be very careful if not taking the lock as it's
1574 * possible that the VM will disappear then!
1575 *
1576 * @remark This will not assert on an invalid pVM but try return silently.
1577 */
1578static int gvmmR0ByVM(PVM pVM, PGVM *ppGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1579{
1580 RTPROCESS ProcId = RTProcSelf();
1581 PGVMM pGVMM;
1582 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1583
1584 /*
1585 * Validate.
1586 */
1587 if (RT_UNLIKELY( !VALID_PTR(pVM)
1588 || ((uintptr_t)pVM & PAGE_OFFSET_MASK)))
1589 return VERR_INVALID_POINTER;
1590 if (RT_UNLIKELY( pVM->enmVMState < VMSTATE_CREATING
1591 || pVM->enmVMState >= VMSTATE_TERMINATED))
1592 return VERR_INVALID_POINTER;
1593
1594 uint16_t hGVM = pVM->hSelf;
1595 if (RT_UNLIKELY( hGVM == NIL_GVM_HANDLE
1596 || hGVM >= RT_ELEMENTS(pGVMM->aHandles)))
1597 return VERR_INVALID_HANDLE;
1598
1599 /*
1600 * Look it up.
1601 */
1602 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1603 PGVM pGVM;
1604 if (fTakeUsedLock)
1605 {
1606 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
1607 AssertRCReturn(rc, rc);
1608
1609 pGVM = pHandle->pGVM;
1610 if (RT_UNLIKELY( pHandle->pVM != pVM
1611 || pHandle->ProcId != ProcId
1612 || !VALID_PTR(pHandle->pvObj)
1613 || !VALID_PTR(pGVM)
1614 || pGVM->pVM != pVM))
1615 {
1616 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
1617 return VERR_INVALID_HANDLE;
1618 }
1619 }
1620 else
1621 {
1622 if (RT_UNLIKELY(pHandle->pVM != pVM))
1623 return VERR_INVALID_HANDLE;
1624 if (RT_UNLIKELY(pHandle->ProcId != ProcId))
1625 return VERR_INVALID_HANDLE;
1626 if (RT_UNLIKELY(!VALID_PTR(pHandle->pvObj)))
1627 return VERR_INVALID_HANDLE;
1628
1629 pGVM = pHandle->pGVM;
1630 if (RT_UNLIKELY(!VALID_PTR(pGVM)))
1631 return VERR_INVALID_HANDLE;
1632 if (RT_UNLIKELY(pGVM->pVM != pVM))
1633 return VERR_INVALID_HANDLE;
1634 }
1635
1636 *ppGVM = pGVM;
1637 *ppGVMM = pGVMM;
1638 return VINF_SUCCESS;
1639}
1640
1641
1642/**
1643 * Check that the given GVM and VM structures match up.
1644 *
1645 * The calling thread must be in the same process as the VM. All current lookups
1646 * are by threads inside the same process, so this will not be an issue.
1647 *
1648 * @returns VBox status code.
1649 * @param pGVM The global (ring-0) VM structure.
1650 * @param pVM The cross context VM structure.
1651 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1652 * @param fTakeUsedLock Whether to take the used lock or not. We take it in
1653 * shared mode when requested.
1654 *
1655 * Be very careful if not taking the lock as it's
1656 * possible that the VM will disappear then!
1657 *
1658 * @remark This will not assert on an invalid pVM but try return silently.
1659 */
1660static int gvmmR0ByGVMandVM(PGVM pGVM, PVM pVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1661{
1662 /*
1663 * Check the pointers.
1664 */
1665 int rc;
1666 if (RT_LIKELY(RT_VALID_PTR(pGVM)))
1667 {
1668 if (RT_LIKELY( RT_VALID_PTR(pVM)
1669 && ((uintptr_t)pVM & PAGE_OFFSET_MASK) == 0))
1670 {
1671 if (RT_LIKELY(pGVM->pVM == pVM))
1672 {
1673 /*
1674 * Get the pGVMM instance and check the VM handle.
1675 */
1676 PGVMM pGVMM;
1677 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1678
1679 uint16_t hGVM = pGVM->hSelf;
1680 if (RT_LIKELY( hGVM != NIL_GVM_HANDLE
1681 && hGVM < RT_ELEMENTS(pGVMM->aHandles)))
1682 {
1683 RTPROCESS const pidSelf = RTProcSelf();
1684 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1685 if (fTakeUsedLock)
1686 {
1687 rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
1688 AssertRCReturn(rc, rc);
1689 }
1690
1691 if (RT_LIKELY( pHandle->pGVM == pGVM
1692 && pHandle->pVM == pVM
1693 && pHandle->ProcId == pidSelf
1694 && RT_VALID_PTR(pHandle->pvObj)))
1695 {
1696 /*
1697 * Some more VM data consistency checks.
1698 */
1699 if (RT_LIKELY( pVM->cCpus == pGVM->cCpus
1700 && pVM->hSelf == hGVM
1701 && pVM->enmVMState >= VMSTATE_CREATING
1702 && pVM->enmVMState <= VMSTATE_TERMINATED
1703 && pVM->pVMR0 == pVM))
1704 {
1705 *ppGVMM = pGVMM;
1706 return VINF_SUCCESS;
1707 }
1708 }
1709
1710 if (fTakeUsedLock)
1711 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
1712 }
1713 }
1714 rc = VERR_INVALID_VM_HANDLE;
1715 }
1716 else
1717 rc = VERR_INVALID_POINTER;
1718 }
1719 else
1720 rc = VERR_INVALID_POINTER;
1721 return rc;
1722}
1723
1724
1725/**
1726 * Check that the given GVM and VM structures match up.
1727 *
1728 * The calling thread must be in the same process as the VM. All current lookups
1729 * are by threads inside the same process, so this will not be an issue.
1730 *
1731 * @returns VBox status code.
1732 * @param pGVM The global (ring-0) VM structure.
1733 * @param pVM The cross context VM structure.
1734 * @param idCpu The (alleged) Virtual CPU ID of the calling EMT.
1735 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1736 * @thread EMT
1737 *
1738 * @remarks This will assert in all failure paths.
1739 */
1740static int gvmmR0ByGVMandVMandEMT(PGVM pGVM, PVM pVM, VMCPUID idCpu, PGVMM *ppGVMM)
1741{
1742 /*
1743 * Check the pointers.
1744 */
1745 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1746
1747 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1748 AssertReturn(((uintptr_t)pVM & PAGE_OFFSET_MASK) == 0, VERR_INVALID_POINTER);
1749 AssertReturn(pGVM->pVM == pVM, VERR_INVALID_VM_HANDLE);
1750
1751
1752 /*
1753 * Get the pGVMM instance and check the VM handle.
1754 */
1755 PGVMM pGVMM;
1756 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1757
1758 uint16_t hGVM = pGVM->hSelf;
1759 AssertReturn( hGVM != NIL_GVM_HANDLE
1760 && hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1761
1762 RTPROCESS const pidSelf = RTProcSelf();
1763 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1764 AssertReturn( pHandle->pGVM == pGVM
1765 && pHandle->pVM == pVM
1766 && pHandle->ProcId == pidSelf
1767 && RT_VALID_PTR(pHandle->pvObj),
1768 VERR_INVALID_HANDLE);
1769
1770 /*
1771 * Check the EMT claim.
1772 */
1773 RTNATIVETHREAD const hAllegedEMT = RTThreadNativeSelf();
1774 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1775 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
1776
1777 /*
1778 * Some more VM data consistency checks.
1779 */
1780 AssertReturn(pVM->cCpus == pGVM->cCpus, VERR_INCONSISTENT_VM_HANDLE);
1781 AssertReturn(pVM->hSelf == hGVM, VERR_INCONSISTENT_VM_HANDLE);
1782 AssertReturn(pVM->pVMR0 == pVM, VERR_INCONSISTENT_VM_HANDLE);
1783 AssertReturn( pVM->enmVMState >= VMSTATE_CREATING
1784 && pVM->enmVMState <= VMSTATE_TERMINATED, VERR_INCONSISTENT_VM_HANDLE);
1785
1786 *ppGVMM = pGVMM;
1787 return VINF_SUCCESS;
1788}
1789
1790
1791/**
1792 * Validates a GVM/VM pair.
1793 *
1794 * @returns VBox status code.
1795 * @param pGVM The global (ring-0) VM structure.
1796 * @param pVM The cross context VM structure.
1797 */
1798GVMMR0DECL(int) GVMMR0ValidateGVMandVM(PGVM pGVM, PVM pVM)
1799{
1800 PGVMM pGVMM;
1801 return gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, false /*fTakeUsedLock*/);
1802}
1803
1804
1805
1806/**
1807 * Validates a GVM/VM/EMT combo.
1808 *
1809 * @returns VBox status code.
1810 * @param pGVM The global (ring-0) VM structure.
1811 * @param pVM The cross context VM structure.
1812 * @param idCpu The Virtual CPU ID of the calling EMT.
1813 * @thread EMT(idCpu)
1814 */
1815GVMMR0DECL(int) GVMMR0ValidateGVMandVMandEMT(PGVM pGVM, PVM pVM, VMCPUID idCpu)
1816{
1817 PGVMM pGVMM;
1818 return gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
1819}
1820
1821
1822/**
1823 * Looks up the VM belonging to the specified EMT thread.
1824 *
1825 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
1826 * unnecessary kernel panics when the EMT thread hits an assertion. The
1827 * call may or not be an EMT thread.
1828 *
1829 * @returns Pointer to the VM on success, NULL on failure.
1830 * @param hEMT The native thread handle of the EMT.
1831 * NIL_RTNATIVETHREAD means the current thread
1832 */
1833GVMMR0DECL(PVM) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
1834{
1835 /*
1836 * No Assertions here as we're usually called in a AssertMsgN or
1837 * RTAssert* context.
1838 */
1839 PGVMM pGVMM = g_pGVMM;
1840 if ( !VALID_PTR(pGVMM)
1841 || pGVMM->u32Magic != GVMM_MAGIC)
1842 return NULL;
1843
1844 if (hEMT == NIL_RTNATIVETHREAD)
1845 hEMT = RTThreadNativeSelf();
1846 RTPROCESS ProcId = RTProcSelf();
1847
1848 /*
1849 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
1850 */
1851 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
1852 {
1853 if ( pGVMM->aHandles[i].iSelf == i
1854 && pGVMM->aHandles[i].ProcId == ProcId
1855 && VALID_PTR(pGVMM->aHandles[i].pvObj)
1856 && VALID_PTR(pGVMM->aHandles[i].pVM)
1857 && VALID_PTR(pGVMM->aHandles[i].pGVM))
1858 {
1859 if (pGVMM->aHandles[i].hEMT0 == hEMT)
1860 return pGVMM->aHandles[i].pVM;
1861
1862 /* This is fearly safe with the current process per VM approach. */
1863 PGVM pGVM = pGVMM->aHandles[i].pGVM;
1864 VMCPUID const cCpus = pGVM->cCpus;
1865 if ( cCpus < 1
1866 || cCpus > VMM_MAX_CPU_COUNT)
1867 continue;
1868 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
1869 if (pGVM->aCpus[idCpu].hEMT == hEMT)
1870 return pGVMM->aHandles[i].pVM;
1871 }
1872 }
1873 return NULL;
1874}
1875
1876
1877/**
1878 * This is will wake up expired and soon-to-be expired VMs.
1879 *
1880 * @returns Number of VMs that has been woken up.
1881 * @param pGVMM Pointer to the GVMM instance data.
1882 * @param u64Now The current time.
1883 */
1884static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
1885{
1886 /*
1887 * Skip this if we've got disabled because of high resolution wakeups or by
1888 * the user.
1889 */
1890 if (!pGVMM->fDoEarlyWakeUps)
1891 return 0;
1892
1893/** @todo Rewrite this algorithm. See performance defect XYZ. */
1894
1895 /*
1896 * A cheap optimization to stop wasting so much time here on big setups.
1897 */
1898 const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
1899 if ( pGVMM->cHaltedEMTs == 0
1900 || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
1901 return 0;
1902
1903 /*
1904 * Only one thread doing this at a time.
1905 */
1906 if (!ASMAtomicCmpXchgBool(&pGVMM->fDoingEarlyWakeUps, true, false))
1907 return 0;
1908
1909 /*
1910 * The first pass will wake up VMs which have actually expired
1911 * and look for VMs that should be woken up in the 2nd and 3rd passes.
1912 */
1913 const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
1914 uint64_t u64Min = UINT64_MAX;
1915 unsigned cWoken = 0;
1916 unsigned cHalted = 0;
1917 unsigned cTodo2nd = 0;
1918 unsigned cTodo3rd = 0;
1919 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1920 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1921 i = pGVMM->aHandles[i].iNext)
1922 {
1923 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1924 if ( VALID_PTR(pCurGVM)
1925 && pCurGVM->u32Magic == GVM_MAGIC)
1926 {
1927 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1928 {
1929 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1930 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1931 if (u64)
1932 {
1933 if (u64 <= u64Now)
1934 {
1935 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1936 {
1937 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
1938 AssertRC(rc);
1939 cWoken++;
1940 }
1941 }
1942 else
1943 {
1944 cHalted++;
1945 if (u64 <= uNsEarlyWakeUp1)
1946 cTodo2nd++;
1947 else if (u64 <= uNsEarlyWakeUp2)
1948 cTodo3rd++;
1949 else if (u64 < u64Min)
1950 u64 = u64Min;
1951 }
1952 }
1953 }
1954 }
1955 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
1956 }
1957
1958 if (cTodo2nd)
1959 {
1960 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1961 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1962 i = pGVMM->aHandles[i].iNext)
1963 {
1964 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1965 if ( VALID_PTR(pCurGVM)
1966 && pCurGVM->u32Magic == GVM_MAGIC)
1967 {
1968 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1969 {
1970 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1971 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1972 if ( u64
1973 && u64 <= uNsEarlyWakeUp1)
1974 {
1975 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1976 {
1977 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
1978 AssertRC(rc);
1979 cWoken++;
1980 }
1981 }
1982 }
1983 }
1984 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
1985 }
1986 }
1987
1988 if (cTodo3rd)
1989 {
1990 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1991 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1992 i = pGVMM->aHandles[i].iNext)
1993 {
1994 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1995 if ( VALID_PTR(pCurGVM)
1996 && pCurGVM->u32Magic == GVM_MAGIC)
1997 {
1998 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1999 {
2000 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2001 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2002 if ( u64
2003 && u64 <= uNsEarlyWakeUp2)
2004 {
2005 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2006 {
2007 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2008 AssertRC(rc);
2009 cWoken++;
2010 }
2011 }
2012 }
2013 }
2014 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2015 }
2016 }
2017
2018 /*
2019 * Set the minimum value.
2020 */
2021 pGVMM->uNsNextEmtWakeup = u64Min;
2022
2023 ASMAtomicWriteBool(&pGVMM->fDoingEarlyWakeUps, false);
2024 return cWoken;
2025}
2026
2027
2028/**
2029 * Halt the EMT thread.
2030 *
2031 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2032 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2033 * @param pGVM The global (ring-0) VM structure.
2034 * @param pVM The cross context VM structure.
2035 * @param idCpu The Virtual CPU ID of the calling EMT.
2036 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2037 * @thread EMT(idCpu).
2038 */
2039GVMMR0DECL(int) GVMMR0SchedHalt(PGVM pGVM, PVM pVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
2040{
2041 LogFlow(("GVMMR0SchedHalt: pGVM=%p pVM=%p idCpu=%#x u64ExpireGipTime=%#RX64\n", pGVM, pVM, idCpu, u64ExpireGipTime));
2042 GVMM_CHECK_SMAP_SETUP();
2043 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2044
2045 /*
2046 * Validate the VM structure, state and handle.
2047 */
2048 PGVMM pGVMM;
2049 int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
2050 if (RT_FAILURE(rc))
2051 return rc;
2052 pGVM->gvmm.s.StatsSched.cHaltCalls++;
2053 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2054
2055 PGVMCPU pCurGVCpu = &pGVM->aCpus[idCpu];
2056 Assert(!pCurGVCpu->gvmm.s.u64HaltExpire);
2057
2058 /*
2059 * If we're doing early wake-ups, we must take the UsedList lock before we
2060 * start querying the current time.
2061 * Note! Interrupts must NOT be disabled at this point because we ask for GIP time!
2062 */
2063 bool const fDoEarlyWakeUps = pGVMM->fDoEarlyWakeUps;
2064 if (fDoEarlyWakeUps)
2065 {
2066 rc = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc);
2067 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2068 }
2069
2070 pCurGVCpu->gvmm.s.iCpuEmt = ASMGetApicId();
2071
2072 /* GIP hack: We might are frequently sleeping for short intervals where the
2073 difference between GIP and system time matters on systems with high resolution
2074 system time. So, convert the input from GIP to System time in that case. */
2075 Assert(ASMGetFlags() & X86_EFL_IF);
2076 const uint64_t u64NowSys = RTTimeSystemNanoTS();
2077 const uint64_t u64NowGip = RTTimeNanoTS();
2078 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2079
2080 if (fDoEarlyWakeUps)
2081 {
2082 pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
2083 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2084 }
2085
2086 /*
2087 * Go to sleep if we must...
2088 * Cap the sleep time to 1 second to be on the safe side.
2089 */
2090 uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
2091 if ( u64NowGip < u64ExpireGipTime
2092 && cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
2093 ? pGVMM->nsMinSleepCompany
2094 : pGVMM->nsMinSleepAlone))
2095 {
2096 pGVM->gvmm.s.StatsSched.cHaltBlocking++;
2097 if (cNsInterval > RT_NS_1SEC)
2098 u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
2099 ASMAtomicWriteU64(&pCurGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
2100 ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
2101 if (fDoEarlyWakeUps)
2102 {
2103 if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
2104 pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
2105 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2106 }
2107 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2108
2109 rc = RTSemEventMultiWaitEx(pCurGVCpu->gvmm.s.HaltEventMulti,
2110 RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
2111 u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
2112 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2113
2114 ASMAtomicWriteU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0);
2115 ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
2116
2117 /* Reset the semaphore to try prevent a few false wake-ups. */
2118 if (rc == VINF_SUCCESS)
2119 {
2120 RTSemEventMultiReset(pCurGVCpu->gvmm.s.HaltEventMulti);
2121 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2122 }
2123 else if (rc == VERR_TIMEOUT)
2124 {
2125 pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
2126 rc = VINF_SUCCESS;
2127 }
2128 }
2129 else
2130 {
2131 pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
2132 if (fDoEarlyWakeUps)
2133 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2134 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2135 RTSemEventMultiReset(pCurGVCpu->gvmm.s.HaltEventMulti);
2136 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2137 }
2138
2139 return rc;
2140}
2141
2142
2143/**
2144 * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
2145 * the a sleeping EMT.
2146 *
2147 * @retval VINF_SUCCESS if successfully woken up.
2148 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2149 *
2150 * @param pGVM The global (ring-0) VM structure.
2151 * @param pGVCpu The global (ring-0) VCPU structure.
2152 */
2153DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
2154{
2155 pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
2156
2157 /*
2158 * Signal the semaphore regardless of whether it's current blocked on it.
2159 *
2160 * The reason for this is that there is absolutely no way we can be 100%
2161 * certain that it isn't *about* go to go to sleep on it and just got
2162 * delayed a bit en route. So, we will always signal the semaphore when
2163 * the it is flagged as halted in the VMM.
2164 */
2165/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
2166 int rc;
2167 if (pGVCpu->gvmm.s.u64HaltExpire)
2168 {
2169 rc = VINF_SUCCESS;
2170 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2171 }
2172 else
2173 {
2174 rc = VINF_GVM_NOT_BLOCKED;
2175 pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
2176 }
2177
2178 int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2179 AssertRC(rc2);
2180
2181 return rc;
2182}
2183
2184
2185/**
2186 * Wakes up the halted EMT thread so it can service a pending request.
2187 *
2188 * @returns VBox status code.
2189 * @retval VINF_SUCCESS if successfully woken up.
2190 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2191 *
2192 * @param pGVM The global (ring-0) VM structure.
2193 * @param pVM The cross context VM structure.
2194 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2195 * @param fTakeUsedLock Take the used lock or not
2196 * @thread Any but EMT(idCpu).
2197 */
2198GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
2199{
2200 GVMM_CHECK_SMAP_SETUP();
2201 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2202
2203 /*
2204 * Validate input and take the UsedLock.
2205 */
2206 PGVMM pGVMM;
2207 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, fTakeUsedLock);
2208 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2209 if (RT_SUCCESS(rc))
2210 {
2211 if (idCpu < pGVM->cCpus)
2212 {
2213 /*
2214 * Do the actual job.
2215 */
2216 rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2217 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2218
2219 if (fTakeUsedLock && pGVMM->fDoEarlyWakeUps)
2220 {
2221 /*
2222 * While we're here, do a round of scheduling.
2223 */
2224 Assert(ASMGetFlags() & X86_EFL_IF);
2225 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2226 pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2227 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2228 }
2229 }
2230 else
2231 rc = VERR_INVALID_CPU_ID;
2232
2233 if (fTakeUsedLock)
2234 {
2235 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2236 AssertRC(rc2);
2237 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2238 }
2239 }
2240
2241 LogFlow(("GVMMR0SchedWakeUpEx: returns %Rrc\n", rc));
2242 return rc;
2243}
2244
2245
2246/**
2247 * Wakes up the halted EMT thread so it can service a pending request.
2248 *
2249 * @returns VBox status code.
2250 * @retval VINF_SUCCESS if successfully woken up.
2251 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2252 *
2253 * @param pGVM The global (ring-0) VM structure.
2254 * @param pVM The cross context VM structure.
2255 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2256 * @thread Any but EMT(idCpu).
2257 */
2258GVMMR0DECL(int) GVMMR0SchedWakeUp(PGVM pGVM, PVM pVM, VMCPUID idCpu)
2259{
2260 return GVMMR0SchedWakeUpEx(pGVM, pVM, idCpu, true /* fTakeUsedLock */);
2261}
2262
2263
2264/**
2265 * Wakes up the halted EMT thread so it can service a pending request, no GVM
2266 * parameter and no used locking.
2267 *
2268 * @returns VBox status code.
2269 * @retval VINF_SUCCESS if successfully woken up.
2270 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2271 *
2272 * @param pVM The cross context VM structure.
2273 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2274 * @thread Any but EMT(idCpu).
2275 * @deprecated Don't use in new code if possible! Use the GVM variant.
2276 */
2277GVMMR0DECL(int) GVMMR0SchedWakeUpNoGVMNoLock(PVM pVM, VMCPUID idCpu)
2278{
2279 GVMM_CHECK_SMAP_SETUP();
2280 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2281 PGVM pGVM;
2282 PGVMM pGVMM;
2283 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, false /*fTakeUsedLock*/);
2284 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2285 if (RT_SUCCESS(rc))
2286 rc = GVMMR0SchedWakeUpEx(pGVM, pVM, idCpu, false /*fTakeUsedLock*/);
2287 return rc;
2288}
2289
2290
2291/**
2292 * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
2293 * the Virtual CPU if it's still busy executing guest code.
2294 *
2295 * @returns VBox status code.
2296 * @retval VINF_SUCCESS if poked successfully.
2297 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2298 *
2299 * @param pGVM The global (ring-0) VM structure.
2300 * @param pVCpu The cross context virtual CPU structure.
2301 */
2302DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPU pVCpu)
2303{
2304 pGVM->gvmm.s.StatsSched.cPokeCalls++;
2305
2306 RTCPUID idHostCpu = pVCpu->idHostCpu;
2307 if ( idHostCpu == NIL_RTCPUID
2308 || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
2309 {
2310 pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
2311 return VINF_GVM_NOT_BUSY_IN_GC;
2312 }
2313
2314 /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
2315 RTMpPokeCpu(idHostCpu);
2316 return VINF_SUCCESS;
2317}
2318
2319
2320/**
2321 * Pokes an EMT if it's still busy running guest code.
2322 *
2323 * @returns VBox status code.
2324 * @retval VINF_SUCCESS if poked successfully.
2325 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2326 *
2327 * @param pGVM The global (ring-0) VM structure.
2328 * @param pVM The cross context VM structure.
2329 * @param idCpu The ID of the virtual CPU to poke.
2330 * @param fTakeUsedLock Take the used lock or not
2331 */
2332GVMMR0DECL(int) GVMMR0SchedPokeEx(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
2333{
2334 /*
2335 * Validate input and take the UsedLock.
2336 */
2337 PGVMM pGVMM;
2338 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, fTakeUsedLock);
2339 if (RT_SUCCESS(rc))
2340 {
2341 if (idCpu < pGVM->cCpus)
2342 rc = gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2343 else
2344 rc = VERR_INVALID_CPU_ID;
2345
2346 if (fTakeUsedLock)
2347 {
2348 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2349 AssertRC(rc2);
2350 }
2351 }
2352
2353 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2354 return rc;
2355}
2356
2357
2358/**
2359 * Pokes an EMT if it's still busy running guest code.
2360 *
2361 * @returns VBox status code.
2362 * @retval VINF_SUCCESS if poked successfully.
2363 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2364 *
2365 * @param pGVM The global (ring-0) VM structure.
2366 * @param pVM The cross context VM structure.
2367 * @param idCpu The ID of the virtual CPU to poke.
2368 */
2369GVMMR0DECL(int) GVMMR0SchedPoke(PGVM pGVM, PVM pVM, VMCPUID idCpu)
2370{
2371 return GVMMR0SchedPokeEx(pGVM, pVM, idCpu, true /* fTakeUsedLock */);
2372}
2373
2374
2375/**
2376 * Pokes an EMT if it's still busy running guest code, no GVM parameter and no
2377 * used locking.
2378 *
2379 * @returns VBox status code.
2380 * @retval VINF_SUCCESS if poked successfully.
2381 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2382 *
2383 * @param pVM The cross context VM structure.
2384 * @param idCpu The ID of the virtual CPU to poke.
2385 *
2386 * @deprecated Don't use in new code if possible! Use the GVM variant.
2387 */
2388GVMMR0DECL(int) GVMMR0SchedPokeNoGVMNoLock(PVM pVM, VMCPUID idCpu)
2389{
2390 PGVM pGVM;
2391 PGVMM pGVMM;
2392 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, false /*fTakeUsedLock*/);
2393 if (RT_SUCCESS(rc))
2394 {
2395 if (idCpu < pGVM->cCpus)
2396 rc = gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2397 else
2398 rc = VERR_INVALID_CPU_ID;
2399 }
2400 return rc;
2401}
2402
2403
2404/**
2405 * Wakes up a set of halted EMT threads so they can service pending request.
2406 *
2407 * @returns VBox status code, no informational stuff.
2408 *
2409 * @param pGVM The global (ring-0) VM structure.
2410 * @param pVM The cross context VM structure.
2411 * @param pSleepSet The set of sleepers to wake up.
2412 * @param pPokeSet The set of CPUs to poke.
2413 */
2414GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PGVM pGVM, PVM pVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2415{
2416 AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2417 AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2418 GVMM_CHECK_SMAP_SETUP();
2419 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2420 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2421
2422 /*
2423 * Validate input and take the UsedLock.
2424 */
2425 PGVMM pGVMM;
2426 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /* fTakeUsedLock */);
2427 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2428 if (RT_SUCCESS(rc))
2429 {
2430 rc = VINF_SUCCESS;
2431 VMCPUID idCpu = pGVM->cCpus;
2432 while (idCpu-- > 0)
2433 {
2434 /* Don't try poke or wake up ourselves. */
2435 if (pGVM->aCpus[idCpu].hEMT == hSelf)
2436 continue;
2437
2438 /* just ignore errors for now. */
2439 if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2440 {
2441 gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2442 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2443 }
2444 else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2445 {
2446 gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2447 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2448 }
2449 }
2450
2451 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2452 AssertRC(rc2);
2453 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2454 }
2455
2456 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2457 return rc;
2458}
2459
2460
2461/**
2462 * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2463 *
2464 * @returns see GVMMR0SchedWakeUpAndPokeCpus.
2465 * @param pGVM The global (ring-0) VM structure.
2466 * @param pVM The cross context VM structure.
2467 * @param pReq Pointer to the request packet.
2468 */
2469GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PGVM pGVM, PVM pVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2470{
2471 /*
2472 * Validate input and pass it on.
2473 */
2474 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2475 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2476
2477 return GVMMR0SchedWakeUpAndPokeCpus(pGVM, pVM, &pReq->SleepSet, &pReq->PokeSet);
2478}
2479
2480
2481
2482/**
2483 * Poll the schedule to see if someone else should get a chance to run.
2484 *
2485 * This is a bit hackish and will not work too well if the machine is
2486 * under heavy load from non-VM processes.
2487 *
2488 * @returns VINF_SUCCESS if not yielded.
2489 * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2490 * @param pGVM The global (ring-0) VM structure.
2491 * @param pVM The cross context VM structure.
2492 * @param idCpu The Virtual CPU ID of the calling EMT.
2493 * @param fYield Whether to yield or not.
2494 * This is for when we're spinning in the halt loop.
2495 * @thread EMT(idCpu).
2496 */
2497GVMMR0DECL(int) GVMMR0SchedPoll(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fYield)
2498{
2499 /*
2500 * Validate input.
2501 */
2502 PGVMM pGVMM;
2503 int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
2504 if (RT_SUCCESS(rc))
2505 {
2506 /*
2507 * We currently only implement helping doing wakeups (fYield = false), so don't
2508 * bother taking the lock if gvmmR0SchedDoWakeUps is not going to do anything.
2509 */
2510 if (!fYield && pGVMM->fDoEarlyWakeUps)
2511 {
2512 rc = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc);
2513 pGVM->gvmm.s.StatsSched.cPollCalls++;
2514
2515 Assert(ASMGetFlags() & X86_EFL_IF);
2516 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2517
2518 pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2519
2520 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2521 }
2522 /*
2523 * Not quite sure what we could do here...
2524 */
2525 else if (fYield)
2526 rc = VERR_NOT_IMPLEMENTED; /** @todo implement this... */
2527 else
2528 rc = VINF_SUCCESS;
2529 }
2530
2531 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
2532 return rc;
2533}
2534
2535
2536#ifdef GVMM_SCHED_WITH_PPT
2537/**
2538 * Timer callback for the periodic preemption timer.
2539 *
2540 * @param pTimer The timer handle.
2541 * @param pvUser Pointer to the per cpu structure.
2542 * @param iTick The current tick.
2543 */
2544static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2545{
2546 PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
2547 NOREF(pTimer); NOREF(iTick);
2548
2549 /*
2550 * Termination check
2551 */
2552 if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
2553 return;
2554
2555 /*
2556 * Do the house keeping.
2557 */
2558 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2559
2560 if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
2561 {
2562 /*
2563 * Historicize the max frequency.
2564 */
2565 uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
2566 pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
2567 pCpu->Ppt.iTickHistorization = 0;
2568 pCpu->Ppt.uDesiredHz = 0;
2569
2570 /*
2571 * Check if the current timer frequency.
2572 */
2573 uint32_t uHistMaxHz = 0;
2574 for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
2575 if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
2576 uHistMaxHz = pCpu->Ppt.aHzHistory[i];
2577 if (uHistMaxHz == pCpu->Ppt.uTimerHz)
2578 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2579 else if (uHistMaxHz)
2580 {
2581 /*
2582 * Reprogram it.
2583 */
2584 pCpu->Ppt.cChanges++;
2585 pCpu->Ppt.iTickHistorization = 0;
2586 pCpu->Ppt.uTimerHz = uHistMaxHz;
2587 uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
2588 pCpu->Ppt.cNsInterval = cNsInterval;
2589 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2590 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2591 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2592 / cNsInterval;
2593 else
2594 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2595 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2596
2597 /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
2598 RTTimerChangeInterval(pTimer, cNsInterval);
2599 }
2600 else
2601 {
2602 /*
2603 * Stop it.
2604 */
2605 pCpu->Ppt.fStarted = false;
2606 pCpu->Ppt.uTimerHz = 0;
2607 pCpu->Ppt.cNsInterval = 0;
2608 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2609
2610 /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
2611 RTTimerStop(pTimer);
2612 }
2613 }
2614 else
2615 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2616}
2617#endif /* GVMM_SCHED_WITH_PPT */
2618
2619
2620/**
2621 * Updates the periodic preemption timer for the calling CPU.
2622 *
2623 * The caller must have disabled preemption!
2624 * The caller must check that the host can do high resolution timers.
2625 *
2626 * @param pVM The cross context VM structure.
2627 * @param idHostCpu The current host CPU id.
2628 * @param uHz The desired frequency.
2629 */
2630GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PVM pVM, RTCPUID idHostCpu, uint32_t uHz)
2631{
2632 NOREF(pVM);
2633#ifdef GVMM_SCHED_WITH_PPT
2634 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2635 Assert(RTTimerCanDoHighResolution());
2636
2637 /*
2638 * Resolve the per CPU data.
2639 */
2640 uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
2641 PGVMM pGVMM = g_pGVMM;
2642 if ( !VALID_PTR(pGVMM)
2643 || pGVMM->u32Magic != GVMM_MAGIC)
2644 return;
2645 AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
2646 PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
2647 AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
2648 && pCpu->idCpu == idHostCpu,
2649 ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
2650
2651 /*
2652 * Check whether we need to do anything about the timer.
2653 * We have to be a little bit careful since we might be race the timer
2654 * callback here.
2655 */
2656 if (uHz > 16384)
2657 uHz = 16384; /** @todo add a query method for this! */
2658 if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
2659 && uHz >= pCpu->Ppt.uMinHz
2660 && !pCpu->Ppt.fStarting /* solaris paranoia */))
2661 {
2662 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2663
2664 pCpu->Ppt.uDesiredHz = uHz;
2665 uint32_t cNsInterval = 0;
2666 if (!pCpu->Ppt.fStarted)
2667 {
2668 pCpu->Ppt.cStarts++;
2669 pCpu->Ppt.fStarted = true;
2670 pCpu->Ppt.fStarting = true;
2671 pCpu->Ppt.iTickHistorization = 0;
2672 pCpu->Ppt.uTimerHz = uHz;
2673 pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
2674 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2675 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2676 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2677 / cNsInterval;
2678 else
2679 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2680 }
2681
2682 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2683
2684 if (cNsInterval)
2685 {
2686 RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
2687 int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
2688 AssertRC(rc);
2689
2690 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2691 if (RT_FAILURE(rc))
2692 pCpu->Ppt.fStarted = false;
2693 pCpu->Ppt.fStarting = false;
2694 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2695 }
2696 }
2697#else /* !GVMM_SCHED_WITH_PPT */
2698 NOREF(idHostCpu); NOREF(uHz);
2699#endif /* !GVMM_SCHED_WITH_PPT */
2700}
2701
2702
2703/**
2704 * Retrieves the GVMM statistics visible to the caller.
2705 *
2706 * @returns VBox status code.
2707 *
2708 * @param pStats Where to put the statistics.
2709 * @param pSession The current session.
2710 * @param pGVM The GVM to obtain statistics for. Optional.
2711 * @param pVM The VM structure corresponding to @a pGVM.
2712 */
2713GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM, PVM pVM)
2714{
2715 LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pGVM=%p pVM=%p\n", pStats, pSession, pGVM, pVM));
2716
2717 /*
2718 * Validate input.
2719 */
2720 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2721 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2722 pStats->cVMs = 0; /* (crash before taking the sem...) */
2723
2724 /*
2725 * Take the lock and get the VM statistics.
2726 */
2727 PGVMM pGVMM;
2728 if (pGVM)
2729 {
2730 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /*fTakeUsedLock*/);
2731 if (RT_FAILURE(rc))
2732 return rc;
2733 pStats->SchedVM = pGVM->gvmm.s.StatsSched;
2734 }
2735 else
2736 {
2737 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2738 memset(&pStats->SchedVM, 0, sizeof(pStats->SchedVM));
2739
2740 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
2741 AssertRCReturn(rc, rc);
2742 }
2743
2744 /*
2745 * Enumerate the VMs and add the ones visible to the statistics.
2746 */
2747 pStats->cVMs = 0;
2748 pStats->cEMTs = 0;
2749 memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
2750
2751 for (unsigned i = pGVMM->iUsedHead;
2752 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2753 i = pGVMM->aHandles[i].iNext)
2754 {
2755 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
2756 void *pvObj = pGVMM->aHandles[i].pvObj;
2757 if ( VALID_PTR(pvObj)
2758 && VALID_PTR(pOtherGVM)
2759 && pOtherGVM->u32Magic == GVM_MAGIC
2760 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2761 {
2762 pStats->cVMs++;
2763 pStats->cEMTs += pOtherGVM->cCpus;
2764
2765 pStats->SchedSum.cHaltCalls += pOtherGVM->gvmm.s.StatsSched.cHaltCalls;
2766 pStats->SchedSum.cHaltBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltBlocking;
2767 pStats->SchedSum.cHaltTimeouts += pOtherGVM->gvmm.s.StatsSched.cHaltTimeouts;
2768 pStats->SchedSum.cHaltNotBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltNotBlocking;
2769 pStats->SchedSum.cHaltWakeUps += pOtherGVM->gvmm.s.StatsSched.cHaltWakeUps;
2770
2771 pStats->SchedSum.cWakeUpCalls += pOtherGVM->gvmm.s.StatsSched.cWakeUpCalls;
2772 pStats->SchedSum.cWakeUpNotHalted += pOtherGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
2773 pStats->SchedSum.cWakeUpWakeUps += pOtherGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
2774
2775 pStats->SchedSum.cPokeCalls += pOtherGVM->gvmm.s.StatsSched.cPokeCalls;
2776 pStats->SchedSum.cPokeNotBusy += pOtherGVM->gvmm.s.StatsSched.cPokeNotBusy;
2777
2778 pStats->SchedSum.cPollCalls += pOtherGVM->gvmm.s.StatsSched.cPollCalls;
2779 pStats->SchedSum.cPollHalts += pOtherGVM->gvmm.s.StatsSched.cPollHalts;
2780 pStats->SchedSum.cPollWakeUps += pOtherGVM->gvmm.s.StatsSched.cPollWakeUps;
2781 }
2782 }
2783
2784 /*
2785 * Copy out the per host CPU statistics.
2786 */
2787 uint32_t iDstCpu = 0;
2788 uint32_t cSrcCpus = pGVMM->cHostCpus;
2789 for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
2790 {
2791 if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
2792 {
2793 pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
2794 pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
2795#ifdef GVMM_SCHED_WITH_PPT
2796 pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
2797 pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
2798 pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
2799 pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
2800#else
2801 pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
2802 pStats->aHostCpus[iDstCpu].uTimerHz = 0;
2803 pStats->aHostCpus[iDstCpu].cChanges = 0;
2804 pStats->aHostCpus[iDstCpu].cStarts = 0;
2805#endif
2806 iDstCpu++;
2807 if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
2808 break;
2809 }
2810 }
2811 pStats->cHostCpus = iDstCpu;
2812
2813 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2814
2815 return VINF_SUCCESS;
2816}
2817
2818
2819/**
2820 * VMMR0 request wrapper for GVMMR0QueryStatistics.
2821 *
2822 * @returns see GVMMR0QueryStatistics.
2823 * @param pGVM The global (ring-0) VM structure. Optional.
2824 * @param pVM The cross context VM structure. Optional.
2825 * @param pReq Pointer to the request packet.
2826 * @param pSession The current session.
2827 */
2828GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PGVM pGVM, PVM pVM, PGVMMQUERYSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
2829{
2830 /*
2831 * Validate input and pass it on.
2832 */
2833 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2834 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2835 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
2836
2837 return GVMMR0QueryStatistics(&pReq->Stats, pSession, pGVM, pVM);
2838}
2839
2840
2841/**
2842 * Resets the specified GVMM statistics.
2843 *
2844 * @returns VBox status code.
2845 *
2846 * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
2847 * @param pSession The current session.
2848 * @param pGVM The GVM to reset statistics for. Optional.
2849 * @param pVM The VM structure corresponding to @a pGVM.
2850 */
2851GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM, PVM pVM)
2852{
2853 LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pGVM=%p pVM=%p\n", pStats, pSession, pGVM, pVM));
2854
2855 /*
2856 * Validate input.
2857 */
2858 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2859 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2860
2861 /*
2862 * Take the lock and get the VM statistics.
2863 */
2864 PGVMM pGVMM;
2865 if (pGVM)
2866 {
2867 int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /*fTakeUsedLock*/);
2868 if (RT_FAILURE(rc))
2869 return rc;
2870# define MAYBE_RESET_FIELD(field) \
2871 do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
2872 MAYBE_RESET_FIELD(cHaltCalls);
2873 MAYBE_RESET_FIELD(cHaltBlocking);
2874 MAYBE_RESET_FIELD(cHaltTimeouts);
2875 MAYBE_RESET_FIELD(cHaltNotBlocking);
2876 MAYBE_RESET_FIELD(cHaltWakeUps);
2877 MAYBE_RESET_FIELD(cWakeUpCalls);
2878 MAYBE_RESET_FIELD(cWakeUpNotHalted);
2879 MAYBE_RESET_FIELD(cWakeUpWakeUps);
2880 MAYBE_RESET_FIELD(cPokeCalls);
2881 MAYBE_RESET_FIELD(cPokeNotBusy);
2882 MAYBE_RESET_FIELD(cPollCalls);
2883 MAYBE_RESET_FIELD(cPollHalts);
2884 MAYBE_RESET_FIELD(cPollWakeUps);
2885# undef MAYBE_RESET_FIELD
2886 }
2887 else
2888 {
2889 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2890
2891 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
2892 AssertRCReturn(rc, rc);
2893 }
2894
2895 /*
2896 * Enumerate the VMs and add the ones visible to the statistics.
2897 */
2898 if (!ASMMemIsZero(&pStats->SchedSum, sizeof(pStats->SchedSum)))
2899 {
2900 for (unsigned i = pGVMM->iUsedHead;
2901 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2902 i = pGVMM->aHandles[i].iNext)
2903 {
2904 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
2905 void *pvObj = pGVMM->aHandles[i].pvObj;
2906 if ( VALID_PTR(pvObj)
2907 && VALID_PTR(pOtherGVM)
2908 && pOtherGVM->u32Magic == GVM_MAGIC
2909 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2910 {
2911# define MAYBE_RESET_FIELD(field) \
2912 do { if (pStats->SchedSum. field ) { pOtherGVM->gvmm.s.StatsSched. field = 0; } } while (0)
2913 MAYBE_RESET_FIELD(cHaltCalls);
2914 MAYBE_RESET_FIELD(cHaltBlocking);
2915 MAYBE_RESET_FIELD(cHaltTimeouts);
2916 MAYBE_RESET_FIELD(cHaltNotBlocking);
2917 MAYBE_RESET_FIELD(cHaltWakeUps);
2918 MAYBE_RESET_FIELD(cWakeUpCalls);
2919 MAYBE_RESET_FIELD(cWakeUpNotHalted);
2920 MAYBE_RESET_FIELD(cWakeUpWakeUps);
2921 MAYBE_RESET_FIELD(cPokeCalls);
2922 MAYBE_RESET_FIELD(cPokeNotBusy);
2923 MAYBE_RESET_FIELD(cPollCalls);
2924 MAYBE_RESET_FIELD(cPollHalts);
2925 MAYBE_RESET_FIELD(cPollWakeUps);
2926# undef MAYBE_RESET_FIELD
2927 }
2928 }
2929 }
2930
2931 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2932
2933 return VINF_SUCCESS;
2934}
2935
2936
2937/**
2938 * VMMR0 request wrapper for GVMMR0ResetStatistics.
2939 *
2940 * @returns see GVMMR0ResetStatistics.
2941 * @param pGVM The global (ring-0) VM structure. Optional.
2942 * @param pVM The cross context VM structure. Optional.
2943 * @param pReq Pointer to the request packet.
2944 * @param pSession The current session.
2945 */
2946GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PGVM pGVM, PVM pVM, PGVMMRESETSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
2947{
2948 /*
2949 * Validate input and pass it on.
2950 */
2951 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2952 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2953 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
2954
2955 return GVMMR0ResetStatistics(&pReq->Stats, pSession, pGVM, pVM);
2956}
2957
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette