VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp@ 84071

Last change on this file since 84071 was 82989, checked in by vboxsync, 5 years ago

VMM/GMMR0: Added a per-VM chunk TLB to avoid having everyone hammer the global spinlock. bugref:9627

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 106.3 KB
Line 
1/* $Id: GVMMR0.cpp 82989 2020-02-05 11:16:44Z vboxsync $ */
2/** @file
3 * GVMM - Global VM Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2020 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/** @page pg_gvmm GVMM - The Global VM Manager
20 *
21 * The Global VM Manager lives in ring-0. Its main function at the moment is
22 * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
23 * each of them, and assign them unique identifiers (so GMM can track page
24 * owners). The GVMM also manage some of the host CPU resources, like the
25 * periodic preemption timer.
26 *
27 * The GVMM will create a ring-0 object for each VM when it is registered, this
28 * is both for session cleanup purposes and for having a point where it is
29 * possible to implement usage polices later (in SUPR0ObjRegister).
30 *
31 *
32 * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
33 *
34 * On system that sports a high resolution kernel timer API, we use per-cpu
35 * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
36 * execution. The timer frequency is calculating by taking the max
37 * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
38 * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
39 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
40 *
41 * The TMCalcHostTimerFrequency() part of the things gets its takes the max
42 * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
43 * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
44 * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
45 * AMD-V and raw-mode execution environments.
46 */
47
48
49/*********************************************************************************************************************************
50* Header Files *
51*********************************************************************************************************************************/
52#define LOG_GROUP LOG_GROUP_GVMM
53#include <VBox/vmm/gvmm.h>
54#include <VBox/vmm/gmm.h>
55#include "GVMMR0Internal.h"
56#include <VBox/vmm/iom.h>
57#include <VBox/vmm/pdm.h>
58#include <VBox/vmm/pgm.h>
59#include <VBox/vmm/vmm.h>
60#ifdef VBOX_WITH_NEM_R0
61# include <VBox/vmm/nem.h>
62#endif
63#include <VBox/vmm/vmcpuset.h>
64#include <VBox/vmm/vmcc.h>
65#include <VBox/param.h>
66#include <VBox/err.h>
67
68#include <iprt/asm.h>
69#include <iprt/asm-amd64-x86.h>
70#include <iprt/critsect.h>
71#include <iprt/mem.h>
72#include <iprt/semaphore.h>
73#include <iprt/time.h>
74#include <VBox/log.h>
75#include <iprt/thread.h>
76#include <iprt/process.h>
77#include <iprt/param.h>
78#include <iprt/string.h>
79#include <iprt/assert.h>
80#include <iprt/mem.h>
81#include <iprt/memobj.h>
82#include <iprt/mp.h>
83#include <iprt/cpuset.h>
84#include <iprt/spinlock.h>
85#include <iprt/timer.h>
86
87#include "dtrace/VBoxVMM.h"
88
89
90/*********************************************************************************************************************************
91* Defined Constants And Macros *
92*********************************************************************************************************************************/
93#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(DOXYGEN_RUNNING)
94/** Define this to enable the periodic preemption timer. */
95# define GVMM_SCHED_WITH_PPT
96#endif
97
98
99/** @def GVMM_CHECK_SMAP_SETUP
100 * SMAP check setup. */
101/** @def GVMM_CHECK_SMAP_CHECK
102 * Checks that the AC flag is set if SMAP is enabled. If AC is not set,
103 * it will be logged and @a a_BadExpr is executed. */
104/** @def GVMM_CHECK_SMAP_CHECK2
105 * Checks that the AC flag is set if SMAP is enabled. If AC is not set, it will
106 * be logged, written to the VMs assertion text buffer, and @a a_BadExpr is
107 * executed. */
108#if (defined(VBOX_STRICT) || 1) && !defined(VBOX_WITH_RAM_IN_KERNEL)
109# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = SUPR0GetKernelFeatures()
110# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) \
111 do { \
112 if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
113 { \
114 RTCCUINTREG fEflCheck = ASMGetFlags(); \
115 if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
116 { /* likely */ } \
117 else \
118 { \
119 SUPR0Printf("%s, line %d: EFLAGS.AC is clear! (%#x)\n", __FUNCTION__, __LINE__, (uint32_t)fEflCheck); \
120 a_BadExpr; \
121 } \
122 } \
123 } while (0)
124# define GVMM_CHECK_SMAP_CHECK2(a_pGVM, a_BadExpr) \
125 do { \
126 if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
127 { \
128 RTCCUINTREG fEflCheck = ASMGetFlags(); \
129 if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
130 { /* likely */ } \
131 else \
132 { \
133 SUPR0BadContext((a_pGVM) ? (a_pGVM)->pSession : NULL, __FILE__, __LINE__, "EFLAGS.AC is zero!"); \
134 a_BadExpr; \
135 } \
136 } \
137 } while (0)
138#else
139# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = 0
140# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) NOREF(fKernelFeatures)
141# define GVMM_CHECK_SMAP_CHECK2(a_pGVM, a_BadExpr) NOREF(fKernelFeatures)
142#endif
143
144
145
146/*********************************************************************************************************************************
147* Structures and Typedefs *
148*********************************************************************************************************************************/
149
150/**
151 * Global VM handle.
152 */
153typedef struct GVMHANDLE
154{
155 /** The index of the next handle in the list (free or used). (0 is nil.) */
156 uint16_t volatile iNext;
157 /** Our own index / handle value. */
158 uint16_t iSelf;
159 /** The process ID of the handle owner.
160 * This is used for access checks. */
161 RTPROCESS ProcId;
162 /** The pointer to the ring-0 only (aka global) VM structure. */
163 PGVM pGVM;
164 /** The virtual machine object. */
165 void *pvObj;
166 /** The session this VM is associated with. */
167 PSUPDRVSESSION pSession;
168 /** The ring-0 handle of the EMT0 thread.
169 * This is used for ownership checks as well as looking up a VM handle by thread
170 * at times like assertions. */
171 RTNATIVETHREAD hEMT0;
172} GVMHANDLE;
173/** Pointer to a global VM handle. */
174typedef GVMHANDLE *PGVMHANDLE;
175
176/** Number of GVM handles (including the NIL handle). */
177#if HC_ARCH_BITS == 64
178# define GVMM_MAX_HANDLES 8192
179#else
180# define GVMM_MAX_HANDLES 128
181#endif
182
183/**
184 * Per host CPU GVMM data.
185 */
186typedef struct GVMMHOSTCPU
187{
188 /** Magic number (GVMMHOSTCPU_MAGIC). */
189 uint32_t volatile u32Magic;
190 /** The CPU ID. */
191 RTCPUID idCpu;
192 /** The CPU set index. */
193 uint32_t idxCpuSet;
194
195#ifdef GVMM_SCHED_WITH_PPT
196 /** Periodic preemption timer data. */
197 struct
198 {
199 /** The handle to the periodic preemption timer. */
200 PRTTIMER pTimer;
201 /** Spinlock protecting the data below. */
202 RTSPINLOCK hSpinlock;
203 /** The smalles Hz that we need to care about. (static) */
204 uint32_t uMinHz;
205 /** The number of ticks between each historization. */
206 uint32_t cTicksHistoriziationInterval;
207 /** The current historization tick (counting up to
208 * cTicksHistoriziationInterval and then resetting). */
209 uint32_t iTickHistorization;
210 /** The current timer interval. This is set to 0 when inactive. */
211 uint32_t cNsInterval;
212 /** The current timer frequency. This is set to 0 when inactive. */
213 uint32_t uTimerHz;
214 /** The current max frequency reported by the EMTs.
215 * This gets historicize and reset by the timer callback. This is
216 * read without holding the spinlock, so needs atomic updating. */
217 uint32_t volatile uDesiredHz;
218 /** Whether the timer was started or not. */
219 bool volatile fStarted;
220 /** Set if we're starting timer. */
221 bool volatile fStarting;
222 /** The index of the next history entry (mod it). */
223 uint32_t iHzHistory;
224 /** Historicized uDesiredHz values. The array wraps around, new entries
225 * are added at iHzHistory. This is updated approximately every
226 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
227 uint32_t aHzHistory[8];
228 /** Statistics counter for recording the number of interval changes. */
229 uint32_t cChanges;
230 /** Statistics counter for recording the number of timer starts. */
231 uint32_t cStarts;
232 } Ppt;
233#endif /* GVMM_SCHED_WITH_PPT */
234
235} GVMMHOSTCPU;
236/** Pointer to the per host CPU GVMM data. */
237typedef GVMMHOSTCPU *PGVMMHOSTCPU;
238/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
239#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
240/** The interval on history entry should cover (approximately) give in
241 * nanoseconds. */
242#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
243
244
245/**
246 * The GVMM instance data.
247 */
248typedef struct GVMM
249{
250 /** Eyecatcher / magic. */
251 uint32_t u32Magic;
252 /** The index of the head of the free handle chain. (0 is nil.) */
253 uint16_t volatile iFreeHead;
254 /** The index of the head of the active handle chain. (0 is nil.) */
255 uint16_t volatile iUsedHead;
256 /** The number of VMs. */
257 uint16_t volatile cVMs;
258 /** Alignment padding. */
259 uint16_t u16Reserved;
260 /** The number of EMTs. */
261 uint32_t volatile cEMTs;
262 /** The number of EMTs that have halted in GVMMR0SchedHalt. */
263 uint32_t volatile cHaltedEMTs;
264 /** Mini lock for restricting early wake-ups to one thread. */
265 bool volatile fDoingEarlyWakeUps;
266 bool afPadding[3]; /**< explicit alignment padding. */
267 /** When the next halted or sleeping EMT will wake up.
268 * This is set to 0 when it needs recalculating and to UINT64_MAX when
269 * there are no halted or sleeping EMTs in the GVMM. */
270 uint64_t uNsNextEmtWakeup;
271 /** The lock used to serialize VM creation, destruction and associated events that
272 * isn't performance critical. Owners may acquire the list lock. */
273 RTCRITSECT CreateDestroyLock;
274 /** The lock used to serialize used list updates and accesses.
275 * This indirectly includes scheduling since the scheduler will have to walk the
276 * used list to examin running VMs. Owners may not acquire any other locks. */
277 RTCRITSECTRW UsedLock;
278 /** The handle array.
279 * The size of this array defines the maximum number of currently running VMs.
280 * The first entry is unused as it represents the NIL handle. */
281 GVMHANDLE aHandles[GVMM_MAX_HANDLES];
282
283 /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
284 * The number of EMTs that means we no longer consider ourselves alone on a
285 * CPU/Core.
286 */
287 uint32_t cEMTsMeansCompany;
288 /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
289 * The minimum sleep time for when we're alone, in nano seconds.
290 */
291 uint32_t nsMinSleepAlone;
292 /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
293 * The minimum sleep time for when we've got company, in nano seconds.
294 */
295 uint32_t nsMinSleepCompany;
296 /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
297 * The limit for the first round of early wake-ups, given in nano seconds.
298 */
299 uint32_t nsEarlyWakeUp1;
300 /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
301 * The limit for the second round of early wake-ups, given in nano seconds.
302 */
303 uint32_t nsEarlyWakeUp2;
304
305 /** Set if we're doing early wake-ups.
306 * This reflects nsEarlyWakeUp1 and nsEarlyWakeUp2. */
307 bool volatile fDoEarlyWakeUps;
308
309 /** The number of entries in the host CPU array (aHostCpus). */
310 uint32_t cHostCpus;
311 /** Per host CPU data (variable length). */
312 GVMMHOSTCPU aHostCpus[1];
313} GVMM;
314AssertCompileMemberAlignment(GVMM, CreateDestroyLock, 8);
315AssertCompileMemberAlignment(GVMM, UsedLock, 8);
316AssertCompileMemberAlignment(GVMM, uNsNextEmtWakeup, 8);
317/** Pointer to the GVMM instance data. */
318typedef GVMM *PGVMM;
319
320/** The GVMM::u32Magic value (Charlie Haden). */
321#define GVMM_MAGIC UINT32_C(0x19370806)
322
323
324
325/*********************************************************************************************************************************
326* Global Variables *
327*********************************************************************************************************************************/
328/** Pointer to the GVMM instance data.
329 * (Just my general dislike for global variables.) */
330static PGVMM g_pGVMM = NULL;
331
332/** Macro for obtaining and validating the g_pGVMM pointer.
333 * On failure it will return from the invoking function with the specified return value.
334 *
335 * @param pGVMM The name of the pGVMM variable.
336 * @param rc The return value on failure. Use VERR_GVMM_INSTANCE for VBox
337 * status codes.
338 */
339#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
340 do { \
341 (pGVMM) = g_pGVMM;\
342 AssertPtrReturn((pGVMM), (rc)); \
343 AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
344 } while (0)
345
346/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
347 * On failure it will return from the invoking function.
348 *
349 * @param pGVMM The name of the pGVMM variable.
350 */
351#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
352 do { \
353 (pGVMM) = g_pGVMM;\
354 AssertPtrReturnVoid((pGVMM)); \
355 AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
356 } while (0)
357
358
359/*********************************************************************************************************************************
360* Internal Functions *
361*********************************************************************************************************************************/
362static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession);
363static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
364static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock);
365static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM);
366
367#ifdef GVMM_SCHED_WITH_PPT
368static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
369#endif
370
371
372/**
373 * Initializes the GVMM.
374 *
375 * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
376 *
377 * @returns VBox status code.
378 */
379GVMMR0DECL(int) GVMMR0Init(void)
380{
381 LogFlow(("GVMMR0Init:\n"));
382
383 /*
384 * Allocate and initialize the instance data.
385 */
386 uint32_t cHostCpus = RTMpGetArraySize();
387 AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE);
388
389 PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVMM, aHostCpus[cHostCpus]));
390 if (!pGVMM)
391 return VERR_NO_MEMORY;
392 int rc = RTCritSectInitEx(&pGVMM->CreateDestroyLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE,
393 "GVMM-CreateDestroyLock");
394 if (RT_SUCCESS(rc))
395 {
396 rc = RTCritSectRwInitEx(&pGVMM->UsedLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, "GVMM-UsedLock");
397 if (RT_SUCCESS(rc))
398 {
399 pGVMM->u32Magic = GVMM_MAGIC;
400 pGVMM->iUsedHead = 0;
401 pGVMM->iFreeHead = 1;
402
403 /* the nil handle */
404 pGVMM->aHandles[0].iSelf = 0;
405 pGVMM->aHandles[0].iNext = 0;
406
407 /* the tail */
408 unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
409 pGVMM->aHandles[i].iSelf = i;
410 pGVMM->aHandles[i].iNext = 0; /* nil */
411
412 /* the rest */
413 while (i-- > 1)
414 {
415 pGVMM->aHandles[i].iSelf = i;
416 pGVMM->aHandles[i].iNext = i + 1;
417 }
418
419 /* The default configuration values. */
420 uint32_t cNsResolution = RTSemEventMultiGetResolution();
421 pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
422 if (cNsResolution >= 5*RT_NS_100US)
423 {
424 pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
425 pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
426 pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
427 pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
428 }
429 else if (cNsResolution > RT_NS_100US)
430 {
431 pGVMM->nsMinSleepAlone = cNsResolution / 2;
432 pGVMM->nsMinSleepCompany = cNsResolution / 4;
433 pGVMM->nsEarlyWakeUp1 = 0;
434 pGVMM->nsEarlyWakeUp2 = 0;
435 }
436 else
437 {
438 pGVMM->nsMinSleepAlone = 2000;
439 pGVMM->nsMinSleepCompany = 2000;
440 pGVMM->nsEarlyWakeUp1 = 0;
441 pGVMM->nsEarlyWakeUp2 = 0;
442 }
443 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
444
445 /* The host CPU data. */
446 pGVMM->cHostCpus = cHostCpus;
447 uint32_t iCpu = cHostCpus;
448 RTCPUSET PossibleSet;
449 RTMpGetSet(&PossibleSet);
450 while (iCpu-- > 0)
451 {
452 pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
453#ifdef GVMM_SCHED_WITH_PPT
454 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
455 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
456 pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
457 pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
458 //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
459 //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
460 //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
461 //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
462 //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
463 //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
464 //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
465 //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
466#endif
467
468 if (RTCpuSetIsMember(&PossibleSet, iCpu))
469 {
470 pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
471 pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
472
473#ifdef GVMM_SCHED_WITH_PPT
474 rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
475 50*1000*1000 /* whatever */,
476 RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
477 gvmmR0SchedPeriodicPreemptionTimerCallback,
478 &pGVMM->aHostCpus[iCpu]);
479 if (RT_SUCCESS(rc))
480 rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "GVMM/CPU");
481 if (RT_FAILURE(rc))
482 {
483 while (iCpu < cHostCpus)
484 {
485 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
486 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
487 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
488 iCpu++;
489 }
490 break;
491 }
492#endif
493 }
494 else
495 {
496 pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
497 pGVMM->aHostCpus[iCpu].u32Magic = 0;
498 }
499 }
500 if (RT_SUCCESS(rc))
501 {
502 g_pGVMM = pGVMM;
503 LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
504 return VINF_SUCCESS;
505 }
506
507 /* bail out. */
508 RTCritSectRwDelete(&pGVMM->UsedLock);
509 }
510 RTCritSectDelete(&pGVMM->CreateDestroyLock);
511 }
512
513 RTMemFree(pGVMM);
514 return rc;
515}
516
517
518/**
519 * Terminates the GVM.
520 *
521 * This is called while owning the loader semaphore (see supdrvLdrFree()).
522 * And unless something is wrong, there should be absolutely no VMs
523 * registered at this point.
524 */
525GVMMR0DECL(void) GVMMR0Term(void)
526{
527 LogFlow(("GVMMR0Term:\n"));
528
529 PGVMM pGVMM = g_pGVMM;
530 g_pGVMM = NULL;
531 if (RT_UNLIKELY(!RT_VALID_PTR(pGVMM)))
532 {
533 SUPR0Printf("GVMMR0Term: pGVMM=%RKv\n", pGVMM);
534 return;
535 }
536
537 /*
538 * First of all, stop all active timers.
539 */
540 uint32_t cActiveTimers = 0;
541 uint32_t iCpu = pGVMM->cHostCpus;
542 while (iCpu-- > 0)
543 {
544 ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
545#ifdef GVMM_SCHED_WITH_PPT
546 if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
547 && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
548 cActiveTimers++;
549#endif
550 }
551 if (cActiveTimers)
552 RTThreadSleep(1); /* fudge */
553
554 /*
555 * Invalidate the and free resources.
556 */
557 pGVMM->u32Magic = ~GVMM_MAGIC;
558 RTCritSectRwDelete(&pGVMM->UsedLock);
559 RTCritSectDelete(&pGVMM->CreateDestroyLock);
560
561 pGVMM->iFreeHead = 0;
562 if (pGVMM->iUsedHead)
563 {
564 SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
565 pGVMM->iUsedHead = 0;
566 }
567
568#ifdef GVMM_SCHED_WITH_PPT
569 iCpu = pGVMM->cHostCpus;
570 while (iCpu-- > 0)
571 {
572 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
573 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
574 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
575 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
576 }
577#endif
578
579 RTMemFree(pGVMM);
580}
581
582
583/**
584 * A quick hack for setting global config values.
585 *
586 * @returns VBox status code.
587 *
588 * @param pSession The session handle. Used for authentication.
589 * @param pszName The variable name.
590 * @param u64Value The new value.
591 */
592GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
593{
594 /*
595 * Validate input.
596 */
597 PGVMM pGVMM;
598 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
599 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
600 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
601
602 /*
603 * String switch time!
604 */
605 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
606 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
607 int rc = VINF_SUCCESS;
608 pszName += sizeof("/GVMM/") - 1;
609 if (!strcmp(pszName, "cEMTsMeansCompany"))
610 {
611 if (u64Value <= UINT32_MAX)
612 pGVMM->cEMTsMeansCompany = u64Value;
613 else
614 rc = VERR_OUT_OF_RANGE;
615 }
616 else if (!strcmp(pszName, "MinSleepAlone"))
617 {
618 if (u64Value <= RT_NS_100MS)
619 pGVMM->nsMinSleepAlone = u64Value;
620 else
621 rc = VERR_OUT_OF_RANGE;
622 }
623 else if (!strcmp(pszName, "MinSleepCompany"))
624 {
625 if (u64Value <= RT_NS_100MS)
626 pGVMM->nsMinSleepCompany = u64Value;
627 else
628 rc = VERR_OUT_OF_RANGE;
629 }
630 else if (!strcmp(pszName, "EarlyWakeUp1"))
631 {
632 if (u64Value <= RT_NS_100MS)
633 {
634 pGVMM->nsEarlyWakeUp1 = u64Value;
635 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
636 }
637 else
638 rc = VERR_OUT_OF_RANGE;
639 }
640 else if (!strcmp(pszName, "EarlyWakeUp2"))
641 {
642 if (u64Value <= RT_NS_100MS)
643 {
644 pGVMM->nsEarlyWakeUp2 = u64Value;
645 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
646 }
647 else
648 rc = VERR_OUT_OF_RANGE;
649 }
650 else
651 rc = VERR_CFGM_VALUE_NOT_FOUND;
652 return rc;
653}
654
655
656/**
657 * A quick hack for getting global config values.
658 *
659 * @returns VBox status code.
660 *
661 * @param pSession The session handle. Used for authentication.
662 * @param pszName The variable name.
663 * @param pu64Value Where to return the value.
664 */
665GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
666{
667 /*
668 * Validate input.
669 */
670 PGVMM pGVMM;
671 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
672 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
673 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
674 AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
675
676 /*
677 * String switch time!
678 */
679 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
680 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
681 int rc = VINF_SUCCESS;
682 pszName += sizeof("/GVMM/") - 1;
683 if (!strcmp(pszName, "cEMTsMeansCompany"))
684 *pu64Value = pGVMM->cEMTsMeansCompany;
685 else if (!strcmp(pszName, "MinSleepAlone"))
686 *pu64Value = pGVMM->nsMinSleepAlone;
687 else if (!strcmp(pszName, "MinSleepCompany"))
688 *pu64Value = pGVMM->nsMinSleepCompany;
689 else if (!strcmp(pszName, "EarlyWakeUp1"))
690 *pu64Value = pGVMM->nsEarlyWakeUp1;
691 else if (!strcmp(pszName, "EarlyWakeUp2"))
692 *pu64Value = pGVMM->nsEarlyWakeUp2;
693 else
694 rc = VERR_CFGM_VALUE_NOT_FOUND;
695 return rc;
696}
697
698
699/**
700 * Acquire the 'used' lock in shared mode.
701 *
702 * This prevents destruction of the VM while we're in ring-0.
703 *
704 * @returns IPRT status code, see RTSemFastMutexRequest.
705 * @param a_pGVMM The GVMM instance data.
706 * @sa GVMMR0_USED_SHARED_UNLOCK, GVMMR0_USED_EXCLUSIVE_LOCK
707 */
708#define GVMMR0_USED_SHARED_LOCK(a_pGVMM) RTCritSectRwEnterShared(&(a_pGVMM)->UsedLock)
709
710/**
711 * Release the 'used' lock in when owning it in shared mode.
712 *
713 * @returns IPRT status code, see RTSemFastMutexRequest.
714 * @param a_pGVMM The GVMM instance data.
715 * @sa GVMMR0_USED_SHARED_LOCK
716 */
717#define GVMMR0_USED_SHARED_UNLOCK(a_pGVMM) RTCritSectRwLeaveShared(&(a_pGVMM)->UsedLock)
718
719/**
720 * Acquire the 'used' lock in exclusive mode.
721 *
722 * Only use this function when making changes to the used list.
723 *
724 * @returns IPRT status code, see RTSemFastMutexRequest.
725 * @param a_pGVMM The GVMM instance data.
726 * @sa GVMMR0_USED_EXCLUSIVE_UNLOCK
727 */
728#define GVMMR0_USED_EXCLUSIVE_LOCK(a_pGVMM) RTCritSectRwEnterExcl(&(a_pGVMM)->UsedLock)
729
730/**
731 * Release the 'used' lock when owning it in exclusive mode.
732 *
733 * @returns IPRT status code, see RTSemFastMutexRelease.
734 * @param a_pGVMM The GVMM instance data.
735 * @sa GVMMR0_USED_EXCLUSIVE_LOCK, GVMMR0_USED_SHARED_UNLOCK
736 */
737#define GVMMR0_USED_EXCLUSIVE_UNLOCK(a_pGVMM) RTCritSectRwLeaveExcl(&(a_pGVMM)->UsedLock)
738
739
740/**
741 * Try acquire the 'create & destroy' lock.
742 *
743 * @returns IPRT status code, see RTSemFastMutexRequest.
744 * @param pGVMM The GVMM instance data.
745 */
746DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
747{
748 LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
749 int rc = RTCritSectEnter(&pGVMM->CreateDestroyLock);
750 LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
751 return rc;
752}
753
754
755/**
756 * Release the 'create & destroy' lock.
757 *
758 * @returns IPRT status code, see RTSemFastMutexRequest.
759 * @param pGVMM The GVMM instance data.
760 */
761DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
762{
763 LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
764 int rc = RTCritSectLeave(&pGVMM->CreateDestroyLock);
765 AssertRC(rc);
766 return rc;
767}
768
769
770/**
771 * Request wrapper for the GVMMR0CreateVM API.
772 *
773 * @returns VBox status code.
774 * @param pReq The request buffer.
775 * @param pSession The session handle. The VM will be associated with this.
776 */
777GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq, PSUPDRVSESSION pSession)
778{
779 /*
780 * Validate the request.
781 */
782 if (!RT_VALID_PTR(pReq))
783 return VERR_INVALID_POINTER;
784 if (pReq->Hdr.cbReq != sizeof(*pReq))
785 return VERR_INVALID_PARAMETER;
786 if (pReq->pSession != pSession)
787 return VERR_INVALID_POINTER;
788
789 /*
790 * Execute it.
791 */
792 PGVM pGVM;
793 pReq->pVMR0 = NULL;
794 pReq->pVMR3 = NIL_RTR3PTR;
795 int rc = GVMMR0CreateVM(pSession, pReq->cCpus, &pGVM);
796 if (RT_SUCCESS(rc))
797 {
798 pReq->pVMR0 = pGVM; /** @todo don't expose this to ring-3, use a unique random number instead. */
799 pReq->pVMR3 = pGVM->pVMR3;
800 }
801 return rc;
802}
803
804
805/**
806 * Allocates the VM structure and registers it with GVM.
807 *
808 * The caller will become the VM owner and there by the EMT.
809 *
810 * @returns VBox status code.
811 * @param pSession The support driver session.
812 * @param cCpus Number of virtual CPUs for the new VM.
813 * @param ppGVM Where to store the pointer to the VM structure.
814 *
815 * @thread EMT.
816 */
817GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PGVM *ppGVM)
818{
819 LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
820 PGVMM pGVMM;
821 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
822
823 AssertPtrReturn(ppGVM, VERR_INVALID_POINTER);
824 *ppGVM = NULL;
825
826 if ( cCpus == 0
827 || cCpus > VMM_MAX_CPU_COUNT)
828 return VERR_INVALID_PARAMETER;
829
830 RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
831 AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT);
832 RTPROCESS ProcId = RTProcSelf();
833 AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT);
834
835 /*
836 * The whole allocation process is protected by the lock.
837 */
838 int rc = gvmmR0CreateDestroyLock(pGVMM);
839 AssertRCReturn(rc, rc);
840
841 /*
842 * Only one VM per session.
843 */
844 if (SUPR0GetSessionVM(pSession) != NULL)
845 {
846 gvmmR0CreateDestroyUnlock(pGVMM);
847 SUPR0Printf("GVMMR0CreateVM: The session %p already got a VM: %p\n", pSession, SUPR0GetSessionVM(pSession));
848 return VERR_ALREADY_EXISTS;
849 }
850
851 /*
852 * Allocate a handle first so we don't waste resources unnecessarily.
853 */
854 uint16_t iHandle = pGVMM->iFreeHead;
855 if (iHandle)
856 {
857 PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
858
859 /* consistency checks, a bit paranoid as always. */
860 if ( !pHandle->pGVM
861 && !pHandle->pvObj
862 && pHandle->iSelf == iHandle)
863 {
864 pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
865 if (pHandle->pvObj)
866 {
867 /*
868 * Move the handle from the free to used list and perform permission checks.
869 */
870 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
871 AssertRC(rc);
872
873 pGVMM->iFreeHead = pHandle->iNext;
874 pHandle->iNext = pGVMM->iUsedHead;
875 pGVMM->iUsedHead = iHandle;
876 pGVMM->cVMs++;
877
878 pHandle->pGVM = NULL;
879 pHandle->pSession = pSession;
880 pHandle->hEMT0 = NIL_RTNATIVETHREAD;
881 pHandle->ProcId = NIL_RTPROCESS;
882
883 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
884
885 rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
886 if (RT_SUCCESS(rc))
887 {
888 /*
889 * Allocate memory for the VM structure (combined VM + GVM).
890 */
891 const uint32_t cbVM = RT_UOFFSETOF_DYN(GVM, aCpus[cCpus]);
892 const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
893 RTR0MEMOBJ hVMMemObj = NIL_RTR0MEMOBJ;
894 rc = RTR0MemObjAllocPage(&hVMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
895 if (RT_SUCCESS(rc))
896 {
897 PGVM pGVM = (PGVM)RTR0MemObjAddress(hVMMemObj);
898 AssertPtr(pGVM);
899
900 /*
901 * Initialise the structure.
902 */
903 RT_BZERO(pGVM, cPages << PAGE_SHIFT);
904 gvmmR0InitPerVMData(pGVM, iHandle, cCpus, pSession);
905 pGVM->gvmm.s.VMMemObj = hVMMemObj;
906 rc = GMMR0InitPerVMData(pGVM);
907 int rc2 = PGMR0InitPerVMData(pGVM);
908 PDMR0InitPerVMData(pGVM);
909 IOMR0InitPerVMData(pGVM);
910 if (RT_SUCCESS(rc) && RT_SUCCESS(rc2))
911 {
912 /*
913 * Allocate page array.
914 * This currently have to be made available to ring-3, but this is should change eventually.
915 */
916 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
917 if (RT_SUCCESS(rc))
918 {
919 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
920 for (uint32_t iPage = 0; iPage < cPages; iPage++)
921 {
922 paPages[iPage].uReserved = 0;
923 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
924 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
925 }
926
927 /*
928 * Map the page array, VM and VMCPU structures into ring-3.
929 */
930 AssertCompileSizeAlignment(VM, PAGE_SIZE);
931 rc = RTR0MemObjMapUserEx(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
932 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
933 0 /*offSub*/, sizeof(VM));
934 for (VMCPUID i = 0; i < cCpus && RT_SUCCESS(rc); i++)
935 {
936 AssertCompileSizeAlignment(VMCPU, PAGE_SIZE);
937 rc = RTR0MemObjMapUserEx(&pGVM->aCpus[i].gvmm.s.VMCpuMapObj, pGVM->gvmm.s.VMMemObj,
938 (RTR3PTR)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
939 RT_UOFFSETOF_DYN(GVM, aCpus[i]), sizeof(VMCPU));
940 }
941 if (RT_SUCCESS(rc))
942 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
943 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
944 NIL_RTR0PROCESS);
945 if (RT_SUCCESS(rc))
946 {
947 /*
948 * Initialize all the VM pointers.
949 */
950 PVMR3 pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
951 AssertPtr((void *)pVMR3);
952
953 for (VMCPUID i = 0; i < cCpus; i++)
954 {
955 pGVM->aCpus[i].pVMR0 = pGVM;
956 pGVM->aCpus[i].pVMR3 = pVMR3;
957 pGVM->apCpusR3[i] = RTR0MemObjAddressR3(pGVM->aCpus[i].gvmm.s.VMCpuMapObj);
958 pGVM->aCpus[i].pVCpuR3 = pGVM->apCpusR3[i];
959 pGVM->apCpusR0[i] = &pGVM->aCpus[i];
960 AssertPtr((void *)pGVM->apCpusR3[i]);
961 }
962
963 pGVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
964 AssertPtr((void *)pGVM->paVMPagesR3);
965
966 /*
967 * Complete the handle - take the UsedLock sem just to be careful.
968 */
969 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
970 AssertRC(rc);
971
972 pHandle->pGVM = pGVM;
973 pHandle->hEMT0 = hEMT0;
974 pHandle->ProcId = ProcId;
975 pGVM->pVMR3 = pVMR3;
976 pGVM->pVMR3Unsafe = pVMR3;
977 pGVM->aCpus[0].hEMT = hEMT0;
978 pGVM->aCpus[0].hNativeThreadR0 = hEMT0;
979 pGVMM->cEMTs += cCpus;
980
981 /* Associate it with the session and create the context hook for EMT0. */
982 rc = SUPR0SetSessionVM(pSession, pGVM, pGVM);
983 if (RT_SUCCESS(rc))
984 {
985 rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[0]);
986 if (RT_SUCCESS(rc))
987 {
988 /*
989 * Done!
990 */
991 VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pGVM, ProcId, (void *)hEMT0, cCpus);
992
993 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
994 gvmmR0CreateDestroyUnlock(pGVMM);
995
996 CPUMR0RegisterVCpuThread(&pGVM->aCpus[0]);
997
998 *ppGVM = pGVM;
999 Log(("GVMMR0CreateVM: pVMR3=%p pGVM=%p hGVM=%d\n", pVMR3, pGVM, iHandle));
1000 return VINF_SUCCESS;
1001 }
1002
1003 SUPR0SetSessionVM(pSession, NULL, NULL);
1004 }
1005 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1006 }
1007
1008 /* Cleanup mappings. */
1009 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1010 {
1011 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
1012 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1013 }
1014 for (VMCPUID i = 0; i < cCpus; i++)
1015 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1016 {
1017 RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */);
1018 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1019 }
1020 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1021 {
1022 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */);
1023 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1024 }
1025 }
1026 }
1027 else if (RT_SUCCESS(rc))
1028 rc = rc2;
1029 }
1030 }
1031 /* else: The user wasn't permitted to create this VM. */
1032
1033 /*
1034 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
1035 * object reference here. A little extra mess because of non-recursive lock.
1036 */
1037 void *pvObj = pHandle->pvObj;
1038 pHandle->pvObj = NULL;
1039 gvmmR0CreateDestroyUnlock(pGVMM);
1040
1041 SUPR0ObjRelease(pvObj, pSession);
1042
1043 SUPR0Printf("GVMMR0CreateVM: failed, rc=%Rrc\n", rc);
1044 return rc;
1045 }
1046
1047 rc = VERR_NO_MEMORY;
1048 }
1049 else
1050 rc = VERR_GVMM_IPE_1;
1051 }
1052 else
1053 rc = VERR_GVM_TOO_MANY_VMS;
1054
1055 gvmmR0CreateDestroyUnlock(pGVMM);
1056 return rc;
1057}
1058
1059
1060/**
1061 * Initializes the per VM data belonging to GVMM.
1062 *
1063 * @param pGVM Pointer to the global VM structure.
1064 * @param hSelf The handle.
1065 * @param cCpus The CPU count.
1066 * @param pSession The session this VM is associated with.
1067 */
1068static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession)
1069{
1070 AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
1071 AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
1072 AssertCompileMemberAlignment(VM, cpum, 64);
1073 AssertCompileMemberAlignment(VM, tm, 64);
1074
1075 /* GVM: */
1076 pGVM->u32Magic = GVM_MAGIC;
1077 pGVM->hSelf = hSelf;
1078 pGVM->cCpus = cCpus;
1079 pGVM->pSession = pSession;
1080 pGVM->pSelf = pGVM;
1081
1082 /* VM: */
1083 pGVM->enmVMState = VMSTATE_CREATING;
1084 pGVM->hSelfUnsafe = hSelf;
1085 pGVM->pSessionUnsafe = pSession;
1086 pGVM->pVMR0ForCall = pGVM;
1087 pGVM->cCpusUnsafe = cCpus;
1088 pGVM->uCpuExecutionCap = 100; /* default is no cap. */
1089 pGVM->uStructVersion = 1;
1090 pGVM->cbSelf = sizeof(VM);
1091 pGVM->cbVCpu = sizeof(VMCPU);
1092
1093 /* GVMM: */
1094 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1095 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1096 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1097 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1098 pGVM->gvmm.s.fDoneVMMR0Init = false;
1099 pGVM->gvmm.s.fDoneVMMR0Term = false;
1100
1101 /*
1102 * Per virtual CPU.
1103 */
1104 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1105 {
1106 pGVM->aCpus[i].idCpu = i;
1107 pGVM->aCpus[i].idCpuUnsafe = i;
1108 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1109 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1110 pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
1111 pGVM->aCpus[i].pGVM = pGVM;
1112 pGVM->aCpus[i].idHostCpu = NIL_RTCPUID;
1113 pGVM->aCpus[i].iHostCpuSet = UINT32_MAX;
1114 pGVM->aCpus[i].hNativeThread = NIL_RTNATIVETHREAD;
1115 pGVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1116 pGVM->aCpus[i].enmState = VMCPUSTATE_STOPPED;
1117 pGVM->aCpus[i].pVCpuR0ForVtg = &pGVM->aCpus[i];
1118 }
1119}
1120
1121
1122/**
1123 * Does the VM initialization.
1124 *
1125 * @returns VBox status code.
1126 * @param pGVM The global (ring-0) VM structure.
1127 */
1128GVMMR0DECL(int) GVMMR0InitVM(PGVM pGVM)
1129{
1130 LogFlow(("GVMMR0InitVM: pGVM=%p\n", pGVM));
1131
1132 int rc = VERR_INTERNAL_ERROR_3;
1133 if ( !pGVM->gvmm.s.fDoneVMMR0Init
1134 && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
1135 {
1136 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1137 {
1138 rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
1139 if (RT_FAILURE(rc))
1140 {
1141 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1142 break;
1143 }
1144 }
1145 }
1146 else
1147 rc = VERR_WRONG_ORDER;
1148
1149 LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
1150 return rc;
1151}
1152
1153
1154/**
1155 * Indicates that we're done with the ring-0 initialization
1156 * of the VM.
1157 *
1158 * @param pGVM The global (ring-0) VM structure.
1159 * @thread EMT(0)
1160 */
1161GVMMR0DECL(void) GVMMR0DoneInitVM(PGVM pGVM)
1162{
1163 /* Set the indicator. */
1164 pGVM->gvmm.s.fDoneVMMR0Init = true;
1165}
1166
1167
1168/**
1169 * Indicates that we're doing the ring-0 termination of the VM.
1170 *
1171 * @returns true if termination hasn't been done already, false if it has.
1172 * @param pGVM Pointer to the global VM structure. Optional.
1173 * @thread EMT(0) or session cleanup thread.
1174 */
1175GVMMR0DECL(bool) GVMMR0DoingTermVM(PGVM pGVM)
1176{
1177 /* Validate the VM structure, state and handle. */
1178 AssertPtrReturn(pGVM, false);
1179
1180 /* Set the indicator. */
1181 if (pGVM->gvmm.s.fDoneVMMR0Term)
1182 return false;
1183 pGVM->gvmm.s.fDoneVMMR0Term = true;
1184 return true;
1185}
1186
1187
1188/**
1189 * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1190 *
1191 * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1192 * and the caller is not the EMT thread, unfortunately. For security reasons, it
1193 * would've been nice if the caller was actually the EMT thread or that we somehow
1194 * could've associated the calling thread with the VM up front.
1195 *
1196 * @returns VBox status code.
1197 * @param pGVM The global (ring-0) VM structure.
1198 *
1199 * @thread EMT(0) if it's associated with the VM, otherwise any thread.
1200 */
1201GVMMR0DECL(int) GVMMR0DestroyVM(PGVM pGVM)
1202{
1203 LogFlow(("GVMMR0DestroyVM: pGVM=%p\n", pGVM));
1204 PGVMM pGVMM;
1205 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1206
1207 /*
1208 * Validate the VM structure, state and caller.
1209 */
1210 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1211 AssertReturn(!((uintptr_t)pGVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1212 AssertMsgReturn(pGVM->enmVMState >= VMSTATE_CREATING && pGVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pGVM->enmVMState),
1213 VERR_WRONG_ORDER);
1214
1215 uint32_t hGVM = pGVM->hSelf;
1216 ASMCompilerBarrier();
1217 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_VM_HANDLE);
1218 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1219
1220 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1221 AssertReturn(pHandle->pGVM == pGVM, VERR_NOT_OWNER);
1222
1223 RTPROCESS ProcId = RTProcSelf();
1224 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1225 AssertReturn( ( pHandle->hEMT0 == hSelf
1226 && pHandle->ProcId == ProcId)
1227 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1228
1229 /*
1230 * Lookup the handle and destroy the object.
1231 * Since the lock isn't recursive and we'll have to leave it before dereferencing the
1232 * object, we take some precautions against racing callers just in case...
1233 */
1234 int rc = gvmmR0CreateDestroyLock(pGVMM);
1235 AssertRC(rc);
1236
1237 /* Be careful here because we might theoretically be racing someone else cleaning up. */
1238 if ( pHandle->pGVM == pGVM
1239 && ( ( pHandle->hEMT0 == hSelf
1240 && pHandle->ProcId == ProcId)
1241 || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1242 && RT_VALID_PTR(pHandle->pvObj)
1243 && RT_VALID_PTR(pHandle->pSession)
1244 && RT_VALID_PTR(pHandle->pGVM)
1245 && pHandle->pGVM->u32Magic == GVM_MAGIC)
1246 {
1247 /* Check that other EMTs have deregistered. */
1248 uint32_t cNotDeregistered = 0;
1249 for (VMCPUID idCpu = 1; idCpu < pGVM->cCpus; idCpu++)
1250 cNotDeregistered += pGVM->aCpus[idCpu].hEMT != ~(RTNATIVETHREAD)1; /* see GVMMR0DeregisterVCpu for the value */
1251 if (cNotDeregistered == 0)
1252 {
1253 /* Grab the object pointer. */
1254 void *pvObj = pHandle->pvObj;
1255 pHandle->pvObj = NULL;
1256 gvmmR0CreateDestroyUnlock(pGVMM);
1257
1258 SUPR0ObjRelease(pvObj, pHandle->pSession);
1259 }
1260 else
1261 {
1262 gvmmR0CreateDestroyUnlock(pGVMM);
1263 rc = VERR_GVMM_NOT_ALL_EMTS_DEREGISTERED;
1264 }
1265 }
1266 else
1267 {
1268 SUPR0Printf("GVMMR0DestroyVM: pHandle=%RKv:{.pGVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pGVM=%p hSelf=%p\n",
1269 pHandle, pHandle->pGVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pGVM, hSelf);
1270 gvmmR0CreateDestroyUnlock(pGVMM);
1271 rc = VERR_GVMM_IPE_2;
1272 }
1273
1274 return rc;
1275}
1276
1277
1278/**
1279 * Performs VM cleanup task as part of object destruction.
1280 *
1281 * @param pGVM The GVM pointer.
1282 */
1283static void gvmmR0CleanupVM(PGVM pGVM)
1284{
1285 if ( pGVM->gvmm.s.fDoneVMMR0Init
1286 && !pGVM->gvmm.s.fDoneVMMR0Term)
1287 {
1288 if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1289 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM)
1290 {
1291 LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1292 VMMR0TermVM(pGVM, NIL_VMCPUID);
1293 }
1294 else
1295 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pGVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM));
1296 }
1297
1298 GMMR0CleanupVM(pGVM);
1299#ifdef VBOX_WITH_NEM_R0
1300 NEMR0CleanupVM(pGVM);
1301#endif
1302 PDMR0CleanupVM(pGVM);
1303 IOMR0CleanupVM(pGVM);
1304 PGMR0CleanupVM(pGVM);
1305
1306 AssertCompile(NIL_RTTHREADCTXHOOK == (RTTHREADCTXHOOK)0); /* Depends on zero initialized memory working for NIL at the moment. */
1307 for (VMCPUID idCpu = 0; idCpu < pGVM->cCpus; idCpu++)
1308 {
1309 /** @todo Can we busy wait here for all thread-context hooks to be
1310 * deregistered before releasing (destroying) it? Only until we find a
1311 * solution for not deregistering hooks everytime we're leaving HMR0
1312 * context. */
1313 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->aCpus[idCpu]);
1314 }
1315}
1316
1317
1318/**
1319 * @callback_method_impl{FNSUPDRVDESTRUCTOR,VM handle destructor}
1320 *
1321 * pvUser1 is the GVM instance pointer.
1322 * pvUser2 is the handle pointer.
1323 */
1324static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvUser1, void *pvUser2)
1325{
1326 LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvUser1, pvUser2));
1327
1328 NOREF(pvObj);
1329
1330 /*
1331 * Some quick, paranoid, input validation.
1332 */
1333 PGVMHANDLE pHandle = (PGVMHANDLE)pvUser2;
1334 AssertPtr(pHandle);
1335 PGVMM pGVMM = (PGVMM)pvUser1;
1336 Assert(pGVMM == g_pGVMM);
1337 const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1338 if ( !iHandle
1339 || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1340 || iHandle != pHandle->iSelf)
1341 {
1342 SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1343 return;
1344 }
1345
1346 int rc = gvmmR0CreateDestroyLock(pGVMM);
1347 AssertRC(rc);
1348 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1349 AssertRC(rc);
1350
1351 /*
1352 * This is a tad slow but a doubly linked list is too much hassle.
1353 */
1354 if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1355 {
1356 SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1357 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1358 gvmmR0CreateDestroyUnlock(pGVMM);
1359 return;
1360 }
1361
1362 if (pGVMM->iUsedHead == iHandle)
1363 pGVMM->iUsedHead = pHandle->iNext;
1364 else
1365 {
1366 uint16_t iPrev = pGVMM->iUsedHead;
1367 int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1368 while (iPrev)
1369 {
1370 if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1371 {
1372 SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1373 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1374 gvmmR0CreateDestroyUnlock(pGVMM);
1375 return;
1376 }
1377 if (RT_UNLIKELY(c-- <= 0))
1378 {
1379 iPrev = 0;
1380 break;
1381 }
1382
1383 if (pGVMM->aHandles[iPrev].iNext == iHandle)
1384 break;
1385 iPrev = pGVMM->aHandles[iPrev].iNext;
1386 }
1387 if (!iPrev)
1388 {
1389 SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1390 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1391 gvmmR0CreateDestroyUnlock(pGVMM);
1392 return;
1393 }
1394
1395 Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1396 pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1397 }
1398 pHandle->iNext = 0;
1399 pGVMM->cVMs--;
1400
1401 /*
1402 * Do the global cleanup round.
1403 */
1404 PGVM pGVM = pHandle->pGVM;
1405 if ( RT_VALID_PTR(pGVM)
1406 && pGVM->u32Magic == GVM_MAGIC)
1407 {
1408 pGVMM->cEMTs -= pGVM->cCpus;
1409
1410 if (pGVM->pSession)
1411 SUPR0SetSessionVM(pGVM->pSession, NULL, NULL);
1412
1413 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1414
1415 gvmmR0CleanupVM(pGVM);
1416
1417 /*
1418 * Do the GVMM cleanup - must be done last.
1419 */
1420 /* The VM and VM pages mappings/allocations. */
1421 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1422 {
1423 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1424 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1425 }
1426
1427 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1428 {
1429 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1430 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1431 }
1432
1433 if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1434 {
1435 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1436 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1437 }
1438
1439 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1440 {
1441 if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1442 {
1443 rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1444 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1445 }
1446 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1447 {
1448 rc = RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */); AssertRC(rc);
1449 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1450 }
1451 }
1452
1453 /* the GVM structure itself. */
1454 pGVM->u32Magic |= UINT32_C(0x80000000);
1455 Assert(pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ);
1456 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, true /*fFreeMappings*/); AssertRC(rc);
1457 pGVM = NULL;
1458
1459 /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1460 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1461 AssertRC(rc);
1462 }
1463 /* else: GVMMR0CreateVM cleanup. */
1464
1465 /*
1466 * Free the handle.
1467 */
1468 pHandle->iNext = pGVMM->iFreeHead;
1469 pGVMM->iFreeHead = iHandle;
1470 ASMAtomicWriteNullPtr(&pHandle->pGVM);
1471 ASMAtomicWriteNullPtr(&pHandle->pvObj);
1472 ASMAtomicWriteNullPtr(&pHandle->pSession);
1473 ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1474 ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS);
1475
1476 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1477 gvmmR0CreateDestroyUnlock(pGVMM);
1478 LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1479}
1480
1481
1482/**
1483 * Registers the calling thread as the EMT of a Virtual CPU.
1484 *
1485 * Note that VCPU 0 is automatically registered during VM creation.
1486 *
1487 * @returns VBox status code
1488 * @param pGVM The global (ring-0) VM structure.
1489 * @param idCpu VCPU id to register the current thread as.
1490 */
1491GVMMR0DECL(int) GVMMR0RegisterVCpu(PGVM pGVM, VMCPUID idCpu)
1492{
1493 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1494
1495 /*
1496 * Validate the VM structure, state and handle.
1497 */
1498 PGVMM pGVMM;
1499 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /* fTakeUsedLock */); /** @todo take lock here. */
1500 if (RT_SUCCESS(rc))
1501 {
1502 if (idCpu < pGVM->cCpus)
1503 {
1504 /* Check that the EMT isn't already assigned to a thread. */
1505 if (pGVM->aCpus[idCpu].hEMT == NIL_RTNATIVETHREAD)
1506 {
1507 Assert(pGVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD);
1508
1509 /* A thread may only be one EMT. */
1510 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1511 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1512 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1513 if (RT_SUCCESS(rc))
1514 {
1515 /*
1516 * Do the assignment, then try setup the hook. Undo if that fails.
1517 */
1518 pGVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf();
1519
1520 rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[idCpu]);
1521 if (RT_SUCCESS(rc))
1522 CPUMR0RegisterVCpuThread(&pGVM->aCpus[idCpu]);
1523 else
1524 pGVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = NIL_RTNATIVETHREAD;
1525 }
1526 }
1527 else
1528 rc = VERR_ACCESS_DENIED;
1529 }
1530 else
1531 rc = VERR_INVALID_CPU_ID;
1532 }
1533 return rc;
1534}
1535
1536
1537/**
1538 * Deregisters the calling thread as the EMT of a Virtual CPU.
1539 *
1540 * Note that VCPU 0 shall call GVMMR0DestroyVM intead of this API.
1541 *
1542 * @returns VBox status code
1543 * @param pGVM The global (ring-0) VM structure.
1544 * @param idCpu VCPU id to register the current thread as.
1545 */
1546GVMMR0DECL(int) GVMMR0DeregisterVCpu(PGVM pGVM, VMCPUID idCpu)
1547{
1548 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1549
1550 /*
1551 * Validate the VM structure, state and handle.
1552 */
1553 PGVMM pGVMM;
1554 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
1555 if (RT_SUCCESS(rc))
1556 {
1557 /*
1558 * Take the destruction lock and recheck the handle state to
1559 * prevent racing GVMMR0DestroyVM.
1560 */
1561 gvmmR0CreateDestroyLock(pGVMM);
1562 uint32_t hSelf = pGVM->hSelf;
1563 ASMCompilerBarrier();
1564 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1565 && pGVMM->aHandles[hSelf].pvObj != NULL
1566 && pGVMM->aHandles[hSelf].pGVM == pGVM)
1567 {
1568 /*
1569 * Do per-EMT cleanups.
1570 */
1571 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->aCpus[idCpu]);
1572
1573 /*
1574 * Invalidate hEMT. We don't use NIL here as that would allow
1575 * GVMMR0RegisterVCpu to be called again, and we don't want that.
1576 */
1577 AssertCompile(~(RTNATIVETHREAD)1 != NIL_RTNATIVETHREAD);
1578 pGVM->aCpus[idCpu].hEMT = ~(RTNATIVETHREAD)1;
1579 pGVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1580 }
1581
1582 gvmmR0CreateDestroyUnlock(pGVMM);
1583 }
1584 return rc;
1585}
1586
1587
1588/**
1589 * Lookup a GVM structure by its handle.
1590 *
1591 * @returns The GVM pointer on success, NULL on failure.
1592 * @param hGVM The global VM handle. Asserts on bad handle.
1593 */
1594GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1595{
1596 PGVMM pGVMM;
1597 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1598
1599 /*
1600 * Validate.
1601 */
1602 AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1603 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1604
1605 /*
1606 * Look it up.
1607 */
1608 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1609 AssertPtrReturn(pHandle->pvObj, NULL);
1610 PGVM pGVM = pHandle->pGVM;
1611 AssertPtrReturn(pGVM, NULL);
1612
1613 return pGVM;
1614}
1615
1616
1617/**
1618 * Check that the given GVM and VM structures match up.
1619 *
1620 * The calling thread must be in the same process as the VM. All current lookups
1621 * are by threads inside the same process, so this will not be an issue.
1622 *
1623 * @returns VBox status code.
1624 * @param pGVM The global (ring-0) VM structure.
1625 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1626 * @param fTakeUsedLock Whether to take the used lock or not. We take it in
1627 * shared mode when requested.
1628 *
1629 * Be very careful if not taking the lock as it's
1630 * possible that the VM will disappear then!
1631 *
1632 * @remark This will not assert on an invalid pGVM but try return silently.
1633 */
1634static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1635{
1636 /*
1637 * Check the pointers.
1638 */
1639 int rc;
1640 if (RT_LIKELY( RT_VALID_PTR(pGVM)
1641 && ((uintptr_t)pGVM & PAGE_OFFSET_MASK) == 0 ))
1642 {
1643 /*
1644 * Get the pGVMM instance and check the VM handle.
1645 */
1646 PGVMM pGVMM;
1647 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1648
1649 uint16_t hGVM = pGVM->hSelf;
1650 if (RT_LIKELY( hGVM != NIL_GVM_HANDLE
1651 && hGVM < RT_ELEMENTS(pGVMM->aHandles)))
1652 {
1653 RTPROCESS const pidSelf = RTProcSelf();
1654 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1655 if (fTakeUsedLock)
1656 {
1657 rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
1658 AssertRCReturn(rc, rc);
1659 }
1660
1661 if (RT_LIKELY( pHandle->pGVM == pGVM
1662 && pHandle->ProcId == pidSelf
1663 && RT_VALID_PTR(pHandle->pvObj)))
1664 {
1665 /*
1666 * Some more VM data consistency checks.
1667 */
1668 if (RT_LIKELY( pGVM->cCpusUnsafe == pGVM->cCpus
1669 && pGVM->hSelfUnsafe == hGVM
1670 && pGVM->pSelf == pGVM))
1671 {
1672 if (RT_LIKELY( pGVM->enmVMState >= VMSTATE_CREATING
1673 && pGVM->enmVMState <= VMSTATE_TERMINATED))
1674 {
1675 *ppGVMM = pGVMM;
1676 return VINF_SUCCESS;
1677 }
1678 rc = VERR_INCONSISTENT_VM_HANDLE;
1679 }
1680 else
1681 rc = VERR_INCONSISTENT_VM_HANDLE;
1682 }
1683 else
1684 rc = VERR_INVALID_VM_HANDLE;
1685
1686 if (fTakeUsedLock)
1687 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
1688 }
1689 else
1690 rc = VERR_INVALID_VM_HANDLE;
1691 }
1692 else
1693 rc = VERR_INVALID_POINTER;
1694 return rc;
1695}
1696
1697
1698/**
1699 * Validates a GVM/VM pair.
1700 *
1701 * @returns VBox status code.
1702 * @param pGVM The global (ring-0) VM structure.
1703 */
1704GVMMR0DECL(int) GVMMR0ValidateGVM(PGVM pGVM)
1705{
1706 PGVMM pGVMM;
1707 return gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1708}
1709
1710
1711/**
1712 * Check that the given GVM and VM structures match up.
1713 *
1714 * The calling thread must be in the same process as the VM. All current lookups
1715 * are by threads inside the same process, so this will not be an issue.
1716 *
1717 * @returns VBox status code.
1718 * @param pGVM The global (ring-0) VM structure.
1719 * @param idCpu The (alleged) Virtual CPU ID of the calling EMT.
1720 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1721 * @thread EMT
1722 *
1723 * @remarks This will assert in all failure paths.
1724 */
1725static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM)
1726{
1727 /*
1728 * Check the pointers.
1729 */
1730 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1731 AssertReturn(((uintptr_t)pGVM & PAGE_OFFSET_MASK) == 0, VERR_INVALID_POINTER);
1732
1733 /*
1734 * Get the pGVMM instance and check the VM handle.
1735 */
1736 PGVMM pGVMM;
1737 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1738
1739 uint16_t hGVM = pGVM->hSelf;
1740 ASMCompilerBarrier();
1741 AssertReturn( hGVM != NIL_GVM_HANDLE
1742 && hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1743
1744 RTPROCESS const pidSelf = RTProcSelf();
1745 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1746 AssertReturn( pHandle->pGVM == pGVM
1747 && pHandle->ProcId == pidSelf
1748 && RT_VALID_PTR(pHandle->pvObj),
1749 VERR_INVALID_HANDLE);
1750
1751 /*
1752 * Check the EMT claim.
1753 */
1754 RTNATIVETHREAD const hAllegedEMT = RTThreadNativeSelf();
1755 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1756 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
1757
1758 /*
1759 * Some more VM data consistency checks.
1760 */
1761 AssertReturn(pGVM->cCpusUnsafe == pGVM->cCpus, VERR_INCONSISTENT_VM_HANDLE);
1762 AssertReturn(pGVM->hSelfUnsafe == hGVM, VERR_INCONSISTENT_VM_HANDLE);
1763 AssertReturn( pGVM->enmVMState >= VMSTATE_CREATING
1764 && pGVM->enmVMState <= VMSTATE_TERMINATED, VERR_INCONSISTENT_VM_HANDLE);
1765
1766 *ppGVMM = pGVMM;
1767 return VINF_SUCCESS;
1768}
1769
1770
1771/**
1772 * Validates a GVM/EMT pair.
1773 *
1774 * @returns VBox status code.
1775 * @param pGVM The global (ring-0) VM structure.
1776 * @param idCpu The Virtual CPU ID of the calling EMT.
1777 * @thread EMT(idCpu)
1778 */
1779GVMMR0DECL(int) GVMMR0ValidateGVMandEMT(PGVM pGVM, VMCPUID idCpu)
1780{
1781 PGVMM pGVMM;
1782 return gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
1783}
1784
1785
1786/**
1787 * Looks up the VM belonging to the specified EMT thread.
1788 *
1789 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
1790 * unnecessary kernel panics when the EMT thread hits an assertion. The
1791 * call may or not be an EMT thread.
1792 *
1793 * @returns Pointer to the VM on success, NULL on failure.
1794 * @param hEMT The native thread handle of the EMT.
1795 * NIL_RTNATIVETHREAD means the current thread
1796 */
1797GVMMR0DECL(PVMCC) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
1798{
1799 /*
1800 * No Assertions here as we're usually called in a AssertMsgN or
1801 * RTAssert* context.
1802 */
1803 PGVMM pGVMM = g_pGVMM;
1804 if ( !RT_VALID_PTR(pGVMM)
1805 || pGVMM->u32Magic != GVMM_MAGIC)
1806 return NULL;
1807
1808 if (hEMT == NIL_RTNATIVETHREAD)
1809 hEMT = RTThreadNativeSelf();
1810 RTPROCESS ProcId = RTProcSelf();
1811
1812 /*
1813 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
1814 */
1815/** @todo introduce some pid hash table here, please. */
1816 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
1817 {
1818 if ( pGVMM->aHandles[i].iSelf == i
1819 && pGVMM->aHandles[i].ProcId == ProcId
1820 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
1821 && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
1822 {
1823 if (pGVMM->aHandles[i].hEMT0 == hEMT)
1824 return pGVMM->aHandles[i].pGVM;
1825
1826 /* This is fearly safe with the current process per VM approach. */
1827 PGVM pGVM = pGVMM->aHandles[i].pGVM;
1828 VMCPUID const cCpus = pGVM->cCpus;
1829 ASMCompilerBarrier();
1830 if ( cCpus < 1
1831 || cCpus > VMM_MAX_CPU_COUNT)
1832 continue;
1833 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
1834 if (pGVM->aCpus[idCpu].hEMT == hEMT)
1835 return pGVMM->aHandles[i].pGVM;
1836 }
1837 }
1838 return NULL;
1839}
1840
1841
1842/**
1843 * Looks up the GVMCPU belonging to the specified EMT thread.
1844 *
1845 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
1846 * unnecessary kernel panics when the EMT thread hits an assertion. The
1847 * call may or not be an EMT thread.
1848 *
1849 * @returns Pointer to the VM on success, NULL on failure.
1850 * @param hEMT The native thread handle of the EMT.
1851 * NIL_RTNATIVETHREAD means the current thread
1852 */
1853GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByEMT(RTNATIVETHREAD hEMT)
1854{
1855 /*
1856 * No Assertions here as we're usually called in a AssertMsgN,
1857 * RTAssert*, Log and LogRel contexts.
1858 */
1859 PGVMM pGVMM = g_pGVMM;
1860 if ( !RT_VALID_PTR(pGVMM)
1861 || pGVMM->u32Magic != GVMM_MAGIC)
1862 return NULL;
1863
1864 if (hEMT == NIL_RTNATIVETHREAD)
1865 hEMT = RTThreadNativeSelf();
1866 RTPROCESS ProcId = RTProcSelf();
1867
1868 /*
1869 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
1870 */
1871/** @todo introduce some pid hash table here, please. */
1872 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
1873 {
1874 if ( pGVMM->aHandles[i].iSelf == i
1875 && pGVMM->aHandles[i].ProcId == ProcId
1876 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
1877 && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
1878 {
1879 PGVM pGVM = pGVMM->aHandles[i].pGVM;
1880 if (pGVMM->aHandles[i].hEMT0 == hEMT)
1881 return &pGVM->aCpus[0];
1882
1883 /* This is fearly safe with the current process per VM approach. */
1884 VMCPUID const cCpus = pGVM->cCpus;
1885 ASMCompilerBarrier();
1886 ASMCompilerBarrier();
1887 if ( cCpus < 1
1888 || cCpus > VMM_MAX_CPU_COUNT)
1889 continue;
1890 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
1891 if (pGVM->aCpus[idCpu].hEMT == hEMT)
1892 return &pGVM->aCpus[idCpu];
1893 }
1894 }
1895 return NULL;
1896}
1897
1898
1899/**
1900 * This is will wake up expired and soon-to-be expired VMs.
1901 *
1902 * @returns Number of VMs that has been woken up.
1903 * @param pGVMM Pointer to the GVMM instance data.
1904 * @param u64Now The current time.
1905 */
1906static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
1907{
1908 /*
1909 * Skip this if we've got disabled because of high resolution wakeups or by
1910 * the user.
1911 */
1912 if (!pGVMM->fDoEarlyWakeUps)
1913 return 0;
1914
1915/** @todo Rewrite this algorithm. See performance defect XYZ. */
1916
1917 /*
1918 * A cheap optimization to stop wasting so much time here on big setups.
1919 */
1920 const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
1921 if ( pGVMM->cHaltedEMTs == 0
1922 || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
1923 return 0;
1924
1925 /*
1926 * Only one thread doing this at a time.
1927 */
1928 if (!ASMAtomicCmpXchgBool(&pGVMM->fDoingEarlyWakeUps, true, false))
1929 return 0;
1930
1931 /*
1932 * The first pass will wake up VMs which have actually expired
1933 * and look for VMs that should be woken up in the 2nd and 3rd passes.
1934 */
1935 const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
1936 uint64_t u64Min = UINT64_MAX;
1937 unsigned cWoken = 0;
1938 unsigned cHalted = 0;
1939 unsigned cTodo2nd = 0;
1940 unsigned cTodo3rd = 0;
1941 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1942 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1943 i = pGVMM->aHandles[i].iNext)
1944 {
1945 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1946 if ( RT_VALID_PTR(pCurGVM)
1947 && pCurGVM->u32Magic == GVM_MAGIC)
1948 {
1949 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1950 {
1951 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1952 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1953 if (u64)
1954 {
1955 if (u64 <= u64Now)
1956 {
1957 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1958 {
1959 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
1960 AssertRC(rc);
1961 cWoken++;
1962 }
1963 }
1964 else
1965 {
1966 cHalted++;
1967 if (u64 <= uNsEarlyWakeUp1)
1968 cTodo2nd++;
1969 else if (u64 <= uNsEarlyWakeUp2)
1970 cTodo3rd++;
1971 else if (u64 < u64Min)
1972 u64 = u64Min;
1973 }
1974 }
1975 }
1976 }
1977 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
1978 }
1979
1980 if (cTodo2nd)
1981 {
1982 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1983 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1984 i = pGVMM->aHandles[i].iNext)
1985 {
1986 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1987 if ( RT_VALID_PTR(pCurGVM)
1988 && pCurGVM->u32Magic == GVM_MAGIC)
1989 {
1990 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1991 {
1992 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1993 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1994 if ( u64
1995 && u64 <= uNsEarlyWakeUp1)
1996 {
1997 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1998 {
1999 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2000 AssertRC(rc);
2001 cWoken++;
2002 }
2003 }
2004 }
2005 }
2006 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2007 }
2008 }
2009
2010 if (cTodo3rd)
2011 {
2012 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2013 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2014 i = pGVMM->aHandles[i].iNext)
2015 {
2016 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2017 if ( RT_VALID_PTR(pCurGVM)
2018 && pCurGVM->u32Magic == GVM_MAGIC)
2019 {
2020 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2021 {
2022 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2023 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2024 if ( u64
2025 && u64 <= uNsEarlyWakeUp2)
2026 {
2027 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2028 {
2029 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2030 AssertRC(rc);
2031 cWoken++;
2032 }
2033 }
2034 }
2035 }
2036 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2037 }
2038 }
2039
2040 /*
2041 * Set the minimum value.
2042 */
2043 pGVMM->uNsNextEmtWakeup = u64Min;
2044
2045 ASMAtomicWriteBool(&pGVMM->fDoingEarlyWakeUps, false);
2046 return cWoken;
2047}
2048
2049
2050/**
2051 * Halt the EMT thread.
2052 *
2053 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2054 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2055 * @param pGVM The global (ring-0) VM structure.
2056 * @param pGVCpu The global (ring-0) CPU structure of the calling
2057 * EMT.
2058 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2059 * @thread EMT(pGVCpu).
2060 */
2061GVMMR0DECL(int) GVMMR0SchedHalt(PGVM pGVM, PGVMCPU pGVCpu, uint64_t u64ExpireGipTime)
2062{
2063 LogFlow(("GVMMR0SchedHalt: pGVM=%p pGVCpu=%p(%d) u64ExpireGipTime=%#RX64\n",
2064 pGVM, pGVCpu, pGVCpu->idCpu, u64ExpireGipTime));
2065 GVMM_CHECK_SMAP_SETUP();
2066 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2067
2068 PGVMM pGVMM;
2069 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2070
2071 pGVM->gvmm.s.StatsSched.cHaltCalls++;
2072 Assert(!pGVCpu->gvmm.s.u64HaltExpire);
2073
2074 /*
2075 * If we're doing early wake-ups, we must take the UsedList lock before we
2076 * start querying the current time.
2077 * Note! Interrupts must NOT be disabled at this point because we ask for GIP time!
2078 */
2079 bool const fDoEarlyWakeUps = pGVMM->fDoEarlyWakeUps;
2080 if (fDoEarlyWakeUps)
2081 {
2082 int rc2 = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc2);
2083 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2084 }
2085
2086 pGVCpu->gvmm.s.iCpuEmt = ASMGetApicId();
2087
2088 /* GIP hack: We might are frequently sleeping for short intervals where the
2089 difference between GIP and system time matters on systems with high resolution
2090 system time. So, convert the input from GIP to System time in that case. */
2091 Assert(ASMGetFlags() & X86_EFL_IF);
2092 const uint64_t u64NowSys = RTTimeSystemNanoTS();
2093 const uint64_t u64NowGip = RTTimeNanoTS();
2094 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2095
2096 if (fDoEarlyWakeUps)
2097 {
2098 pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
2099 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2100 }
2101
2102 /*
2103 * Go to sleep if we must...
2104 * Cap the sleep time to 1 second to be on the safe side.
2105 */
2106 int rc;
2107 uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
2108 if ( u64NowGip < u64ExpireGipTime
2109 && cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
2110 ? pGVMM->nsMinSleepCompany
2111 : pGVMM->nsMinSleepAlone))
2112 {
2113 pGVM->gvmm.s.StatsSched.cHaltBlocking++;
2114 if (cNsInterval > RT_NS_1SEC)
2115 u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
2116 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
2117 ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
2118 if (fDoEarlyWakeUps)
2119 {
2120 if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
2121 pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
2122 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2123 }
2124 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2125
2126 rc = RTSemEventMultiWaitEx(pGVCpu->gvmm.s.HaltEventMulti,
2127 RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
2128 u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
2129 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2130
2131 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2132 ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
2133
2134 /* Reset the semaphore to try prevent a few false wake-ups. */
2135 if (rc == VINF_SUCCESS)
2136 {
2137 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2138 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2139 }
2140 else if (rc == VERR_TIMEOUT)
2141 {
2142 pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
2143 rc = VINF_SUCCESS;
2144 }
2145 }
2146 else
2147 {
2148 pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
2149 if (fDoEarlyWakeUps)
2150 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2151 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2152 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2153 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2154 rc = VINF_SUCCESS;
2155 }
2156
2157 return rc;
2158}
2159
2160
2161/**
2162 * Halt the EMT thread.
2163 *
2164 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2165 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2166 * @param pGVM The global (ring-0) VM structure.
2167 * @param idCpu The Virtual CPU ID of the calling EMT.
2168 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2169 * @thread EMT(idCpu).
2170 */
2171GVMMR0DECL(int) GVMMR0SchedHaltReq(PGVM pGVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
2172{
2173 GVMM_CHECK_SMAP_SETUP();
2174 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2175 PGVMM pGVMM;
2176 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2177 if (RT_SUCCESS(rc))
2178 {
2179 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2180 rc = GVMMR0SchedHalt(pGVM, &pGVM->aCpus[idCpu], u64ExpireGipTime);
2181 }
2182 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2183 return rc;
2184}
2185
2186
2187
2188/**
2189 * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
2190 * the a sleeping EMT.
2191 *
2192 * @retval VINF_SUCCESS if successfully woken up.
2193 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2194 *
2195 * @param pGVM The global (ring-0) VM structure.
2196 * @param pGVCpu The global (ring-0) VCPU structure.
2197 */
2198DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
2199{
2200 pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
2201
2202 /*
2203 * Signal the semaphore regardless of whether it's current blocked on it.
2204 *
2205 * The reason for this is that there is absolutely no way we can be 100%
2206 * certain that it isn't *about* go to go to sleep on it and just got
2207 * delayed a bit en route. So, we will always signal the semaphore when
2208 * the it is flagged as halted in the VMM.
2209 */
2210/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
2211 int rc;
2212 if (pGVCpu->gvmm.s.u64HaltExpire)
2213 {
2214 rc = VINF_SUCCESS;
2215 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2216 }
2217 else
2218 {
2219 rc = VINF_GVM_NOT_BLOCKED;
2220 pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
2221 }
2222
2223 int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2224 AssertRC(rc2);
2225
2226 return rc;
2227}
2228
2229
2230/**
2231 * Wakes up the halted EMT thread so it can service a pending request.
2232 *
2233 * @returns VBox status code.
2234 * @retval VINF_SUCCESS if successfully woken up.
2235 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2236 *
2237 * @param pGVM The global (ring-0) VM structure.
2238 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2239 * @param fTakeUsedLock Take the used lock or not
2240 * @thread Any but EMT(idCpu).
2241 */
2242GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
2243{
2244 GVMM_CHECK_SMAP_SETUP();
2245 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2246
2247 /*
2248 * Validate input and take the UsedLock.
2249 */
2250 PGVMM pGVMM;
2251 int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
2252 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2253 if (RT_SUCCESS(rc))
2254 {
2255 if (idCpu < pGVM->cCpus)
2256 {
2257 /*
2258 * Do the actual job.
2259 */
2260 rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2261 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2262
2263 if (fTakeUsedLock && pGVMM->fDoEarlyWakeUps)
2264 {
2265 /*
2266 * While we're here, do a round of scheduling.
2267 */
2268 Assert(ASMGetFlags() & X86_EFL_IF);
2269 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2270 pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2271 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2272 }
2273 }
2274 else
2275 rc = VERR_INVALID_CPU_ID;
2276
2277 if (fTakeUsedLock)
2278 {
2279 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2280 AssertRC(rc2);
2281 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2282 }
2283 }
2284
2285 LogFlow(("GVMMR0SchedWakeUpEx: returns %Rrc\n", rc));
2286 return rc;
2287}
2288
2289
2290/**
2291 * Wakes up the halted EMT thread so it can service a pending request.
2292 *
2293 * @returns VBox status code.
2294 * @retval VINF_SUCCESS if successfully woken up.
2295 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2296 *
2297 * @param pGVM The global (ring-0) VM structure.
2298 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2299 * @thread Any but EMT(idCpu).
2300 */
2301GVMMR0DECL(int) GVMMR0SchedWakeUp(PGVM pGVM, VMCPUID idCpu)
2302{
2303 return GVMMR0SchedWakeUpEx(pGVM, idCpu, true /* fTakeUsedLock */);
2304}
2305
2306
2307/**
2308 * Wakes up the halted EMT thread so it can service a pending request, no GVM
2309 * parameter and no used locking.
2310 *
2311 * @returns VBox status code.
2312 * @retval VINF_SUCCESS if successfully woken up.
2313 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2314 *
2315 * @param pGVM The global (ring-0) VM structure.
2316 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2317 * @thread Any but EMT(idCpu).
2318 * @deprecated Don't use in new code if possible! Use the GVM variant.
2319 */
2320GVMMR0DECL(int) GVMMR0SchedWakeUpNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
2321{
2322 GVMM_CHECK_SMAP_SETUP();
2323 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2324 PGVMM pGVMM;
2325 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
2326 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2327 if (RT_SUCCESS(rc))
2328 rc = GVMMR0SchedWakeUpEx(pGVM, idCpu, false /*fTakeUsedLock*/);
2329 return rc;
2330}
2331
2332
2333/**
2334 * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
2335 * the Virtual CPU if it's still busy executing guest code.
2336 *
2337 * @returns VBox status code.
2338 * @retval VINF_SUCCESS if poked successfully.
2339 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2340 *
2341 * @param pGVM The global (ring-0) VM structure.
2342 * @param pVCpu The cross context virtual CPU structure.
2343 */
2344DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPUCC pVCpu)
2345{
2346 pGVM->gvmm.s.StatsSched.cPokeCalls++;
2347
2348 RTCPUID idHostCpu = pVCpu->idHostCpu;
2349 if ( idHostCpu == NIL_RTCPUID
2350 || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
2351 {
2352 pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
2353 return VINF_GVM_NOT_BUSY_IN_GC;
2354 }
2355
2356 /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
2357 RTMpPokeCpu(idHostCpu);
2358 return VINF_SUCCESS;
2359}
2360
2361
2362/**
2363 * Pokes an EMT if it's still busy running guest code.
2364 *
2365 * @returns VBox status code.
2366 * @retval VINF_SUCCESS if poked successfully.
2367 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2368 *
2369 * @param pGVM The global (ring-0) VM structure.
2370 * @param idCpu The ID of the virtual CPU to poke.
2371 * @param fTakeUsedLock Take the used lock or not
2372 */
2373GVMMR0DECL(int) GVMMR0SchedPokeEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
2374{
2375 /*
2376 * Validate input and take the UsedLock.
2377 */
2378 PGVMM pGVMM;
2379 int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
2380 if (RT_SUCCESS(rc))
2381 {
2382 if (idCpu < pGVM->cCpus)
2383 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2384 else
2385 rc = VERR_INVALID_CPU_ID;
2386
2387 if (fTakeUsedLock)
2388 {
2389 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2390 AssertRC(rc2);
2391 }
2392 }
2393
2394 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2395 return rc;
2396}
2397
2398
2399/**
2400 * Pokes an EMT if it's still busy running guest code.
2401 *
2402 * @returns VBox status code.
2403 * @retval VINF_SUCCESS if poked successfully.
2404 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2405 *
2406 * @param pGVM The global (ring-0) VM structure.
2407 * @param idCpu The ID of the virtual CPU to poke.
2408 */
2409GVMMR0DECL(int) GVMMR0SchedPoke(PGVM pGVM, VMCPUID idCpu)
2410{
2411 return GVMMR0SchedPokeEx(pGVM, idCpu, true /* fTakeUsedLock */);
2412}
2413
2414
2415/**
2416 * Pokes an EMT if it's still busy running guest code, no GVM parameter and no
2417 * used locking.
2418 *
2419 * @returns VBox status code.
2420 * @retval VINF_SUCCESS if poked successfully.
2421 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2422 *
2423 * @param pGVM The global (ring-0) VM structure.
2424 * @param idCpu The ID of the virtual CPU to poke.
2425 *
2426 * @deprecated Don't use in new code if possible! Use the GVM variant.
2427 */
2428GVMMR0DECL(int) GVMMR0SchedPokeNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
2429{
2430 PGVMM pGVMM;
2431 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
2432 if (RT_SUCCESS(rc))
2433 {
2434 if (idCpu < pGVM->cCpus)
2435 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2436 else
2437 rc = VERR_INVALID_CPU_ID;
2438 }
2439 return rc;
2440}
2441
2442
2443/**
2444 * Wakes up a set of halted EMT threads so they can service pending request.
2445 *
2446 * @returns VBox status code, no informational stuff.
2447 *
2448 * @param pGVM The global (ring-0) VM structure.
2449 * @param pSleepSet The set of sleepers to wake up.
2450 * @param pPokeSet The set of CPUs to poke.
2451 */
2452GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PGVM pGVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2453{
2454 AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2455 AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2456 GVMM_CHECK_SMAP_SETUP();
2457 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2458 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2459
2460 /*
2461 * Validate input and take the UsedLock.
2462 */
2463 PGVMM pGVMM;
2464 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /* fTakeUsedLock */);
2465 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2466 if (RT_SUCCESS(rc))
2467 {
2468 rc = VINF_SUCCESS;
2469 VMCPUID idCpu = pGVM->cCpus;
2470 while (idCpu-- > 0)
2471 {
2472 /* Don't try poke or wake up ourselves. */
2473 if (pGVM->aCpus[idCpu].hEMT == hSelf)
2474 continue;
2475
2476 /* just ignore errors for now. */
2477 if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2478 {
2479 gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2480 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2481 }
2482 else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2483 {
2484 gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2485 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2486 }
2487 }
2488
2489 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2490 AssertRC(rc2);
2491 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2492 }
2493
2494 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2495 return rc;
2496}
2497
2498
2499/**
2500 * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2501 *
2502 * @returns see GVMMR0SchedWakeUpAndPokeCpus.
2503 * @param pGVM The global (ring-0) VM structure.
2504 * @param pReq Pointer to the request packet.
2505 */
2506GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PGVM pGVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2507{
2508 /*
2509 * Validate input and pass it on.
2510 */
2511 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2512 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2513
2514 return GVMMR0SchedWakeUpAndPokeCpus(pGVM, &pReq->SleepSet, &pReq->PokeSet);
2515}
2516
2517
2518
2519/**
2520 * Poll the schedule to see if someone else should get a chance to run.
2521 *
2522 * This is a bit hackish and will not work too well if the machine is
2523 * under heavy load from non-VM processes.
2524 *
2525 * @returns VINF_SUCCESS if not yielded.
2526 * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2527 * @param pGVM The global (ring-0) VM structure.
2528 * @param idCpu The Virtual CPU ID of the calling EMT.
2529 * @param fYield Whether to yield or not.
2530 * This is for when we're spinning in the halt loop.
2531 * @thread EMT(idCpu).
2532 */
2533GVMMR0DECL(int) GVMMR0SchedPoll(PGVM pGVM, VMCPUID idCpu, bool fYield)
2534{
2535 /*
2536 * Validate input.
2537 */
2538 PGVMM pGVMM;
2539 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2540 if (RT_SUCCESS(rc))
2541 {
2542 /*
2543 * We currently only implement helping doing wakeups (fYield = false), so don't
2544 * bother taking the lock if gvmmR0SchedDoWakeUps is not going to do anything.
2545 */
2546 if (!fYield && pGVMM->fDoEarlyWakeUps)
2547 {
2548 rc = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc);
2549 pGVM->gvmm.s.StatsSched.cPollCalls++;
2550
2551 Assert(ASMGetFlags() & X86_EFL_IF);
2552 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2553
2554 pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2555
2556 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2557 }
2558 /*
2559 * Not quite sure what we could do here...
2560 */
2561 else if (fYield)
2562 rc = VERR_NOT_IMPLEMENTED; /** @todo implement this... */
2563 else
2564 rc = VINF_SUCCESS;
2565 }
2566
2567 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
2568 return rc;
2569}
2570
2571
2572#ifdef GVMM_SCHED_WITH_PPT
2573/**
2574 * Timer callback for the periodic preemption timer.
2575 *
2576 * @param pTimer The timer handle.
2577 * @param pvUser Pointer to the per cpu structure.
2578 * @param iTick The current tick.
2579 */
2580static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2581{
2582 PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
2583 NOREF(pTimer); NOREF(iTick);
2584
2585 /*
2586 * Termination check
2587 */
2588 if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
2589 return;
2590
2591 /*
2592 * Do the house keeping.
2593 */
2594 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2595
2596 if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
2597 {
2598 /*
2599 * Historicize the max frequency.
2600 */
2601 uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
2602 pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
2603 pCpu->Ppt.iTickHistorization = 0;
2604 pCpu->Ppt.uDesiredHz = 0;
2605
2606 /*
2607 * Check if the current timer frequency.
2608 */
2609 uint32_t uHistMaxHz = 0;
2610 for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
2611 if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
2612 uHistMaxHz = pCpu->Ppt.aHzHistory[i];
2613 if (uHistMaxHz == pCpu->Ppt.uTimerHz)
2614 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2615 else if (uHistMaxHz)
2616 {
2617 /*
2618 * Reprogram it.
2619 */
2620 pCpu->Ppt.cChanges++;
2621 pCpu->Ppt.iTickHistorization = 0;
2622 pCpu->Ppt.uTimerHz = uHistMaxHz;
2623 uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
2624 pCpu->Ppt.cNsInterval = cNsInterval;
2625 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2626 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2627 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2628 / cNsInterval;
2629 else
2630 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2631 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2632
2633 /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
2634 RTTimerChangeInterval(pTimer, cNsInterval);
2635 }
2636 else
2637 {
2638 /*
2639 * Stop it.
2640 */
2641 pCpu->Ppt.fStarted = false;
2642 pCpu->Ppt.uTimerHz = 0;
2643 pCpu->Ppt.cNsInterval = 0;
2644 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2645
2646 /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
2647 RTTimerStop(pTimer);
2648 }
2649 }
2650 else
2651 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2652}
2653#endif /* GVMM_SCHED_WITH_PPT */
2654
2655
2656/**
2657 * Updates the periodic preemption timer for the calling CPU.
2658 *
2659 * The caller must have disabled preemption!
2660 * The caller must check that the host can do high resolution timers.
2661 *
2662 * @param pGVM The global (ring-0) VM structure.
2663 * @param idHostCpu The current host CPU id.
2664 * @param uHz The desired frequency.
2665 */
2666GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PGVM pGVM, RTCPUID idHostCpu, uint32_t uHz)
2667{
2668 NOREF(pGVM);
2669#ifdef GVMM_SCHED_WITH_PPT
2670 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2671 Assert(RTTimerCanDoHighResolution());
2672
2673 /*
2674 * Resolve the per CPU data.
2675 */
2676 uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
2677 PGVMM pGVMM = g_pGVMM;
2678 if ( !RT_VALID_PTR(pGVMM)
2679 || pGVMM->u32Magic != GVMM_MAGIC)
2680 return;
2681 AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
2682 PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
2683 AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
2684 && pCpu->idCpu == idHostCpu,
2685 ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
2686
2687 /*
2688 * Check whether we need to do anything about the timer.
2689 * We have to be a little bit careful since we might be race the timer
2690 * callback here.
2691 */
2692 if (uHz > 16384)
2693 uHz = 16384; /** @todo add a query method for this! */
2694 if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
2695 && uHz >= pCpu->Ppt.uMinHz
2696 && !pCpu->Ppt.fStarting /* solaris paranoia */))
2697 {
2698 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2699
2700 pCpu->Ppt.uDesiredHz = uHz;
2701 uint32_t cNsInterval = 0;
2702 if (!pCpu->Ppt.fStarted)
2703 {
2704 pCpu->Ppt.cStarts++;
2705 pCpu->Ppt.fStarted = true;
2706 pCpu->Ppt.fStarting = true;
2707 pCpu->Ppt.iTickHistorization = 0;
2708 pCpu->Ppt.uTimerHz = uHz;
2709 pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
2710 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2711 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2712 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2713 / cNsInterval;
2714 else
2715 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2716 }
2717
2718 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2719
2720 if (cNsInterval)
2721 {
2722 RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
2723 int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
2724 AssertRC(rc);
2725
2726 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2727 if (RT_FAILURE(rc))
2728 pCpu->Ppt.fStarted = false;
2729 pCpu->Ppt.fStarting = false;
2730 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2731 }
2732 }
2733#else /* !GVMM_SCHED_WITH_PPT */
2734 NOREF(idHostCpu); NOREF(uHz);
2735#endif /* !GVMM_SCHED_WITH_PPT */
2736}
2737
2738
2739/**
2740 * Calls @a pfnCallback for each VM in the system.
2741 *
2742 * This will enumerate the VMs while holding the global VM used list lock in
2743 * shared mode. So, only suitable for simple work. If more expensive work
2744 * needs doing, a different approach must be taken as using this API would
2745 * otherwise block VM creation and destruction.
2746 *
2747 * @returns VBox status code.
2748 * @param pfnCallback The callback function.
2749 * @param pvUser User argument to the callback.
2750 */
2751GVMMR0DECL(int) GVMMR0EnumVMs(PFNGVMMR0ENUMCALLBACK pfnCallback, void *pvUser)
2752{
2753 PGVMM pGVMM;
2754 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2755
2756 int rc = VINF_SUCCESS;
2757 GVMMR0_USED_SHARED_LOCK(pGVMM);
2758 for (unsigned i = pGVMM->iUsedHead, cLoops = 0;
2759 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2760 i = pGVMM->aHandles[i].iNext, cLoops++)
2761 {
2762 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2763 if ( RT_VALID_PTR(pGVM)
2764 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
2765 && pGVM->u32Magic == GVM_MAGIC)
2766 {
2767 rc = pfnCallback(pGVM, pvUser);
2768 if (rc != VINF_SUCCESS)
2769 break;
2770 }
2771
2772 AssertBreak(cLoops < RT_ELEMENTS(pGVMM->aHandles) * 4); /* paranoia */
2773 }
2774 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2775 return rc;
2776}
2777
2778
2779/**
2780 * Retrieves the GVMM statistics visible to the caller.
2781 *
2782 * @returns VBox status code.
2783 *
2784 * @param pStats Where to put the statistics.
2785 * @param pSession The current session.
2786 * @param pGVM The GVM to obtain statistics for. Optional.
2787 */
2788GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
2789{
2790 LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
2791
2792 /*
2793 * Validate input.
2794 */
2795 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2796 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2797 pStats->cVMs = 0; /* (crash before taking the sem...) */
2798
2799 /*
2800 * Take the lock and get the VM statistics.
2801 */
2802 PGVMM pGVMM;
2803 if (pGVM)
2804 {
2805 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
2806 if (RT_FAILURE(rc))
2807 return rc;
2808 pStats->SchedVM = pGVM->gvmm.s.StatsSched;
2809 }
2810 else
2811 {
2812 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2813 memset(&pStats->SchedVM, 0, sizeof(pStats->SchedVM));
2814
2815 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
2816 AssertRCReturn(rc, rc);
2817 }
2818
2819 /*
2820 * Enumerate the VMs and add the ones visible to the statistics.
2821 */
2822 pStats->cVMs = 0;
2823 pStats->cEMTs = 0;
2824 memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
2825
2826 for (unsigned i = pGVMM->iUsedHead;
2827 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2828 i = pGVMM->aHandles[i].iNext)
2829 {
2830 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
2831 void *pvObj = pGVMM->aHandles[i].pvObj;
2832 if ( RT_VALID_PTR(pvObj)
2833 && RT_VALID_PTR(pOtherGVM)
2834 && pOtherGVM->u32Magic == GVM_MAGIC
2835 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2836 {
2837 pStats->cVMs++;
2838 pStats->cEMTs += pOtherGVM->cCpus;
2839
2840 pStats->SchedSum.cHaltCalls += pOtherGVM->gvmm.s.StatsSched.cHaltCalls;
2841 pStats->SchedSum.cHaltBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltBlocking;
2842 pStats->SchedSum.cHaltTimeouts += pOtherGVM->gvmm.s.StatsSched.cHaltTimeouts;
2843 pStats->SchedSum.cHaltNotBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltNotBlocking;
2844 pStats->SchedSum.cHaltWakeUps += pOtherGVM->gvmm.s.StatsSched.cHaltWakeUps;
2845
2846 pStats->SchedSum.cWakeUpCalls += pOtherGVM->gvmm.s.StatsSched.cWakeUpCalls;
2847 pStats->SchedSum.cWakeUpNotHalted += pOtherGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
2848 pStats->SchedSum.cWakeUpWakeUps += pOtherGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
2849
2850 pStats->SchedSum.cPokeCalls += pOtherGVM->gvmm.s.StatsSched.cPokeCalls;
2851 pStats->SchedSum.cPokeNotBusy += pOtherGVM->gvmm.s.StatsSched.cPokeNotBusy;
2852
2853 pStats->SchedSum.cPollCalls += pOtherGVM->gvmm.s.StatsSched.cPollCalls;
2854 pStats->SchedSum.cPollHalts += pOtherGVM->gvmm.s.StatsSched.cPollHalts;
2855 pStats->SchedSum.cPollWakeUps += pOtherGVM->gvmm.s.StatsSched.cPollWakeUps;
2856 }
2857 }
2858
2859 /*
2860 * Copy out the per host CPU statistics.
2861 */
2862 uint32_t iDstCpu = 0;
2863 uint32_t cSrcCpus = pGVMM->cHostCpus;
2864 for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
2865 {
2866 if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
2867 {
2868 pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
2869 pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
2870#ifdef GVMM_SCHED_WITH_PPT
2871 pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
2872 pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
2873 pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
2874 pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
2875#else
2876 pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
2877 pStats->aHostCpus[iDstCpu].uTimerHz = 0;
2878 pStats->aHostCpus[iDstCpu].cChanges = 0;
2879 pStats->aHostCpus[iDstCpu].cStarts = 0;
2880#endif
2881 iDstCpu++;
2882 if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
2883 break;
2884 }
2885 }
2886 pStats->cHostCpus = iDstCpu;
2887
2888 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2889
2890 return VINF_SUCCESS;
2891}
2892
2893
2894/**
2895 * VMMR0 request wrapper for GVMMR0QueryStatistics.
2896 *
2897 * @returns see GVMMR0QueryStatistics.
2898 * @param pGVM The global (ring-0) VM structure. Optional.
2899 * @param pReq Pointer to the request packet.
2900 * @param pSession The current session.
2901 */
2902GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PGVM pGVM, PGVMMQUERYSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
2903{
2904 /*
2905 * Validate input and pass it on.
2906 */
2907 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2908 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2909 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
2910
2911 return GVMMR0QueryStatistics(&pReq->Stats, pSession, pGVM);
2912}
2913
2914
2915/**
2916 * Resets the specified GVMM statistics.
2917 *
2918 * @returns VBox status code.
2919 *
2920 * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
2921 * @param pSession The current session.
2922 * @param pGVM The GVM to reset statistics for. Optional.
2923 */
2924GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
2925{
2926 LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
2927
2928 /*
2929 * Validate input.
2930 */
2931 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2932 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2933
2934 /*
2935 * Take the lock and get the VM statistics.
2936 */
2937 PGVMM pGVMM;
2938 if (pGVM)
2939 {
2940 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
2941 if (RT_FAILURE(rc))
2942 return rc;
2943# define MAYBE_RESET_FIELD(field) \
2944 do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
2945 MAYBE_RESET_FIELD(cHaltCalls);
2946 MAYBE_RESET_FIELD(cHaltBlocking);
2947 MAYBE_RESET_FIELD(cHaltTimeouts);
2948 MAYBE_RESET_FIELD(cHaltNotBlocking);
2949 MAYBE_RESET_FIELD(cHaltWakeUps);
2950 MAYBE_RESET_FIELD(cWakeUpCalls);
2951 MAYBE_RESET_FIELD(cWakeUpNotHalted);
2952 MAYBE_RESET_FIELD(cWakeUpWakeUps);
2953 MAYBE_RESET_FIELD(cPokeCalls);
2954 MAYBE_RESET_FIELD(cPokeNotBusy);
2955 MAYBE_RESET_FIELD(cPollCalls);
2956 MAYBE_RESET_FIELD(cPollHalts);
2957 MAYBE_RESET_FIELD(cPollWakeUps);
2958# undef MAYBE_RESET_FIELD
2959 }
2960 else
2961 {
2962 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2963
2964 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
2965 AssertRCReturn(rc, rc);
2966 }
2967
2968 /*
2969 * Enumerate the VMs and add the ones visible to the statistics.
2970 */
2971 if (!ASMMemIsZero(&pStats->SchedSum, sizeof(pStats->SchedSum)))
2972 {
2973 for (unsigned i = pGVMM->iUsedHead;
2974 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2975 i = pGVMM->aHandles[i].iNext)
2976 {
2977 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
2978 void *pvObj = pGVMM->aHandles[i].pvObj;
2979 if ( RT_VALID_PTR(pvObj)
2980 && RT_VALID_PTR(pOtherGVM)
2981 && pOtherGVM->u32Magic == GVM_MAGIC
2982 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2983 {
2984# define MAYBE_RESET_FIELD(field) \
2985 do { if (pStats->SchedSum. field ) { pOtherGVM->gvmm.s.StatsSched. field = 0; } } while (0)
2986 MAYBE_RESET_FIELD(cHaltCalls);
2987 MAYBE_RESET_FIELD(cHaltBlocking);
2988 MAYBE_RESET_FIELD(cHaltTimeouts);
2989 MAYBE_RESET_FIELD(cHaltNotBlocking);
2990 MAYBE_RESET_FIELD(cHaltWakeUps);
2991 MAYBE_RESET_FIELD(cWakeUpCalls);
2992 MAYBE_RESET_FIELD(cWakeUpNotHalted);
2993 MAYBE_RESET_FIELD(cWakeUpWakeUps);
2994 MAYBE_RESET_FIELD(cPokeCalls);
2995 MAYBE_RESET_FIELD(cPokeNotBusy);
2996 MAYBE_RESET_FIELD(cPollCalls);
2997 MAYBE_RESET_FIELD(cPollHalts);
2998 MAYBE_RESET_FIELD(cPollWakeUps);
2999# undef MAYBE_RESET_FIELD
3000 }
3001 }
3002 }
3003
3004 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3005
3006 return VINF_SUCCESS;
3007}
3008
3009
3010/**
3011 * VMMR0 request wrapper for GVMMR0ResetStatistics.
3012 *
3013 * @returns see GVMMR0ResetStatistics.
3014 * @param pGVM The global (ring-0) VM structure. Optional.
3015 * @param pReq Pointer to the request packet.
3016 * @param pSession The current session.
3017 */
3018GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PGVM pGVM, PGVMMRESETSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3019{
3020 /*
3021 * Validate input and pass it on.
3022 */
3023 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3024 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3025 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3026
3027 return GVMMR0ResetStatistics(&pReq->Stats, pSession, pGVM);
3028}
3029
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette