VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp@ 106129

Last change on this file since 106129 was 106061, checked in by vboxsync, 3 months ago

Copyright year updates by scm.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 122.5 KB
Line 
1/* $Id: GVMMR0.cpp 106061 2024-09-16 14:03:52Z vboxsync $ */
2/** @file
3 * GVMM - Global VM Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/** @page pg_gvmm GVMM - The Global VM Manager
30 *
31 * The Global VM Manager lives in ring-0. Its main function at the moment is
32 * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
33 * each of them, and assign them unique identifiers (so GMM can track page
34 * owners). The GVMM also manage some of the host CPU resources, like the
35 * periodic preemption timer.
36 *
37 * The GVMM will create a ring-0 object for each VM when it is registered, this
38 * is both for session cleanup purposes and for having a point where it is
39 * possible to implement usage polices later (in SUPR0ObjRegister).
40 *
41 *
42 * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
43 *
44 * On system that sports a high resolution kernel timer API, we use per-cpu
45 * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
46 * execution. The timer frequency is calculating by taking the max
47 * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
48 * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
49 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
50 *
51 * The TMCalcHostTimerFrequency() part of the things gets its takes the max
52 * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
53 * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
54 * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
55 * AMD-V and raw-mode execution environments.
56 */
57
58
59/*********************************************************************************************************************************
60* Header Files *
61*********************************************************************************************************************************/
62#define LOG_GROUP LOG_GROUP_GVMM
63#include <VBox/vmm/gvmm.h>
64#include <VBox/vmm/gmm.h>
65#include "GVMMR0Internal.h"
66#include <VBox/vmm/dbgf.h>
67#include <VBox/vmm/iom.h>
68#include <VBox/vmm/pdm.h>
69#include <VBox/vmm/pgm.h>
70#include <VBox/vmm/vmm.h>
71#ifdef VBOX_WITH_NEM_R0
72# include <VBox/vmm/nem.h>
73#endif
74#include <VBox/vmm/vmcpuset.h>
75#include <VBox/vmm/vmcc.h>
76#include <VBox/param.h>
77#include <VBox/err.h>
78
79#include <iprt/asm.h>
80#include <iprt/asm-amd64-x86.h>
81#include <iprt/critsect.h>
82#include <iprt/mem.h>
83#include <iprt/semaphore.h>
84#include <iprt/time.h>
85#include <VBox/log.h>
86#include <iprt/thread.h>
87#include <iprt/process.h>
88#include <iprt/param.h>
89#include <iprt/string.h>
90#include <iprt/assert.h>
91#include <iprt/mem.h>
92#include <iprt/memobj.h>
93#include <iprt/mp.h>
94#include <iprt/cpuset.h>
95#include <iprt/spinlock.h>
96#include <iprt/timer.h>
97
98#include "dtrace/VBoxVMM.h"
99
100
101/*********************************************************************************************************************************
102* Defined Constants And Macros *
103*********************************************************************************************************************************/
104#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(RT_OS_WINDOWS) || defined(DOXYGEN_RUNNING)
105/** Define this to enable the periodic preemption timer. */
106# define GVMM_SCHED_WITH_PPT
107#endif
108
109#if /*defined(RT_OS_WINDOWS) ||*/ defined(DOXYGEN_RUNNING)
110/** Define this to enable the per-EMT high resolution wakeup timers. */
111# define GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
112#endif
113
114
115/** Special value that GVMMR0DeregisterVCpu sets. */
116#define GVMM_RTNATIVETHREAD_DESTROYED (~(RTNATIVETHREAD)1)
117AssertCompile(GVMM_RTNATIVETHREAD_DESTROYED != NIL_RTNATIVETHREAD);
118
119
120/*********************************************************************************************************************************
121* Structures and Typedefs *
122*********************************************************************************************************************************/
123
124/**
125 * Global VM handle.
126 */
127typedef struct GVMHANDLE
128{
129 /** The index of the next handle in the list (free or used). (0 is nil.) */
130 uint16_t volatile iNext;
131 /** Our own index / handle value. */
132 uint16_t iSelf;
133 /** The process ID of the handle owner.
134 * This is used for access checks. */
135 RTPROCESS ProcId;
136 /** The pointer to the ring-0 only (aka global) VM structure. */
137 PGVM pGVM;
138 /** The virtual machine object. */
139 void *pvObj;
140 /** The session this VM is associated with. */
141 PSUPDRVSESSION pSession;
142 /** The ring-0 handle of the EMT0 thread.
143 * This is used for ownership checks as well as looking up a VM handle by thread
144 * at times like assertions. */
145 RTNATIVETHREAD hEMT0;
146} GVMHANDLE;
147/** Pointer to a global VM handle. */
148typedef GVMHANDLE *PGVMHANDLE;
149
150/** Number of GVM handles (including the NIL handle). */
151#if HC_ARCH_BITS == 64
152# define GVMM_MAX_HANDLES 8192
153#else
154# define GVMM_MAX_HANDLES 128
155#endif
156
157/**
158 * Per host CPU GVMM data.
159 */
160typedef struct GVMMHOSTCPU
161{
162 /** Magic number (GVMMHOSTCPU_MAGIC). */
163 uint32_t volatile u32Magic;
164 /** The CPU ID. */
165 RTCPUID idCpu;
166 /** The CPU set index. */
167 uint32_t idxCpuSet;
168
169#ifdef GVMM_SCHED_WITH_PPT
170 /** Periodic preemption timer data. */
171 struct
172 {
173 /** The handle to the periodic preemption timer. */
174 PRTTIMER pTimer;
175 /** Spinlock protecting the data below. */
176 RTSPINLOCK hSpinlock;
177 /** The smalles Hz that we need to care about. (static) */
178 uint32_t uMinHz;
179 /** The number of ticks between each historization. */
180 uint32_t cTicksHistoriziationInterval;
181 /** The current historization tick (counting up to
182 * cTicksHistoriziationInterval and then resetting). */
183 uint32_t iTickHistorization;
184 /** The current timer interval. This is set to 0 when inactive. */
185 uint32_t cNsInterval;
186 /** The current timer frequency. This is set to 0 when inactive. */
187 uint32_t uTimerHz;
188 /** The current max frequency reported by the EMTs.
189 * This gets historicize and reset by the timer callback. This is
190 * read without holding the spinlock, so needs atomic updating. */
191 uint32_t volatile uDesiredHz;
192 /** Whether the timer was started or not. */
193 bool volatile fStarted;
194 /** Set if we're starting timer. */
195 bool volatile fStarting;
196 /** The index of the next history entry (mod it). */
197 uint32_t iHzHistory;
198 /** Historicized uDesiredHz values. The array wraps around, new entries
199 * are added at iHzHistory. This is updated approximately every
200 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
201 uint32_t aHzHistory[8];
202 /** Statistics counter for recording the number of interval changes. */
203 uint32_t cChanges;
204 /** Statistics counter for recording the number of timer starts. */
205 uint32_t cStarts;
206 } Ppt;
207#endif /* GVMM_SCHED_WITH_PPT */
208
209} GVMMHOSTCPU;
210/** Pointer to the per host CPU GVMM data. */
211typedef GVMMHOSTCPU *PGVMMHOSTCPU;
212/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
213#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
214/** The interval on history entry should cover (approximately) give in
215 * nanoseconds. */
216#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
217
218
219/**
220 * The GVMM instance data.
221 */
222typedef struct GVMM
223{
224 /** Eyecatcher / magic. */
225 uint32_t u32Magic;
226 /** The index of the head of the free handle chain. (0 is nil.) */
227 uint16_t volatile iFreeHead;
228 /** The index of the head of the active handle chain. (0 is nil.) */
229 uint16_t volatile iUsedHead;
230 /** The number of VMs. */
231 uint16_t volatile cVMs;
232 /** Alignment padding. */
233 uint16_t u16Reserved;
234 /** The number of EMTs. */
235 uint32_t volatile cEMTs;
236 /** The number of EMTs that have halted in GVMMR0SchedHalt. */
237 uint32_t volatile cHaltedEMTs;
238 /** Mini lock for restricting early wake-ups to one thread. */
239 bool volatile fDoingEarlyWakeUps;
240 bool afPadding[3]; /**< explicit alignment padding. */
241 /** When the next halted or sleeping EMT will wake up.
242 * This is set to 0 when it needs recalculating and to UINT64_MAX when
243 * there are no halted or sleeping EMTs in the GVMM. */
244 uint64_t uNsNextEmtWakeup;
245 /** The lock used to serialize VM creation, destruction and associated events that
246 * isn't performance critical. Owners may acquire the list lock. */
247 RTCRITSECT CreateDestroyLock;
248 /** The lock used to serialize used list updates and accesses.
249 * This indirectly includes scheduling since the scheduler will have to walk the
250 * used list to examin running VMs. Owners may not acquire any other locks. */
251 RTCRITSECTRW UsedLock;
252 /** The handle array.
253 * The size of this array defines the maximum number of currently running VMs.
254 * The first entry is unused as it represents the NIL handle. */
255 GVMHANDLE aHandles[GVMM_MAX_HANDLES];
256
257 /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
258 * The number of EMTs that means we no longer consider ourselves alone on a
259 * CPU/Core.
260 */
261 uint32_t cEMTsMeansCompany;
262 /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
263 * The minimum sleep time for when we're alone, in nano seconds.
264 */
265 uint32_t nsMinSleepAlone;
266 /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
267 * The minimum sleep time for when we've got company, in nano seconds.
268 */
269 uint32_t nsMinSleepCompany;
270#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
271 /** @gcfgm{/GVMM/MinSleepWithHrWakeUp,32-bit,0, 100000000, 5000, ns}
272 * The minimum sleep time for when we've got a high-resolution wake-up timer, in
273 * nano seconds.
274 */
275 uint32_t nsMinSleepWithHrTimer;
276#endif
277 /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
278 * The limit for the first round of early wake-ups, given in nano seconds.
279 */
280 uint32_t nsEarlyWakeUp1;
281 /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
282 * The limit for the second round of early wake-ups, given in nano seconds.
283 */
284 uint32_t nsEarlyWakeUp2;
285
286 /** Set if we're doing early wake-ups.
287 * This reflects nsEarlyWakeUp1 and nsEarlyWakeUp2. */
288 bool volatile fDoEarlyWakeUps;
289
290 /** The number of entries in the host CPU array (aHostCpus). */
291 uint32_t cHostCpus;
292 /** Per host CPU data (variable length). */
293 GVMMHOSTCPU aHostCpus[1];
294} GVMM;
295AssertCompileMemberAlignment(GVMM, CreateDestroyLock, 8);
296AssertCompileMemberAlignment(GVMM, UsedLock, 8);
297AssertCompileMemberAlignment(GVMM, uNsNextEmtWakeup, 8);
298/** Pointer to the GVMM instance data. */
299typedef GVMM *PGVMM;
300
301/** The GVMM::u32Magic value (Charlie Haden). */
302#define GVMM_MAGIC UINT32_C(0x19370806)
303
304
305
306/*********************************************************************************************************************************
307* Global Variables *
308*********************************************************************************************************************************/
309/** Pointer to the GVMM instance data.
310 * (Just my general dislike for global variables.) */
311static PGVMM g_pGVMM = NULL;
312
313/** Macro for obtaining and validating the g_pGVMM pointer.
314 * On failure it will return from the invoking function with the specified return value.
315 *
316 * @param pGVMM The name of the pGVMM variable.
317 * @param rc The return value on failure. Use VERR_GVMM_INSTANCE for VBox
318 * status codes.
319 */
320#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
321 do { \
322 (pGVMM) = g_pGVMM;\
323 AssertPtrReturn((pGVMM), (rc)); \
324 AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
325 } while (0)
326
327/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
328 * On failure it will return from the invoking function.
329 *
330 * @param pGVMM The name of the pGVMM variable.
331 */
332#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
333 do { \
334 (pGVMM) = g_pGVMM;\
335 AssertPtrReturnVoid((pGVMM)); \
336 AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
337 } while (0)
338
339
340/*********************************************************************************************************************************
341* Internal Functions *
342*********************************************************************************************************************************/
343static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession);
344static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
345static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock);
346static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM);
347
348#ifdef GVMM_SCHED_WITH_PPT
349static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
350#endif
351#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
352static DECLCALLBACK(void) gvmmR0EmtWakeUpTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
353#endif
354
355
356/**
357 * Initializes the GVMM.
358 *
359 * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
360 *
361 * @returns VBox status code.
362 */
363GVMMR0DECL(int) GVMMR0Init(void)
364{
365 LogFlow(("GVMMR0Init:\n"));
366
367 /*
368 * Allocate and initialize the instance data.
369 */
370 uint32_t cHostCpus = RTMpGetArraySize();
371 AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE);
372
373 PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVMM, aHostCpus[cHostCpus]));
374 if (!pGVMM)
375 return VERR_NO_MEMORY;
376 int rc = RTCritSectInitEx(&pGVMM->CreateDestroyLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE,
377 "GVMM-CreateDestroyLock");
378 if (RT_SUCCESS(rc))
379 {
380 rc = RTCritSectRwInitEx(&pGVMM->UsedLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, "GVMM-UsedLock");
381 if (RT_SUCCESS(rc))
382 {
383 pGVMM->u32Magic = GVMM_MAGIC;
384 pGVMM->iUsedHead = 0;
385 pGVMM->iFreeHead = 1;
386
387 /* the nil handle */
388 pGVMM->aHandles[0].iSelf = 0;
389 pGVMM->aHandles[0].iNext = 0;
390
391 /* the tail */
392 unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
393 pGVMM->aHandles[i].iSelf = i;
394 pGVMM->aHandles[i].iNext = 0; /* nil */
395
396 /* the rest */
397 while (i-- > 1)
398 {
399 pGVMM->aHandles[i].iSelf = i;
400 pGVMM->aHandles[i].iNext = i + 1;
401 }
402
403 /* The default configuration values. */
404 uint32_t cNsResolution = RTSemEventMultiGetResolution();
405 pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
406 if (cNsResolution >= 5*RT_NS_100US)
407 {
408 pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
409 pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
410 pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
411 pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
412 }
413 else if (cNsResolution > RT_NS_100US)
414 {
415 pGVMM->nsMinSleepAlone = cNsResolution / 2;
416 pGVMM->nsMinSleepCompany = cNsResolution / 4;
417 pGVMM->nsEarlyWakeUp1 = 0;
418 pGVMM->nsEarlyWakeUp2 = 0;
419 }
420 else
421 {
422 pGVMM->nsMinSleepAlone = 2000;
423 pGVMM->nsMinSleepCompany = 2000;
424 pGVMM->nsEarlyWakeUp1 = 0;
425 pGVMM->nsEarlyWakeUp2 = 0;
426 }
427#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
428 pGVMM->nsMinSleepWithHrTimer = 5000 /* ns (0.005 ms) */;
429#endif
430 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
431
432 /* The host CPU data. */
433 pGVMM->cHostCpus = cHostCpus;
434 uint32_t iCpu = cHostCpus;
435 RTCPUSET PossibleSet;
436 RTMpGetSet(&PossibleSet);
437 while (iCpu-- > 0)
438 {
439 pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
440#ifdef GVMM_SCHED_WITH_PPT
441 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
442 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
443 pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
444 pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
445 //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
446 //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
447 //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
448 //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
449 //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
450 //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
451 //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
452 //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
453#endif
454
455 if (RTCpuSetIsMember(&PossibleSet, iCpu))
456 {
457 pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
458 pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
459
460#ifdef GVMM_SCHED_WITH_PPT
461 rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
462 50*1000*1000 /* whatever */,
463 RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
464 gvmmR0SchedPeriodicPreemptionTimerCallback,
465 &pGVMM->aHostCpus[iCpu]);
466 if (RT_SUCCESS(rc))
467 {
468 rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "GVMM/CPU");
469 if (RT_FAILURE(rc))
470 LogRel(("GVMMR0Init: RTSpinlockCreate failed for #%u (%d)\n", iCpu, rc));
471 }
472 else
473 LogRel(("GVMMR0Init: RTTimerCreateEx failed for #%u (%d)\n", iCpu, rc));
474 if (RT_FAILURE(rc))
475 {
476 while (iCpu < cHostCpus)
477 {
478 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
479 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
480 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
481 iCpu++;
482 }
483 break;
484 }
485#endif
486 }
487 else
488 {
489 pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
490 pGVMM->aHostCpus[iCpu].u32Magic = 0;
491 }
492 }
493 if (RT_SUCCESS(rc))
494 {
495 g_pGVMM = pGVMM;
496 LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
497 return VINF_SUCCESS;
498 }
499
500 /* bail out. */
501 RTCritSectRwDelete(&pGVMM->UsedLock);
502 }
503 else
504 LogRel(("GVMMR0Init: RTCritSectRwInitEx failed (%d)\n", rc));
505 RTCritSectDelete(&pGVMM->CreateDestroyLock);
506 }
507 else
508 LogRel(("GVMMR0Init: RTCritSectInitEx failed (%d)\n", rc));
509
510 RTMemFree(pGVMM);
511 return rc;
512}
513
514
515/**
516 * Terminates the GVM.
517 *
518 * This is called while owning the loader semaphore (see supdrvLdrFree()).
519 * And unless something is wrong, there should be absolutely no VMs
520 * registered at this point.
521 */
522GVMMR0DECL(void) GVMMR0Term(void)
523{
524 LogFlow(("GVMMR0Term:\n"));
525
526 PGVMM pGVMM = g_pGVMM;
527 g_pGVMM = NULL;
528 if (RT_UNLIKELY(!RT_VALID_PTR(pGVMM)))
529 {
530 SUPR0Printf("GVMMR0Term: pGVMM=%RKv\n", pGVMM);
531 return;
532 }
533
534 /*
535 * First of all, stop all active timers.
536 */
537 uint32_t cActiveTimers = 0;
538 uint32_t iCpu = pGVMM->cHostCpus;
539 while (iCpu-- > 0)
540 {
541 ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
542#ifdef GVMM_SCHED_WITH_PPT
543 if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
544 && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
545 cActiveTimers++;
546#endif
547 }
548 if (cActiveTimers)
549 RTThreadSleep(1); /* fudge */
550
551 /*
552 * Invalidate the and free resources.
553 */
554 pGVMM->u32Magic = ~GVMM_MAGIC;
555 RTCritSectRwDelete(&pGVMM->UsedLock);
556 RTCritSectDelete(&pGVMM->CreateDestroyLock);
557
558 pGVMM->iFreeHead = 0;
559 if (pGVMM->iUsedHead)
560 {
561 SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
562 pGVMM->iUsedHead = 0;
563 }
564
565#ifdef GVMM_SCHED_WITH_PPT
566 iCpu = pGVMM->cHostCpus;
567 while (iCpu-- > 0)
568 {
569 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
570 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
571 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
572 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
573 }
574#endif
575
576 RTMemFree(pGVMM);
577}
578
579
580/**
581 * A quick hack for setting global config values.
582 *
583 * @returns VBox status code.
584 *
585 * @param pSession The session handle. Used for authentication.
586 * @param pszName The variable name.
587 * @param u64Value The new value.
588 */
589GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
590{
591 /*
592 * Validate input.
593 */
594 PGVMM pGVMM;
595 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
596 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
597 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
598
599 /*
600 * String switch time!
601 */
602 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
603 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
604 int rc = VINF_SUCCESS;
605 pszName += sizeof("/GVMM/") - 1;
606 if (!strcmp(pszName, "cEMTsMeansCompany"))
607 {
608 if (u64Value <= UINT32_MAX)
609 pGVMM->cEMTsMeansCompany = u64Value;
610 else
611 rc = VERR_OUT_OF_RANGE;
612 }
613 else if (!strcmp(pszName, "MinSleepAlone"))
614 {
615 if (u64Value <= RT_NS_100MS)
616 pGVMM->nsMinSleepAlone = u64Value;
617 else
618 rc = VERR_OUT_OF_RANGE;
619 }
620 else if (!strcmp(pszName, "MinSleepCompany"))
621 {
622 if (u64Value <= RT_NS_100MS)
623 pGVMM->nsMinSleepCompany = u64Value;
624 else
625 rc = VERR_OUT_OF_RANGE;
626 }
627#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
628 else if (!strcmp(pszName, "MinSleepWithHrWakeUp"))
629 {
630 if (u64Value <= RT_NS_100MS)
631 pGVMM->nsMinSleepWithHrTimer = u64Value;
632 else
633 rc = VERR_OUT_OF_RANGE;
634 }
635#endif
636 else if (!strcmp(pszName, "EarlyWakeUp1"))
637 {
638 if (u64Value <= RT_NS_100MS)
639 {
640 pGVMM->nsEarlyWakeUp1 = u64Value;
641 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
642 }
643 else
644 rc = VERR_OUT_OF_RANGE;
645 }
646 else if (!strcmp(pszName, "EarlyWakeUp2"))
647 {
648 if (u64Value <= RT_NS_100MS)
649 {
650 pGVMM->nsEarlyWakeUp2 = u64Value;
651 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
652 }
653 else
654 rc = VERR_OUT_OF_RANGE;
655 }
656 else
657 rc = VERR_CFGM_VALUE_NOT_FOUND;
658 return rc;
659}
660
661
662/**
663 * A quick hack for getting global config values.
664 *
665 * @returns VBox status code.
666 *
667 * @param pSession The session handle. Used for authentication.
668 * @param pszName The variable name.
669 * @param pu64Value Where to return the value.
670 */
671GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
672{
673 /*
674 * Validate input.
675 */
676 PGVMM pGVMM;
677 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
678 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
679 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
680 AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
681
682 /*
683 * String switch time!
684 */
685 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
686 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
687 int rc = VINF_SUCCESS;
688 pszName += sizeof("/GVMM/") - 1;
689 if (!strcmp(pszName, "cEMTsMeansCompany"))
690 *pu64Value = pGVMM->cEMTsMeansCompany;
691 else if (!strcmp(pszName, "MinSleepAlone"))
692 *pu64Value = pGVMM->nsMinSleepAlone;
693 else if (!strcmp(pszName, "MinSleepCompany"))
694 *pu64Value = pGVMM->nsMinSleepCompany;
695#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
696 else if (!strcmp(pszName, "MinSleepWithHrWakeUp"))
697 *pu64Value = pGVMM->nsMinSleepWithHrTimer;
698#endif
699 else if (!strcmp(pszName, "EarlyWakeUp1"))
700 *pu64Value = pGVMM->nsEarlyWakeUp1;
701 else if (!strcmp(pszName, "EarlyWakeUp2"))
702 *pu64Value = pGVMM->nsEarlyWakeUp2;
703 else
704 rc = VERR_CFGM_VALUE_NOT_FOUND;
705 return rc;
706}
707
708
709/**
710 * Acquire the 'used' lock in shared mode.
711 *
712 * This prevents destruction of the VM while we're in ring-0.
713 *
714 * @returns IPRT status code, see RTSemFastMutexRequest.
715 * @param a_pGVMM The GVMM instance data.
716 * @sa GVMMR0_USED_SHARED_UNLOCK, GVMMR0_USED_EXCLUSIVE_LOCK
717 */
718#define GVMMR0_USED_SHARED_LOCK(a_pGVMM) RTCritSectRwEnterShared(&(a_pGVMM)->UsedLock)
719
720/**
721 * Release the 'used' lock in when owning it in shared mode.
722 *
723 * @returns IPRT status code, see RTSemFastMutexRequest.
724 * @param a_pGVMM The GVMM instance data.
725 * @sa GVMMR0_USED_SHARED_LOCK
726 */
727#define GVMMR0_USED_SHARED_UNLOCK(a_pGVMM) RTCritSectRwLeaveShared(&(a_pGVMM)->UsedLock)
728
729/**
730 * Acquire the 'used' lock in exclusive mode.
731 *
732 * Only use this function when making changes to the used list.
733 *
734 * @returns IPRT status code, see RTSemFastMutexRequest.
735 * @param a_pGVMM The GVMM instance data.
736 * @sa GVMMR0_USED_EXCLUSIVE_UNLOCK
737 */
738#define GVMMR0_USED_EXCLUSIVE_LOCK(a_pGVMM) RTCritSectRwEnterExcl(&(a_pGVMM)->UsedLock)
739
740/**
741 * Release the 'used' lock when owning it in exclusive mode.
742 *
743 * @returns IPRT status code, see RTSemFastMutexRelease.
744 * @param a_pGVMM The GVMM instance data.
745 * @sa GVMMR0_USED_EXCLUSIVE_LOCK, GVMMR0_USED_SHARED_UNLOCK
746 */
747#define GVMMR0_USED_EXCLUSIVE_UNLOCK(a_pGVMM) RTCritSectRwLeaveExcl(&(a_pGVMM)->UsedLock)
748
749
750/**
751 * Try acquire the 'create & destroy' lock.
752 *
753 * @returns IPRT status code, see RTSemFastMutexRequest.
754 * @param pGVMM The GVMM instance data.
755 */
756DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
757{
758 LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
759 int rc = RTCritSectEnter(&pGVMM->CreateDestroyLock);
760 LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
761 return rc;
762}
763
764
765/**
766 * Release the 'create & destroy' lock.
767 *
768 * @returns IPRT status code, see RTSemFastMutexRequest.
769 * @param pGVMM The GVMM instance data.
770 */
771DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
772{
773 LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
774 int rc = RTCritSectLeave(&pGVMM->CreateDestroyLock);
775 AssertRC(rc);
776 return rc;
777}
778
779
780/**
781 * Request wrapper for the GVMMR0CreateVM API.
782 *
783 * @returns VBox status code.
784 * @param pReq The request buffer.
785 * @param pSession The session handle. The VM will be associated with this.
786 */
787GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq, PSUPDRVSESSION pSession)
788{
789 /*
790 * Validate the request.
791 */
792 if (!RT_VALID_PTR(pReq))
793 return VERR_INVALID_POINTER;
794 if (pReq->Hdr.cbReq != sizeof(*pReq))
795 return VERR_INVALID_PARAMETER;
796 if (pReq->pSession != pSession)
797 return VERR_INVALID_POINTER;
798
799 /*
800 * Execute it.
801 */
802 PGVM pGVM;
803 pReq->pVMR0 = NULL;
804 pReq->pVMR3 = NIL_RTR3PTR;
805 int rc = GVMMR0CreateVM(pSession, pReq->cCpus, &pGVM);
806 if (RT_SUCCESS(rc))
807 {
808 pReq->pVMR0 = pGVM; /** @todo don't expose this to ring-3, use a unique random number instead. */
809 pReq->pVMR3 = pGVM->pVMR3;
810 }
811 return rc;
812}
813
814
815/**
816 * Allocates the VM structure and registers it with GVM.
817 *
818 * The caller will become the VM owner and there by the EMT.
819 *
820 * @returns VBox status code.
821 * @param pSession The support driver session.
822 * @param cCpus Number of virtual CPUs for the new VM.
823 * @param ppGVM Where to store the pointer to the VM structure.
824 *
825 * @thread EMT.
826 */
827GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PGVM *ppGVM)
828{
829 LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
830 PGVMM pGVMM;
831 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
832
833 AssertPtrReturn(ppGVM, VERR_INVALID_POINTER);
834 *ppGVM = NULL;
835
836 if ( cCpus == 0
837 || cCpus > VMM_MAX_CPU_COUNT)
838 return VERR_INVALID_PARAMETER;
839
840 RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
841 AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT);
842 RTPROCESS ProcId = RTProcSelf();
843 AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT);
844
845 /*
846 * The whole allocation process is protected by the lock.
847 */
848 int rc = gvmmR0CreateDestroyLock(pGVMM);
849 AssertRCReturn(rc, rc);
850
851 /*
852 * Only one VM per session.
853 */
854 if (SUPR0GetSessionVM(pSession) != NULL)
855 {
856 gvmmR0CreateDestroyUnlock(pGVMM);
857 SUPR0Printf("GVMMR0CreateVM: The session %p already got a VM: %p\n", pSession, SUPR0GetSessionVM(pSession));
858 return VERR_ALREADY_EXISTS;
859 }
860
861 /*
862 * Allocate a handle first so we don't waste resources unnecessarily.
863 */
864 uint16_t iHandle = pGVMM->iFreeHead;
865 if (iHandle)
866 {
867 PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
868
869 /* consistency checks, a bit paranoid as always. */
870 if ( !pHandle->pGVM
871 && !pHandle->pvObj
872 && pHandle->iSelf == iHandle)
873 {
874 pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
875 if (pHandle->pvObj)
876 {
877 /*
878 * Move the handle from the free to used list and perform permission checks.
879 */
880 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
881 AssertRC(rc);
882
883 pGVMM->iFreeHead = pHandle->iNext;
884 pHandle->iNext = pGVMM->iUsedHead;
885 pGVMM->iUsedHead = iHandle;
886 pGVMM->cVMs++;
887
888 pHandle->pGVM = NULL;
889 pHandle->pSession = pSession;
890 pHandle->hEMT0 = NIL_RTNATIVETHREAD;
891 pHandle->ProcId = NIL_RTPROCESS;
892
893 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
894
895 rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
896 if (RT_SUCCESS(rc))
897 {
898 /*
899 * Allocate memory for the VM structure (combined VM + GVM).
900 */
901 const uint32_t cbVM = RT_UOFFSETOF_DYN(GVM, aCpus[cCpus]);
902 const uint32_t cPages = RT_ALIGN_32(cbVM, HOST_PAGE_SIZE) >> HOST_PAGE_SHIFT;
903 RTR0MEMOBJ hVMMemObj = NIL_RTR0MEMOBJ;
904 rc = RTR0MemObjAllocPage(&hVMMemObj, cPages << HOST_PAGE_SHIFT, false /* fExecutable */);
905 if (RT_SUCCESS(rc))
906 {
907 PGVM pGVM = (PGVM)RTR0MemObjAddress(hVMMemObj);
908 AssertPtr(pGVM);
909
910 /*
911 * Initialise the structure.
912 */
913 RT_BZERO(pGVM, cPages << HOST_PAGE_SHIFT);
914 gvmmR0InitPerVMData(pGVM, iHandle, cCpus, pSession);
915 pGVM->gvmm.s.VMMemObj = hVMMemObj;
916 rc = GMMR0InitPerVMData(pGVM);
917 int rc2 = PGMR0InitPerVMData(pGVM, hVMMemObj);
918 int rc3 = VMMR0InitPerVMData(pGVM);
919 CPUMR0InitPerVMData(pGVM);
920 DBGFR0InitPerVMData(pGVM);
921 PDMR0InitPerVMData(pGVM);
922 IOMR0InitPerVMData(pGVM);
923 TMR0InitPerVMData(pGVM);
924 if (RT_SUCCESS(rc) && RT_SUCCESS(rc2) && RT_SUCCESS(rc3))
925 {
926 /*
927 * Allocate page array.
928 * This currently have to be made available to ring-3, but this is should change eventually.
929 */
930 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
931 if (RT_SUCCESS(rc))
932 {
933 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
934 for (uint32_t iPage = 0; iPage < cPages; iPage++)
935 {
936 paPages[iPage].uReserved = 0;
937 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
938 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
939 }
940
941 /*
942 * Map the page array, VM and VMCPU structures into ring-3.
943 */
944 AssertCompileSizeAlignment(VM, HOST_PAGE_SIZE);
945 rc = RTR0MemObjMapUserEx(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
946 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
947 0 /*offSub*/, sizeof(VM));
948 for (VMCPUID i = 0; i < cCpus && RT_SUCCESS(rc); i++)
949 {
950 AssertCompileSizeAlignment(VMCPU, HOST_PAGE_SIZE);
951 rc = RTR0MemObjMapUserEx(&pGVM->aCpus[i].gvmm.s.VMCpuMapObj, pGVM->gvmm.s.VMMemObj,
952 (RTR3PTR)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
953 RT_UOFFSETOF_DYN(GVM, aCpus[i]), sizeof(VMCPU));
954 }
955 if (RT_SUCCESS(rc))
956 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
957 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
958 NIL_RTR0PROCESS);
959 if (RT_SUCCESS(rc))
960 {
961 /*
962 * Initialize all the VM pointers.
963 */
964 PVMR3 pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
965 AssertMsg(RTR0MemUserIsValidAddr(pVMR3) && pVMR3 != NIL_RTR3PTR, ("%p\n", pVMR3));
966
967 for (VMCPUID i = 0; i < cCpus; i++)
968 {
969 pGVM->aCpus[i].pVMR0 = pGVM;
970 pGVM->aCpus[i].pVMR3 = pVMR3;
971 pGVM->apCpusR3[i] = RTR0MemObjAddressR3(pGVM->aCpus[i].gvmm.s.VMCpuMapObj);
972 pGVM->aCpus[i].pVCpuR3 = pGVM->apCpusR3[i];
973 pGVM->apCpusR0[i] = &pGVM->aCpus[i];
974 AssertMsg(RTR0MemUserIsValidAddr(pGVM->apCpusR3[i]) && pGVM->apCpusR3[i] != NIL_RTR3PTR,
975 ("apCpusR3[%u]=%p\n", i, pGVM->apCpusR3[i]));
976 }
977
978 pGVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
979 AssertMsg(RTR0MemUserIsValidAddr(pGVM->paVMPagesR3) && pGVM->paVMPagesR3 != NIL_RTR3PTR,
980 ("%p\n", pGVM->paVMPagesR3));
981
982#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
983 /*
984 * Create the high resolution wake-up timer for EMT 0, ignore failures.
985 */
986 if (RTTimerCanDoHighResolution())
987 {
988 int rc4 = RTTimerCreateEx(&pGVM->aCpus[0].gvmm.s.hHrWakeUpTimer,
989 0 /*one-shot, no interval*/,
990 RTTIMER_FLAGS_HIGH_RES, gvmmR0EmtWakeUpTimerCallback,
991 &pGVM->aCpus[0]);
992 if (RT_FAILURE(rc4))
993 pGVM->aCpus[0].gvmm.s.hHrWakeUpTimer = NULL;
994 }
995#endif
996
997 /*
998 * Complete the handle - take the UsedLock sem just to be careful.
999 */
1000 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1001 AssertRC(rc);
1002
1003 pHandle->pGVM = pGVM;
1004 pHandle->hEMT0 = hEMT0;
1005 pHandle->ProcId = ProcId;
1006 pGVM->pVMR3 = pVMR3;
1007 pGVM->pVMR3Unsafe = pVMR3;
1008 pGVM->aCpus[0].hEMT = hEMT0;
1009 pGVM->aCpus[0].hNativeThreadR0 = hEMT0;
1010 pGVM->aCpus[0].cEmtHashCollisions = 0;
1011 uint32_t const idxHash = GVMM_EMT_HASH_1(hEMT0);
1012 pGVM->aCpus[0].gvmm.s.idxEmtHash = (uint16_t)idxHash;
1013 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = hEMT0;
1014 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = 0;
1015 pGVMM->cEMTs += cCpus;
1016
1017 /* Associate it with the session and create the context hook for EMT0. */
1018 rc = SUPR0SetSessionVM(pSession, pGVM, pGVM);
1019 if (RT_SUCCESS(rc))
1020 {
1021 rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[0]);
1022 if (RT_SUCCESS(rc))
1023 {
1024 /*
1025 * Done!
1026 */
1027 VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pGVM, ProcId, (void *)hEMT0, cCpus);
1028
1029 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1030 gvmmR0CreateDestroyUnlock(pGVMM);
1031
1032 CPUMR0RegisterVCpuThread(&pGVM->aCpus[0]);
1033
1034 *ppGVM = pGVM;
1035 Log(("GVMMR0CreateVM: pVMR3=%p pGVM=%p hGVM=%d\n", pVMR3, pGVM, iHandle));
1036 return VINF_SUCCESS;
1037 }
1038
1039 SUPR0SetSessionVM(pSession, NULL, NULL);
1040 }
1041 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1042 }
1043
1044 /* Cleanup mappings. */
1045 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1046 {
1047 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
1048 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1049 }
1050 for (VMCPUID i = 0; i < cCpus; i++)
1051 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1052 {
1053 RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */);
1054 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1055 }
1056 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1057 {
1058 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */);
1059 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1060 }
1061 }
1062 }
1063 else
1064 {
1065 if (RT_SUCCESS_NP(rc))
1066 rc = rc2;
1067 if (RT_SUCCESS_NP(rc))
1068 rc = rc3;
1069 AssertStmt(RT_FAILURE_NP(rc), rc = VERR_IPE_UNEXPECTED_STATUS);
1070 }
1071 }
1072 }
1073 /* else: The user wasn't permitted to create this VM. */
1074
1075 /*
1076 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
1077 * object reference here. A little extra mess because of non-recursive lock.
1078 */
1079 void *pvObj = pHandle->pvObj;
1080 pHandle->pvObj = NULL;
1081 gvmmR0CreateDestroyUnlock(pGVMM);
1082
1083 SUPR0ObjRelease(pvObj, pSession);
1084
1085 SUPR0Printf("GVMMR0CreateVM: failed, rc=%Rrc\n", rc);
1086 return rc;
1087 }
1088
1089 rc = VERR_NO_MEMORY;
1090 }
1091 else
1092 rc = VERR_GVMM_IPE_1;
1093 }
1094 else
1095 rc = VERR_GVM_TOO_MANY_VMS;
1096
1097 gvmmR0CreateDestroyUnlock(pGVMM);
1098 return rc;
1099}
1100
1101
1102/**
1103 * Initializes the per VM data belonging to GVMM.
1104 *
1105 * @param pGVM Pointer to the global VM structure.
1106 * @param hSelf The handle.
1107 * @param cCpus The CPU count.
1108 * @param pSession The session this VM is associated with.
1109 */
1110static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession)
1111{
1112 AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
1113 AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
1114 AssertCompileMemberAlignment(VM, cpum, 64);
1115 AssertCompileMemberAlignment(VM, tm, 64);
1116
1117 /* GVM: */
1118 pGVM->u32Magic = GVM_MAGIC;
1119 pGVM->hSelf = hSelf;
1120 pGVM->cCpus = cCpus;
1121 pGVM->pSession = pSession;
1122 pGVM->pSelf = pGVM;
1123
1124 /* VM: */
1125 pGVM->enmVMState = VMSTATE_CREATING;
1126 pGVM->hSelfUnsafe = hSelf;
1127 pGVM->pSessionUnsafe = pSession;
1128 pGVM->pVMR0ForCall = pGVM;
1129 pGVM->cCpusUnsafe = cCpus;
1130 pGVM->uCpuExecutionCap = 100; /* default is no cap. */
1131 pGVM->uStructVersion = 1;
1132 pGVM->cbSelf = sizeof(VM);
1133 pGVM->cbVCpu = sizeof(VMCPU);
1134
1135 /* GVMM: */
1136 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1137 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1138 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1139 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1140 pGVM->gvmm.s.fDoneVMMR0Init = false;
1141 pGVM->gvmm.s.fDoneVMMR0Term = false;
1142
1143 for (size_t i = 0; i < RT_ELEMENTS(pGVM->gvmm.s.aWorkerThreads); i++)
1144 {
1145 pGVM->gvmm.s.aWorkerThreads[i].hNativeThread = NIL_RTNATIVETHREAD;
1146 pGVM->gvmm.s.aWorkerThreads[i].hNativeThreadR3 = NIL_RTNATIVETHREAD;
1147 }
1148 pGVM->gvmm.s.aWorkerThreads[0].hNativeThread = GVMM_RTNATIVETHREAD_DESTROYED; /* invalid entry */
1149
1150 for (size_t i = 0; i < RT_ELEMENTS(pGVM->gvmm.s.aEmtHash); i++)
1151 {
1152 pGVM->gvmm.s.aEmtHash[i].hNativeEmt = NIL_RTNATIVETHREAD;
1153 pGVM->gvmm.s.aEmtHash[i].idVCpu = NIL_VMCPUID;
1154 }
1155
1156 /*
1157 * Per virtual CPU.
1158 */
1159 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1160 {
1161 pGVM->aCpus[i].idCpu = i;
1162 pGVM->aCpus[i].idCpuUnsafe = i;
1163 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1164 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1165 pGVM->aCpus[i].gvmm.s.idxEmtHash = UINT16_MAX;
1166 pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer = NULL;
1167 pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
1168 pGVM->aCpus[i].pGVM = pGVM;
1169 pGVM->aCpus[i].idHostCpu = NIL_RTCPUID;
1170 pGVM->aCpus[i].iHostCpuSet = UINT32_MAX;
1171 pGVM->aCpus[i].hNativeThread = NIL_RTNATIVETHREAD;
1172 pGVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1173 pGVM->aCpus[i].enmState = VMCPUSTATE_STOPPED;
1174 pGVM->aCpus[i].pVCpuR0ForVtg = &pGVM->aCpus[i];
1175 }
1176}
1177
1178
1179/**
1180 * Does the VM initialization.
1181 *
1182 * @returns VBox status code.
1183 * @param pGVM The global (ring-0) VM structure.
1184 */
1185GVMMR0DECL(int) GVMMR0InitVM(PGVM pGVM)
1186{
1187 LogFlow(("GVMMR0InitVM: pGVM=%p\n", pGVM));
1188
1189 int rc = VERR_INTERNAL_ERROR_3;
1190 if ( !pGVM->gvmm.s.fDoneVMMR0Init
1191 && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
1192 {
1193 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1194 {
1195 rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
1196 if (RT_FAILURE(rc))
1197 {
1198 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1199 break;
1200 }
1201 }
1202 }
1203 else
1204 rc = VERR_WRONG_ORDER;
1205
1206 LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
1207 return rc;
1208}
1209
1210
1211/**
1212 * Indicates that we're done with the ring-0 initialization
1213 * of the VM.
1214 *
1215 * @param pGVM The global (ring-0) VM structure.
1216 * @thread EMT(0)
1217 */
1218GVMMR0DECL(void) GVMMR0DoneInitVM(PGVM pGVM)
1219{
1220 /* Set the indicator. */
1221 pGVM->gvmm.s.fDoneVMMR0Init = true;
1222}
1223
1224
1225/**
1226 * Indicates that we're doing the ring-0 termination of the VM.
1227 *
1228 * @returns true if termination hasn't been done already, false if it has.
1229 * @param pGVM Pointer to the global VM structure. Optional.
1230 * @thread EMT(0) or session cleanup thread.
1231 */
1232GVMMR0DECL(bool) GVMMR0DoingTermVM(PGVM pGVM)
1233{
1234 /* Validate the VM structure, state and handle. */
1235 AssertPtrReturn(pGVM, false);
1236
1237 /* Set the indicator. */
1238 if (pGVM->gvmm.s.fDoneVMMR0Term)
1239 return false;
1240 pGVM->gvmm.s.fDoneVMMR0Term = true;
1241 return true;
1242}
1243
1244
1245/**
1246 * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1247 *
1248 * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1249 * and the caller is not the EMT thread, unfortunately. For security reasons, it
1250 * would've been nice if the caller was actually the EMT thread or that we somehow
1251 * could've associated the calling thread with the VM up front.
1252 *
1253 * @returns VBox status code.
1254 * @param pGVM The global (ring-0) VM structure.
1255 *
1256 * @thread EMT(0) if it's associated with the VM, otherwise any thread.
1257 */
1258GVMMR0DECL(int) GVMMR0DestroyVM(PGVM pGVM)
1259{
1260 LogFlow(("GVMMR0DestroyVM: pGVM=%p\n", pGVM));
1261 PGVMM pGVMM;
1262 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1263
1264 /*
1265 * Validate the VM structure, state and caller.
1266 */
1267 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1268 AssertReturn(!((uintptr_t)pGVM & HOST_PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1269 AssertMsgReturn(pGVM->enmVMState >= VMSTATE_CREATING && pGVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pGVM->enmVMState),
1270 VERR_WRONG_ORDER);
1271
1272 uint32_t hGVM = pGVM->hSelf;
1273 ASMCompilerBarrier();
1274 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_VM_HANDLE);
1275 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1276
1277 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1278 AssertReturn(pHandle->pGVM == pGVM, VERR_NOT_OWNER);
1279
1280 RTPROCESS ProcId = RTProcSelf();
1281 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1282 AssertReturn( ( pHandle->hEMT0 == hSelf
1283 && pHandle->ProcId == ProcId)
1284 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1285
1286 /*
1287 * Lookup the handle and destroy the object.
1288 * Since the lock isn't recursive and we'll have to leave it before dereferencing the
1289 * object, we take some precautions against racing callers just in case...
1290 */
1291 int rc = gvmmR0CreateDestroyLock(pGVMM);
1292 AssertRC(rc);
1293
1294 /* Be careful here because we might theoretically be racing someone else cleaning up. */
1295 if ( pHandle->pGVM == pGVM
1296 && ( ( pHandle->hEMT0 == hSelf
1297 && pHandle->ProcId == ProcId)
1298 || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1299 && RT_VALID_PTR(pHandle->pvObj)
1300 && RT_VALID_PTR(pHandle->pSession)
1301 && RT_VALID_PTR(pHandle->pGVM)
1302 && pHandle->pGVM->u32Magic == GVM_MAGIC)
1303 {
1304 /* Check that other EMTs have deregistered. */
1305 uint32_t cNotDeregistered = 0;
1306 for (VMCPUID idCpu = 1; idCpu < pGVM->cCpus; idCpu++)
1307 cNotDeregistered += pGVM->aCpus[idCpu].hEMT != GVMM_RTNATIVETHREAD_DESTROYED;
1308 if (cNotDeregistered == 0)
1309 {
1310 /* Grab the object pointer. */
1311 void *pvObj = pHandle->pvObj;
1312 pHandle->pvObj = NULL;
1313 gvmmR0CreateDestroyUnlock(pGVMM);
1314
1315 SUPR0ObjRelease(pvObj, pHandle->pSession);
1316 }
1317 else
1318 {
1319 gvmmR0CreateDestroyUnlock(pGVMM);
1320 rc = VERR_GVMM_NOT_ALL_EMTS_DEREGISTERED;
1321 }
1322 }
1323 else
1324 {
1325 SUPR0Printf("GVMMR0DestroyVM: pHandle=%RKv:{.pGVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pGVM=%p hSelf=%p\n",
1326 pHandle, pHandle->pGVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pGVM, hSelf);
1327 gvmmR0CreateDestroyUnlock(pGVMM);
1328 rc = VERR_GVMM_IPE_2;
1329 }
1330
1331 return rc;
1332}
1333
1334
1335/**
1336 * Performs VM cleanup task as part of object destruction.
1337 *
1338 * @param pGVM The GVM pointer.
1339 */
1340static void gvmmR0CleanupVM(PGVM pGVM)
1341{
1342 if ( pGVM->gvmm.s.fDoneVMMR0Init
1343 && !pGVM->gvmm.s.fDoneVMMR0Term)
1344 {
1345 if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1346 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM)
1347 {
1348 LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1349 VMMR0TermVM(pGVM, NIL_VMCPUID);
1350 }
1351 else
1352 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pGVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM));
1353 }
1354
1355 GMMR0CleanupVM(pGVM);
1356#ifdef VBOX_WITH_NEM_R0
1357 NEMR0CleanupVM(pGVM);
1358#endif
1359 PDMR0CleanupVM(pGVM);
1360 IOMR0CleanupVM(pGVM);
1361 DBGFR0CleanupVM(pGVM);
1362 PGMR0CleanupVM(pGVM);
1363 TMR0CleanupVM(pGVM);
1364 VMMR0CleanupVM(pGVM);
1365}
1366
1367
1368/**
1369 * @callback_method_impl{FNSUPDRVDESTRUCTOR,VM handle destructor}
1370 *
1371 * pvUser1 is the GVM instance pointer.
1372 * pvUser2 is the handle pointer.
1373 */
1374static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvUser1, void *pvUser2)
1375{
1376 LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvUser1, pvUser2));
1377
1378 NOREF(pvObj);
1379
1380 /*
1381 * Some quick, paranoid, input validation.
1382 */
1383 PGVMHANDLE pHandle = (PGVMHANDLE)pvUser2;
1384 AssertPtr(pHandle);
1385 PGVMM pGVMM = (PGVMM)pvUser1;
1386 Assert(pGVMM == g_pGVMM);
1387 const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1388 if ( !iHandle
1389 || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1390 || iHandle != pHandle->iSelf)
1391 {
1392 SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1393 return;
1394 }
1395
1396 int rc = gvmmR0CreateDestroyLock(pGVMM);
1397 AssertRC(rc);
1398 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1399 AssertRC(rc);
1400
1401 /*
1402 * This is a tad slow but a doubly linked list is too much hassle.
1403 */
1404 if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1405 {
1406 SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1407 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1408 gvmmR0CreateDestroyUnlock(pGVMM);
1409 return;
1410 }
1411
1412 if (pGVMM->iUsedHead == iHandle)
1413 pGVMM->iUsedHead = pHandle->iNext;
1414 else
1415 {
1416 uint16_t iPrev = pGVMM->iUsedHead;
1417 int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1418 while (iPrev)
1419 {
1420 if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1421 {
1422 SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1423 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1424 gvmmR0CreateDestroyUnlock(pGVMM);
1425 return;
1426 }
1427 if (RT_UNLIKELY(c-- <= 0))
1428 {
1429 iPrev = 0;
1430 break;
1431 }
1432
1433 if (pGVMM->aHandles[iPrev].iNext == iHandle)
1434 break;
1435 iPrev = pGVMM->aHandles[iPrev].iNext;
1436 }
1437 if (!iPrev)
1438 {
1439 SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1440 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1441 gvmmR0CreateDestroyUnlock(pGVMM);
1442 return;
1443 }
1444
1445 Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1446 pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1447 }
1448 pHandle->iNext = 0;
1449 pGVMM->cVMs--;
1450
1451 /*
1452 * Do the global cleanup round.
1453 */
1454 PGVM pGVM = pHandle->pGVM;
1455 if ( RT_VALID_PTR(pGVM)
1456 && pGVM->u32Magic == GVM_MAGIC)
1457 {
1458 pGVMM->cEMTs -= pGVM->cCpus;
1459
1460 if (pGVM->pSession)
1461 SUPR0SetSessionVM(pGVM->pSession, NULL, NULL);
1462
1463 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1464
1465 gvmmR0CleanupVM(pGVM);
1466
1467 /*
1468 * Do the GVMM cleanup - must be done last.
1469 */
1470 /* The VM and VM pages mappings/allocations. */
1471 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1472 {
1473 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1474 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1475 }
1476
1477 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1478 {
1479 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1480 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1481 }
1482
1483 if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1484 {
1485 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1486 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1487 }
1488
1489 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1490 {
1491 if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1492 {
1493 rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1494 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1495 }
1496 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1497 {
1498 rc = RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */); AssertRC(rc);
1499 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1500 }
1501#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
1502 if (pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer != NULL)
1503 {
1504 RTTimerDestroy(pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer);
1505 pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer = NULL;
1506 }
1507#endif
1508 }
1509
1510 /* the GVM structure itself. */
1511 pGVM->u32Magic |= UINT32_C(0x80000000);
1512 Assert(pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ);
1513 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, true /*fFreeMappings*/); AssertRC(rc);
1514 pGVM = NULL;
1515
1516 /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1517 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1518 AssertRC(rc);
1519 }
1520 /* else: GVMMR0CreateVM cleanup. */
1521
1522 /*
1523 * Free the handle.
1524 */
1525 pHandle->iNext = pGVMM->iFreeHead;
1526 pGVMM->iFreeHead = iHandle;
1527 ASMAtomicWriteNullPtr(&pHandle->pGVM);
1528 ASMAtomicWriteNullPtr(&pHandle->pvObj);
1529 ASMAtomicWriteNullPtr(&pHandle->pSession);
1530 ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1531 ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS);
1532
1533 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1534 gvmmR0CreateDestroyUnlock(pGVMM);
1535 LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1536}
1537
1538
1539/**
1540 * Registers the calling thread as the EMT of a Virtual CPU.
1541 *
1542 * Note that VCPU 0 is automatically registered during VM creation.
1543 *
1544 * @returns VBox status code
1545 * @param pGVM The global (ring-0) VM structure.
1546 * @param idCpu VCPU id to register the current thread as.
1547 */
1548GVMMR0DECL(int) GVMMR0RegisterVCpu(PGVM pGVM, VMCPUID idCpu)
1549{
1550 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1551
1552 /*
1553 * Validate the VM structure, state and handle.
1554 */
1555 PGVMM pGVMM;
1556 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /* fTakeUsedLock */);
1557 if (RT_SUCCESS(rc))
1558 {
1559 if (idCpu < pGVM->cCpus)
1560 {
1561 PGVMCPU const pGVCpu = &pGVM->aCpus[idCpu];
1562 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1563
1564 gvmmR0CreateDestroyLock(pGVMM); /** @todo per-VM lock? */
1565
1566 /* Check that the EMT isn't already assigned to a thread. */
1567 if (pGVCpu->hEMT == NIL_RTNATIVETHREAD)
1568 {
1569 Assert(pGVCpu->hNativeThreadR0 == NIL_RTNATIVETHREAD);
1570
1571 /* A thread may only be one EMT (this makes sure hNativeSelf isn't NIL). */
1572 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1573 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1574 if (RT_SUCCESS(rc))
1575 {
1576 /*
1577 * Do the assignment, then try setup the hook. Undo if that fails.
1578 */
1579 unsigned cCollisions = 0;
1580 uint32_t idxHash = GVMM_EMT_HASH_1(hNativeSelf);
1581 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt != NIL_RTNATIVETHREAD)
1582 {
1583 uint32_t const idxHash2 = GVMM_EMT_HASH_2(hNativeSelf);
1584 do
1585 {
1586 cCollisions++;
1587 Assert(cCollisions < GVMM_EMT_HASH_SIZE);
1588 idxHash = (idxHash + idxHash2) % GVMM_EMT_HASH_SIZE;
1589 } while (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt != NIL_RTNATIVETHREAD);
1590 }
1591 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = hNativeSelf;
1592 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = idCpu;
1593
1594 pGVCpu->hNativeThreadR0 = hNativeSelf;
1595 pGVCpu->hEMT = hNativeSelf;
1596 pGVCpu->cEmtHashCollisions = (uint8_t)cCollisions;
1597 pGVCpu->gvmm.s.idxEmtHash = (uint16_t)idxHash;
1598
1599 rc = VMMR0ThreadCtxHookCreateForEmt(pGVCpu);
1600 if (RT_SUCCESS(rc))
1601 {
1602 CPUMR0RegisterVCpuThread(pGVCpu);
1603
1604#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
1605 /*
1606 * Create the high resolution wake-up timer, ignore failures.
1607 */
1608 if (RTTimerCanDoHighResolution())
1609 {
1610 int rc2 = RTTimerCreateEx(&pGVCpu->gvmm.s.hHrWakeUpTimer, 0 /*one-shot, no interval*/,
1611 RTTIMER_FLAGS_HIGH_RES, gvmmR0EmtWakeUpTimerCallback, pGVCpu);
1612 if (RT_FAILURE(rc2))
1613 pGVCpu->gvmm.s.hHrWakeUpTimer = NULL;
1614 }
1615#endif
1616 }
1617 else
1618 {
1619 pGVCpu->hNativeThreadR0 = NIL_RTNATIVETHREAD;
1620 pGVCpu->hEMT = NIL_RTNATIVETHREAD;
1621 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = NIL_RTNATIVETHREAD;
1622 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = NIL_VMCPUID;
1623 pGVCpu->gvmm.s.idxEmtHash = UINT16_MAX;
1624 }
1625 }
1626 }
1627 else
1628 rc = VERR_ACCESS_DENIED;
1629
1630 gvmmR0CreateDestroyUnlock(pGVMM);
1631 }
1632 else
1633 rc = VERR_INVALID_CPU_ID;
1634 }
1635 return rc;
1636}
1637
1638
1639/**
1640 * Deregisters the calling thread as the EMT of a Virtual CPU.
1641 *
1642 * Note that VCPU 0 shall call GVMMR0DestroyVM intead of this API.
1643 *
1644 * @returns VBox status code
1645 * @param pGVM The global (ring-0) VM structure.
1646 * @param idCpu VCPU id to register the current thread as.
1647 */
1648GVMMR0DECL(int) GVMMR0DeregisterVCpu(PGVM pGVM, VMCPUID idCpu)
1649{
1650 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1651
1652 /*
1653 * Validate the VM structure, state and handle.
1654 */
1655 PGVMM pGVMM;
1656 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
1657 if (RT_SUCCESS(rc))
1658 {
1659 /*
1660 * Take the destruction lock and recheck the handle state to
1661 * prevent racing GVMMR0DestroyVM.
1662 */
1663 gvmmR0CreateDestroyLock(pGVMM);
1664
1665 uint32_t hSelf = pGVM->hSelf;
1666 ASMCompilerBarrier();
1667 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1668 && pGVMM->aHandles[hSelf].pvObj != NULL
1669 && pGVMM->aHandles[hSelf].pGVM == pGVM)
1670 {
1671 /*
1672 * Do per-EMT cleanups.
1673 */
1674 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->aCpus[idCpu]);
1675
1676 /*
1677 * Invalidate hEMT. We don't use NIL here as that would allow
1678 * GVMMR0RegisterVCpu to be called again, and we don't want that.
1679 */
1680 pGVM->aCpus[idCpu].hEMT = GVMM_RTNATIVETHREAD_DESTROYED;
1681 pGVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1682
1683 uint32_t const idxHash = pGVM->aCpus[idCpu].gvmm.s.idxEmtHash;
1684 if (idxHash < RT_ELEMENTS(pGVM->gvmm.s.aEmtHash))
1685 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = GVMM_RTNATIVETHREAD_DESTROYED;
1686 }
1687
1688 gvmmR0CreateDestroyUnlock(pGVMM);
1689 }
1690 return rc;
1691}
1692
1693
1694/**
1695 * Registers the caller as a given worker thread.
1696 *
1697 * This enables the thread to operate critical sections in ring-0.
1698 *
1699 * @returns VBox status code.
1700 * @param pGVM The global (ring-0) VM structure.
1701 * @param enmWorker The worker thread this is supposed to be.
1702 * @param hNativeSelfR3 The ring-3 native self of the caller.
1703 */
1704GVMMR0DECL(int) GVMMR0RegisterWorkerThread(PGVM pGVM, GVMMWORKERTHREAD enmWorker, RTNATIVETHREAD hNativeSelfR3)
1705{
1706 /*
1707 * Validate input.
1708 */
1709 AssertReturn(enmWorker > GVMMWORKERTHREAD_INVALID && enmWorker < GVMMWORKERTHREAD_END, VERR_INVALID_PARAMETER);
1710 AssertReturn(hNativeSelfR3 != NIL_RTNATIVETHREAD, VERR_INVALID_HANDLE);
1711 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1712 AssertReturn(hNativeSelf != NIL_RTNATIVETHREAD, VERR_INTERNAL_ERROR_3);
1713 PGVMM pGVMM;
1714 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1715 AssertRCReturn(rc, rc);
1716 AssertReturn(pGVM->enmVMState < VMSTATE_DESTROYING, VERR_VM_INVALID_VM_STATE);
1717
1718 /*
1719 * Grab the big lock and check the VM state again.
1720 */
1721 uint32_t const hSelf = pGVM->hSelf;
1722 gvmmR0CreateDestroyLock(pGVMM); /** @todo per-VM lock? */
1723 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1724 && pGVMM->aHandles[hSelf].pvObj != NULL
1725 && pGVMM->aHandles[hSelf].pGVM == pGVM
1726 && pGVMM->aHandles[hSelf].ProcId == RTProcSelf())
1727 {
1728 if (pGVM->enmVMState < VMSTATE_DESTROYING)
1729 {
1730 /*
1731 * Check that the thread isn't an EMT or serving in some other worker capacity.
1732 */
1733 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1734 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1735 for (size_t idx = 0; idx < RT_ELEMENTS(pGVM->gvmm.s.aWorkerThreads); idx++)
1736 AssertBreakStmt(idx == (size_t)enmWorker || pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread != hNativeSelf,
1737 rc = VERR_INVALID_PARAMETER);
1738 if (RT_SUCCESS(rc))
1739 {
1740 /*
1741 * Do the registration.
1742 */
1743 if ( pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == NIL_RTNATIVETHREAD
1744 && pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 == NIL_RTNATIVETHREAD)
1745 {
1746 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread = hNativeSelf;
1747 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 = hNativeSelfR3;
1748 rc = VINF_SUCCESS;
1749 }
1750 else if ( pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == hNativeSelf
1751 && pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 == hNativeSelfR3)
1752 rc = VERR_ALREADY_EXISTS;
1753 else
1754 rc = VERR_RESOURCE_BUSY;
1755 }
1756 }
1757 else
1758 rc = VERR_VM_INVALID_VM_STATE;
1759 }
1760 else
1761 rc = VERR_INVALID_VM_HANDLE;
1762 gvmmR0CreateDestroyUnlock(pGVMM);
1763 return rc;
1764}
1765
1766
1767/**
1768 * Deregisters a workinger thread (caller).
1769 *
1770 * The worker thread cannot be re-created and re-registered, instead the given
1771 * @a enmWorker slot becomes invalid.
1772 *
1773 * @returns VBox status code.
1774 * @param pGVM The global (ring-0) VM structure.
1775 * @param enmWorker The worker thread this is supposed to be.
1776 */
1777GVMMR0DECL(int) GVMMR0DeregisterWorkerThread(PGVM pGVM, GVMMWORKERTHREAD enmWorker)
1778{
1779 /*
1780 * Validate input.
1781 */
1782 AssertReturn(enmWorker > GVMMWORKERTHREAD_INVALID && enmWorker < GVMMWORKERTHREAD_END, VERR_INVALID_PARAMETER);
1783 RTNATIVETHREAD const hNativeThread = RTThreadNativeSelf();
1784 AssertReturn(hNativeThread != NIL_RTNATIVETHREAD, VERR_INTERNAL_ERROR_3);
1785 PGVMM pGVMM;
1786 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1787 AssertRCReturn(rc, rc);
1788
1789 /*
1790 * Grab the big lock and check the VM state again.
1791 */
1792 uint32_t const hSelf = pGVM->hSelf;
1793 gvmmR0CreateDestroyLock(pGVMM); /** @todo per-VM lock? */
1794 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1795 && pGVMM->aHandles[hSelf].pvObj != NULL
1796 && pGVMM->aHandles[hSelf].pGVM == pGVM
1797 && pGVMM->aHandles[hSelf].ProcId == RTProcSelf())
1798 {
1799 /*
1800 * Do the deregistration.
1801 * This will prevent any other threads register as the worker later.
1802 */
1803 if (pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == hNativeThread)
1804 {
1805 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread = GVMM_RTNATIVETHREAD_DESTROYED;
1806 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 = GVMM_RTNATIVETHREAD_DESTROYED;
1807 rc = VINF_SUCCESS;
1808 }
1809 else if ( pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == GVMM_RTNATIVETHREAD_DESTROYED
1810 && pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 == GVMM_RTNATIVETHREAD_DESTROYED)
1811 rc = VINF_SUCCESS;
1812 else
1813 rc = VERR_NOT_OWNER;
1814 }
1815 else
1816 rc = VERR_INVALID_VM_HANDLE;
1817 gvmmR0CreateDestroyUnlock(pGVMM);
1818 return rc;
1819}
1820
1821
1822/**
1823 * Lookup a GVM structure by its handle.
1824 *
1825 * @returns The GVM pointer on success, NULL on failure.
1826 * @param hGVM The global VM handle. Asserts on bad handle.
1827 */
1828GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1829{
1830 PGVMM pGVMM;
1831 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1832
1833 /*
1834 * Validate.
1835 */
1836 AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1837 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1838
1839 /*
1840 * Look it up.
1841 */
1842 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1843 AssertPtrReturn(pHandle->pvObj, NULL);
1844 PGVM pGVM = pHandle->pGVM;
1845 AssertPtrReturn(pGVM, NULL);
1846
1847 return pGVM;
1848}
1849
1850
1851/**
1852 * Check that the given GVM and VM structures match up.
1853 *
1854 * The calling thread must be in the same process as the VM. All current lookups
1855 * are by threads inside the same process, so this will not be an issue.
1856 *
1857 * @returns VBox status code.
1858 * @param pGVM The global (ring-0) VM structure.
1859 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1860 * @param fTakeUsedLock Whether to take the used lock or not. We take it in
1861 * shared mode when requested.
1862 *
1863 * Be very careful if not taking the lock as it's
1864 * possible that the VM will disappear then!
1865 *
1866 * @remark This will not assert on an invalid pGVM but try return silently.
1867 */
1868static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1869{
1870 /*
1871 * Check the pointers.
1872 */
1873 int rc;
1874 if (RT_LIKELY( RT_VALID_PTR(pGVM)
1875 && ((uintptr_t)pGVM & HOST_PAGE_OFFSET_MASK) == 0 ))
1876 {
1877 /*
1878 * Get the pGVMM instance and check the VM handle.
1879 */
1880 PGVMM pGVMM;
1881 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1882
1883 uint16_t hGVM = pGVM->hSelf;
1884 if (RT_LIKELY( hGVM != NIL_GVM_HANDLE
1885 && hGVM < RT_ELEMENTS(pGVMM->aHandles)))
1886 {
1887 RTPROCESS const pidSelf = RTProcSelf();
1888 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1889 if (fTakeUsedLock)
1890 {
1891 rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
1892 AssertRCReturn(rc, rc);
1893 }
1894
1895 if (RT_LIKELY( pHandle->pGVM == pGVM
1896 && pHandle->ProcId == pidSelf
1897 && RT_VALID_PTR(pHandle->pvObj)))
1898 {
1899 /*
1900 * Some more VM data consistency checks.
1901 */
1902 if (RT_LIKELY( pGVM->cCpusUnsafe == pGVM->cCpus
1903 && pGVM->hSelfUnsafe == hGVM
1904 && pGVM->pSelf == pGVM))
1905 {
1906 if (RT_LIKELY( pGVM->enmVMState >= VMSTATE_CREATING
1907 && pGVM->enmVMState <= VMSTATE_TERMINATED))
1908 {
1909 *ppGVMM = pGVMM;
1910 return VINF_SUCCESS;
1911 }
1912 rc = VERR_INCONSISTENT_VM_HANDLE;
1913 }
1914 else
1915 rc = VERR_INCONSISTENT_VM_HANDLE;
1916 }
1917 else
1918 rc = VERR_INVALID_VM_HANDLE;
1919
1920 if (fTakeUsedLock)
1921 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
1922 }
1923 else
1924 rc = VERR_INVALID_VM_HANDLE;
1925 }
1926 else
1927 rc = VERR_INVALID_POINTER;
1928 return rc;
1929}
1930
1931
1932/**
1933 * Validates a GVM/VM pair.
1934 *
1935 * @returns VBox status code.
1936 * @param pGVM The global (ring-0) VM structure.
1937 */
1938GVMMR0DECL(int) GVMMR0ValidateGVM(PGVM pGVM)
1939{
1940 PGVMM pGVMM;
1941 return gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1942}
1943
1944
1945/**
1946 * Check that the given GVM and VM structures match up.
1947 *
1948 * The calling thread must be in the same process as the VM. All current lookups
1949 * are by threads inside the same process, so this will not be an issue.
1950 *
1951 * @returns VBox status code.
1952 * @param pGVM The global (ring-0) VM structure.
1953 * @param idCpu The (alleged) Virtual CPU ID of the calling EMT.
1954 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1955 * @thread EMT
1956 *
1957 * @remarks This will assert in all failure paths.
1958 */
1959static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM)
1960{
1961 /*
1962 * Check the pointers.
1963 */
1964 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1965 AssertReturn(((uintptr_t)pGVM & HOST_PAGE_OFFSET_MASK) == 0, VERR_INVALID_POINTER);
1966
1967 /*
1968 * Get the pGVMM instance and check the VM handle.
1969 */
1970 PGVMM pGVMM;
1971 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1972
1973 uint16_t hGVM = pGVM->hSelf;
1974 ASMCompilerBarrier();
1975 AssertReturn( hGVM != NIL_GVM_HANDLE
1976 && hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1977
1978 RTPROCESS const pidSelf = RTProcSelf();
1979 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1980 AssertReturn( pHandle->pGVM == pGVM
1981 && pHandle->ProcId == pidSelf
1982 && RT_VALID_PTR(pHandle->pvObj),
1983 VERR_INVALID_HANDLE);
1984
1985 /*
1986 * Check the EMT claim.
1987 */
1988 RTNATIVETHREAD const hAllegedEMT = RTThreadNativeSelf();
1989 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1990 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
1991
1992 /*
1993 * Some more VM data consistency checks.
1994 */
1995 AssertReturn(pGVM->cCpusUnsafe == pGVM->cCpus, VERR_INCONSISTENT_VM_HANDLE);
1996 AssertReturn(pGVM->hSelfUnsafe == hGVM, VERR_INCONSISTENT_VM_HANDLE);
1997 AssertReturn( pGVM->enmVMState >= VMSTATE_CREATING
1998 && pGVM->enmVMState <= VMSTATE_TERMINATED, VERR_INCONSISTENT_VM_HANDLE);
1999
2000 *ppGVMM = pGVMM;
2001 return VINF_SUCCESS;
2002}
2003
2004
2005/**
2006 * Validates a GVM/EMT pair.
2007 *
2008 * @returns VBox status code.
2009 * @param pGVM The global (ring-0) VM structure.
2010 * @param idCpu The Virtual CPU ID of the calling EMT.
2011 * @thread EMT(idCpu)
2012 */
2013GVMMR0DECL(int) GVMMR0ValidateGVMandEMT(PGVM pGVM, VMCPUID idCpu)
2014{
2015 PGVMM pGVMM;
2016 return gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2017}
2018
2019
2020/**
2021 * Looks up the VM belonging to the specified EMT thread.
2022 *
2023 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
2024 * unnecessary kernel panics when the EMT thread hits an assertion. The
2025 * call may or not be an EMT thread.
2026 *
2027 * @returns Pointer to the VM on success, NULL on failure.
2028 * @param hEMT The native thread handle of the EMT.
2029 * NIL_RTNATIVETHREAD means the current thread
2030 */
2031GVMMR0DECL(PVMCC) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
2032{
2033 /*
2034 * No Assertions here as we're usually called in a AssertMsgN or
2035 * RTAssert* context.
2036 */
2037 PGVMM pGVMM = g_pGVMM;
2038 if ( !RT_VALID_PTR(pGVMM)
2039 || pGVMM->u32Magic != GVMM_MAGIC)
2040 return NULL;
2041
2042 if (hEMT == NIL_RTNATIVETHREAD)
2043 hEMT = RTThreadNativeSelf();
2044 RTPROCESS ProcId = RTProcSelf();
2045
2046 /*
2047 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
2048 */
2049/** @todo introduce some pid hash table here, please. */
2050 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
2051 {
2052 if ( pGVMM->aHandles[i].iSelf == i
2053 && pGVMM->aHandles[i].ProcId == ProcId
2054 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
2055 && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
2056 {
2057 if (pGVMM->aHandles[i].hEMT0 == hEMT)
2058 return pGVMM->aHandles[i].pGVM;
2059
2060 /* This is fearly safe with the current process per VM approach. */
2061 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2062 VMCPUID const cCpus = pGVM->cCpus;
2063 ASMCompilerBarrier();
2064 if ( cCpus < 1
2065 || cCpus > VMM_MAX_CPU_COUNT)
2066 continue;
2067 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
2068 if (pGVM->aCpus[idCpu].hEMT == hEMT)
2069 return pGVMM->aHandles[i].pGVM;
2070 }
2071 }
2072 return NULL;
2073}
2074
2075
2076/**
2077 * Looks up the GVMCPU belonging to the specified EMT thread.
2078 *
2079 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
2080 * unnecessary kernel panics when the EMT thread hits an assertion. The
2081 * call may or not be an EMT thread.
2082 *
2083 * @returns Pointer to the VM on success, NULL on failure.
2084 * @param hEMT The native thread handle of the EMT.
2085 * NIL_RTNATIVETHREAD means the current thread
2086 */
2087GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByEMT(RTNATIVETHREAD hEMT)
2088{
2089 /*
2090 * No Assertions here as we're usually called in a AssertMsgN,
2091 * RTAssert*, Log and LogRel contexts.
2092 */
2093 PGVMM pGVMM = g_pGVMM;
2094 if ( !RT_VALID_PTR(pGVMM)
2095 || pGVMM->u32Magic != GVMM_MAGIC)
2096 return NULL;
2097
2098 if (hEMT == NIL_RTNATIVETHREAD)
2099 hEMT = RTThreadNativeSelf();
2100 RTPROCESS ProcId = RTProcSelf();
2101
2102 /*
2103 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
2104 */
2105/** @todo introduce some pid hash table here, please. */
2106 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
2107 {
2108 if ( pGVMM->aHandles[i].iSelf == i
2109 && pGVMM->aHandles[i].ProcId == ProcId
2110 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
2111 && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
2112 {
2113 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2114 if (pGVMM->aHandles[i].hEMT0 == hEMT)
2115 return &pGVM->aCpus[0];
2116
2117 /* This is fearly safe with the current process per VM approach. */
2118 VMCPUID const cCpus = pGVM->cCpus;
2119 ASMCompilerBarrier();
2120 ASMCompilerBarrier();
2121 if ( cCpus < 1
2122 || cCpus > VMM_MAX_CPU_COUNT)
2123 continue;
2124 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
2125 if (pGVM->aCpus[idCpu].hEMT == hEMT)
2126 return &pGVM->aCpus[idCpu];
2127 }
2128 }
2129 return NULL;
2130}
2131
2132
2133/**
2134 * Get the GVMCPU structure for the given EMT.
2135 *
2136 * @returns The VCpu structure for @a hEMT, NULL if not an EMT.
2137 * @param pGVM The global (ring-0) VM structure.
2138 * @param hEMT The native thread handle of the EMT.
2139 * NIL_RTNATIVETHREAD means the current thread
2140 */
2141GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByGVMandEMT(PGVM pGVM, RTNATIVETHREAD hEMT)
2142{
2143 /*
2144 * Validate & adjust input.
2145 */
2146 AssertPtr(pGVM);
2147 Assert(pGVM->u32Magic == GVM_MAGIC);
2148 if (hEMT == NIL_RTNATIVETHREAD /* likely */)
2149 {
2150 hEMT = RTThreadNativeSelf();
2151 AssertReturn(hEMT != NIL_RTNATIVETHREAD, NULL);
2152 }
2153
2154 /*
2155 * Find the matching hash table entry.
2156 * See similar code in GVMMR0GetRing3ThreadForSelf.
2157 */
2158 uint32_t idxHash = GVMM_EMT_HASH_1(hEMT);
2159 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hEMT)
2160 { /* likely */ }
2161 else
2162 {
2163#ifdef VBOX_STRICT
2164 unsigned cCollisions = 0;
2165#endif
2166 uint32_t const idxHash2 = GVMM_EMT_HASH_2(hEMT);
2167 for (;;)
2168 {
2169 Assert(cCollisions++ < GVMM_EMT_HASH_SIZE);
2170 idxHash = (idxHash + idxHash2) % GVMM_EMT_HASH_SIZE;
2171 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hEMT)
2172 break;
2173 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == NIL_RTNATIVETHREAD)
2174 {
2175#ifdef VBOX_STRICT
2176 uint32_t idxCpu = pGVM->cCpus;
2177 AssertStmt(idxCpu < VMM_MAX_CPU_COUNT, idxCpu = VMM_MAX_CPU_COUNT);
2178 while (idxCpu-- > 0)
2179 Assert(pGVM->aCpus[idxCpu].hNativeThreadR0 != hEMT);
2180#endif
2181 return NULL;
2182 }
2183 }
2184 }
2185
2186 /*
2187 * Validate the VCpu number and translate it into a pointer.
2188 */
2189 VMCPUID const idCpu = pGVM->gvmm.s.aEmtHash[idxHash].idVCpu;
2190 AssertReturn(idCpu < pGVM->cCpus, NULL);
2191 PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
2192 Assert(pGVCpu->hNativeThreadR0 == hEMT);
2193 Assert(pGVCpu->gvmm.s.idxEmtHash == idxHash);
2194 return pGVCpu;
2195}
2196
2197
2198/**
2199 * Get the native ring-3 thread handle for the caller.
2200 *
2201 * This works for EMTs and registered workers.
2202 *
2203 * @returns ring-3 native thread handle or NIL_RTNATIVETHREAD.
2204 * @param pGVM The global (ring-0) VM structure.
2205 */
2206GVMMR0DECL(RTNATIVETHREAD) GVMMR0GetRing3ThreadForSelf(PGVM pGVM)
2207{
2208 /*
2209 * Validate input.
2210 */
2211 AssertPtr(pGVM);
2212 AssertReturn(pGVM->u32Magic == GVM_MAGIC, NIL_RTNATIVETHREAD);
2213 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
2214 AssertReturn(hNativeSelf != NIL_RTNATIVETHREAD, NIL_RTNATIVETHREAD);
2215
2216 /*
2217 * Find the matching hash table entry.
2218 * See similar code in GVMMR0GetGVCpuByGVMandEMT.
2219 */
2220 uint32_t idxHash = GVMM_EMT_HASH_1(hNativeSelf);
2221 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hNativeSelf)
2222 { /* likely */ }
2223 else
2224 {
2225#ifdef VBOX_STRICT
2226 unsigned cCollisions = 0;
2227#endif
2228 uint32_t const idxHash2 = GVMM_EMT_HASH_2(hNativeSelf);
2229 for (;;)
2230 {
2231 Assert(cCollisions++ < GVMM_EMT_HASH_SIZE);
2232 idxHash = (idxHash + idxHash2) % GVMM_EMT_HASH_SIZE;
2233 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hNativeSelf)
2234 break;
2235 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == NIL_RTNATIVETHREAD)
2236 {
2237#ifdef VBOX_STRICT
2238 uint32_t idxCpu = pGVM->cCpus;
2239 AssertStmt(idxCpu < VMM_MAX_CPU_COUNT, idxCpu = VMM_MAX_CPU_COUNT);
2240 while (idxCpu-- > 0)
2241 Assert(pGVM->aCpus[idxCpu].hNativeThreadR0 != hNativeSelf);
2242#endif
2243
2244 /*
2245 * Not an EMT, so see if it's a worker thread.
2246 */
2247 size_t idx = RT_ELEMENTS(pGVM->gvmm.s.aWorkerThreads);
2248 while (--idx > GVMMWORKERTHREAD_INVALID)
2249 if (pGVM->gvmm.s.aWorkerThreads[idx].hNativeThread == hNativeSelf)
2250 return pGVM->gvmm.s.aWorkerThreads[idx].hNativeThreadR3;
2251
2252 return NIL_RTNATIVETHREAD;
2253 }
2254 }
2255 }
2256
2257 /*
2258 * Validate the VCpu number and translate it into a pointer.
2259 */
2260 VMCPUID const idCpu = pGVM->gvmm.s.aEmtHash[idxHash].idVCpu;
2261 AssertReturn(idCpu < pGVM->cCpus, NIL_RTNATIVETHREAD);
2262 PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
2263 Assert(pGVCpu->hNativeThreadR0 == hNativeSelf);
2264 Assert(pGVCpu->gvmm.s.idxEmtHash == idxHash);
2265 return pGVCpu->hNativeThread;
2266}
2267
2268
2269/**
2270 * Converts a pointer with the GVM structure to a host physical address.
2271 *
2272 * @returns Host physical address.
2273 * @param pGVM The global (ring-0) VM structure.
2274 * @param pv The address to convert.
2275 * @thread EMT
2276 */
2277GVMMR0DECL(RTHCPHYS) GVMMR0ConvertGVMPtr2HCPhys(PGVM pGVM, void *pv)
2278{
2279 AssertPtr(pGVM);
2280 Assert(pGVM->u32Magic == GVM_MAGIC);
2281 uintptr_t const off = (uintptr_t)pv - (uintptr_t)pGVM;
2282 Assert(off < RT_UOFFSETOF_DYN(GVM, aCpus[pGVM->cCpus]));
2283 return RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, off >> HOST_PAGE_SHIFT) | ((uintptr_t)pv & HOST_PAGE_OFFSET_MASK);
2284}
2285
2286
2287/**
2288 * This is will wake up expired and soon-to-be expired VMs.
2289 *
2290 * @returns Number of VMs that has been woken up.
2291 * @param pGVMM Pointer to the GVMM instance data.
2292 * @param u64Now The current time.
2293 */
2294static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
2295{
2296 /*
2297 * Skip this if we've got disabled because of high resolution wakeups or by
2298 * the user.
2299 */
2300 if (!pGVMM->fDoEarlyWakeUps)
2301 return 0;
2302
2303/** @todo Rewrite this algorithm. See performance defect XYZ. */
2304
2305 /*
2306 * A cheap optimization to stop wasting so much time here on big setups.
2307 */
2308 const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
2309 if ( pGVMM->cHaltedEMTs == 0
2310 || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
2311 return 0;
2312
2313 /*
2314 * Only one thread doing this at a time.
2315 */
2316 if (!ASMAtomicCmpXchgBool(&pGVMM->fDoingEarlyWakeUps, true, false))
2317 return 0;
2318
2319 /*
2320 * The first pass will wake up VMs which have actually expired
2321 * and look for VMs that should be woken up in the 2nd and 3rd passes.
2322 */
2323 const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
2324 uint64_t u64Min = UINT64_MAX;
2325 unsigned cWoken = 0;
2326 unsigned cHalted = 0;
2327 unsigned cTodo2nd = 0;
2328 unsigned cTodo3rd = 0;
2329 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2330 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2331 i = pGVMM->aHandles[i].iNext)
2332 {
2333 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2334 if ( RT_VALID_PTR(pCurGVM)
2335 && pCurGVM->u32Magic == GVM_MAGIC)
2336 {
2337 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2338 {
2339 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2340 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2341 if (u64)
2342 {
2343 if (u64 <= u64Now)
2344 {
2345 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2346 {
2347 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2348 AssertRC(rc);
2349 cWoken++;
2350 }
2351 }
2352 else
2353 {
2354 cHalted++;
2355 if (u64 <= uNsEarlyWakeUp1)
2356 cTodo2nd++;
2357 else if (u64 <= uNsEarlyWakeUp2)
2358 cTodo3rd++;
2359 else if (u64 < u64Min)
2360 u64 = u64Min;
2361 }
2362 }
2363 }
2364 }
2365 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2366 }
2367
2368 if (cTodo2nd)
2369 {
2370 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2371 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2372 i = pGVMM->aHandles[i].iNext)
2373 {
2374 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2375 if ( RT_VALID_PTR(pCurGVM)
2376 && pCurGVM->u32Magic == GVM_MAGIC)
2377 {
2378 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2379 {
2380 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2381 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2382 if ( u64
2383 && u64 <= uNsEarlyWakeUp1)
2384 {
2385 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2386 {
2387 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2388 AssertRC(rc);
2389 cWoken++;
2390 }
2391 }
2392 }
2393 }
2394 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2395 }
2396 }
2397
2398 if (cTodo3rd)
2399 {
2400 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2401 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2402 i = pGVMM->aHandles[i].iNext)
2403 {
2404 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2405 if ( RT_VALID_PTR(pCurGVM)
2406 && pCurGVM->u32Magic == GVM_MAGIC)
2407 {
2408 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2409 {
2410 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2411 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2412 if ( u64
2413 && u64 <= uNsEarlyWakeUp2)
2414 {
2415 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2416 {
2417 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2418 AssertRC(rc);
2419 cWoken++;
2420 }
2421 }
2422 }
2423 }
2424 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2425 }
2426 }
2427
2428 /*
2429 * Set the minimum value.
2430 */
2431 pGVMM->uNsNextEmtWakeup = u64Min;
2432
2433 ASMAtomicWriteBool(&pGVMM->fDoingEarlyWakeUps, false);
2434 return cWoken;
2435}
2436
2437
2438#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2439/**
2440 * Timer callback for the EMT high-resolution wake-up timer.
2441 *
2442 * @param pTimer The timer handle.
2443 * @param pvUser The global (ring-0) CPU structure for the EMT to wake up.
2444 * @param iTick The current tick.
2445 */
2446static DECLCALLBACK(void) gvmmR0EmtWakeUpTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2447{
2448 PGVMCPU pGVCpu = (PGVMCPU)pvUser;
2449 NOREF(pTimer); NOREF(iTick);
2450
2451 pGVCpu->gvmm.s.fHrWakeUptimerArmed = false;
2452 if (pGVCpu->gvmm.s.u64HaltExpire != 0)
2453 {
2454 RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2455 pGVCpu->gvmm.s.Stats.cWakeUpTimerHits += 1;
2456 }
2457 else
2458 pGVCpu->gvmm.s.Stats.cWakeUpTimerMisses += 1;
2459
2460 if (RTMpCpuId() == pGVCpu->gvmm.s.idHaltedOnCpu)
2461 pGVCpu->gvmm.s.Stats.cWakeUpTimerSameCpu += 1;
2462}
2463#endif /* GVMM_SCHED_WITH_HR_WAKE_UP_TIMER */
2464
2465
2466/**
2467 * Halt the EMT thread.
2468 *
2469 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2470 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2471 * @param pGVM The global (ring-0) VM structure.
2472 * @param pGVCpu The global (ring-0) CPU structure of the calling
2473 * EMT.
2474 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2475 * @thread EMT(pGVCpu).
2476 */
2477GVMMR0DECL(int) GVMMR0SchedHalt(PGVM pGVM, PGVMCPU pGVCpu, uint64_t u64ExpireGipTime)
2478{
2479 LogFlow(("GVMMR0SchedHalt: pGVM=%p pGVCpu=%p(%d) u64ExpireGipTime=%#RX64\n",
2480 pGVM, pGVCpu, pGVCpu->idCpu, u64ExpireGipTime));
2481 PGVMM pGVMM;
2482 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2483
2484 pGVM->gvmm.s.StatsSched.cHaltCalls++;
2485 Assert(!pGVCpu->gvmm.s.u64HaltExpire);
2486
2487 /*
2488 * If we're doing early wake-ups, we must take the UsedList lock before we
2489 * start querying the current time.
2490 * Note! Interrupts must NOT be disabled at this point because we ask for GIP time!
2491 */
2492 bool const fDoEarlyWakeUps = pGVMM->fDoEarlyWakeUps;
2493 if (fDoEarlyWakeUps)
2494 {
2495 int rc2 = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc2);
2496 }
2497
2498 /* GIP hack: We might are frequently sleeping for short intervals where the
2499 difference between GIP and system time matters on systems with high resolution
2500 system time. So, convert the input from GIP to System time in that case. */
2501 Assert(ASMGetFlags() & X86_EFL_IF);
2502 const uint64_t u64NowSys = RTTimeSystemNanoTS();
2503 const uint64_t u64NowGip = RTTimeNanoTS();
2504
2505 if (fDoEarlyWakeUps)
2506 pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
2507
2508 /*
2509 * Go to sleep if we must...
2510 * Cap the sleep time to 1 second to be on the safe side.
2511 */
2512 int rc;
2513 uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
2514 if ( u64NowGip < u64ExpireGipTime
2515 && ( cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
2516 ? pGVMM->nsMinSleepCompany
2517 : pGVMM->nsMinSleepAlone)
2518#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2519 || (pGVCpu->gvmm.s.hHrWakeUpTimer != NULL && cNsInterval >= pGVMM->nsMinSleepWithHrTimer)
2520#endif
2521 )
2522 )
2523 {
2524 pGVM->gvmm.s.StatsSched.cHaltBlocking++;
2525 if (cNsInterval > RT_NS_1SEC)
2526 u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
2527 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
2528 ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
2529 if (fDoEarlyWakeUps)
2530 {
2531 if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
2532 pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
2533 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2534 }
2535
2536#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2537 if ( pGVCpu->gvmm.s.hHrWakeUpTimer != NULL
2538 && cNsInterval >= RT_MIN(RT_NS_1US, pGVMM->nsMinSleepWithHrTimer))
2539 {
2540 STAM_REL_PROFILE_START(&pGVCpu->gvmm.s.Stats.Start, a);
2541 RTTimerStart(pGVCpu->gvmm.s.hHrWakeUpTimer, cNsInterval);
2542 pGVCpu->gvmm.s.fHrWakeUptimerArmed = true;
2543 pGVCpu->gvmm.s.idHaltedOnCpu = RTMpCpuId();
2544 STAM_REL_PROFILE_STOP(&pGVCpu->gvmm.s.Stats.Start, a);
2545 }
2546#endif
2547
2548 rc = RTSemEventMultiWaitEx(pGVCpu->gvmm.s.HaltEventMulti,
2549 RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
2550 u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
2551
2552 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2553 ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
2554
2555#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2556 if (!pGVCpu->gvmm.s.fHrWakeUptimerArmed)
2557 { /* likely */ }
2558 else
2559 {
2560 STAM_REL_PROFILE_START(&pGVCpu->gvmm.s.Stats.Stop, a);
2561 RTTimerStop(pGVCpu->gvmm.s.hHrWakeUpTimer);
2562 pGVCpu->gvmm.s.fHrWakeUptimerArmed = false;
2563 pGVCpu->gvmm.s.Stats.cWakeUpTimerCanceled += 1;
2564 STAM_REL_PROFILE_STOP(&pGVCpu->gvmm.s.Stats.Stop, a);
2565 }
2566#endif
2567
2568 /* Reset the semaphore to try prevent a few false wake-ups. */
2569 if (rc == VINF_SUCCESS)
2570 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2571 else if (rc == VERR_TIMEOUT)
2572 {
2573 pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
2574 rc = VINF_SUCCESS;
2575 }
2576 }
2577 else
2578 {
2579 pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
2580 if (fDoEarlyWakeUps)
2581 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2582 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2583 rc = VINF_SUCCESS;
2584 }
2585
2586 return rc;
2587}
2588
2589
2590/**
2591 * Halt the EMT thread.
2592 *
2593 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2594 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2595 * @param pGVM The global (ring-0) VM structure.
2596 * @param idCpu The Virtual CPU ID of the calling EMT.
2597 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2598 * @thread EMT(idCpu).
2599 */
2600GVMMR0DECL(int) GVMMR0SchedHaltReq(PGVM pGVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
2601{
2602 PGVMM pGVMM;
2603 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2604 if (RT_SUCCESS(rc))
2605 rc = GVMMR0SchedHalt(pGVM, &pGVM->aCpus[idCpu], u64ExpireGipTime);
2606 return rc;
2607}
2608
2609
2610
2611/**
2612 * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
2613 * the a sleeping EMT.
2614 *
2615 * @retval VINF_SUCCESS if successfully woken up.
2616 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2617 *
2618 * @param pGVM The global (ring-0) VM structure.
2619 * @param pGVCpu The global (ring-0) VCPU structure.
2620 */
2621DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
2622{
2623 pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
2624
2625 /*
2626 * Signal the semaphore regardless of whether it's current blocked on it.
2627 *
2628 * The reason for this is that there is absolutely no way we can be 100%
2629 * certain that it isn't *about* go to go to sleep on it and just got
2630 * delayed a bit en route. So, we will always signal the semaphore when
2631 * the it is flagged as halted in the VMM.
2632 */
2633/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
2634 int rc;
2635 if (pGVCpu->gvmm.s.u64HaltExpire)
2636 {
2637 rc = VINF_SUCCESS;
2638 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2639 }
2640 else
2641 {
2642 rc = VINF_GVM_NOT_BLOCKED;
2643 pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
2644 }
2645
2646 int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2647 AssertRC(rc2);
2648
2649 return rc;
2650}
2651
2652
2653/**
2654 * Wakes up the halted EMT thread so it can service a pending request.
2655 *
2656 * @returns VBox status code.
2657 * @retval VINF_SUCCESS if successfully woken up.
2658 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2659 *
2660 * @param pGVM The global (ring-0) VM structure.
2661 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2662 * @param fTakeUsedLock Take the used lock or not
2663 * @thread Any but EMT(idCpu).
2664 */
2665GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
2666{
2667 /*
2668 * Validate input and take the UsedLock.
2669 */
2670 PGVMM pGVMM;
2671 int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
2672 if (RT_SUCCESS(rc))
2673 {
2674 if (idCpu < pGVM->cCpus)
2675 {
2676 /*
2677 * Do the actual job.
2678 */
2679 rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2680
2681 if (fTakeUsedLock && pGVMM->fDoEarlyWakeUps)
2682 {
2683 /*
2684 * While we're here, do a round of scheduling.
2685 */
2686 Assert(ASMGetFlags() & X86_EFL_IF);
2687 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2688 pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2689 }
2690 }
2691 else
2692 rc = VERR_INVALID_CPU_ID;
2693
2694 if (fTakeUsedLock)
2695 {
2696 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2697 AssertRC(rc2);
2698 }
2699 }
2700
2701 LogFlow(("GVMMR0SchedWakeUpEx: returns %Rrc\n", rc));
2702 return rc;
2703}
2704
2705
2706/**
2707 * Wakes up the halted EMT thread so it can service a pending request.
2708 *
2709 * @returns VBox status code.
2710 * @retval VINF_SUCCESS if successfully woken up.
2711 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2712 *
2713 * @param pGVM The global (ring-0) VM structure.
2714 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2715 * @thread Any but EMT(idCpu).
2716 */
2717GVMMR0DECL(int) GVMMR0SchedWakeUp(PGVM pGVM, VMCPUID idCpu)
2718{
2719 return GVMMR0SchedWakeUpEx(pGVM, idCpu, true /* fTakeUsedLock */);
2720}
2721
2722
2723/**
2724 * Wakes up the halted EMT thread so it can service a pending request, no GVM
2725 * parameter and no used locking.
2726 *
2727 * @returns VBox status code.
2728 * @retval VINF_SUCCESS if successfully woken up.
2729 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2730 *
2731 * @param pGVM The global (ring-0) VM structure.
2732 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2733 * @thread Any but EMT(idCpu).
2734 * @deprecated Don't use in new code if possible! Use the GVM variant.
2735 */
2736GVMMR0DECL(int) GVMMR0SchedWakeUpNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
2737{
2738 PGVMM pGVMM;
2739 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
2740 if (RT_SUCCESS(rc))
2741 rc = GVMMR0SchedWakeUpEx(pGVM, idCpu, false /*fTakeUsedLock*/);
2742 return rc;
2743}
2744
2745
2746/**
2747 * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
2748 * the Virtual CPU if it's still busy executing guest code.
2749 *
2750 * @returns VBox status code.
2751 * @retval VINF_SUCCESS if poked successfully.
2752 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2753 *
2754 * @param pGVM The global (ring-0) VM structure.
2755 * @param pVCpu The cross context virtual CPU structure.
2756 */
2757DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPUCC pVCpu)
2758{
2759 pGVM->gvmm.s.StatsSched.cPokeCalls++;
2760
2761 RTCPUID idHostCpu = pVCpu->idHostCpu;
2762 if ( idHostCpu == NIL_RTCPUID
2763 || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
2764 {
2765 pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
2766 return VINF_GVM_NOT_BUSY_IN_GC;
2767 }
2768
2769 /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
2770 RTMpPokeCpu(idHostCpu);
2771 return VINF_SUCCESS;
2772}
2773
2774
2775/**
2776 * Pokes an EMT if it's still busy running guest code.
2777 *
2778 * @returns VBox status code.
2779 * @retval VINF_SUCCESS if poked successfully.
2780 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2781 *
2782 * @param pGVM The global (ring-0) VM structure.
2783 * @param idCpu The ID of the virtual CPU to poke.
2784 * @param fTakeUsedLock Take the used lock or not
2785 */
2786GVMMR0DECL(int) GVMMR0SchedPokeEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
2787{
2788 /*
2789 * Validate input and take the UsedLock.
2790 */
2791 PGVMM pGVMM;
2792 int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
2793 if (RT_SUCCESS(rc))
2794 {
2795 if (idCpu < pGVM->cCpus)
2796 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2797 else
2798 rc = VERR_INVALID_CPU_ID;
2799
2800 if (fTakeUsedLock)
2801 {
2802 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2803 AssertRC(rc2);
2804 }
2805 }
2806
2807 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2808 return rc;
2809}
2810
2811
2812/**
2813 * Pokes an EMT if it's still busy running guest code.
2814 *
2815 * @returns VBox status code.
2816 * @retval VINF_SUCCESS if poked successfully.
2817 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2818 *
2819 * @param pGVM The global (ring-0) VM structure.
2820 * @param idCpu The ID of the virtual CPU to poke.
2821 */
2822GVMMR0DECL(int) GVMMR0SchedPoke(PGVM pGVM, VMCPUID idCpu)
2823{
2824 return GVMMR0SchedPokeEx(pGVM, idCpu, true /* fTakeUsedLock */);
2825}
2826
2827
2828/**
2829 * Pokes an EMT if it's still busy running guest code, no GVM parameter and no
2830 * used locking.
2831 *
2832 * @returns VBox status code.
2833 * @retval VINF_SUCCESS if poked successfully.
2834 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2835 *
2836 * @param pGVM The global (ring-0) VM structure.
2837 * @param idCpu The ID of the virtual CPU to poke.
2838 *
2839 * @deprecated Don't use in new code if possible! Use the GVM variant.
2840 */
2841GVMMR0DECL(int) GVMMR0SchedPokeNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
2842{
2843 PGVMM pGVMM;
2844 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
2845 if (RT_SUCCESS(rc))
2846 {
2847 if (idCpu < pGVM->cCpus)
2848 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2849 else
2850 rc = VERR_INVALID_CPU_ID;
2851 }
2852 return rc;
2853}
2854
2855
2856/**
2857 * Wakes up a set of halted EMT threads so they can service pending request.
2858 *
2859 * @returns VBox status code, no informational stuff.
2860 *
2861 * @param pGVM The global (ring-0) VM structure.
2862 * @param pSleepSet The set of sleepers to wake up.
2863 * @param pPokeSet The set of CPUs to poke.
2864 */
2865GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PGVM pGVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2866{
2867 AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2868 AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2869 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2870
2871 /*
2872 * Validate input and take the UsedLock.
2873 */
2874 PGVMM pGVMM;
2875 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /* fTakeUsedLock */);
2876 if (RT_SUCCESS(rc))
2877 {
2878 rc = VINF_SUCCESS;
2879 VMCPUID idCpu = pGVM->cCpus;
2880 while (idCpu-- > 0)
2881 {
2882 /* Don't try poke or wake up ourselves. */
2883 if (pGVM->aCpus[idCpu].hEMT == hSelf)
2884 continue;
2885
2886 /* just ignore errors for now. */
2887 if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2888 gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2889 else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2890 gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2891 }
2892
2893 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2894 AssertRC(rc2);
2895 }
2896
2897 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2898 return rc;
2899}
2900
2901
2902/**
2903 * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2904 *
2905 * @returns see GVMMR0SchedWakeUpAndPokeCpus.
2906 * @param pGVM The global (ring-0) VM structure.
2907 * @param pReq Pointer to the request packet.
2908 */
2909GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PGVM pGVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2910{
2911 /*
2912 * Validate input and pass it on.
2913 */
2914 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2915 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2916
2917 return GVMMR0SchedWakeUpAndPokeCpus(pGVM, &pReq->SleepSet, &pReq->PokeSet);
2918}
2919
2920
2921
2922/**
2923 * Poll the schedule to see if someone else should get a chance to run.
2924 *
2925 * This is a bit hackish and will not work too well if the machine is
2926 * under heavy load from non-VM processes.
2927 *
2928 * @returns VINF_SUCCESS if not yielded.
2929 * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2930 * @param pGVM The global (ring-0) VM structure.
2931 * @param idCpu The Virtual CPU ID of the calling EMT.
2932 * @param fYield Whether to yield or not.
2933 * This is for when we're spinning in the halt loop.
2934 * @thread EMT(idCpu).
2935 */
2936GVMMR0DECL(int) GVMMR0SchedPoll(PGVM pGVM, VMCPUID idCpu, bool fYield)
2937{
2938 /*
2939 * Validate input.
2940 */
2941 PGVMM pGVMM;
2942 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2943 if (RT_SUCCESS(rc))
2944 {
2945 /*
2946 * We currently only implement helping doing wakeups (fYield = false), so don't
2947 * bother taking the lock if gvmmR0SchedDoWakeUps is not going to do anything.
2948 */
2949 if (!fYield && pGVMM->fDoEarlyWakeUps)
2950 {
2951 rc = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc);
2952 pGVM->gvmm.s.StatsSched.cPollCalls++;
2953
2954 Assert(ASMGetFlags() & X86_EFL_IF);
2955 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2956
2957 pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2958
2959 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2960 }
2961 /*
2962 * Not quite sure what we could do here...
2963 */
2964 else if (fYield)
2965 rc = VERR_NOT_IMPLEMENTED; /** @todo implement this... */
2966 else
2967 rc = VINF_SUCCESS;
2968 }
2969
2970 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
2971 return rc;
2972}
2973
2974
2975#ifdef GVMM_SCHED_WITH_PPT
2976/**
2977 * Timer callback for the periodic preemption timer.
2978 *
2979 * @param pTimer The timer handle.
2980 * @param pvUser Pointer to the per cpu structure.
2981 * @param iTick The current tick.
2982 */
2983static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2984{
2985 PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
2986 NOREF(pTimer); NOREF(iTick);
2987
2988 /*
2989 * Termination check
2990 */
2991 if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
2992 return;
2993
2994 /*
2995 * Do the house keeping.
2996 */
2997 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2998
2999 if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
3000 {
3001 /*
3002 * Historicize the max frequency.
3003 */
3004 uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
3005 pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
3006 pCpu->Ppt.iTickHistorization = 0;
3007 pCpu->Ppt.uDesiredHz = 0;
3008
3009 /*
3010 * Check if the current timer frequency.
3011 */
3012 uint32_t uHistMaxHz = 0;
3013 for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
3014 if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
3015 uHistMaxHz = pCpu->Ppt.aHzHistory[i];
3016 if (uHistMaxHz == pCpu->Ppt.uTimerHz)
3017 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3018 else if (uHistMaxHz)
3019 {
3020 /*
3021 * Reprogram it.
3022 */
3023 pCpu->Ppt.cChanges++;
3024 pCpu->Ppt.iTickHistorization = 0;
3025 pCpu->Ppt.uTimerHz = uHistMaxHz;
3026 uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
3027 pCpu->Ppt.cNsInterval = cNsInterval;
3028 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
3029 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
3030 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
3031 / cNsInterval;
3032 else
3033 pCpu->Ppt.cTicksHistoriziationInterval = 1;
3034 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3035
3036 /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
3037 RTTimerChangeInterval(pTimer, cNsInterval);
3038 }
3039 else
3040 {
3041 /*
3042 * Stop it.
3043 */
3044 pCpu->Ppt.fStarted = false;
3045 pCpu->Ppt.uTimerHz = 0;
3046 pCpu->Ppt.cNsInterval = 0;
3047 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3048
3049 /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
3050 RTTimerStop(pTimer);
3051 }
3052 }
3053 else
3054 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3055}
3056#endif /* GVMM_SCHED_WITH_PPT */
3057
3058
3059/**
3060 * Updates the periodic preemption timer for the calling CPU.
3061 *
3062 * The caller must have disabled preemption!
3063 * The caller must check that the host can do high resolution timers.
3064 *
3065 * @param pGVM The global (ring-0) VM structure.
3066 * @param idHostCpu The current host CPU id.
3067 * @param uHz The desired frequency.
3068 */
3069GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PGVM pGVM, RTCPUID idHostCpu, uint32_t uHz)
3070{
3071 NOREF(pGVM);
3072#ifdef GVMM_SCHED_WITH_PPT
3073 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3074 Assert(RTTimerCanDoHighResolution());
3075
3076 /*
3077 * Resolve the per CPU data.
3078 */
3079 uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
3080 PGVMM pGVMM = g_pGVMM;
3081 if ( !RT_VALID_PTR(pGVMM)
3082 || pGVMM->u32Magic != GVMM_MAGIC)
3083 return;
3084 AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
3085 PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
3086 AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
3087 && pCpu->idCpu == idHostCpu,
3088 ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
3089
3090 /*
3091 * Check whether we need to do anything about the timer.
3092 * We have to be a little bit careful since we might be race the timer
3093 * callback here.
3094 */
3095 if (uHz > 16384)
3096 uHz = 16384; /** @todo add a query method for this! */
3097 if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
3098 && uHz >= pCpu->Ppt.uMinHz
3099 && !pCpu->Ppt.fStarting /* solaris paranoia */))
3100 {
3101 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3102
3103 pCpu->Ppt.uDesiredHz = uHz;
3104 uint32_t cNsInterval = 0;
3105 if (!pCpu->Ppt.fStarted)
3106 {
3107 pCpu->Ppt.cStarts++;
3108 pCpu->Ppt.fStarted = true;
3109 pCpu->Ppt.fStarting = true;
3110 pCpu->Ppt.iTickHistorization = 0;
3111 pCpu->Ppt.uTimerHz = uHz;
3112 pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
3113 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
3114 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
3115 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
3116 / cNsInterval;
3117 else
3118 pCpu->Ppt.cTicksHistoriziationInterval = 1;
3119 }
3120
3121 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3122
3123 if (cNsInterval)
3124 {
3125 RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
3126 int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
3127 AssertRC(rc);
3128
3129 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3130 if (RT_FAILURE(rc))
3131 pCpu->Ppt.fStarted = false;
3132 pCpu->Ppt.fStarting = false;
3133 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3134 }
3135 }
3136#else /* !GVMM_SCHED_WITH_PPT */
3137 NOREF(idHostCpu); NOREF(uHz);
3138#endif /* !GVMM_SCHED_WITH_PPT */
3139}
3140
3141
3142/**
3143 * Calls @a pfnCallback for each VM in the system.
3144 *
3145 * This will enumerate the VMs while holding the global VM used list lock in
3146 * shared mode. So, only suitable for simple work. If more expensive work
3147 * needs doing, a different approach must be taken as using this API would
3148 * otherwise block VM creation and destruction.
3149 *
3150 * @returns VBox status code.
3151 * @param pfnCallback The callback function.
3152 * @param pvUser User argument to the callback.
3153 */
3154GVMMR0DECL(int) GVMMR0EnumVMs(PFNGVMMR0ENUMCALLBACK pfnCallback, void *pvUser)
3155{
3156 PGVMM pGVMM;
3157 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3158
3159 int rc = VINF_SUCCESS;
3160 GVMMR0_USED_SHARED_LOCK(pGVMM);
3161 for (unsigned i = pGVMM->iUsedHead, cLoops = 0;
3162 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3163 i = pGVMM->aHandles[i].iNext, cLoops++)
3164 {
3165 PGVM pGVM = pGVMM->aHandles[i].pGVM;
3166 if ( RT_VALID_PTR(pGVM)
3167 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
3168 && pGVM->u32Magic == GVM_MAGIC)
3169 {
3170 rc = pfnCallback(pGVM, pvUser);
3171 if (rc != VINF_SUCCESS)
3172 break;
3173 }
3174
3175 AssertBreak(cLoops < RT_ELEMENTS(pGVMM->aHandles) * 4); /* paranoia */
3176 }
3177 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3178 return rc;
3179}
3180
3181
3182/**
3183 * Retrieves the GVMM statistics visible to the caller.
3184 *
3185 * @returns VBox status code.
3186 *
3187 * @param pStats Where to put the statistics.
3188 * @param pSession The current session.
3189 * @param pGVM The GVM to obtain statistics for. Optional.
3190 */
3191GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
3192{
3193 LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
3194
3195 /*
3196 * Validate input.
3197 */
3198 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
3199 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
3200 pStats->cVMs = 0; /* (crash before taking the sem...) */
3201
3202 /*
3203 * Take the lock and get the VM statistics.
3204 */
3205 PGVMM pGVMM;
3206 if (pGVM)
3207 {
3208 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
3209 if (RT_FAILURE(rc))
3210 return rc;
3211 pStats->SchedVM = pGVM->gvmm.s.StatsSched;
3212
3213 uint32_t iCpu = RT_MIN(pGVM->cCpus, RT_ELEMENTS(pStats->aVCpus));
3214 if (iCpu < RT_ELEMENTS(pStats->aVCpus))
3215 RT_BZERO(&pStats->aVCpus[iCpu], (RT_ELEMENTS(pStats->aVCpus) - iCpu) * sizeof(pStats->aVCpus[0]));
3216 while (iCpu-- > 0)
3217 pStats->aVCpus[iCpu] = pGVM->aCpus[iCpu].gvmm.s.Stats;
3218 }
3219 else
3220 {
3221 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3222 RT_ZERO(pStats->SchedVM);
3223 RT_ZERO(pStats->aVCpus);
3224
3225 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
3226 AssertRCReturn(rc, rc);
3227 }
3228
3229 /*
3230 * Enumerate the VMs and add the ones visible to the statistics.
3231 */
3232 pStats->cVMs = 0;
3233 pStats->cEMTs = 0;
3234 memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
3235
3236 for (unsigned i = pGVMM->iUsedHead;
3237 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3238 i = pGVMM->aHandles[i].iNext)
3239 {
3240 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
3241 void *pvObj = pGVMM->aHandles[i].pvObj;
3242 if ( RT_VALID_PTR(pvObj)
3243 && RT_VALID_PTR(pOtherGVM)
3244 && pOtherGVM->u32Magic == GVM_MAGIC
3245 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
3246 {
3247 pStats->cVMs++;
3248 pStats->cEMTs += pOtherGVM->cCpus;
3249
3250 pStats->SchedSum.cHaltCalls += pOtherGVM->gvmm.s.StatsSched.cHaltCalls;
3251 pStats->SchedSum.cHaltBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltBlocking;
3252 pStats->SchedSum.cHaltTimeouts += pOtherGVM->gvmm.s.StatsSched.cHaltTimeouts;
3253 pStats->SchedSum.cHaltNotBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltNotBlocking;
3254 pStats->SchedSum.cHaltWakeUps += pOtherGVM->gvmm.s.StatsSched.cHaltWakeUps;
3255
3256 pStats->SchedSum.cWakeUpCalls += pOtherGVM->gvmm.s.StatsSched.cWakeUpCalls;
3257 pStats->SchedSum.cWakeUpNotHalted += pOtherGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
3258 pStats->SchedSum.cWakeUpWakeUps += pOtherGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
3259
3260 pStats->SchedSum.cPokeCalls += pOtherGVM->gvmm.s.StatsSched.cPokeCalls;
3261 pStats->SchedSum.cPokeNotBusy += pOtherGVM->gvmm.s.StatsSched.cPokeNotBusy;
3262
3263 pStats->SchedSum.cPollCalls += pOtherGVM->gvmm.s.StatsSched.cPollCalls;
3264 pStats->SchedSum.cPollHalts += pOtherGVM->gvmm.s.StatsSched.cPollHalts;
3265 pStats->SchedSum.cPollWakeUps += pOtherGVM->gvmm.s.StatsSched.cPollWakeUps;
3266 }
3267 }
3268
3269 /*
3270 * Copy out the per host CPU statistics.
3271 */
3272 uint32_t iDstCpu = 0;
3273 uint32_t cSrcCpus = pGVMM->cHostCpus;
3274 for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
3275 {
3276 if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
3277 {
3278 pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
3279 pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
3280#ifdef GVMM_SCHED_WITH_PPT
3281 pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
3282 pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
3283 pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
3284 pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
3285#else
3286 pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
3287 pStats->aHostCpus[iDstCpu].uTimerHz = 0;
3288 pStats->aHostCpus[iDstCpu].cChanges = 0;
3289 pStats->aHostCpus[iDstCpu].cStarts = 0;
3290#endif
3291 iDstCpu++;
3292 if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
3293 break;
3294 }
3295 }
3296 pStats->cHostCpus = iDstCpu;
3297
3298 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3299
3300 return VINF_SUCCESS;
3301}
3302
3303
3304/**
3305 * VMMR0 request wrapper for GVMMR0QueryStatistics.
3306 *
3307 * @returns see GVMMR0QueryStatistics.
3308 * @param pGVM The global (ring-0) VM structure. Optional.
3309 * @param pReq Pointer to the request packet.
3310 * @param pSession The current session.
3311 */
3312GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PGVM pGVM, PGVMMQUERYSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3313{
3314 /*
3315 * Validate input and pass it on.
3316 */
3317 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3318 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3319 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3320
3321 return GVMMR0QueryStatistics(&pReq->Stats, pSession, pGVM);
3322}
3323
3324
3325/**
3326 * Resets the specified GVMM statistics.
3327 *
3328 * @returns VBox status code.
3329 *
3330 * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
3331 * @param pSession The current session.
3332 * @param pGVM The GVM to reset statistics for. Optional.
3333 */
3334GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
3335{
3336 LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
3337
3338 /*
3339 * Validate input.
3340 */
3341 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
3342 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
3343
3344 /*
3345 * Take the lock and get the VM statistics.
3346 */
3347 PGVMM pGVMM;
3348 if (pGVM)
3349 {
3350 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
3351 if (RT_FAILURE(rc))
3352 return rc;
3353# define MAYBE_RESET_FIELD(field) \
3354 do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3355 MAYBE_RESET_FIELD(cHaltCalls);
3356 MAYBE_RESET_FIELD(cHaltBlocking);
3357 MAYBE_RESET_FIELD(cHaltTimeouts);
3358 MAYBE_RESET_FIELD(cHaltNotBlocking);
3359 MAYBE_RESET_FIELD(cHaltWakeUps);
3360 MAYBE_RESET_FIELD(cWakeUpCalls);
3361 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3362 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3363 MAYBE_RESET_FIELD(cPokeCalls);
3364 MAYBE_RESET_FIELD(cPokeNotBusy);
3365 MAYBE_RESET_FIELD(cPollCalls);
3366 MAYBE_RESET_FIELD(cPollHalts);
3367 MAYBE_RESET_FIELD(cPollWakeUps);
3368# undef MAYBE_RESET_FIELD
3369 }
3370 else
3371 {
3372 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3373
3374 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
3375 AssertRCReturn(rc, rc);
3376 }
3377
3378 /*
3379 * Enumerate the VMs and add the ones visible to the statistics.
3380 */
3381 if (!ASMMemIsZero(&pStats->SchedSum, sizeof(pStats->SchedSum)))
3382 {
3383 for (unsigned i = pGVMM->iUsedHead;
3384 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3385 i = pGVMM->aHandles[i].iNext)
3386 {
3387 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
3388 void *pvObj = pGVMM->aHandles[i].pvObj;
3389 if ( RT_VALID_PTR(pvObj)
3390 && RT_VALID_PTR(pOtherGVM)
3391 && pOtherGVM->u32Magic == GVM_MAGIC
3392 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
3393 {
3394# define MAYBE_RESET_FIELD(field) \
3395 do { if (pStats->SchedSum. field ) { pOtherGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3396 MAYBE_RESET_FIELD(cHaltCalls);
3397 MAYBE_RESET_FIELD(cHaltBlocking);
3398 MAYBE_RESET_FIELD(cHaltTimeouts);
3399 MAYBE_RESET_FIELD(cHaltNotBlocking);
3400 MAYBE_RESET_FIELD(cHaltWakeUps);
3401 MAYBE_RESET_FIELD(cWakeUpCalls);
3402 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3403 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3404 MAYBE_RESET_FIELD(cPokeCalls);
3405 MAYBE_RESET_FIELD(cPokeNotBusy);
3406 MAYBE_RESET_FIELD(cPollCalls);
3407 MAYBE_RESET_FIELD(cPollHalts);
3408 MAYBE_RESET_FIELD(cPollWakeUps);
3409# undef MAYBE_RESET_FIELD
3410 }
3411 }
3412 }
3413
3414 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3415
3416 return VINF_SUCCESS;
3417}
3418
3419
3420/**
3421 * VMMR0 request wrapper for GVMMR0ResetStatistics.
3422 *
3423 * @returns see GVMMR0ResetStatistics.
3424 * @param pGVM The global (ring-0) VM structure. Optional.
3425 * @param pReq Pointer to the request packet.
3426 * @param pSession The current session.
3427 */
3428GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PGVM pGVM, PGVMMRESETSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3429{
3430 /*
3431 * Validate input and pass it on.
3432 */
3433 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3434 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3435 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3436
3437 return GVMMR0ResetStatistics(&pReq->Stats, pSession, pGVM);
3438}
3439
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette