VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDrv.c@ 10265

Last change on this file since 10265 was 10265, checked in by vboxsync, 16 years ago

Some more IDC code.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 153.4 KB
Line 
1/* $Revision: 10265 $ */
2/** @file
3 * VirtualBox Support Driver - Shared code.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 *
26 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
27 * Clara, CA 95054 USA or visit http://www.sun.com if you need
28 * additional information or have any questions.
29 */
30
31
32/*******************************************************************************
33* Header Files *
34*******************************************************************************/
35#include "SUPDrvInternal.h"
36#ifndef PAGE_SHIFT
37# include <iprt/param.h>
38#endif
39#include <iprt/alloc.h>
40#include <iprt/semaphore.h>
41#include <iprt/spinlock.h>
42#include <iprt/thread.h>
43#include <iprt/process.h>
44#include <iprt/mp.h>
45#include <iprt/cpuset.h>
46#include <iprt/log.h>
47/* VBox/x86.h not compatible with the Linux kernel sources */
48#ifdef RT_OS_LINUX
49# define X86_CPUID_VENDOR_AMD_EBX 0x68747541
50# define X86_CPUID_VENDOR_AMD_ECX 0x444d4163
51# define X86_CPUID_VENDOR_AMD_EDX 0x69746e65
52#else
53# include <VBox/x86.h>
54#endif
55
56/*
57 * Logging assignments:
58 * Log - useful stuff, like failures.
59 * LogFlow - program flow, except the really noisy bits.
60 * Log2 - Cleanup and IDTE
61 * Log3 - Loader flow noise.
62 * Log4 - Call VMMR0 flow noise.
63 * Log5 - Native yet-to-be-defined noise.
64 * Log6 - Native ioctl flow noise.
65 *
66 * Logging requires BUILD_TYPE=debug and possibly changes to the logger
67 * instanciation in log-vbox.c(pp).
68 */
69
70
71/*******************************************************************************
72* Defined Constants And Macros *
73*******************************************************************************/
74/* from x86.h - clashes with linux thus this duplication */
75#undef X86_CR0_PG
76#define X86_CR0_PG RT_BIT(31)
77#undef X86_CR0_PE
78#define X86_CR0_PE RT_BIT(0)
79#undef X86_CPUID_AMD_FEATURE_EDX_NX
80#define X86_CPUID_AMD_FEATURE_EDX_NX RT_BIT(20)
81#undef MSR_K6_EFER
82#define MSR_K6_EFER 0xc0000080
83#undef MSR_K6_EFER_NXE
84#define MSR_K6_EFER_NXE RT_BIT(11)
85#undef MSR_K6_EFER_LMA
86#define MSR_K6_EFER_LMA RT_BIT(10)
87#undef X86_CR4_PGE
88#define X86_CR4_PGE RT_BIT(7)
89#undef X86_CR4_PAE
90#define X86_CR4_PAE RT_BIT(5)
91#undef X86_CPUID_AMD_FEATURE_EDX_LONG_MODE
92#define X86_CPUID_AMD_FEATURE_EDX_LONG_MODE RT_BIT(29)
93
94
95/** The frequency by which we recalculate the u32UpdateHz and
96 * u32UpdateIntervalNS GIP members. The value must be a power of 2. */
97#define GIP_UPDATEHZ_RECALC_FREQ 0x800
98
99/**
100 * Validates a session pointer.
101 *
102 * @returns true/false accordingly.
103 * @param pSession The session.
104 */
105#define SUP_IS_SESSION_VALID(pSession) \
106 ( VALID_PTR(pSession) \
107 && pSession->u32Cookie == BIRD_INV)
108
109
110/*******************************************************************************
111* Global Variables *
112*******************************************************************************/
113/**
114 * Array of the R0 SUP API.
115 */
116static SUPFUNC g_aFunctions[] =
117{
118 /* name function */
119 { "SUPR0ObjRegister", (void *)SUPR0ObjRegister },
120 { "SUPR0ObjAddRef", (void *)SUPR0ObjAddRef },
121 { "SUPR0ObjRelease", (void *)SUPR0ObjRelease },
122 { "SUPR0ObjVerifyAccess", (void *)SUPR0ObjVerifyAccess },
123 { "SUPR0LockMem", (void *)SUPR0LockMem },
124 { "SUPR0UnlockMem", (void *)SUPR0UnlockMem },
125 { "SUPR0ContAlloc", (void *)SUPR0ContAlloc },
126 { "SUPR0ContFree", (void *)SUPR0ContFree },
127 { "SUPR0LowAlloc", (void *)SUPR0LowAlloc },
128 { "SUPR0LowFree", (void *)SUPR0LowFree },
129 { "SUPR0MemAlloc", (void *)SUPR0MemAlloc },
130 { "SUPR0MemGetPhys", (void *)SUPR0MemGetPhys },
131 { "SUPR0MemFree", (void *)SUPR0MemFree },
132 { "SUPR0PageAlloc", (void *)SUPR0PageAlloc },
133 { "SUPR0PageFree", (void *)SUPR0PageFree },
134 { "SUPR0Printf", (void *)SUPR0Printf },
135 { "RTMemAlloc", (void *)RTMemAlloc },
136 { "RTMemAllocZ", (void *)RTMemAllocZ },
137 { "RTMemFree", (void *)RTMemFree },
138 /*{ "RTMemDup", (void *)RTMemDup },*/
139 { "RTMemRealloc", (void *)RTMemRealloc },
140 { "RTR0MemObjAllocLow", (void *)RTR0MemObjAllocLow },
141 { "RTR0MemObjAllocPage", (void *)RTR0MemObjAllocPage },
142 { "RTR0MemObjAllocPhys", (void *)RTR0MemObjAllocPhys },
143 { "RTR0MemObjAllocPhysNC", (void *)RTR0MemObjAllocPhysNC },
144 { "RTR0MemObjAllocCont", (void *)RTR0MemObjAllocCont },
145 { "RTR0MemObjLockUser", (void *)RTR0MemObjLockUser },
146 { "RTR0MemObjMapKernel", (void *)RTR0MemObjMapKernel },
147 { "RTR0MemObjMapUser", (void *)RTR0MemObjMapUser },
148 { "RTR0MemObjAddress", (void *)RTR0MemObjAddress },
149 { "RTR0MemObjAddressR3", (void *)RTR0MemObjAddressR3 },
150 { "RTR0MemObjSize", (void *)RTR0MemObjSize },
151 { "RTR0MemObjIsMapping", (void *)RTR0MemObjIsMapping },
152 { "RTR0MemObjGetPagePhysAddr", (void *)RTR0MemObjGetPagePhysAddr },
153 { "RTR0MemObjFree", (void *)RTR0MemObjFree },
154/* These don't work yet on linux - use fast mutexes!
155 { "RTSemMutexCreate", (void *)RTSemMutexCreate },
156 { "RTSemMutexRequest", (void *)RTSemMutexRequest },
157 { "RTSemMutexRelease", (void *)RTSemMutexRelease },
158 { "RTSemMutexDestroy", (void *)RTSemMutexDestroy },
159*/
160 { "RTProcSelf", (void *)RTProcSelf },
161 { "RTR0ProcHandleSelf", (void *)RTR0ProcHandleSelf },
162 { "RTSemFastMutexCreate", (void *)RTSemFastMutexCreate },
163 { "RTSemFastMutexDestroy", (void *)RTSemFastMutexDestroy },
164 { "RTSemFastMutexRequest", (void *)RTSemFastMutexRequest },
165 { "RTSemFastMutexRelease", (void *)RTSemFastMutexRelease },
166 { "RTSemEventCreate", (void *)RTSemEventCreate },
167 { "RTSemEventSignal", (void *)RTSemEventSignal },
168 { "RTSemEventWait", (void *)RTSemEventWait },
169 { "RTSemEventWaitNoResume", (void *)RTSemEventWaitNoResume },
170 { "RTSemEventDestroy", (void *)RTSemEventDestroy },
171 { "RTSemEventMultiCreate", (void *)RTSemEventMultiCreate },
172 { "RTSemEventMultiSignal", (void *)RTSemEventMultiSignal },
173 { "RTSemEventMultiReset", (void *)RTSemEventMultiReset },
174 { "RTSemEventMultiWait", (void *)RTSemEventMultiWait },
175 { "RTSemEventMultiWaitNoResume", (void *)RTSemEventMultiWaitNoResume },
176 { "RTSemEventMultiDestroy", (void *)RTSemEventMultiDestroy },
177 { "RTSpinlockCreate", (void *)RTSpinlockCreate },
178 { "RTSpinlockDestroy", (void *)RTSpinlockDestroy },
179 { "RTSpinlockAcquire", (void *)RTSpinlockAcquire },
180 { "RTSpinlockRelease", (void *)RTSpinlockRelease },
181 { "RTSpinlockAcquireNoInts", (void *)RTSpinlockAcquireNoInts },
182 { "RTSpinlockReleaseNoInts", (void *)RTSpinlockReleaseNoInts },
183 { "RTThreadNativeSelf", (void *)RTThreadNativeSelf },
184 { "RTThreadSleep", (void *)RTThreadSleep },
185 { "RTThreadYield", (void *)RTThreadYield },
186#if 0 /* Thread APIs, Part 2. */
187 { "RTThreadSelf", (void *)RTThreadSelf },
188 { "RTThreadCreate", (void *)RTThreadCreate },
189 { "RTThreadGetNative", (void *)RTThreadGetNative },
190 { "RTThreadWait", (void *)RTThreadWait },
191 { "RTThreadWaitNoResume", (void *)RTThreadWaitNoResume },
192 { "RTThreadGetName", (void *)RTThreadGetName },
193 { "RTThreadSelfName", (void *)RTThreadSelfName },
194 { "RTThreadGetType", (void *)RTThreadGetType },
195 { "RTThreadUserSignal", (void *)RTThreadUserSignal },
196 { "RTThreadUserReset", (void *)RTThreadUserReset },
197 { "RTThreadUserWait", (void *)RTThreadUserWait },
198 { "RTThreadUserWaitNoResume", (void *)RTThreadUserWaitNoResume },
199#endif
200 { "RTLogDefaultInstance", (void *)RTLogDefaultInstance },
201 { "RTMpCpuId", (void *)RTMpCpuId },
202 { "RTMpCpuIdFromSetIndex", (void *)RTMpCpuIdFromSetIndex },
203 { "RTMpCpuIdToSetIndex", (void *)RTMpCpuIdToSetIndex },
204 { "RTMpIsCpuPossible", (void *)RTMpIsCpuPossible },
205 { "RTMpGetCount", (void *)RTMpGetCount },
206 { "RTMpGetMaxCpuId", (void *)RTMpGetMaxCpuId },
207 { "RTMpGetOnlineCount", (void *)RTMpGetOnlineCount },
208 { "RTMpGetOnlineSet", (void *)RTMpGetOnlineSet },
209 { "RTMpGetSet", (void *)RTMpGetSet },
210 { "RTMpIsCpuOnline", (void *)RTMpIsCpuOnline },
211 { "RTMpOnAll", (void *)RTMpOnAll },
212 { "RTMpOnOthers", (void *)RTMpOnOthers },
213 { "RTMpOnSpecific", (void *)RTMpOnSpecific },
214 { "RTLogRelDefaultInstance", (void *)RTLogRelDefaultInstance },
215 { "RTLogSetDefaultInstanceThread", (void *)RTLogSetDefaultInstanceThread },
216 { "RTLogLogger", (void *)RTLogLogger },
217 { "RTLogLoggerEx", (void *)RTLogLoggerEx },
218 { "RTLogLoggerExV", (void *)RTLogLoggerExV },
219 { "RTLogPrintf", (void *)RTLogPrintf },
220 { "RTLogPrintfV", (void *)RTLogPrintfV },
221 { "AssertMsg1", (void *)AssertMsg1 },
222 { "AssertMsg2", (void *)AssertMsg2 },
223};
224
225
226/*******************************************************************************
227* Internal Functions *
228*******************************************************************************/
229static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession);
230static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType);
231#ifdef VBOX_WITH_IDT_PATCHING
232static int supdrvIOCtl_IdtInstall(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPIDTINSTALL pReq);
233static PSUPDRVPATCH supdrvIdtPatchOne(PSUPDRVDEVEXT pDevExt, PSUPDRVPATCH pPatch);
234static int supdrvIOCtl_IdtRemoveAll(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession);
235static void supdrvIdtRemoveOne(PSUPDRVDEVEXT pDevExt, PSUPDRVPATCH pPatch);
236static void supdrvIdtWrite(volatile void *pvIdtEntry, const SUPDRVIDTE *pNewIDTEntry);
237#endif /* VBOX_WITH_IDT_PATCHING */
238static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq);
239static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq);
240static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq);
241static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq);
242static int supdrvLdrSetR0EP(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx);
243static void supdrvLdrUnsetR0EP(PSUPDRVDEVEXT pDevExt);
244static void supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage);
245static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage);
246static SUPPAGINGMODE supdrvIOCtl_GetPagingMode(void);
247static SUPGIPMODE supdrvGipDeterminTscMode(PSUPDRVDEVEXT pDevExt);
248#ifdef RT_OS_WINDOWS
249static int supdrvPageGetPhys(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages);
250static bool supdrvPageWasLockedByPageAlloc(PSUPDRVSESSION pSession, RTR3PTR pvR3);
251#endif /* RT_OS_WINDOWS */
252static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt);
253static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt);
254static DECLCALLBACK(void) supdrvGipSyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
255static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
256static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser);
257
258
259/**
260 * Initializes the device extentsion structure.
261 *
262 * @returns IPRT status code.
263 * @param pDevExt The device extension to initialize.
264 */
265int VBOXCALL supdrvInitDevExt(PSUPDRVDEVEXT pDevExt)
266{
267 /*
268 * Initialize it.
269 */
270 int rc;
271 memset(pDevExt, 0, sizeof(*pDevExt));
272 rc = RTSpinlockCreate(&pDevExt->Spinlock);
273 if (!rc)
274 {
275 rc = RTSemFastMutexCreate(&pDevExt->mtxLdr);
276 if (!rc)
277 {
278 rc = RTSemFastMutexCreate(&pDevExt->mtxGip);
279 if (!rc)
280 {
281 rc = supdrvGipCreate(pDevExt);
282 if (RT_SUCCESS(rc))
283 {
284 pDevExt->u32Cookie = BIRD; /** @todo make this random? */
285 return VINF_SUCCESS;
286 }
287
288 RTSemFastMutexDestroy(pDevExt->mtxGip);
289 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
290 }
291 RTSemFastMutexDestroy(pDevExt->mtxLdr);
292 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
293 }
294 RTSpinlockDestroy(pDevExt->Spinlock);
295 pDevExt->Spinlock = NIL_RTSPINLOCK;
296 }
297 return rc;
298}
299
300
301/**
302 * Delete the device extension (e.g. cleanup members).
303 *
304 * @param pDevExt The device extension to delete.
305 */
306void VBOXCALL supdrvDeleteDevExt(PSUPDRVDEVEXT pDevExt)
307{
308#ifdef VBOX_WITH_IDT_PATCHING
309 PSUPDRVPATCH pPatch;
310#endif
311 PSUPDRVOBJ pObj;
312 PSUPDRVUSAGE pUsage;
313
314 /*
315 * Kill mutexes and spinlocks.
316 */
317 RTSemFastMutexDestroy(pDevExt->mtxGip);
318 pDevExt->mtxGip = NIL_RTSEMFASTMUTEX;
319 RTSemFastMutexDestroy(pDevExt->mtxLdr);
320 pDevExt->mtxLdr = NIL_RTSEMFASTMUTEX;
321 RTSpinlockDestroy(pDevExt->Spinlock);
322 pDevExt->Spinlock = NIL_RTSPINLOCK;
323
324 /*
325 * Free lists.
326 */
327#ifdef VBOX_WITH_IDT_PATCHING
328 /* patches */
329 /** @todo make sure we don't uninstall patches which has been patched by someone else. */
330 pPatch = pDevExt->pIdtPatchesFree;
331 pDevExt->pIdtPatchesFree = NULL;
332 while (pPatch)
333 {
334 void *pvFree = pPatch;
335 pPatch = pPatch->pNext;
336 RTMemExecFree(pvFree);
337 }
338#endif /* VBOX_WITH_IDT_PATCHING */
339
340 /* objects. */
341 pObj = pDevExt->pObjs;
342#if !defined(DEBUG_bird) || !defined(RT_OS_LINUX) /* breaks unloading, temporary, remove me! */
343 Assert(!pObj); /* (can trigger on forced unloads) */
344#endif
345 pDevExt->pObjs = NULL;
346 while (pObj)
347 {
348 void *pvFree = pObj;
349 pObj = pObj->pNext;
350 RTMemFree(pvFree);
351 }
352
353 /* usage records. */
354 pUsage = pDevExt->pUsageFree;
355 pDevExt->pUsageFree = NULL;
356 while (pUsage)
357 {
358 void *pvFree = pUsage;
359 pUsage = pUsage->pNext;
360 RTMemFree(pvFree);
361 }
362
363 /* kill the GIP */
364 supdrvGipDestroy(pDevExt);
365}
366
367
368/**
369 * Create session.
370 *
371 * @returns IPRT status code.
372 * @param pDevExt Device extension.
373 * @param ppSession Where to store the pointer to the session data.
374 */
375int VBOXCALL supdrvCreateSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION *ppSession)
376{
377 /*
378 * Allocate memory for the session data.
379 */
380 int rc = VERR_NO_MEMORY;
381 PSUPDRVSESSION pSession = *ppSession = (PSUPDRVSESSION)RTMemAllocZ(sizeof(*pSession));
382 if (pSession)
383 {
384 /* Initialize session data. */
385 rc = RTSpinlockCreate(&pSession->Spinlock);
386 if (!rc)
387 {
388 Assert(pSession->Spinlock != NIL_RTSPINLOCK);
389 pSession->pDevExt = pDevExt;
390 pSession->u32Cookie = BIRD_INV;
391 /*pSession->pLdrUsage = NULL;
392 pSession->pPatchUsage = NULL;
393 pSession->pUsage = NULL;
394 pSession->pGip = NULL;
395 pSession->fGipReferenced = false;
396 pSession->Bundle.cUsed = 0 */
397
398 LogFlow(("Created session %p initial cookie=%#x\n", pSession, pSession->u32Cookie));
399 return VINF_SUCCESS;
400 }
401
402 RTMemFree(pSession);
403 *ppSession = NULL;
404 Log(("Failed to create spinlock, rc=%d!\n", rc));
405 }
406
407 return rc;
408}
409
410
411/**
412 * Shared code for cleaning up a session.
413 *
414 * @param pDevExt Device extension.
415 * @param pSession Session data.
416 * This data will be freed by this routine.
417 */
418void VBOXCALL supdrvCloseSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
419{
420 /*
421 * Cleanup the session first.
422 */
423 supdrvCleanupSession(pDevExt, pSession);
424
425 /*
426 * Free the rest of the session stuff.
427 */
428 RTSpinlockDestroy(pSession->Spinlock);
429 pSession->Spinlock = NIL_RTSPINLOCK;
430 pSession->pDevExt = NULL;
431 RTMemFree(pSession);
432 LogFlow(("supdrvCloseSession: returns\n"));
433}
434
435
436/**
437 * Shared code for cleaning up a session (but not quite freeing it).
438 *
439 * This is primarily intended for MAC OS X where we have to clean up the memory
440 * stuff before the file handle is closed.
441 *
442 * @param pDevExt Device extension.
443 * @param pSession Session data.
444 * This data will be freed by this routine.
445 */
446void VBOXCALL supdrvCleanupSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
447{
448 PSUPDRVBUNDLE pBundle;
449 LogFlow(("supdrvCleanupSession: pSession=%p\n", pSession));
450
451 /*
452 * Remove logger instances related to this session.
453 */
454 RTLogSetDefaultInstanceThread(NULL, (uintptr_t)pSession);
455
456#ifdef VBOX_WITH_IDT_PATCHING
457 /*
458 * Uninstall any IDT patches installed for this session.
459 */
460 supdrvIOCtl_IdtRemoveAll(pDevExt, pSession);
461#endif
462
463 /*
464 * Release object references made in this session.
465 * In theory there should be noone racing us in this session.
466 */
467 Log2(("release objects - start\n"));
468 if (pSession->pUsage)
469 {
470 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
471 PSUPDRVUSAGE pUsage;
472 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
473
474 while ((pUsage = pSession->pUsage) != NULL)
475 {
476 PSUPDRVOBJ pObj = pUsage->pObj;
477 pSession->pUsage = pUsage->pNext;
478
479 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
480 if (pUsage->cUsage < pObj->cUsage)
481 {
482 pObj->cUsage -= pUsage->cUsage;
483 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
484 }
485 else
486 {
487 /* Destroy the object and free the record. */
488 if (pDevExt->pObjs == pObj)
489 pDevExt->pObjs = pObj->pNext;
490 else
491 {
492 PSUPDRVOBJ pObjPrev;
493 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
494 if (pObjPrev->pNext == pObj)
495 {
496 pObjPrev->pNext = pObj->pNext;
497 break;
498 }
499 Assert(pObjPrev);
500 }
501 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
502
503 Log(("supdrvCleanupSession: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
504 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
505 if (pObj->pfnDestructor)
506 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
507 RTMemFree(pObj);
508 }
509
510 /* free it and continue. */
511 RTMemFree(pUsage);
512
513 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
514 }
515
516 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
517 AssertMsg(!pSession->pUsage, ("Some buster reregistered an object during desturction!\n"));
518 }
519 Log2(("release objects - done\n"));
520
521 /*
522 * Release memory allocated in the session.
523 *
524 * We do not serialize this as we assume that the application will
525 * not allocated memory while closing the file handle object.
526 */
527 Log2(("freeing memory:\n"));
528 pBundle = &pSession->Bundle;
529 while (pBundle)
530 {
531 PSUPDRVBUNDLE pToFree;
532 unsigned i;
533
534 /*
535 * Check and unlock all entries in the bundle.
536 */
537 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
538 {
539 if (pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ)
540 {
541 int rc;
542 Log2(("eType=%d pvR0=%p pvR3=%p cb=%ld\n", pBundle->aMem[i].eType, RTR0MemObjAddress(pBundle->aMem[i].MemObj),
543 (void *)RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3), (long)RTR0MemObjSize(pBundle->aMem[i].MemObj)));
544 if (pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ)
545 {
546 rc = RTR0MemObjFree(pBundle->aMem[i].MapObjR3, false);
547 AssertRC(rc); /** @todo figure out how to handle this. */
548 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
549 }
550 rc = RTR0MemObjFree(pBundle->aMem[i].MemObj, false);
551 AssertRC(rc); /** @todo figure out how to handle this. */
552 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
553 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
554 }
555 }
556
557 /*
558 * Advance and free previous bundle.
559 */
560 pToFree = pBundle;
561 pBundle = pBundle->pNext;
562
563 pToFree->pNext = NULL;
564 pToFree->cUsed = 0;
565 if (pToFree != &pSession->Bundle)
566 RTMemFree(pToFree);
567 }
568 Log2(("freeing memory - done\n"));
569
570 /*
571 * Loaded images needs to be dereferenced and possibly freed up.
572 */
573 RTSemFastMutexRequest(pDevExt->mtxLdr);
574 Log2(("freeing images:\n"));
575 if (pSession->pLdrUsage)
576 {
577 PSUPDRVLDRUSAGE pUsage = pSession->pLdrUsage;
578 pSession->pLdrUsage = NULL;
579 while (pUsage)
580 {
581 void *pvFree = pUsage;
582 PSUPDRVLDRIMAGE pImage = pUsage->pImage;
583 if (pImage->cUsage > pUsage->cUsage)
584 pImage->cUsage -= pUsage->cUsage;
585 else
586 supdrvLdrFree(pDevExt, pImage);
587 pUsage->pImage = NULL;
588 pUsage = pUsage->pNext;
589 RTMemFree(pvFree);
590 }
591 }
592 RTSemFastMutexRelease(pDevExt->mtxLdr);
593 Log2(("freeing images - done\n"));
594
595 /*
596 * Unmap the GIP.
597 */
598 Log2(("umapping GIP:\n"));
599 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
600 {
601 SUPR0GipUnmap(pSession);
602 pSession->fGipReferenced = 0;
603 }
604 Log2(("umapping GIP - done\n"));
605}
606
607
608/**
609 * Fast path I/O Control worker.
610 *
611 * @returns VBox status code that should be passed down to ring-3 unchanged.
612 * @param uIOCtl Function number.
613 * @param pDevExt Device extention.
614 * @param pSession Session data.
615 */
616int VBOXCALL supdrvIOCtlFast(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
617{
618 int rc;
619
620 /*
621 * We check the two prereqs after doing this only to allow the compiler to optimize things better.
622 */
623 if (RT_LIKELY(pSession->pVM && pDevExt->pfnVMMR0EntryFast))
624 {
625 switch (uIOCtl)
626 {
627 case SUP_IOCTL_FAST_DO_RAW_RUN:
628 rc = pDevExt->pfnVMMR0EntryFast(pSession->pVM, SUP_VMMR0_DO_RAW_RUN);
629 break;
630 case SUP_IOCTL_FAST_DO_HWACC_RUN:
631 rc = pDevExt->pfnVMMR0EntryFast(pSession->pVM, SUP_VMMR0_DO_HWACC_RUN);
632 break;
633 case SUP_IOCTL_FAST_DO_NOP:
634 rc = pDevExt->pfnVMMR0EntryFast(pSession->pVM, SUP_VMMR0_DO_NOP);
635 break;
636 default:
637 rc = VERR_INTERNAL_ERROR;
638 break;
639 }
640 }
641 else
642 rc = VERR_INTERNAL_ERROR;
643
644 return rc;
645}
646
647
648/**
649 * Helper for supdrvIOCtl. Check if pszStr contains any character of pszChars.
650 * We would use strpbrk here if this function would be contained in the RedHat kABI white
651 * list, see http://www.kerneldrivers.org/RHEL5.
652 *
653 * @return 1 if pszStr does contain any character of pszChars, 0 otherwise.
654 * @param pszStr String to check
655 * @param pszChars Character set
656 */
657static int supdrvCheckInvalidChar(const char *pszStr, const char *pszChars)
658{
659 int chCur;
660 while ((chCur = *pszStr++) != '\0')
661 {
662 int ch;
663 const char *psz = pszChars;
664 while ((ch = *psz++) != '\0')
665 if (ch == chCur)
666 return 1;
667
668 }
669 return 0;
670}
671
672
673/**
674 * I/O Control worker.
675 *
676 * @returns 0 on success.
677 * @returns VERR_INVALID_PARAMETER if the request is invalid.
678 *
679 * @param uIOCtl Function number.
680 * @param pDevExt Device extention.
681 * @param pSession Session data.
682 * @param pReqHdr The request header.
683 */
684int VBOXCALL supdrvIOCtl(uintptr_t uIOCtl, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPREQHDR pReqHdr)
685{
686 /*
687 * Validate the request.
688 */
689 /* this first check could probably be omitted as its also done by the OS specific code... */
690 if (RT_UNLIKELY( (pReqHdr->fFlags & SUPREQHDR_FLAGS_MAGIC_MASK) != SUPREQHDR_FLAGS_MAGIC
691 || pReqHdr->cbIn < sizeof(*pReqHdr)
692 || pReqHdr->cbOut < sizeof(*pReqHdr)))
693 {
694 OSDBGPRINT(("vboxdrv: Bad ioctl request header; cbIn=%#lx cbOut=%#lx fFlags=%#lx\n",
695 (long)pReqHdr->cbIn, (long)pReqHdr->cbOut, (long)pReqHdr->fFlags));
696 return VERR_INVALID_PARAMETER;
697 }
698 if (RT_UNLIKELY(uIOCtl == SUP_IOCTL_COOKIE))
699 {
700 if (pReqHdr->u32Cookie != SUPCOOKIE_INITIAL_COOKIE)
701 {
702 OSDBGPRINT(("SUP_IOCTL_COOKIE: bad cookie %#lx\n", (long)pReqHdr->u32Cookie));
703 return VERR_INVALID_PARAMETER;
704 }
705 }
706 else if (RT_UNLIKELY( pReqHdr->u32Cookie != pDevExt->u32Cookie
707 || pReqHdr->u32SessionCookie != pSession->u32Cookie))
708 {
709 OSDBGPRINT(("vboxdrv: bad cookie %#lx / %#lx.\n", (long)pReqHdr->u32Cookie, (long)pReqHdr->u32SessionCookie));
710 return VERR_INVALID_PARAMETER;
711 }
712
713/*
714 * Validation macros
715 */
716#define REQ_CHECK_SIZES_EX(Name, cbInExpect, cbOutExpect) \
717 do { \
718 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect) || pReqHdr->cbOut != (cbOutExpect))) \
719 { \
720 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld. cbOut=%ld expected %ld.\n", \
721 (long)pReq->Hdr.cbIn, (long)(cbInExpect), (long)pReq->Hdr.cbOut, (long)(cbOutExpect))); \
722 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
723 } \
724 } while (0)
725
726#define REQ_CHECK_SIZES(Name) REQ_CHECK_SIZES_EX(Name, Name ## _SIZE_IN, Name ## _SIZE_OUT)
727
728#define REQ_CHECK_SIZE_IN(Name, cbInExpect) \
729 do { \
730 if (RT_UNLIKELY(pReqHdr->cbIn != (cbInExpect))) \
731 { \
732 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbIn=%ld expected %ld.\n", \
733 (long)pReq->Hdr.cbIn, (long)(cbInExpect))); \
734 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
735 } \
736 } while (0)
737
738#define REQ_CHECK_SIZE_OUT(Name, cbOutExpect) \
739 do { \
740 if (RT_UNLIKELY(pReqHdr->cbOut != (cbOutExpect))) \
741 { \
742 OSDBGPRINT(( #Name ": Invalid input/output sizes. cbOut=%ld expected %ld.\n", \
743 (long)pReq->Hdr.cbOut, (long)(cbOutExpect))); \
744 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
745 } \
746 } while (0)
747
748#define REQ_CHECK_EXPR(Name, expr) \
749 do { \
750 if (RT_UNLIKELY(!(expr))) \
751 { \
752 OSDBGPRINT(( #Name ": %s\n", #expr)); \
753 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
754 } \
755 } while (0)
756
757#define REQ_CHECK_EXPR_FMT(expr, fmt) \
758 do { \
759 if (RT_UNLIKELY(!(expr))) \
760 { \
761 OSDBGPRINT( fmt ); \
762 return pReq->Hdr.rc = VERR_INVALID_PARAMETER; \
763 } \
764 } while (0)
765
766
767 /*
768 * The switch.
769 */
770 switch (SUP_CTL_CODE_NO_SIZE(uIOCtl))
771 {
772 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_COOKIE):
773 {
774 PSUPCOOKIE pReq = (PSUPCOOKIE)pReqHdr;
775 REQ_CHECK_SIZES(SUP_IOCTL_COOKIE);
776 if (strncmp(pReq->u.In.szMagic, SUPCOOKIE_MAGIC, sizeof(pReq->u.In.szMagic)))
777 {
778 OSDBGPRINT(("SUP_IOCTL_COOKIE: invalid magic %.16s\n", pReq->u.In.szMagic));
779 pReq->Hdr.rc = VERR_INVALID_MAGIC;
780 return 0;
781 }
782
783#if 0
784 /*
785 * Call out to the OS specific code and let it do permission checks on the
786 * client process.
787 */
788 if (!supdrvOSValidateClientProcess(pDevExt, pSession))
789 {
790 pReq->u.Out.u32Cookie = 0xffffffff;
791 pReq->u.Out.u32SessionCookie = 0xffffffff;
792 pReq->u.Out.u32SessionVersion = 0xffffffff;
793 pReq->u.Out.u32DriverVersion = SUPDRVIOC_VERSION;
794 pReq->u.Out.pSession = NULL;
795 pReq->u.Out.cFunctions = 0;
796 pReq->Hdr.rc = VERR_PERMISSION_DENIED;
797 return 0;
798 }
799#endif
800
801 /*
802 * Match the version.
803 * The current logic is very simple, match the major interface version.
804 */
805 if ( pReq->u.In.u32MinVersion > SUPDRVIOC_VERSION
806 || (pReq->u.In.u32MinVersion & 0xffff0000) != (SUPDRVIOC_VERSION & 0xffff0000))
807 {
808 OSDBGPRINT(("SUP_IOCTL_COOKIE: Version mismatch. Requested: %#x Min: %#x Current: %#x\n",
809 pReq->u.In.u32ReqVersion, pReq->u.In.u32MinVersion, SUPDRVIOC_VERSION));
810 pReq->u.Out.u32Cookie = 0xffffffff;
811 pReq->u.Out.u32SessionCookie = 0xffffffff;
812 pReq->u.Out.u32SessionVersion = 0xffffffff;
813 pReq->u.Out.u32DriverVersion = SUPDRVIOC_VERSION;
814 pReq->u.Out.pSession = NULL;
815 pReq->u.Out.cFunctions = 0;
816 pReq->Hdr.rc = VERR_VERSION_MISMATCH;
817 return 0;
818 }
819
820 /*
821 * Fill in return data and be gone.
822 * N.B. The first one to change SUPDRVIOC_VERSION shall makes sure that
823 * u32SessionVersion <= u32ReqVersion!
824 */
825 /** @todo Somehow validate the client and negotiate a secure cookie... */
826 pReq->u.Out.u32Cookie = pDevExt->u32Cookie;
827 pReq->u.Out.u32SessionCookie = pSession->u32Cookie;
828 pReq->u.Out.u32SessionVersion = SUPDRVIOC_VERSION;
829 pReq->u.Out.u32DriverVersion = SUPDRVIOC_VERSION;
830 pReq->u.Out.pSession = pSession;
831 pReq->u.Out.cFunctions = sizeof(g_aFunctions) / sizeof(g_aFunctions[0]);
832 pReq->Hdr.rc = VINF_SUCCESS;
833 return 0;
834 }
835
836 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_QUERY_FUNCS(0)):
837 {
838 /* validate */
839 PSUPQUERYFUNCS pReq = (PSUPQUERYFUNCS)pReqHdr;
840 REQ_CHECK_SIZES_EX(SUP_IOCTL_QUERY_FUNCS, SUP_IOCTL_QUERY_FUNCS_SIZE_IN, SUP_IOCTL_QUERY_FUNCS_SIZE_OUT(RT_ELEMENTS(g_aFunctions)));
841
842 /* execute */
843 pReq->u.Out.cFunctions = RT_ELEMENTS(g_aFunctions);
844 memcpy(&pReq->u.Out.aFunctions[0], g_aFunctions, sizeof(g_aFunctions));
845 pReq->Hdr.rc = VINF_SUCCESS;
846 return 0;
847 }
848
849 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_IDT_INSTALL):
850 {
851 /* validate */
852 PSUPIDTINSTALL pReq = (PSUPIDTINSTALL)pReqHdr;
853 REQ_CHECK_SIZES(SUP_IOCTL_IDT_INSTALL);
854
855 /* execute */
856#ifdef VBOX_WITH_IDT_PATCHING
857 pReq->Hdr.rc = supdrvIOCtl_IdtInstall(pDevExt, pSession, pReq);
858#else
859 pReq->u.Out.u8Idt = 3;
860 pReq->Hdr.rc = VERR_NOT_SUPPORTED;
861#endif
862 return 0;
863 }
864
865 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_IDT_REMOVE):
866 {
867 /* validate */
868 PSUPIDTREMOVE pReq = (PSUPIDTREMOVE)pReqHdr;
869 REQ_CHECK_SIZES(SUP_IOCTL_IDT_REMOVE);
870
871 /* execute */
872#ifdef VBOX_WITH_IDT_PATCHING
873 pReq->Hdr.rc = supdrvIOCtl_IdtRemoveAll(pDevExt, pSession);
874#else
875 pReq->Hdr.rc = VERR_NOT_SUPPORTED;
876#endif
877 return 0;
878 }
879
880 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_LOCK):
881 {
882 /* validate */
883 PSUPPAGELOCK pReq = (PSUPPAGELOCK)pReqHdr;
884 REQ_CHECK_SIZE_IN(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_IN);
885 REQ_CHECK_SIZE_OUT(SUP_IOCTL_PAGE_LOCK, SUP_IOCTL_PAGE_LOCK_SIZE_OUT(pReq->u.In.cPages));
886 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.cPages > 0);
887 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_LOCK, pReq->u.In.pvR3 >= PAGE_SIZE);
888
889 /* execute */
890 pReq->Hdr.rc = SUPR0LockMem(pSession, pReq->u.In.pvR3, pReq->u.In.cPages, &pReq->u.Out.aPages[0]);
891 if (RT_FAILURE(pReq->Hdr.rc))
892 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
893 return 0;
894 }
895
896 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_UNLOCK):
897 {
898 /* validate */
899 PSUPPAGEUNLOCK pReq = (PSUPPAGEUNLOCK)pReqHdr;
900 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_UNLOCK);
901
902 /* execute */
903 pReq->Hdr.rc = SUPR0UnlockMem(pSession, pReq->u.In.pvR3);
904 return 0;
905 }
906
907 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_ALLOC):
908 {
909 /* validate */
910 PSUPCONTALLOC pReq = (PSUPCONTALLOC)pReqHdr;
911 REQ_CHECK_SIZES(SUP_IOCTL_CONT_ALLOC);
912
913 /* execute */
914 pReq->Hdr.rc = SUPR0ContAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.HCPhys);
915 if (RT_FAILURE(pReq->Hdr.rc))
916 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
917 return 0;
918 }
919
920 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CONT_FREE):
921 {
922 /* validate */
923 PSUPCONTFREE pReq = (PSUPCONTFREE)pReqHdr;
924 REQ_CHECK_SIZES(SUP_IOCTL_CONT_FREE);
925
926 /* execute */
927 pReq->Hdr.rc = SUPR0ContFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
928 return 0;
929 }
930
931 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_OPEN):
932 {
933 /* validate */
934 PSUPLDROPEN pReq = (PSUPLDROPEN)pReqHdr;
935 REQ_CHECK_SIZES(SUP_IOCTL_LDR_OPEN);
936 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImage > 0);
937 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.cbImage < _1M*16);
938 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, pReq->u.In.szName[0]);
939 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, memchr(pReq->u.In.szName, '\0', sizeof(pReq->u.In.szName)));
940 REQ_CHECK_EXPR(SUP_IOCTL_LDR_OPEN, !supdrvCheckInvalidChar(pReq->u.In.szName, ";:()[]{}/\\|&*%#@!~`\"'"));
941
942 /* execute */
943 pReq->Hdr.rc = supdrvIOCtl_LdrOpen(pDevExt, pSession, pReq);
944 return 0;
945 }
946
947 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_LOAD):
948 {
949 /* validate */
950 PSUPLDRLOAD pReq = (PSUPLDRLOAD)pReqHdr;
951 REQ_CHECK_EXPR(Name, pReq->Hdr.cbIn >= sizeof(*pReq));
952 REQ_CHECK_SIZES_EX(SUP_IOCTL_LDR_LOAD, SUP_IOCTL_LDR_LOAD_SIZE_IN(pReq->u.In.cbImage), SUP_IOCTL_LDR_LOAD_SIZE_OUT);
953 REQ_CHECK_EXPR(SUP_IOCTL_LDR_LOAD, pReq->u.In.cSymbols <= 16384);
954 REQ_CHECK_EXPR_FMT( !pReq->u.In.cSymbols
955 || ( pReq->u.In.offSymbols < pReq->u.In.cbImage
956 && pReq->u.In.offSymbols + pReq->u.In.cSymbols * sizeof(SUPLDRSYM) <= pReq->u.In.cbImage),
957 ("SUP_IOCTL_LDR_LOAD: offSymbols=%#lx cSymbols=%#lx cbImage=%#lx\n", (long)pReq->u.In.offSymbols,
958 (long)pReq->u.In.cSymbols, (long)pReq->u.In.cbImage));
959 REQ_CHECK_EXPR_FMT( !pReq->u.In.cbStrTab
960 || ( pReq->u.In.offStrTab < pReq->u.In.cbImage
961 && pReq->u.In.offStrTab + pReq->u.In.cbStrTab <= pReq->u.In.cbImage
962 && pReq->u.In.cbStrTab <= pReq->u.In.cbImage),
963 ("SUP_IOCTL_LDR_LOAD: offStrTab=%#lx cbStrTab=%#lx cbImage=%#lx\n", (long)pReq->u.In.offStrTab,
964 (long)pReq->u.In.cbStrTab, (long)pReq->u.In.cbImage));
965
966 if (pReq->u.In.cSymbols)
967 {
968 uint32_t i;
969 PSUPLDRSYM paSyms = (PSUPLDRSYM)&pReq->u.In.achImage[pReq->u.In.offSymbols];
970 for (i = 0; i < pReq->u.In.cSymbols; i++)
971 {
972 REQ_CHECK_EXPR_FMT(paSyms[i].offSymbol < pReq->u.In.cbImage,
973 ("SUP_IOCTL_LDR_LOAD: sym #%ld: symb off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offSymbol, (long)pReq->u.In.cbImage));
974 REQ_CHECK_EXPR_FMT(paSyms[i].offName < pReq->u.In.cbStrTab,
975 ("SUP_IOCTL_LDR_LOAD: sym #%ld: name off %#lx (max=%#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImage));
976 REQ_CHECK_EXPR_FMT(memchr(&pReq->u.In.achImage[pReq->u.In.offStrTab + paSyms[i].offName], '\0', pReq->u.In.cbStrTab - paSyms[i].offName),
977 ("SUP_IOCTL_LDR_LOAD: sym #%ld: unterminated name! (%#lx / %#lx)\n", (long)i, (long)paSyms[i].offName, (long)pReq->u.In.cbImage));
978 }
979 }
980
981 /* execute */
982 pReq->Hdr.rc = supdrvIOCtl_LdrLoad(pDevExt, pSession, pReq);
983 return 0;
984 }
985
986 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_FREE):
987 {
988 /* validate */
989 PSUPLDRFREE pReq = (PSUPLDRFREE)pReqHdr;
990 REQ_CHECK_SIZES(SUP_IOCTL_LDR_FREE);
991
992 /* execute */
993 pReq->Hdr.rc = supdrvIOCtl_LdrFree(pDevExt, pSession, pReq);
994 return 0;
995 }
996
997 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LDR_GET_SYMBOL):
998 {
999 /* validate */
1000 PSUPLDRGETSYMBOL pReq = (PSUPLDRGETSYMBOL)pReqHdr;
1001 REQ_CHECK_SIZES(SUP_IOCTL_LDR_GET_SYMBOL);
1002 REQ_CHECK_EXPR(SUP_IOCTL_LDR_GET_SYMBOL, memchr(pReq->u.In.szSymbol, '\0', sizeof(pReq->u.In.szSymbol)));
1003
1004 /* execute */
1005 pReq->Hdr.rc = supdrvIOCtl_LdrGetSymbol(pDevExt, pSession, pReq);
1006 return 0;
1007 }
1008
1009 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_CALL_VMMR0(0)):
1010 {
1011 /* validate */
1012 PSUPCALLVMMR0 pReq = (PSUPCALLVMMR0)pReqHdr;
1013 Log4(("SUP_IOCTL_CALL_VMMR0: op=%u in=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1014 pReq->u.In.uOperation, pReq->Hdr.cbIn, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1015
1016 if (pReq->Hdr.cbIn == SUP_IOCTL_CALL_VMMR0_SIZE(0))
1017 {
1018 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(0), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(0));
1019
1020 /* execute */
1021 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1022 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.uOperation, NULL, pReq->u.In.u64Arg);
1023 else
1024 pReq->Hdr.rc = VERR_WRONG_ORDER;
1025 }
1026 else
1027 {
1028 PSUPVMMR0REQHDR pVMMReq = (PSUPVMMR0REQHDR)&pReq->abReqPkt[0];
1029 REQ_CHECK_EXPR_FMT(pReq->Hdr.cbIn >= SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR)),
1030 ("SUP_IOCTL_CALL_VMMR0: cbIn=%#x < %#x\n", pReq->Hdr.cbIn, SUP_IOCTL_CALL_VMMR0_SIZE(sizeof(SUPVMMR0REQHDR))));
1031 REQ_CHECK_EXPR(SUP_IOCTL_CALL_VMMR0, pVMMReq->u32Magic == SUPVMMR0REQHDR_MAGIC);
1032 REQ_CHECK_SIZES_EX(SUP_IOCTL_CALL_VMMR0, SUP_IOCTL_CALL_VMMR0_SIZE_IN(pVMMReq->cbReq), SUP_IOCTL_CALL_VMMR0_SIZE_OUT(pVMMReq->cbReq));
1033
1034 /* execute */
1035 if (RT_LIKELY(pDevExt->pfnVMMR0EntryEx))
1036 pReq->Hdr.rc = pDevExt->pfnVMMR0EntryEx(pReq->u.In.pVMR0, pReq->u.In.uOperation, pVMMReq, pReq->u.In.u64Arg);
1037 else
1038 pReq->Hdr.rc = VERR_WRONG_ORDER;
1039 }
1040
1041 if ( RT_FAILURE(pReq->Hdr.rc)
1042 && pReq->Hdr.rc != VERR_INTERRUPTED
1043 && pReq->Hdr.rc != VERR_TIMEOUT)
1044 Log(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1045 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1046 else
1047 Log4(("SUP_IOCTL_CALL_VMMR0: rc=%Rrc op=%u out=%u arg=%RX64 p/t=%RTproc/%RTthrd\n",
1048 pReq->Hdr.rc, pReq->u.In.uOperation, pReq->Hdr.cbOut, pReq->u.In.u64Arg, RTProcSelf(), RTThreadNativeSelf()));
1049 return 0;
1050 }
1051
1052 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GET_PAGING_MODE):
1053 {
1054 /* validate */
1055 PSUPGETPAGINGMODE pReq = (PSUPGETPAGINGMODE)pReqHdr;
1056 REQ_CHECK_SIZES(SUP_IOCTL_GET_PAGING_MODE);
1057
1058 /* execute */
1059 pReq->Hdr.rc = VINF_SUCCESS;
1060 pReq->u.Out.enmMode = supdrvIOCtl_GetPagingMode();
1061 return 0;
1062 }
1063
1064 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_ALLOC):
1065 {
1066 /* validate */
1067 PSUPLOWALLOC pReq = (PSUPLOWALLOC)pReqHdr;
1068 REQ_CHECK_EXPR(SUP_IOCTL_LOW_ALLOC, pReq->Hdr.cbIn <= SUP_IOCTL_LOW_ALLOC_SIZE_IN);
1069 REQ_CHECK_SIZES_EX(SUP_IOCTL_LOW_ALLOC, SUP_IOCTL_LOW_ALLOC_SIZE_IN, SUP_IOCTL_LOW_ALLOC_SIZE_OUT(pReq->u.In.cPages));
1070
1071 /* execute */
1072 pReq->Hdr.rc = SUPR0LowAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR0, &pReq->u.Out.pvR3, &pReq->u.Out.aPages[0]);
1073 if (RT_FAILURE(pReq->Hdr.rc))
1074 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1075 return 0;
1076 }
1077
1078 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_LOW_FREE):
1079 {
1080 /* validate */
1081 PSUPLOWFREE pReq = (PSUPLOWFREE)pReqHdr;
1082 REQ_CHECK_SIZES(SUP_IOCTL_LOW_FREE);
1083
1084 /* execute */
1085 pReq->Hdr.rc = SUPR0LowFree(pSession, (RTHCUINTPTR)pReq->u.In.pvR3);
1086 return 0;
1087 }
1088
1089 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_MAP):
1090 {
1091 /* validate */
1092 PSUPGIPMAP pReq = (PSUPGIPMAP)pReqHdr;
1093 REQ_CHECK_SIZES(SUP_IOCTL_GIP_MAP);
1094
1095 /* execute */
1096 pReq->Hdr.rc = SUPR0GipMap(pSession, &pReq->u.Out.pGipR3, &pReq->u.Out.HCPhysGip);
1097 if (RT_SUCCESS(pReq->Hdr.rc))
1098 pReq->u.Out.pGipR0 = pDevExt->pGip;
1099 return 0;
1100 }
1101
1102 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_GIP_UNMAP):
1103 {
1104 /* validate */
1105 PSUPGIPUNMAP pReq = (PSUPGIPUNMAP)pReqHdr;
1106 REQ_CHECK_SIZES(SUP_IOCTL_GIP_UNMAP);
1107
1108 /* execute */
1109 pReq->Hdr.rc = SUPR0GipUnmap(pSession);
1110 return 0;
1111 }
1112
1113 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_SET_VM_FOR_FAST):
1114 {
1115 /* validate */
1116 PSUPSETVMFORFAST pReq = (PSUPSETVMFORFAST)pReqHdr;
1117 REQ_CHECK_SIZES(SUP_IOCTL_SET_VM_FOR_FAST);
1118 REQ_CHECK_EXPR_FMT( !pReq->u.In.pVMR0
1119 || ( VALID_PTR(pReq->u.In.pVMR0)
1120 && !((uintptr_t)pReq->u.In.pVMR0 & (PAGE_SIZE - 1))),
1121 ("SUP_IOCTL_SET_VM_FOR_FAST: pVMR0=%p!\n", pReq->u.In.pVMR0));
1122 /* execute */
1123 pSession->pVM = pReq->u.In.pVMR0;
1124 pReq->Hdr.rc = VINF_SUCCESS;
1125 return 0;
1126 }
1127
1128 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_ALLOC):
1129 {
1130 /* validate */
1131 PSUPPAGEALLOC pReq = (PSUPPAGEALLOC)pReqHdr;
1132 REQ_CHECK_EXPR(SUP_IOCTL_PAGE_ALLOC, pReq->Hdr.cbIn <= SUP_IOCTL_PAGE_ALLOC_SIZE_IN);
1133 REQ_CHECK_SIZES_EX(SUP_IOCTL_PAGE_ALLOC, SUP_IOCTL_PAGE_ALLOC_SIZE_IN, SUP_IOCTL_PAGE_ALLOC_SIZE_OUT(pReq->u.In.cPages));
1134
1135 /* execute */
1136 pReq->Hdr.rc = SUPR0PageAlloc(pSession, pReq->u.In.cPages, &pReq->u.Out.pvR3, &pReq->u.Out.aPages[0]);
1137 if (RT_FAILURE(pReq->Hdr.rc))
1138 pReq->Hdr.cbOut = sizeof(pReq->Hdr);
1139 return 0;
1140 }
1141
1142 case SUP_CTL_CODE_NO_SIZE(SUP_IOCTL_PAGE_FREE):
1143 {
1144 /* validate */
1145 PSUPPAGEFREE pReq = (PSUPPAGEFREE)pReqHdr;
1146 REQ_CHECK_SIZES(SUP_IOCTL_PAGE_FREE);
1147
1148 /* execute */
1149 pReq->Hdr.rc = SUPR0PageFree(pSession, pReq->u.In.pvR3);
1150 return 0;
1151 }
1152
1153 default:
1154 Log(("Unknown IOCTL %#lx\n", (long)uIOCtl));
1155 break;
1156 }
1157 return SUPDRV_ERR_GENERAL_FAILURE;
1158}
1159
1160
1161/**
1162 * Inter-Driver Communcation (IDC) worker.
1163 *
1164 * @returns VBox status code.
1165 * @retval VINF_SUCCESS on success.
1166 * @retval VERR_NOT_SUPPORTED if the request isn't supported.
1167 * @retval VERR_NOT_IMPLEMENTED if during development.
1168 * @retval VERR_INVALID_PARAMETER if the request is invalid.
1169 *
1170 * @param uReq The request (function) code.
1171 * @param pDevExt Device extention.
1172 * @param pSession Session data.
1173 * @param pReqHdr The request header.
1174 */
1175int VBOXCALL supdrvIDC(uintptr_t uReq, PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPDRVIDCREQHDR pReqHdr)
1176{
1177 /*
1178 * The OS specific code has already validated the pSession
1179 * pointer, and the request size being greater or equal to
1180 * size of the header.
1181 *
1182 * So, just check that pSession is a kernel context session.
1183 */
1184 if (RT_UNLIKELY( pSession
1185 && pSession->R0Process != NIL_RTR0PROCESS))
1186 return VERR_INVALID_PARAMETER;
1187
1188/*
1189 * Validation macro.
1190 */
1191#define REQ_CHECK_IDC_SIZE(Name, cbExpect) \
1192 do { \
1193 if (RT_UNLIKELY(pReqHdr->cb != (cbExpect))) \
1194 { \
1195 OSDBGPRINT(( #Name ": Invalid input/output sizes. cb=%ld expected %ld.\n", \
1196 (long)pReqHdr->cb, (long)(cbExpect))); \
1197 return pReqHdr->rc = VERR_INVALID_PARAMETER; \
1198 } \
1199 } while (0)
1200
1201 switch (uReq)
1202 {
1203 case SUPDRV_IDC_REQ_CONNECT:
1204 {
1205 PSUPDRVIDCREQCONNECT pReq = (PSUPDRVIDCREQCONNECT)pReqHdr;
1206 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_CONNECT, sizeof(*pReq));
1207
1208 return VERR_NOT_IMPLEMENTED;
1209 }
1210
1211 case SUPDRV_IDC_REQ_DISCONNECT:
1212 {
1213 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_DISCONNECT, sizeof(*pReqHdr));
1214
1215 return VERR_NOT_IMPLEMENTED;
1216 }
1217
1218 case SUPDRV_IDC_REQ_GET_SYMBOL:
1219 {
1220 PSUPDRVIDCREQGETSYM pReq;
1221 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_GET_SYMBOL, sizeof(*pReq));
1222
1223 return VERR_NOT_IMPLEMENTED;
1224 }
1225
1226 case SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY:
1227 {
1228 PSUPDRVIDCREQCOMPREGFACTORY pReq;
1229 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_REGISTER_FACTORY, sizeof(*pReq));
1230
1231 return VERR_NOT_IMPLEMENTED;
1232 }
1233
1234 case SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY:
1235 {
1236 PSUPDRVIDCREQCOMPDEREGFACTORY pReq;
1237 REQ_CHECK_IDC_SIZE(SUPDRV_IDC_REQ_COMPONENT_DEREGISTER_FACTORY, sizeof(*pReq));
1238
1239 return VERR_NOT_IMPLEMENTED;
1240 }
1241
1242 default:
1243 Log(("Unknown IDC %#lx\n", (long)uReq));
1244 break;
1245 }
1246
1247#undef REQ_CHECK_IDC_SIZE
1248 return VERR_NOT_SUPPORTED;
1249}
1250
1251
1252/**
1253 * Register a object for reference counting.
1254 * The object is registered with one reference in the specified session.
1255 *
1256 * @returns Unique identifier on success (pointer).
1257 * All future reference must use this identifier.
1258 * @returns NULL on failure.
1259 * @param pfnDestructor The destructore function which will be called when the reference count reaches 0.
1260 * @param pvUser1 The first user argument.
1261 * @param pvUser2 The second user argument.
1262 */
1263SUPR0DECL(void *) SUPR0ObjRegister(PSUPDRVSESSION pSession, SUPDRVOBJTYPE enmType, PFNSUPDRVDESTRUCTOR pfnDestructor, void *pvUser1, void *pvUser2)
1264{
1265 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1266 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
1267 PSUPDRVOBJ pObj;
1268 PSUPDRVUSAGE pUsage;
1269
1270 /*
1271 * Validate the input.
1272 */
1273 AssertReturn(SUP_IS_SESSION_VALID(pSession), NULL);
1274 AssertReturn(enmType > SUPDRVOBJTYPE_INVALID && enmType < SUPDRVOBJTYPE_END, NULL);
1275 AssertPtrReturn(pfnDestructor, NULL);
1276
1277 /*
1278 * Allocate and initialize the object.
1279 */
1280 pObj = (PSUPDRVOBJ)RTMemAlloc(sizeof(*pObj));
1281 if (!pObj)
1282 return NULL;
1283 pObj->u32Magic = SUPDRVOBJ_MAGIC;
1284 pObj->enmType = enmType;
1285 pObj->pNext = NULL;
1286 pObj->cUsage = 1;
1287 pObj->pfnDestructor = pfnDestructor;
1288 pObj->pvUser1 = pvUser1;
1289 pObj->pvUser2 = pvUser2;
1290 pObj->CreatorUid = pSession->Uid;
1291 pObj->CreatorGid = pSession->Gid;
1292 pObj->CreatorProcess= pSession->Process;
1293 supdrvOSObjInitCreator(pObj, pSession);
1294
1295 /*
1296 * Allocate the usage record.
1297 * (We keep freed usage records around to simplify SUPR0ObjAddRef().)
1298 */
1299 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1300
1301 pUsage = pDevExt->pUsageFree;
1302 if (pUsage)
1303 pDevExt->pUsageFree = pUsage->pNext;
1304 else
1305 {
1306 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1307 pUsage = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsage));
1308 if (!pUsage)
1309 {
1310 RTMemFree(pObj);
1311 return NULL;
1312 }
1313 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1314 }
1315
1316 /*
1317 * Insert the object and create the session usage record.
1318 */
1319 /* The object. */
1320 pObj->pNext = pDevExt->pObjs;
1321 pDevExt->pObjs = pObj;
1322
1323 /* The session record. */
1324 pUsage->cUsage = 1;
1325 pUsage->pObj = pObj;
1326 pUsage->pNext = pSession->pUsage;
1327 Log2(("SUPR0ObjRegister: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));
1328 pSession->pUsage = pUsage;
1329
1330 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1331
1332 Log(("SUPR0ObjRegister: returns %p (pvUser1=%p, pvUser=%p)\n", pObj, pvUser1, pvUser2));
1333 return pObj;
1334}
1335
1336
1337/**
1338 * Increment the reference counter for the object associating the reference
1339 * with the specified session.
1340 *
1341 * @returns IPRT status code.
1342 * @param pvObj The identifier returned by SUPR0ObjRegister().
1343 * @param pSession The session which is referencing the object.
1344 */
1345SUPR0DECL(int) SUPR0ObjAddRef(void *pvObj, PSUPDRVSESSION pSession)
1346{
1347 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1348 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
1349 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
1350 PSUPDRVUSAGE pUsagePre;
1351 PSUPDRVUSAGE pUsage;
1352
1353 /*
1354 * Validate the input.
1355 */
1356 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1357 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
1358 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
1359 VERR_INVALID_PARAMETER);
1360
1361 /*
1362 * Preallocate the usage record.
1363 */
1364 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1365
1366 pUsagePre = pDevExt->pUsageFree;
1367 if (pUsagePre)
1368 pDevExt->pUsageFree = pUsagePre->pNext;
1369 else
1370 {
1371 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1372 pUsagePre = (PSUPDRVUSAGE)RTMemAlloc(sizeof(*pUsagePre));
1373 if (!pUsagePre)
1374 return VERR_NO_MEMORY;
1375 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1376 }
1377
1378 /*
1379 * Reference the object.
1380 */
1381 pObj->cUsage++;
1382
1383 /*
1384 * Look for the session record.
1385 */
1386 for (pUsage = pSession->pUsage; pUsage; pUsage = pUsage->pNext)
1387 {
1388 Log(("SUPR0AddRef: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));
1389 if (pUsage->pObj == pObj)
1390 break;
1391 }
1392 if (pUsage)
1393 pUsage->cUsage++;
1394 else
1395 {
1396 /* create a new session record. */
1397 pUsagePre->cUsage = 1;
1398 pUsagePre->pObj = pObj;
1399 pUsagePre->pNext = pSession->pUsage;
1400 pSession->pUsage = pUsagePre;
1401 Log(("SUPR0AddRef: pUsagePre=%p:{.pObj=%p, .pNext=%p}\n", pUsagePre, pUsagePre->pObj, pUsagePre->pNext));
1402
1403 pUsagePre = NULL;
1404 }
1405
1406 /*
1407 * Put any unused usage record into the free list..
1408 */
1409 if (pUsagePre)
1410 {
1411 pUsagePre->pNext = pDevExt->pUsageFree;
1412 pDevExt->pUsageFree = pUsagePre;
1413 }
1414
1415 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1416
1417 return VINF_SUCCESS;
1418}
1419
1420
1421/**
1422 * Decrement / destroy a reference counter record for an object.
1423 *
1424 * The object is uniquely identified by pfnDestructor+pvUser1+pvUser2.
1425 *
1426 * @returns IPRT status code.
1427 * @param pvObj The identifier returned by SUPR0ObjRegister().
1428 * @param pSession The session which is referencing the object.
1429 */
1430SUPR0DECL(int) SUPR0ObjRelease(void *pvObj, PSUPDRVSESSION pSession)
1431{
1432 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1433 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
1434 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
1435 bool fDestroy = false;
1436 PSUPDRVUSAGE pUsage;
1437 PSUPDRVUSAGE pUsagePrev;
1438
1439 /*
1440 * Validate the input.
1441 */
1442 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1443 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
1444 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
1445 VERR_INVALID_PARAMETER);
1446
1447 /*
1448 * Acquire the spinlock and look for the usage record.
1449 */
1450 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
1451
1452 for (pUsagePrev = NULL, pUsage = pSession->pUsage;
1453 pUsage;
1454 pUsagePrev = pUsage, pUsage = pUsage->pNext)
1455 {
1456 Log2(("SUPR0ObjRelease: pUsage=%p:{.pObj=%p, .pNext=%p}\n", pUsage, pUsage->pObj, pUsage->pNext));
1457 if (pUsage->pObj == pObj)
1458 {
1459 AssertMsg(pUsage->cUsage >= 1 && pObj->cUsage >= pUsage->cUsage, ("glob %d; sess %d\n", pObj->cUsage, pUsage->cUsage));
1460 if (pUsage->cUsage > 1)
1461 {
1462 pObj->cUsage--;
1463 pUsage->cUsage--;
1464 }
1465 else
1466 {
1467 /*
1468 * Free the session record.
1469 */
1470 if (pUsagePrev)
1471 pUsagePrev->pNext = pUsage->pNext;
1472 else
1473 pSession->pUsage = pUsage->pNext;
1474 pUsage->pNext = pDevExt->pUsageFree;
1475 pDevExt->pUsageFree = pUsage;
1476
1477 /* What about the object? */
1478 if (pObj->cUsage > 1)
1479 pObj->cUsage--;
1480 else
1481 {
1482 /*
1483 * Object is to be destroyed, unlink it.
1484 */
1485 pObj->u32Magic = SUPDRVOBJ_MAGIC + 1;
1486 fDestroy = true;
1487 if (pDevExt->pObjs == pObj)
1488 pDevExt->pObjs = pObj->pNext;
1489 else
1490 {
1491 PSUPDRVOBJ pObjPrev;
1492 for (pObjPrev = pDevExt->pObjs; pObjPrev; pObjPrev = pObjPrev->pNext)
1493 if (pObjPrev->pNext == pObj)
1494 {
1495 pObjPrev->pNext = pObj->pNext;
1496 break;
1497 }
1498 Assert(pObjPrev);
1499 }
1500 }
1501 }
1502 break;
1503 }
1504 }
1505
1506 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
1507
1508 /*
1509 * Call the destructor and free the object if required.
1510 */
1511 if (fDestroy)
1512 {
1513 Log(("SUPR0ObjRelease: destroying %p/%d (%p/%p) cpid=%RTproc pid=%RTproc dtor=%p\n",
1514 pObj, pObj->enmType, pObj->pvUser1, pObj->pvUser2, pObj->CreatorProcess, RTProcSelf(), pObj->pfnDestructor));
1515 if (pObj->pfnDestructor)
1516 pObj->pfnDestructor(pObj, pObj->pvUser1, pObj->pvUser2);
1517 RTMemFree(pObj);
1518 }
1519
1520 AssertMsg(pUsage, ("pvObj=%p\n", pvObj));
1521 return pUsage ? VINF_SUCCESS : VERR_INVALID_PARAMETER;
1522}
1523
1524/**
1525 * Verifies that the current process can access the specified object.
1526 *
1527 * @returns The following IPRT status code:
1528 * @retval VINF_SUCCESS if access was granted.
1529 * @retval VERR_PERMISSION_DENIED if denied access.
1530 * @retval VERR_INVALID_PARAMETER if invalid parameter.
1531 *
1532 * @param pvObj The identifier returned by SUPR0ObjRegister().
1533 * @param pSession The session which wishes to access the object.
1534 * @param pszObjName Object string name. This is optional and depends on the object type.
1535 *
1536 * @remark The caller is responsible for making sure the object isn't removed while
1537 * we're inside this function. If uncertain about this, just call AddRef before calling us.
1538 */
1539SUPR0DECL(int) SUPR0ObjVerifyAccess(void *pvObj, PSUPDRVSESSION pSession, const char *pszObjName)
1540{
1541 PSUPDRVOBJ pObj = (PSUPDRVOBJ)pvObj;
1542 int rc;
1543
1544 /*
1545 * Validate the input.
1546 */
1547 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1548 AssertMsgReturn(VALID_PTR(pObj) && pObj->u32Magic == SUPDRVOBJ_MAGIC,
1549 ("Invalid pvObj=%p magic=%#x (exepcted %#x)\n", pvObj, pObj ? pObj->u32Magic : 0, SUPDRVOBJ_MAGIC),
1550 VERR_INVALID_PARAMETER);
1551
1552 /*
1553 * Check access. (returns true if a decision has been made.)
1554 */
1555 rc = VERR_INTERNAL_ERROR;
1556 if (supdrvOSObjCanAccess(pObj, pSession, pszObjName, &rc))
1557 return rc;
1558
1559 /*
1560 * Default policy is to allow the user to access his own
1561 * stuff but nothing else.
1562 */
1563 if (pObj->CreatorUid == pSession->Uid)
1564 return VINF_SUCCESS;
1565 return VERR_PERMISSION_DENIED;
1566}
1567
1568
1569/**
1570 * Lock pages.
1571 *
1572 * @returns IPRT status code.
1573 * @param pSession Session to which the locked memory should be associated.
1574 * @param pvR3 Start of the memory range to lock.
1575 * This must be page aligned.
1576 * @param cb Size of the memory range to lock.
1577 * This must be page aligned.
1578 */
1579SUPR0DECL(int) SUPR0LockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages)
1580{
1581 int rc;
1582 SUPDRVMEMREF Mem = {0};
1583 const size_t cb = (size_t)cPages << PAGE_SHIFT;
1584 LogFlow(("SUPR0LockMem: pSession=%p pvR3=%p cPages=%d paPages=%p\n", pSession, (void *)pvR3, cPages, paPages));
1585
1586 /*
1587 * Verify input.
1588 */
1589 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1590 AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
1591 if ( RT_ALIGN_R3PT(pvR3, PAGE_SIZE, RTR3PTR) != pvR3
1592 || !pvR3)
1593 {
1594 Log(("pvR3 (%p) must be page aligned and not NULL!\n", (void *)pvR3));
1595 return VERR_INVALID_PARAMETER;
1596 }
1597
1598#ifdef RT_OS_WINDOWS /* A temporary hack for windows, will be removed once all ring-3 code has been cleaned up. */
1599 /* First check if we allocated it using SUPPageAlloc; if so then we don't need to lock it again */
1600 rc = supdrvPageGetPhys(pSession, pvR3, cPages, paPages);
1601 if (RT_SUCCESS(rc))
1602 return rc;
1603#endif
1604
1605 /*
1606 * Let IPRT do the job.
1607 */
1608 Mem.eType = MEMREF_TYPE_LOCKED;
1609 rc = RTR0MemObjLockUser(&Mem.MemObj, pvR3, cb, RTR0ProcHandleSelf());
1610 if (RT_SUCCESS(rc))
1611 {
1612 uint32_t iPage = cPages;
1613 AssertMsg(RTR0MemObjAddressR3(Mem.MemObj) == pvR3, ("%p == %p\n", RTR0MemObjAddressR3(Mem.MemObj), pvR3));
1614 AssertMsg(RTR0MemObjSize(Mem.MemObj) == cb, ("%x == %x\n", RTR0MemObjSize(Mem.MemObj), cb));
1615
1616 while (iPage-- > 0)
1617 {
1618 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
1619 if (RT_UNLIKELY(paPages[iPage] == NIL_RTCCPHYS))
1620 {
1621 AssertMsgFailed(("iPage=%d\n", iPage));
1622 rc = VERR_INTERNAL_ERROR;
1623 break;
1624 }
1625 }
1626 if (RT_SUCCESS(rc))
1627 rc = supdrvMemAdd(&Mem, pSession);
1628 if (RT_FAILURE(rc))
1629 {
1630 int rc2 = RTR0MemObjFree(Mem.MemObj, false);
1631 AssertRC(rc2);
1632 }
1633 }
1634
1635 return rc;
1636}
1637
1638
1639/**
1640 * Unlocks the memory pointed to by pv.
1641 *
1642 * @returns IPRT status code.
1643 * @param pSession Session to which the memory was locked.
1644 * @param pvR3 Memory to unlock.
1645 */
1646SUPR0DECL(int) SUPR0UnlockMem(PSUPDRVSESSION pSession, RTR3PTR pvR3)
1647{
1648 LogFlow(("SUPR0UnlockMem: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
1649 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1650#ifdef RT_OS_WINDOWS
1651 /*
1652 * Temporary hack for windows - SUPR0PageFree will unlock SUPR0PageAlloc
1653 * allocations; ignore this call.
1654 */
1655 if (supdrvPageWasLockedByPageAlloc(pSession, pvR3))
1656 {
1657 LogFlow(("Page will be unlocked in SUPR0PageFree -> ignore\n"));
1658 return VINF_SUCCESS;
1659 }
1660#endif
1661 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_LOCKED);
1662}
1663
1664
1665/**
1666 * Allocates a chunk of page aligned memory with contiguous and fixed physical
1667 * backing.
1668 *
1669 * @returns IPRT status code.
1670 * @param pSession Session data.
1671 * @param cb Number of bytes to allocate.
1672 * @param ppvR0 Where to put the address of Ring-0 mapping the allocated memory.
1673 * @param ppvR3 Where to put the address of Ring-3 mapping the allocated memory.
1674 * @param pHCPhys Where to put the physical address of allocated memory.
1675 */
1676SUPR0DECL(int) SUPR0ContAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS pHCPhys)
1677{
1678 int rc;
1679 SUPDRVMEMREF Mem = {0};
1680 LogFlow(("SUPR0ContAlloc: pSession=%p cPages=%d ppvR0=%p ppvR3=%p pHCPhys=%p\n", pSession, cPages, ppvR0, ppvR3, pHCPhys));
1681
1682 /*
1683 * Validate input.
1684 */
1685 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1686 if (!ppvR3 || !ppvR0 || !pHCPhys)
1687 {
1688 Log(("Null pointer. All of these should be set: pSession=%p ppvR0=%p ppvR3=%p pHCPhys=%p\n",
1689 pSession, ppvR0, ppvR3, pHCPhys));
1690 return VERR_INVALID_PARAMETER;
1691
1692 }
1693 if (cPages < 1 || cPages >= 256)
1694 {
1695 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256\n", cPages));
1696 return VERR_INVALID_PARAMETER;
1697 }
1698
1699 /*
1700 * Let IPRT do the job.
1701 */
1702 rc = RTR0MemObjAllocCont(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable R0 mapping */);
1703 if (RT_SUCCESS(rc))
1704 {
1705 int rc2;
1706 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
1707 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
1708 if (RT_SUCCESS(rc))
1709 {
1710 Mem.eType = MEMREF_TYPE_CONT;
1711 rc = supdrvMemAdd(&Mem, pSession);
1712 if (!rc)
1713 {
1714 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
1715 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
1716 *pHCPhys = RTR0MemObjGetPagePhysAddr(Mem.MemObj, 0);
1717 return 0;
1718 }
1719
1720 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
1721 AssertRC(rc2);
1722 }
1723 rc2 = RTR0MemObjFree(Mem.MemObj, false);
1724 AssertRC(rc2);
1725 }
1726
1727 return rc;
1728}
1729
1730
1731/**
1732 * Frees memory allocated using SUPR0ContAlloc().
1733 *
1734 * @returns IPRT status code.
1735 * @param pSession The session to which the memory was allocated.
1736 * @param uPtr Pointer to the memory (ring-3 or ring-0).
1737 */
1738SUPR0DECL(int) SUPR0ContFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
1739{
1740 LogFlow(("SUPR0ContFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
1741 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1742 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_CONT);
1743}
1744
1745
1746/**
1747 * Allocates a chunk of page aligned memory with fixed physical backing below 4GB.
1748 *
1749 * The memory isn't zeroed.
1750 *
1751 * @returns IPRT status code.
1752 * @param pSession Session data.
1753 * @param cPages Number of pages to allocate.
1754 * @param ppvR0 Where to put the address of Ring-0 mapping of the allocated memory.
1755 * @param ppvR3 Where to put the address of Ring-3 mapping of the allocated memory.
1756 * @param paPages Where to put the physical addresses of allocated memory.
1757 */
1758SUPR0DECL(int) SUPR0LowAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR0PTR ppvR0, PRTR3PTR ppvR3, PRTHCPHYS paPages)
1759{
1760 unsigned iPage;
1761 int rc;
1762 SUPDRVMEMREF Mem = {0};
1763 LogFlow(("SUPR0LowAlloc: pSession=%p cPages=%d ppvR3=%p ppvR0=%p paPages=%p\n", pSession, cPages, ppvR3, ppvR0, paPages));
1764
1765 /*
1766 * Validate input.
1767 */
1768 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1769 if (!ppvR3 || !ppvR0 || !paPages)
1770 {
1771 Log(("Null pointer. All of these should be set: pSession=%p ppvR3=%p ppvR0=%p paPages=%p\n",
1772 pSession, ppvR3, ppvR0, paPages));
1773 return VERR_INVALID_PARAMETER;
1774
1775 }
1776 if (cPages < 1 || cPages > 256)
1777 {
1778 Log(("Illegal request cPages=%d, must be greater than 0 and smaller than 256.\n", cPages));
1779 return VERR_INVALID_PARAMETER;
1780 }
1781
1782 /*
1783 * Let IPRT do the work.
1784 */
1785 rc = RTR0MemObjAllocLow(&Mem.MemObj, cPages << PAGE_SHIFT, true /* executable ring-0 mapping */);
1786 if (RT_SUCCESS(rc))
1787 {
1788 int rc2;
1789 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
1790 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
1791 if (RT_SUCCESS(rc))
1792 {
1793 Mem.eType = MEMREF_TYPE_LOW;
1794 rc = supdrvMemAdd(&Mem, pSession);
1795 if (!rc)
1796 {
1797 for (iPage = 0; iPage < cPages; iPage++)
1798 {
1799 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MemObj, iPage);
1800 AssertMsg(!(paPages[iPage] & (PAGE_SIZE - 1)), ("iPage=%d Phys=%VHp\n", paPages[iPage]));
1801 }
1802 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
1803 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
1804 return 0;
1805 }
1806
1807 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
1808 AssertRC(rc2);
1809 }
1810
1811 rc2 = RTR0MemObjFree(Mem.MemObj, false);
1812 AssertRC(rc2);
1813 }
1814
1815 return rc;
1816}
1817
1818
1819/**
1820 * Frees memory allocated using SUPR0LowAlloc().
1821 *
1822 * @returns IPRT status code.
1823 * @param pSession The session to which the memory was allocated.
1824 * @param uPtr Pointer to the memory (ring-3 or ring-0).
1825 */
1826SUPR0DECL(int) SUPR0LowFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
1827{
1828 LogFlow(("SUPR0LowFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
1829 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1830 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_LOW);
1831}
1832
1833
1834
1835/**
1836 * Allocates a chunk of memory with both R0 and R3 mappings.
1837 * The memory is fixed and it's possible to query the physical addresses using SUPR0MemGetPhys().
1838 *
1839 * @returns IPRT status code.
1840 * @param pSession The session to associated the allocation with.
1841 * @param cb Number of bytes to allocate.
1842 * @param ppvR0 Where to store the address of the Ring-0 mapping.
1843 * @param ppvR3 Where to store the address of the Ring-3 mapping.
1844 */
1845SUPR0DECL(int) SUPR0MemAlloc(PSUPDRVSESSION pSession, uint32_t cb, PRTR0PTR ppvR0, PRTR3PTR ppvR3)
1846{
1847 int rc;
1848 SUPDRVMEMREF Mem = {0};
1849 LogFlow(("SUPR0MemAlloc: pSession=%p cb=%d ppvR0=%p ppvR3=%p\n", pSession, cb, ppvR0, ppvR3));
1850
1851 /*
1852 * Validate input.
1853 */
1854 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1855 AssertPtrReturn(ppvR0, VERR_INVALID_POINTER);
1856 AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
1857 if (cb < 1 || cb >= _4M)
1858 {
1859 Log(("Illegal request cb=%u; must be greater than 0 and smaller than 4MB.\n", cb));
1860 return VERR_INVALID_PARAMETER;
1861 }
1862
1863 /*
1864 * Let IPRT do the work.
1865 */
1866 rc = RTR0MemObjAllocPage(&Mem.MemObj, cb, true /* executable ring-0 mapping */);
1867 if (RT_SUCCESS(rc))
1868 {
1869 int rc2;
1870 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
1871 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
1872 if (RT_SUCCESS(rc))
1873 {
1874 Mem.eType = MEMREF_TYPE_MEM;
1875 rc = supdrvMemAdd(&Mem, pSession);
1876 if (!rc)
1877 {
1878 *ppvR0 = RTR0MemObjAddress(Mem.MemObj);
1879 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
1880 return VINF_SUCCESS;
1881 }
1882 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
1883 AssertRC(rc2);
1884 }
1885
1886 rc2 = RTR0MemObjFree(Mem.MemObj, false);
1887 AssertRC(rc2);
1888 }
1889
1890 return rc;
1891}
1892
1893
1894/**
1895 * Get the physical addresses of memory allocated using SUPR0MemAlloc().
1896 *
1897 * @returns IPRT status code.
1898 * @param pSession The session to which the memory was allocated.
1899 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
1900 * @param paPages Where to store the physical addresses.
1901 */
1902SUPR0DECL(int) SUPR0MemGetPhys(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, PSUPPAGE paPages) /** @todo switch this bugger to RTHCPHYS */
1903{
1904 PSUPDRVBUNDLE pBundle;
1905 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
1906 LogFlow(("SUPR0MemGetPhys: pSession=%p uPtr=%p paPages=%p\n", pSession, (void *)uPtr, paPages));
1907
1908 /*
1909 * Validate input.
1910 */
1911 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1912 AssertPtrReturn(paPages, VERR_INVALID_POINTER);
1913 AssertReturn(uPtr, VERR_INVALID_PARAMETER);
1914
1915 /*
1916 * Search for the address.
1917 */
1918 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
1919 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
1920 {
1921 if (pBundle->cUsed > 0)
1922 {
1923 unsigned i;
1924 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
1925 {
1926 if ( pBundle->aMem[i].eType == MEMREF_TYPE_MEM
1927 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
1928 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
1929 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
1930 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr)
1931 )
1932 )
1933 {
1934 const unsigned cPages = RTR0MemObjSize(pBundle->aMem[i].MemObj) >> PAGE_SHIFT;
1935 unsigned iPage;
1936 for (iPage = 0; iPage < cPages; iPage++)
1937 {
1938 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pBundle->aMem[i].MemObj, iPage);
1939 paPages[iPage].uReserved = 0;
1940 }
1941 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
1942 return VINF_SUCCESS;
1943 }
1944 }
1945 }
1946 }
1947 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
1948 Log(("Failed to find %p!!!\n", (void *)uPtr));
1949 return VERR_INVALID_PARAMETER;
1950}
1951
1952
1953/**
1954 * Free memory allocated by SUPR0MemAlloc().
1955 *
1956 * @returns IPRT status code.
1957 * @param pSession The session owning the allocation.
1958 * @param uPtr The Ring-0 or Ring-3 address returned by SUPR0MemAlloc().
1959 */
1960SUPR0DECL(int) SUPR0MemFree(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr)
1961{
1962 LogFlow(("SUPR0MemFree: pSession=%p uPtr=%p\n", pSession, (void *)uPtr));
1963 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1964 return supdrvMemRelease(pSession, uPtr, MEMREF_TYPE_MEM);
1965}
1966
1967
1968/**
1969 * Allocates a chunk of memory with only a R3 mappings.
1970 * The memory is fixed and it's possible to query the physical addresses using SUPR0MemGetPhys().
1971 *
1972 * @returns IPRT status code.
1973 * @param pSession The session to associated the allocation with.
1974 * @param cPages The number of pages to allocate.
1975 * @param ppvR3 Where to store the address of the Ring-3 mapping.
1976 * @param paPages Where to store the addresses of the pages. Optional.
1977 */
1978SUPR0DECL(int) SUPR0PageAlloc(PSUPDRVSESSION pSession, uint32_t cPages, PRTR3PTR ppvR3, PRTHCPHYS paPages)
1979{
1980 int rc;
1981 SUPDRVMEMREF Mem = {0};
1982 LogFlow(("SUPR0PageAlloc: pSession=%p cb=%d ppvR3=%p\n", pSession, cPages, ppvR3));
1983
1984 /*
1985 * Validate input. The allowed allocation size must be at least equal to the maximum guest VRAM size.
1986 */
1987 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
1988 AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
1989 if (cPages < 1 || cPages > (128 * _1M)/PAGE_SIZE)
1990 {
1991 Log(("SUPR0PageAlloc: Illegal request cb=%u; must be greater than 0 and smaller than 128MB.\n", cPages));
1992 return VERR_INVALID_PARAMETER;
1993 }
1994
1995 /*
1996 * Let IPRT do the work.
1997 */
1998 rc = RTR0MemObjAllocPhysNC(&Mem.MemObj, (size_t)cPages * PAGE_SIZE, NIL_RTHCPHYS);
1999 if (RT_SUCCESS(rc))
2000 {
2001 int rc2;
2002 rc = RTR0MemObjMapUser(&Mem.MapObjR3, Mem.MemObj, (RTR3PTR)-1, 0,
2003 RTMEM_PROT_EXEC | RTMEM_PROT_WRITE | RTMEM_PROT_READ, RTR0ProcHandleSelf());
2004 if (RT_SUCCESS(rc))
2005 {
2006 Mem.eType = MEMREF_TYPE_LOCKED_SUP;
2007 rc = supdrvMemAdd(&Mem, pSession);
2008 if (!rc)
2009 {
2010 *ppvR3 = RTR0MemObjAddressR3(Mem.MapObjR3);
2011 if (paPages)
2012 {
2013 uint32_t iPage = cPages;
2014 while (iPage-- > 0)
2015 {
2016 paPages[iPage] = RTR0MemObjGetPagePhysAddr(Mem.MapObjR3, iPage);
2017 Assert(paPages[iPage] != NIL_RTHCPHYS);
2018 }
2019 }
2020 return VINF_SUCCESS;
2021 }
2022 rc2 = RTR0MemObjFree(Mem.MapObjR3, false);
2023 AssertRC(rc2);
2024 }
2025
2026 rc2 = RTR0MemObjFree(Mem.MemObj, false);
2027 AssertRC(rc2);
2028 }
2029 return rc;
2030}
2031
2032
2033#ifdef RT_OS_WINDOWS
2034/**
2035 * Check if the pages were locked by SUPR0PageAlloc
2036 *
2037 * This function will be removed along with the lock/unlock hacks when
2038 * we've cleaned up the ring-3 code properly.
2039 *
2040 * @returns boolean
2041 * @param pSession The session to which the memory was allocated.
2042 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc().
2043 */
2044static bool supdrvPageWasLockedByPageAlloc(PSUPDRVSESSION pSession, RTR3PTR pvR3)
2045{
2046 PSUPDRVBUNDLE pBundle;
2047 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2048 LogFlow(("SUPR0PageIsLockedByPageAlloc: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
2049
2050 /*
2051 * Search for the address.
2052 */
2053 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
2054 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
2055 {
2056 if (pBundle->cUsed > 0)
2057 {
2058 unsigned i;
2059 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
2060 {
2061 if ( pBundle->aMem[i].eType == MEMREF_TYPE_LOCKED_SUP
2062 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
2063 && pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
2064 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3)
2065 {
2066 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2067 return true;
2068 }
2069 }
2070 }
2071 }
2072 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2073 return false;
2074}
2075
2076
2077/**
2078 * Get the physical addresses of memory allocated using SUPR0PageAlloc().
2079 *
2080 * This function will be removed along with the lock/unlock hacks when
2081 * we've cleaned up the ring-3 code properly.
2082 *
2083 * @returns IPRT status code.
2084 * @param pSession The session to which the memory was allocated.
2085 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc().
2086 * @param cPages Number of pages in paPages
2087 * @param paPages Where to store the physical addresses.
2088 */
2089static int supdrvPageGetPhys(PSUPDRVSESSION pSession, RTR3PTR pvR3, uint32_t cPages, PRTHCPHYS paPages)
2090{
2091 PSUPDRVBUNDLE pBundle;
2092 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2093 LogFlow(("supdrvPageGetPhys: pSession=%p pvR3=%p cPages=%#lx paPages=%p\n", pSession, (void *)pvR3, (long)cPages, paPages));
2094
2095 /*
2096 * Search for the address.
2097 */
2098 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
2099 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
2100 {
2101 if (pBundle->cUsed > 0)
2102 {
2103 unsigned i;
2104 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
2105 {
2106 if ( pBundle->aMem[i].eType == MEMREF_TYPE_LOCKED_SUP
2107 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
2108 && pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
2109 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == pvR3)
2110 {
2111 uint32_t iPage = RTR0MemObjSize(pBundle->aMem[i].MemObj) >> PAGE_SHIFT;
2112 cPages = RT_MIN(iPage, cPages);
2113 for (iPage = 0; iPage < cPages; iPage++)
2114 paPages[iPage] = RTR0MemObjGetPagePhysAddr(pBundle->aMem[i].MemObj, iPage);
2115 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2116 return VINF_SUCCESS;
2117 }
2118 }
2119 }
2120 }
2121 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2122 return VERR_INVALID_PARAMETER;
2123}
2124#endif /* RT_OS_WINDOWS */
2125
2126
2127/**
2128 * Free memory allocated by SUPR0PageAlloc().
2129 *
2130 * @returns IPRT status code.
2131 * @param pSession The session owning the allocation.
2132 * @param pvR3 The Ring-3 address returned by SUPR0PageAlloc().
2133 */
2134SUPR0DECL(int) SUPR0PageFree(PSUPDRVSESSION pSession, RTR3PTR pvR3)
2135{
2136 LogFlow(("SUPR0PageFree: pSession=%p pvR3=%p\n", pSession, (void *)pvR3));
2137 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2138 return supdrvMemRelease(pSession, (RTHCUINTPTR)pvR3, MEMREF_TYPE_LOCKED_SUP);
2139}
2140
2141
2142/**
2143 * Maps the GIP into userspace and/or get the physical address of the GIP.
2144 *
2145 * @returns IPRT status code.
2146 * @param pSession Session to which the GIP mapping should belong.
2147 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
2148 * @param pHCPhysGip Where to store the physical address. (optional)
2149 *
2150 * @remark There is no reference counting on the mapping, so one call to this function
2151 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
2152 * and remove the session as a GIP user.
2153 */
2154SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
2155{
2156 int rc = 0;
2157 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2158 RTR3PTR pGip = NIL_RTR3PTR;
2159 RTHCPHYS HCPhys = NIL_RTHCPHYS;
2160 LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
2161
2162 /*
2163 * Validate
2164 */
2165 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2166 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
2167 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
2168
2169 RTSemFastMutexRequest(pDevExt->mtxGip);
2170 if (pDevExt->pGip)
2171 {
2172 /*
2173 * Map it?
2174 */
2175 if (ppGipR3)
2176 {
2177 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
2178 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
2179 RTMEM_PROT_READ, RTR0ProcHandleSelf());
2180 if (RT_SUCCESS(rc))
2181 {
2182 pGip = RTR0MemObjAddressR3(pSession->GipMapObjR3);
2183 rc = VINF_SUCCESS; /** @todo remove this and replace the !rc below with RT_SUCCESS(rc). */
2184 }
2185 }
2186
2187 /*
2188 * Get physical address.
2189 */
2190 if (pHCPhysGip && !rc)
2191 HCPhys = pDevExt->HCPhysGip;
2192
2193 /*
2194 * Reference globally.
2195 */
2196 if (!pSession->fGipReferenced && !rc)
2197 {
2198 pSession->fGipReferenced = 1;
2199 pDevExt->cGipUsers++;
2200 if (pDevExt->cGipUsers == 1)
2201 {
2202 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2203 unsigned i;
2204
2205 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
2206
2207 for (i = 0; i < RT_ELEMENTS(pGip->aCPUs); i++)
2208 ASMAtomicXchgU32(&pGip->aCPUs[i].u32TransactionId, pGip->aCPUs[i].u32TransactionId & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
2209 ASMAtomicXchgU64(&pGip->u64NanoTSLastUpdateHz, 0);
2210
2211 rc = RTTimerStart(pDevExt->pGipTimer, 0);
2212 AssertRC(rc); rc = VINF_SUCCESS;
2213 }
2214 }
2215 }
2216 else
2217 {
2218 rc = SUPDRV_ERR_GENERAL_FAILURE;
2219 Log(("SUPR0GipMap: GIP is not available!\n"));
2220 }
2221 RTSemFastMutexRelease(pDevExt->mtxGip);
2222
2223 /*
2224 * Write returns.
2225 */
2226 if (pHCPhysGip)
2227 *pHCPhysGip = HCPhys;
2228 if (ppGipR3)
2229 *ppGipR3 = pGip;
2230
2231#ifdef DEBUG_DARWIN_GIP
2232 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx *ppGip=%p GipMapObjR3\n", rc, (unsigned long)HCPhys, pGip, pSession->GipMapObjR3));
2233#else
2234 LogFlow(("SUPR0GipMap: returns %d *pHCPhysGip=%lx *ppGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)(uintptr_t)pGip));
2235#endif
2236 return rc;
2237}
2238
2239
2240/**
2241 * Unmaps any user mapping of the GIP and terminates all GIP access
2242 * from this session.
2243 *
2244 * @returns IPRT status code.
2245 * @param pSession Session to which the GIP mapping should belong.
2246 */
2247SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
2248{
2249 int rc = VINF_SUCCESS;
2250 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
2251#ifdef DEBUG_DARWIN_GIP
2252 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
2253 pSession,
2254 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
2255 pSession->GipMapObjR3));
2256#else
2257 LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
2258#endif
2259 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
2260
2261 RTSemFastMutexRequest(pDevExt->mtxGip);
2262
2263 /*
2264 * Unmap anything?
2265 */
2266 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
2267 {
2268 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
2269 AssertRC(rc);
2270 if (RT_SUCCESS(rc))
2271 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
2272 }
2273
2274 /*
2275 * Dereference global GIP.
2276 */
2277 if (pSession->fGipReferenced && !rc)
2278 {
2279 pSession->fGipReferenced = 0;
2280 if ( pDevExt->cGipUsers > 0
2281 && !--pDevExt->cGipUsers)
2282 {
2283 LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
2284 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = 0;
2285 }
2286 }
2287
2288 RTSemFastMutexRelease(pDevExt->mtxGip);
2289
2290 return rc;
2291}
2292
2293
2294/**
2295 * Adds a memory object to the session.
2296 *
2297 * @returns IPRT status code.
2298 * @param pMem Memory tracking structure containing the
2299 * information to track.
2300 * @param pSession The session.
2301 */
2302static int supdrvMemAdd(PSUPDRVMEMREF pMem, PSUPDRVSESSION pSession)
2303{
2304 PSUPDRVBUNDLE pBundle;
2305 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2306
2307 /*
2308 * Find free entry and record the allocation.
2309 */
2310 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
2311 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
2312 {
2313 if (pBundle->cUsed < RT_ELEMENTS(pBundle->aMem))
2314 {
2315 unsigned i;
2316 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
2317 {
2318 if (pBundle->aMem[i].MemObj == NIL_RTR0MEMOBJ)
2319 {
2320 pBundle->cUsed++;
2321 pBundle->aMem[i] = *pMem;
2322 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2323 return VINF_SUCCESS;
2324 }
2325 }
2326 AssertFailed(); /* !!this can't be happening!!! */
2327 }
2328 }
2329 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2330
2331 /*
2332 * Need to allocate a new bundle.
2333 * Insert into the last entry in the bundle.
2334 */
2335 pBundle = (PSUPDRVBUNDLE)RTMemAllocZ(sizeof(*pBundle));
2336 if (!pBundle)
2337 return VERR_NO_MEMORY;
2338
2339 /* take last entry. */
2340 pBundle->cUsed++;
2341 pBundle->aMem[RT_ELEMENTS(pBundle->aMem) - 1] = *pMem;
2342
2343 /* insert into list. */
2344 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
2345 pBundle->pNext = pSession->Bundle.pNext;
2346 pSession->Bundle.pNext = pBundle;
2347 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2348
2349 return VINF_SUCCESS;
2350}
2351
2352
2353/**
2354 * Releases a memory object referenced by pointer and type.
2355 *
2356 * @returns IPRT status code.
2357 * @param pSession Session data.
2358 * @param uPtr Pointer to memory. This is matched against both the R0 and R3 addresses.
2359 * @param eType Memory type.
2360 */
2361static int supdrvMemRelease(PSUPDRVSESSION pSession, RTHCUINTPTR uPtr, SUPDRVMEMREFTYPE eType)
2362{
2363 PSUPDRVBUNDLE pBundle;
2364 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2365
2366 /*
2367 * Validate input.
2368 */
2369 if (!uPtr)
2370 {
2371 Log(("Illegal address %p\n", (void *)uPtr));
2372 return VERR_INVALID_PARAMETER;
2373 }
2374
2375 /*
2376 * Search for the address.
2377 */
2378 RTSpinlockAcquire(pSession->Spinlock, &SpinlockTmp);
2379 for (pBundle = &pSession->Bundle; pBundle; pBundle = pBundle->pNext)
2380 {
2381 if (pBundle->cUsed > 0)
2382 {
2383 unsigned i;
2384 for (i = 0; i < RT_ELEMENTS(pBundle->aMem); i++)
2385 {
2386 if ( pBundle->aMem[i].eType == eType
2387 && pBundle->aMem[i].MemObj != NIL_RTR0MEMOBJ
2388 && ( (RTHCUINTPTR)RTR0MemObjAddress(pBundle->aMem[i].MemObj) == uPtr
2389 || ( pBundle->aMem[i].MapObjR3 != NIL_RTR0MEMOBJ
2390 && RTR0MemObjAddressR3(pBundle->aMem[i].MapObjR3) == uPtr))
2391 )
2392 {
2393 /* Make a copy of it and release it outside the spinlock. */
2394 SUPDRVMEMREF Mem = pBundle->aMem[i];
2395 pBundle->aMem[i].eType = MEMREF_TYPE_UNUSED;
2396 pBundle->aMem[i].MemObj = NIL_RTR0MEMOBJ;
2397 pBundle->aMem[i].MapObjR3 = NIL_RTR0MEMOBJ;
2398 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2399
2400 if (Mem.MapObjR3)
2401 {
2402 int rc = RTR0MemObjFree(Mem.MapObjR3, false);
2403 AssertRC(rc); /** @todo figure out how to handle this. */
2404 }
2405 if (Mem.MemObj)
2406 {
2407 int rc = RTR0MemObjFree(Mem.MemObj, false);
2408 AssertRC(rc); /** @todo figure out how to handle this. */
2409 }
2410 return VINF_SUCCESS;
2411 }
2412 }
2413 }
2414 }
2415 RTSpinlockRelease(pSession->Spinlock, &SpinlockTmp);
2416 Log(("Failed to find %p!!! (eType=%d)\n", (void *)uPtr, eType));
2417 return VERR_INVALID_PARAMETER;
2418}
2419
2420
2421#ifdef VBOX_WITH_IDT_PATCHING
2422/**
2423 * Install IDT for the current CPU.
2424 *
2425 * @returns One of the following IPRT status codes:
2426 * @retval VINF_SUCCESS on success.
2427 * @retval VERR_IDT_FAILED.
2428 * @retval VERR_NO_MEMORY.
2429 * @param pDevExt The device extension.
2430 * @param pSession The session data.
2431 * @param pReq The request.
2432 */
2433static int supdrvIOCtl_IdtInstall(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPIDTINSTALL pReq)
2434{
2435 PSUPDRVPATCHUSAGE pUsagePre;
2436 PSUPDRVPATCH pPatchPre;
2437 RTIDTR Idtr;
2438 PSUPDRVPATCH pPatch;
2439 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2440 LogFlow(("supdrvIOCtl_IdtInstall\n"));
2441
2442 /*
2443 * Preallocate entry for this CPU cause we don't wanna do
2444 * that inside the spinlock!
2445 */
2446 pUsagePre = (PSUPDRVPATCHUSAGE)RTMemAlloc(sizeof(*pUsagePre));
2447 if (!pUsagePre)
2448 return VERR_NO_MEMORY;
2449
2450 /*
2451 * Take the spinlock and see what we need to do.
2452 */
2453 RTSpinlockAcquireNoInts(pDevExt->Spinlock, &SpinlockTmp);
2454
2455 /* check if we already got a free patch. */
2456 if (!pDevExt->pIdtPatchesFree)
2457 {
2458 /*
2459 * Allocate a patch - outside the spinlock of course.
2460 */
2461 RTSpinlockReleaseNoInts(pDevExt->Spinlock, &SpinlockTmp);
2462
2463 pPatchPre = (PSUPDRVPATCH)RTMemExecAlloc(sizeof(*pPatchPre));
2464 if (!pPatchPre)
2465 return VERR_NO_MEMORY;
2466
2467 RTSpinlockAcquireNoInts(pDevExt->Spinlock, &SpinlockTmp);
2468 }
2469 else
2470 {
2471 pPatchPre = pDevExt->pIdtPatchesFree;
2472 pDevExt->pIdtPatchesFree = pPatchPre->pNext;
2473 }
2474
2475 /* look for matching patch entry */
2476 ASMGetIDTR(&Idtr);
2477 pPatch = pDevExt->pIdtPatches;
2478 while (pPatch && pPatch->pvIdt != (void *)Idtr.pIdt)
2479 pPatch = pPatch->pNext;
2480
2481 if (!pPatch)
2482 {
2483 /*
2484 * Create patch.
2485 */
2486 pPatch = supdrvIdtPatchOne(pDevExt, pPatchPre);
2487 if (pPatch)
2488 pPatchPre = NULL; /* mark as used. */
2489 }
2490 else
2491 {
2492 /*
2493 * Simply increment patch usage.
2494 */
2495 pPatch->cUsage++;
2496 }
2497
2498 if (pPatch)
2499 {
2500 /*
2501 * Increment and add if need be the session usage record for this patch.
2502 */
2503 PSUPDRVPATCHUSAGE pUsage = pSession->pPatchUsage;
2504 while (pUsage && pUsage->pPatch != pPatch)
2505 pUsage = pUsage->pNext;
2506
2507 if (!pUsage)
2508 {
2509 /*
2510 * Add usage record.
2511 */
2512 pUsagePre->cUsage = 1;
2513 pUsagePre->pPatch = pPatch;
2514 pUsagePre->pNext = pSession->pPatchUsage;
2515 pSession->pPatchUsage = pUsagePre;
2516 pUsagePre = NULL; /* mark as used. */
2517 }
2518 else
2519 {
2520 /*
2521 * Increment usage count.
2522 */
2523 pUsage->cUsage++;
2524 }
2525 }
2526
2527 /* free patch - we accumulate them for paranoid saftly reasons. */
2528 if (pPatchPre)
2529 {
2530 pPatchPre->pNext = pDevExt->pIdtPatchesFree;
2531 pDevExt->pIdtPatchesFree = pPatchPre;
2532 }
2533
2534 RTSpinlockReleaseNoInts(pDevExt->Spinlock, &SpinlockTmp);
2535
2536 /*
2537 * Free unused preallocated buffers.
2538 */
2539 if (pUsagePre)
2540 RTMemFree(pUsagePre);
2541
2542 pReq->u.Out.u8Idt = pDevExt->u8Idt;
2543
2544 return pPatch ? VINF_SUCCESS : VERR_IDT_FAILED;
2545}
2546
2547
2548/**
2549 * This creates a IDT patch entry.
2550 * If the first patch being installed it'll also determin the IDT entry
2551 * to use.
2552 *
2553 * @returns pPatch on success.
2554 * @returns NULL on failure.
2555 * @param pDevExt Pointer to globals.
2556 * @param pPatch Patch entry to use.
2557 * This will be linked into SUPDRVDEVEXT::pIdtPatches on
2558 * successful return.
2559 * @remark Call must be owning the SUPDRVDEVEXT::Spinlock!
2560 */
2561static PSUPDRVPATCH supdrvIdtPatchOne(PSUPDRVDEVEXT pDevExt, PSUPDRVPATCH pPatch)
2562{
2563 RTIDTR Idtr;
2564 PSUPDRVIDTE paIdt;
2565 LogFlow(("supdrvIOCtl_IdtPatchOne: pPatch=%p\n", pPatch));
2566
2567 /*
2568 * Get IDT.
2569 */
2570 ASMGetIDTR(&Idtr);
2571 paIdt = (PSUPDRVIDTE)Idtr.pIdt;
2572 /*
2573 * Recent Linux kernels can be configured to 1G user /3G kernel.
2574 */
2575 if ((uintptr_t)paIdt < 0x40000000)
2576 {
2577 AssertMsgFailed(("bad paIdt=%p\n", paIdt));
2578 return NULL;
2579 }
2580
2581 if (!pDevExt->u8Idt)
2582 {
2583 /*
2584 * Test out the alternatives.
2585 *
2586 * At the moment we do not support chaining thus we ASSUME that one of
2587 * these 48 entries is unused (which is not a problem on Win32 and
2588 * Linux to my knowledge).
2589 */
2590 /** @todo we MUST change this detection to try grab an entry which is NOT in use. This can be
2591 * combined with gathering info about which guest system call gates we can hook up directly. */
2592 unsigned i;
2593 uint8_t u8Idt = 0;
2594 static uint8_t au8Ints[] =
2595 {
2596#ifdef RT_OS_WINDOWS /* We don't use 0xef and above because they are system stuff on linux (ef is IPI,
2597 * local apic timer, or some other frequently fireing thing). */
2598 0xef, 0xee, 0xed, 0xec,
2599#endif
2600 0xeb, 0xea, 0xe9, 0xe8,
2601 0xdf, 0xde, 0xdd, 0xdc,
2602 0x7b, 0x7a, 0x79, 0x78,
2603 0xbf, 0xbe, 0xbd, 0xbc,
2604 };
2605#if defined(RT_ARCH_AMD64) && defined(DEBUG)
2606 static int s_iWobble = 0;
2607 unsigned iMax = !(s_iWobble++ % 2) ? 0x80 : 0x100;
2608 Log2(("IDT: Idtr=%p:%#x\n", (void *)Idtr.pIdt, (unsigned)Idtr.cbIdt));
2609 for (i = iMax - 0x80; i*16+15 < Idtr.cbIdt && i < iMax; i++)
2610 {
2611 Log2(("%#x: %04x:%08x%04x%04x P=%d DPL=%d IST=%d Type1=%#x u32Reserved=%#x u5Reserved=%#x\n",
2612 i, paIdt[i].u16SegSel, paIdt[i].u32OffsetTop, paIdt[i].u16OffsetHigh, paIdt[i].u16OffsetLow,
2613 paIdt[i].u1Present, paIdt[i].u2DPL, paIdt[i].u3IST, paIdt[i].u5Type2,
2614 paIdt[i].u32Reserved, paIdt[i].u5Reserved));
2615 }
2616#endif
2617 /* look for entries which are not present or otherwise unused. */
2618 for (i = 0; i < sizeof(au8Ints) / sizeof(au8Ints[0]); i++)
2619 {
2620 u8Idt = au8Ints[i];
2621 if ( u8Idt * sizeof(SUPDRVIDTE) < Idtr.cbIdt
2622 && ( !paIdt[u8Idt].u1Present
2623 || paIdt[u8Idt].u5Type2 == 0))
2624 break;
2625 u8Idt = 0;
2626 }
2627 if (!u8Idt)
2628 {
2629 /* try again, look for a compatible entry .*/
2630 for (i = 0; i < sizeof(au8Ints) / sizeof(au8Ints[0]); i++)
2631 {
2632 u8Idt = au8Ints[i];
2633 if ( u8Idt * sizeof(SUPDRVIDTE) < Idtr.cbIdt
2634 && paIdt[u8Idt].u1Present
2635 && paIdt[u8Idt].u5Type2 == SUPDRV_IDTE_TYPE2_INTERRUPT_GATE
2636 && !(paIdt[u8Idt].u16SegSel & 3))
2637 break;
2638 u8Idt = 0;
2639 }
2640 if (!u8Idt)
2641 {
2642 Log(("Failed to find appropirate IDT entry!!\n"));
2643 return NULL;
2644 }
2645 }
2646 pDevExt->u8Idt = u8Idt;
2647 LogFlow(("supdrvIOCtl_IdtPatchOne: u8Idt=%x\n", u8Idt));
2648 }
2649
2650 /*
2651 * Prepare the patch
2652 */
2653 memset(pPatch, 0, sizeof(*pPatch));
2654 pPatch->pvIdt = paIdt;
2655 pPatch->cUsage = 1;
2656 pPatch->pIdtEntry = &paIdt[pDevExt->u8Idt];
2657 pPatch->SavedIdt = paIdt[pDevExt->u8Idt];
2658 pPatch->ChangedIdt.u16OffsetLow = (uint32_t)((uintptr_t)&pPatch->auCode[0] & 0xffff);
2659 pPatch->ChangedIdt.u16OffsetHigh = (uint32_t)((uintptr_t)&pPatch->auCode[0] >> 16);
2660#ifdef RT_ARCH_AMD64
2661 pPatch->ChangedIdt.u32OffsetTop = (uint32_t)((uintptr_t)&pPatch->auCode[0] >> 32);
2662#endif
2663 pPatch->ChangedIdt.u16SegSel = ASMGetCS();
2664#ifdef RT_ARCH_AMD64
2665 pPatch->ChangedIdt.u3IST = 0;
2666 pPatch->ChangedIdt.u5Reserved = 0;
2667#else /* x86 */
2668 pPatch->ChangedIdt.u5Reserved = 0;
2669 pPatch->ChangedIdt.u3Type1 = 0;
2670#endif /* x86 */
2671 pPatch->ChangedIdt.u5Type2 = SUPDRV_IDTE_TYPE2_INTERRUPT_GATE;
2672 pPatch->ChangedIdt.u2DPL = 3;
2673 pPatch->ChangedIdt.u1Present = 1;
2674
2675 /*
2676 * Generate the patch code.
2677 */
2678 {
2679#ifdef RT_ARCH_AMD64
2680 union
2681 {
2682 uint8_t *pb;
2683 uint32_t *pu32;
2684 uint64_t *pu64;
2685 } u, uFixJmp, uFixCall, uNotNested;
2686 u.pb = &pPatch->auCode[0];
2687
2688 /* check the cookie */
2689 *u.pb++ = 0x3d; // cmp eax, GLOBALCOOKIE
2690 *u.pu32++ = pDevExt->u32Cookie;
2691
2692 *u.pb++ = 0x74; // jz @VBoxCall
2693 *u.pb++ = 2;
2694
2695 /* jump to forwarder code. */
2696 *u.pb++ = 0xeb;
2697 uFixJmp = u;
2698 *u.pb++ = 0xfe;
2699
2700 // @VBoxCall:
2701 *u.pb++ = 0x0f; // swapgs
2702 *u.pb++ = 0x01;
2703 *u.pb++ = 0xf8;
2704
2705 /*
2706 * Call VMMR0Entry
2707 * We don't have to push the arguments here, but we have top
2708 * reserve some stack space for the interrupt forwarding.
2709 */
2710# ifdef RT_OS_WINDOWS
2711 *u.pb++ = 0x50; // push rax ; alignment filler.
2712 *u.pb++ = 0x41; // push r8 ; uArg
2713 *u.pb++ = 0x50;
2714 *u.pb++ = 0x52; // push rdx ; uOperation
2715 *u.pb++ = 0x51; // push rcx ; pVM
2716# else
2717 *u.pb++ = 0x51; // push rcx ; alignment filler.
2718 *u.pb++ = 0x52; // push rdx ; uArg
2719 *u.pb++ = 0x56; // push rsi ; uOperation
2720 *u.pb++ = 0x57; // push rdi ; pVM
2721# endif
2722
2723 *u.pb++ = 0xff; // call qword [pfnVMMR0EntryInt wrt rip]
2724 *u.pb++ = 0x15;
2725 uFixCall = u;
2726 *u.pu32++ = 0;
2727
2728 *u.pb++ = 0x48; // add rsp, 20h ; remove call frame.
2729 *u.pb++ = 0x81;
2730 *u.pb++ = 0xc4;
2731 *u.pu32++ = 0x20;
2732
2733 *u.pb++ = 0x0f; // swapgs
2734 *u.pb++ = 0x01;
2735 *u.pb++ = 0xf8;
2736
2737 /* Return to R3. */
2738 uNotNested = u;
2739 *u.pb++ = 0x48; // iretq
2740 *u.pb++ = 0xcf;
2741
2742 while ((uintptr_t)u.pb & 0x7) // align 8
2743 *u.pb++ = 0xcc;
2744
2745 /* Pointer to the VMMR0Entry. */ // pfnVMMR0EntryInt dq StubVMMR0Entry
2746 *uFixCall.pu32 = (uint32_t)(u.pb - uFixCall.pb - 4); uFixCall.pb = NULL;
2747 pPatch->offVMMR0EntryFixup = (uint16_t)(u.pb - &pPatch->auCode[0]);
2748 *u.pu64++ = pDevExt->pvVMMR0 ? (uint64_t)pDevExt->pfnVMMR0EntryInt : (uint64_t)u.pb + 8;
2749
2750 /* stub entry. */ // StubVMMR0Entry:
2751 pPatch->offStub = (uint16_t)(u.pb - &pPatch->auCode[0]);
2752 *u.pb++ = 0x33; // xor eax, eax
2753 *u.pb++ = 0xc0;
2754
2755 *u.pb++ = 0x48; // dec rax
2756 *u.pb++ = 0xff;
2757 *u.pb++ = 0xc8;
2758
2759 *u.pb++ = 0xc3; // ret
2760
2761 /* forward to the original handler using a retf. */
2762 *uFixJmp.pb = (uint8_t)(u.pb - uFixJmp.pb - 1); uFixJmp.pb = NULL;
2763
2764 *u.pb++ = 0x68; // push <target cs>
2765 *u.pu32++ = !pPatch->SavedIdt.u5Type2 ? ASMGetCS() : pPatch->SavedIdt.u16SegSel;
2766
2767 *u.pb++ = 0x68; // push <low target rip>
2768 *u.pu32++ = !pPatch->SavedIdt.u5Type2
2769 ? (uint32_t)(uintptr_t)uNotNested.pb
2770 : (uint32_t)pPatch->SavedIdt.u16OffsetLow
2771 | (uint32_t)pPatch->SavedIdt.u16OffsetHigh << 16;
2772
2773 *u.pb++ = 0xc7; // mov dword [rsp + 4], <high target rip>
2774 *u.pb++ = 0x44;
2775 *u.pb++ = 0x24;
2776 *u.pb++ = 0x04;
2777 *u.pu32++ = !pPatch->SavedIdt.u5Type2
2778 ? (uint32_t)((uint64_t)uNotNested.pb >> 32)
2779 : pPatch->SavedIdt.u32OffsetTop;
2780
2781 *u.pb++ = 0x48; // retf ; does this require prefix?
2782 *u.pb++ = 0xcb;
2783
2784#else /* RT_ARCH_X86 */
2785
2786 union
2787 {
2788 uint8_t *pb;
2789 uint16_t *pu16;
2790 uint32_t *pu32;
2791 } u, uFixJmpNotNested, uFixJmp, uFixCall, uNotNested;
2792 u.pb = &pPatch->auCode[0];
2793
2794 /* check the cookie */
2795 *u.pb++ = 0x81; // cmp esi, GLOBALCOOKIE
2796 *u.pb++ = 0xfe;
2797 *u.pu32++ = pDevExt->u32Cookie;
2798
2799 *u.pb++ = 0x74; // jz VBoxCall
2800 uFixJmp = u;
2801 *u.pb++ = 0;
2802
2803 /* jump (far) to the original handler / not-nested-stub. */
2804 *u.pb++ = 0xea; // jmp far NotNested
2805 uFixJmpNotNested = u;
2806 *u.pu32++ = 0;
2807 *u.pu16++ = 0;
2808
2809 /* save selector registers. */ // VBoxCall:
2810 *uFixJmp.pb = (uint8_t)(u.pb - uFixJmp.pb - 1);
2811 *u.pb++ = 0x0f; // push fs
2812 *u.pb++ = 0xa0;
2813
2814 *u.pb++ = 0x1e; // push ds
2815
2816 *u.pb++ = 0x06; // push es
2817
2818 /* call frame */
2819 *u.pb++ = 0x51; // push ecx
2820
2821 *u.pb++ = 0x52; // push edx
2822
2823 *u.pb++ = 0x50; // push eax
2824
2825 /* load ds, es and perhaps fs before call. */
2826 *u.pb++ = 0xb8; // mov eax, KernelDS
2827 *u.pu32++ = ASMGetDS();
2828
2829 *u.pb++ = 0x8e; // mov ds, eax
2830 *u.pb++ = 0xd8;
2831
2832 *u.pb++ = 0x8e; // mov es, eax
2833 *u.pb++ = 0xc0;
2834
2835#ifdef RT_OS_WINDOWS
2836 *u.pb++ = 0xb8; // mov eax, KernelFS
2837 *u.pu32++ = ASMGetFS();
2838
2839 *u.pb++ = 0x8e; // mov fs, eax
2840 *u.pb++ = 0xe0;
2841#endif
2842
2843 /* do the call. */
2844 *u.pb++ = 0xe8; // call _VMMR0Entry / StubVMMR0Entry
2845 uFixCall = u;
2846 pPatch->offVMMR0EntryFixup = (uint16_t)(u.pb - &pPatch->auCode[0]);
2847 *u.pu32++ = 0xfffffffb;
2848
2849 *u.pb++ = 0x83; // add esp, 0ch ; cdecl
2850 *u.pb++ = 0xc4;
2851 *u.pb++ = 0x0c;
2852
2853 /* restore selector registers. */
2854 *u.pb++ = 0x07; // pop es
2855 //
2856 *u.pb++ = 0x1f; // pop ds
2857
2858 *u.pb++ = 0x0f; // pop fs
2859 *u.pb++ = 0xa1;
2860
2861 uNotNested = u; // NotNested:
2862 *u.pb++ = 0xcf; // iretd
2863
2864 /* the stub VMMR0Entry. */ // StubVMMR0Entry:
2865 pPatch->offStub = (uint16_t)(u.pb - &pPatch->auCode[0]);
2866 *u.pb++ = 0x33; // xor eax, eax
2867 *u.pb++ = 0xc0;
2868
2869 *u.pb++ = 0x48; // dec eax
2870
2871 *u.pb++ = 0xc3; // ret
2872
2873 /* Fixup the VMMR0Entry call. */
2874 if (pDevExt->pvVMMR0)
2875 *uFixCall.pu32 = (uint32_t)pDevExt->pfnVMMR0EntryInt - (uint32_t)(uFixCall.pu32 + 1);
2876 else
2877 *uFixCall.pu32 = (uint32_t)&pPatch->auCode[pPatch->offStub] - (uint32_t)(uFixCall.pu32 + 1);
2878
2879 /* Fixup the forward / nested far jump. */
2880 if (!pPatch->SavedIdt.u5Type2)
2881 {
2882 *uFixJmpNotNested.pu32++ = (uint32_t)uNotNested.pb;
2883 *uFixJmpNotNested.pu16++ = ASMGetCS();
2884 }
2885 else
2886 {
2887 *uFixJmpNotNested.pu32++ = ((uint32_t)pPatch->SavedIdt.u16OffsetHigh << 16) | pPatch->SavedIdt.u16OffsetLow;
2888 *uFixJmpNotNested.pu16++ = pPatch->SavedIdt.u16SegSel;
2889 }
2890#endif /* RT_ARCH_X86 */
2891 Assert(u.pb <= &pPatch->auCode[sizeof(pPatch->auCode)]);
2892#if 0
2893 /* dump the patch code */
2894 Log2(("patch code: %p\n", &pPatch->auCode[0]));
2895 for (uFixCall.pb = &pPatch->auCode[0]; uFixCall.pb < u.pb; uFixCall.pb++)
2896 Log2(("0x%02x,\n", *uFixCall.pb));
2897#endif
2898 }
2899
2900 /*
2901 * Install the patch.
2902 */
2903 supdrvIdtWrite(pPatch->pIdtEntry, &pPatch->ChangedIdt);
2904 AssertMsg(!memcmp((void *)pPatch->pIdtEntry, &pPatch->ChangedIdt, sizeof(pPatch->ChangedIdt)), ("The stupid change code didn't work!!!!!\n"));
2905
2906 /*
2907 * Link in the patch.
2908 */
2909 pPatch->pNext = pDevExt->pIdtPatches;
2910 pDevExt->pIdtPatches = pPatch;
2911
2912 return pPatch;
2913}
2914
2915
2916/**
2917 * Removes the sessions IDT references.
2918 * This will uninstall our IDT patch if we left unreferenced.
2919 *
2920 * @returns VINF_SUCCESS.
2921 * @param pDevExt Device globals.
2922 * @param pSession Session data.
2923 */
2924static int supdrvIOCtl_IdtRemoveAll(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
2925{
2926 PSUPDRVPATCHUSAGE pUsage;
2927 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
2928 LogFlow(("supdrvIOCtl_IdtRemoveAll: pSession=%p\n", pSession));
2929
2930 /*
2931 * Take the spinlock.
2932 */
2933 RTSpinlockAcquireNoInts(pDevExt->Spinlock, &SpinlockTmp);
2934
2935 /*
2936 * Walk usage list, removing patches as their usage count reaches zero.
2937 */
2938 pUsage = pSession->pPatchUsage;
2939 while (pUsage)
2940 {
2941 if (pUsage->pPatch->cUsage <= pUsage->cUsage)
2942 supdrvIdtRemoveOne(pDevExt, pUsage->pPatch);
2943 else
2944 pUsage->pPatch->cUsage -= pUsage->cUsage;
2945
2946 /* next */
2947 pUsage = pUsage->pNext;
2948 }
2949
2950 /*
2951 * Empty the usage chain and we're done inside the spinlock.
2952 */
2953 pUsage = pSession->pPatchUsage;
2954 pSession->pPatchUsage = NULL;
2955
2956 RTSpinlockReleaseNoInts(pDevExt->Spinlock, &SpinlockTmp);
2957
2958 /*
2959 * Free usage entries.
2960 */
2961 while (pUsage)
2962 {
2963 void *pvToFree = pUsage;
2964 pUsage->cUsage = 0;
2965 pUsage->pPatch = NULL;
2966 pUsage = pUsage->pNext;
2967 RTMemFree(pvToFree);
2968 }
2969
2970 return VINF_SUCCESS;
2971}
2972
2973
2974/**
2975 * Remove one patch.
2976 *
2977 * Worker for supdrvIOCtl_IdtRemoveAll.
2978 *
2979 * @param pDevExt Device globals.
2980 * @param pPatch Patch entry to remove.
2981 * @remark Caller must own SUPDRVDEVEXT::Spinlock!
2982 */
2983static void supdrvIdtRemoveOne(PSUPDRVDEVEXT pDevExt, PSUPDRVPATCH pPatch)
2984{
2985 LogFlow(("supdrvIdtRemoveOne: pPatch=%p\n", pPatch));
2986
2987 pPatch->cUsage = 0;
2988
2989 /*
2990 * If the IDT entry was changed it have to kick around for ever!
2991 * This will be attempted freed again, perhaps next time we'll succeed :-)
2992 */
2993 if (memcmp((void *)pPatch->pIdtEntry, &pPatch->ChangedIdt, sizeof(pPatch->ChangedIdt)))
2994 {
2995 AssertMsgFailed(("The hijacked IDT entry has CHANGED!!!\n"));
2996 return;
2997 }
2998
2999 /*
3000 * Unlink it.
3001 */
3002 if (pDevExt->pIdtPatches != pPatch)
3003 {
3004 PSUPDRVPATCH pPatchPrev = pDevExt->pIdtPatches;
3005 while (pPatchPrev)
3006 {
3007 if (pPatchPrev->pNext == pPatch)
3008 {
3009 pPatchPrev->pNext = pPatch->pNext;
3010 break;
3011 }
3012 pPatchPrev = pPatchPrev->pNext;
3013 }
3014 Assert(!pPatchPrev);
3015 }
3016 else
3017 pDevExt->pIdtPatches = pPatch->pNext;
3018 pPatch->pNext = NULL;
3019
3020
3021 /*
3022 * Verify and restore the IDT.
3023 */
3024 AssertMsg(!memcmp((void *)pPatch->pIdtEntry, &pPatch->ChangedIdt, sizeof(pPatch->ChangedIdt)), ("The hijacked IDT entry has CHANGED!!!\n"));
3025 supdrvIdtWrite(pPatch->pIdtEntry, &pPatch->SavedIdt);
3026 AssertMsg(!memcmp((void *)pPatch->pIdtEntry, &pPatch->SavedIdt, sizeof(pPatch->SavedIdt)), ("The hijacked IDT entry has CHANGED!!!\n"));
3027
3028 /*
3029 * Put it in the free list.
3030 * (This free list stuff is to calm my paranoia.)
3031 */
3032 pPatch->pvIdt = NULL;
3033 pPatch->pIdtEntry = NULL;
3034
3035 pPatch->pNext = pDevExt->pIdtPatchesFree;
3036 pDevExt->pIdtPatchesFree = pPatch;
3037}
3038
3039
3040/**
3041 * Write to an IDT entry.
3042 *
3043 * @param pvIdtEntry Where to write.
3044 * @param pNewIDTEntry What to write.
3045 */
3046static void supdrvIdtWrite(volatile void *pvIdtEntry, const SUPDRVIDTE *pNewIDTEntry)
3047{
3048 RTR0UINTREG uCR0;
3049 RTR0UINTREG uFlags;
3050
3051 /*
3052 * On SMP machines (P4 hyperthreading included) we must preform a
3053 * 64-bit locked write when updating the IDT entry.
3054 *
3055 * The F00F bugfix for linux (and probably other OSes) causes
3056 * the IDT to be pointing to an readonly mapping. We get around that
3057 * by temporarily turning of WP. Since we're inside a spinlock at this
3058 * point, interrupts are disabled and there isn't any way the WP bit
3059 * flipping can cause any trouble.
3060 */
3061
3062 /* Save & Clear interrupt flag; Save & clear WP. */
3063 uFlags = ASMGetFlags();
3064 ASMSetFlags(uFlags & ~(RTR0UINTREG)(1 << 9)); /*X86_EFL_IF*/
3065 Assert(!(ASMGetFlags() & (1 << 9)));
3066 uCR0 = ASMGetCR0();
3067 ASMSetCR0(uCR0 & ~(RTR0UINTREG)(1 << 16)); /*X86_CR0_WP*/
3068
3069 /* Update IDT Entry */
3070#ifdef RT_ARCH_AMD64
3071 ASMAtomicXchgU128((volatile uint128_t *)pvIdtEntry, *(uint128_t *)(uintptr_t)pNewIDTEntry);
3072#else
3073 ASMAtomicXchgU64((volatile uint64_t *)pvIdtEntry, *(uint64_t *)(uintptr_t)pNewIDTEntry);
3074#endif
3075
3076 /* Restore CR0 & Flags */
3077 ASMSetCR0(uCR0);
3078 ASMSetFlags(uFlags);
3079}
3080#endif /* VBOX_WITH_IDT_PATCHING */
3081
3082
3083/**
3084 * Opens an image. If it's the first time it's opened the call must upload
3085 * the bits using the supdrvIOCtl_LdrLoad() / SUPDRV_IOCTL_LDR_LOAD function.
3086 *
3087 * This is the 1st step of the loading.
3088 *
3089 * @returns IPRT status code.
3090 * @param pDevExt Device globals.
3091 * @param pSession Session data.
3092 * @param pReq The open request.
3093 */
3094static int supdrvIOCtl_LdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDROPEN pReq)
3095{
3096 PSUPDRVLDRIMAGE pImage;
3097 unsigned cb;
3098 void *pv;
3099 LogFlow(("supdrvIOCtl_LdrOpen: szName=%s cbImage=%d\n", pReq->u.In.szName, pReq->u.In.cbImage));
3100
3101 /*
3102 * Check if we got an instance of the image already.
3103 */
3104 RTSemFastMutexRequest(pDevExt->mtxLdr);
3105 for (pImage = pDevExt->pLdrImages; pImage; pImage = pImage->pNext)
3106 {
3107 if (!strcmp(pImage->szName, pReq->u.In.szName))
3108 {
3109 pImage->cUsage++;
3110 pReq->u.Out.pvImageBase = pImage->pvImage;
3111 pReq->u.Out.fNeedsLoading = pImage->uState == SUP_IOCTL_LDR_OPEN;
3112 supdrvLdrAddUsage(pSession, pImage);
3113 RTSemFastMutexRelease(pDevExt->mtxLdr);
3114 return VINF_SUCCESS;
3115 }
3116 }
3117 /* (not found - add it!) */
3118
3119 /*
3120 * Allocate memory.
3121 */
3122 cb = pReq->u.In.cbImage + sizeof(SUPDRVLDRIMAGE) + 31;
3123 pv = RTMemExecAlloc(cb);
3124 if (!pv)
3125 {
3126 RTSemFastMutexRelease(pDevExt->mtxLdr);
3127 Log(("supdrvIOCtl_LdrOpen: RTMemExecAlloc(%u) failed\n", cb));
3128 return VERR_NO_MEMORY;
3129 }
3130
3131 /*
3132 * Setup and link in the LDR stuff.
3133 */
3134 pImage = (PSUPDRVLDRIMAGE)pv;
3135 pImage->pvImage = RT_ALIGN_P(pImage + 1, 32);
3136 pImage->cbImage = pReq->u.In.cbImage;
3137 pImage->pfnModuleInit = NULL;
3138 pImage->pfnModuleTerm = NULL;
3139 pImage->uState = SUP_IOCTL_LDR_OPEN;
3140 pImage->cUsage = 1;
3141 strcpy(pImage->szName, pReq->u.In.szName);
3142
3143 pImage->pNext = pDevExt->pLdrImages;
3144 pDevExt->pLdrImages = pImage;
3145
3146 supdrvLdrAddUsage(pSession, pImage);
3147
3148 pReq->u.Out.pvImageBase = pImage->pvImage;
3149 pReq->u.Out.fNeedsLoading = true;
3150 RTSemFastMutexRelease(pDevExt->mtxLdr);
3151 return VINF_SUCCESS;
3152}
3153
3154
3155/**
3156 * Loads the image bits.
3157 *
3158 * This is the 2nd step of the loading.
3159 *
3160 * @returns IPRT status code.
3161 * @param pDevExt Device globals.
3162 * @param pSession Session data.
3163 * @param pReq The request.
3164 */
3165static int supdrvIOCtl_LdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRLOAD pReq)
3166{
3167 PSUPDRVLDRUSAGE pUsage;
3168 PSUPDRVLDRIMAGE pImage;
3169 int rc;
3170 LogFlow(("supdrvIOCtl_LdrLoad: pvImageBase=%p cbImage=%d\n", pReq->u.In.pvImageBase, pReq->u.In.cbImage));
3171
3172 /*
3173 * Find the ldr image.
3174 */
3175 RTSemFastMutexRequest(pDevExt->mtxLdr);
3176 pUsage = pSession->pLdrUsage;
3177 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
3178 pUsage = pUsage->pNext;
3179 if (!pUsage)
3180 {
3181 RTSemFastMutexRelease(pDevExt->mtxLdr);
3182 Log(("SUP_IOCTL_LDR_LOAD: couldn't find image!\n"));
3183 return VERR_INVALID_HANDLE;
3184 }
3185 pImage = pUsage->pImage;
3186 if (pImage->cbImage != pReq->u.In.cbImage)
3187 {
3188 RTSemFastMutexRelease(pDevExt->mtxLdr);
3189 Log(("SUP_IOCTL_LDR_LOAD: image size mismatch!! %d(prep) != %d(load)\n", pImage->cbImage, pReq->u.In.cbImage));
3190 return VERR_INVALID_HANDLE;
3191 }
3192 if (pImage->uState != SUP_IOCTL_LDR_OPEN)
3193 {
3194 unsigned uState = pImage->uState;
3195 RTSemFastMutexRelease(pDevExt->mtxLdr);
3196 if (uState != SUP_IOCTL_LDR_LOAD)
3197 AssertMsgFailed(("SUP_IOCTL_LDR_LOAD: invalid image state %d (%#x)!\n", uState, uState));
3198 return SUPDRV_ERR_ALREADY_LOADED;
3199 }
3200 switch (pReq->u.In.eEPType)
3201 {
3202 case SUPLDRLOADEP_NOTHING:
3203 break;
3204 case SUPLDRLOADEP_VMMR0:
3205 if ( !pReq->u.In.EP.VMMR0.pvVMMR0
3206 || !pReq->u.In.EP.VMMR0.pvVMMR0EntryInt
3207 || !pReq->u.In.EP.VMMR0.pvVMMR0EntryFast
3208 || !pReq->u.In.EP.VMMR0.pvVMMR0EntryEx)
3209 {
3210 RTSemFastMutexRelease(pDevExt->mtxLdr);
3211 Log(("NULL pointer: pvVMMR0=%p pvVMMR0EntryInt=%p pvVMMR0EntryFast=%p pvVMMR0EntryEx=%p!\n",
3212 pReq->u.In.EP.VMMR0.pvVMMR0, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
3213 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx));
3214 return VERR_INVALID_PARAMETER;
3215 }
3216 if ( (uintptr_t)pReq->u.In.EP.VMMR0.pvVMMR0EntryInt - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage
3217 || (uintptr_t)pReq->u.In.EP.VMMR0.pvVMMR0EntryFast - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage
3218 || (uintptr_t)pReq->u.In.EP.VMMR0.pvVMMR0EntryEx - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage)
3219 {
3220 RTSemFastMutexRelease(pDevExt->mtxLdr);
3221 Log(("Out of range (%p LB %#x): pvVMMR0EntryInt=%p, pvVMMR0EntryFast=%p or pvVMMR0EntryEx=%p is NULL!\n",
3222 pImage->pvImage, pReq->u.In.cbImage, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
3223 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx));
3224 return VERR_INVALID_PARAMETER;
3225 }
3226 break;
3227 default:
3228 RTSemFastMutexRelease(pDevExt->mtxLdr);
3229 Log(("Invalid eEPType=%d\n", pReq->u.In.eEPType));
3230 return VERR_INVALID_PARAMETER;
3231 }
3232 if ( pReq->u.In.pfnModuleInit
3233 && (uintptr_t)pReq->u.In.pfnModuleInit - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage)
3234 {
3235 RTSemFastMutexRelease(pDevExt->mtxLdr);
3236 Log(("SUP_IOCTL_LDR_LOAD: pfnModuleInit=%p is outside the image (%p %d bytes)\n",
3237 pReq->u.In.pfnModuleInit, pImage->pvImage, pReq->u.In.cbImage));
3238 return VERR_INVALID_PARAMETER;
3239 }
3240 if ( pReq->u.In.pfnModuleTerm
3241 && (uintptr_t)pReq->u.In.pfnModuleTerm - (uintptr_t)pImage->pvImage >= pReq->u.In.cbImage)
3242 {
3243 RTSemFastMutexRelease(pDevExt->mtxLdr);
3244 Log(("SUP_IOCTL_LDR_LOAD: pfnModuleTerm=%p is outside the image (%p %d bytes)\n",
3245 pReq->u.In.pfnModuleTerm, pImage->pvImage, pReq->u.In.cbImage));
3246 return VERR_INVALID_PARAMETER;
3247 }
3248
3249 /*
3250 * Copy the memory.
3251 */
3252 /* no need to do try/except as this is a buffered request. */
3253 memcpy(pImage->pvImage, &pReq->u.In.achImage[0], pImage->cbImage);
3254 pImage->uState = SUP_IOCTL_LDR_LOAD;
3255 pImage->pfnModuleInit = pReq->u.In.pfnModuleInit;
3256 pImage->pfnModuleTerm = pReq->u.In.pfnModuleTerm;
3257 pImage->offSymbols = pReq->u.In.offSymbols;
3258 pImage->cSymbols = pReq->u.In.cSymbols;
3259 pImage->offStrTab = pReq->u.In.offStrTab;
3260 pImage->cbStrTab = pReq->u.In.cbStrTab;
3261
3262 /*
3263 * Update any entry points.
3264 */
3265 switch (pReq->u.In.eEPType)
3266 {
3267 default:
3268 case SUPLDRLOADEP_NOTHING:
3269 rc = VINF_SUCCESS;
3270 break;
3271 case SUPLDRLOADEP_VMMR0:
3272 rc = supdrvLdrSetR0EP(pDevExt, pReq->u.In.EP.VMMR0.pvVMMR0, pReq->u.In.EP.VMMR0.pvVMMR0EntryInt,
3273 pReq->u.In.EP.VMMR0.pvVMMR0EntryFast, pReq->u.In.EP.VMMR0.pvVMMR0EntryEx);
3274 break;
3275 }
3276
3277 /*
3278 * On success call the module initialization.
3279 */
3280 LogFlow(("supdrvIOCtl_LdrLoad: pfnModuleInit=%p\n", pImage->pfnModuleInit));
3281 if (RT_SUCCESS(rc) && pImage->pfnModuleInit)
3282 {
3283 Log(("supdrvIOCtl_LdrLoad: calling pfnModuleInit=%p\n", pImage->pfnModuleInit));
3284 rc = pImage->pfnModuleInit();
3285 if (rc && pDevExt->pvVMMR0 == pImage->pvImage)
3286 supdrvLdrUnsetR0EP(pDevExt);
3287 }
3288
3289 if (rc)
3290 pImage->uState = SUP_IOCTL_LDR_OPEN;
3291
3292 RTSemFastMutexRelease(pDevExt->mtxLdr);
3293 return rc;
3294}
3295
3296
3297/**
3298 * Frees a previously loaded (prep'ed) image.
3299 *
3300 * @returns IPRT status code.
3301 * @param pDevExt Device globals.
3302 * @param pSession Session data.
3303 * @param pReq The request.
3304 */
3305static int supdrvIOCtl_LdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRFREE pReq)
3306{
3307 int rc;
3308 PSUPDRVLDRUSAGE pUsagePrev;
3309 PSUPDRVLDRUSAGE pUsage;
3310 PSUPDRVLDRIMAGE pImage;
3311 LogFlow(("supdrvIOCtl_LdrFree: pvImageBase=%p\n", pReq->u.In.pvImageBase));
3312
3313 /*
3314 * Find the ldr image.
3315 */
3316 RTSemFastMutexRequest(pDevExt->mtxLdr);
3317 pUsagePrev = NULL;
3318 pUsage = pSession->pLdrUsage;
3319 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
3320 {
3321 pUsagePrev = pUsage;
3322 pUsage = pUsage->pNext;
3323 }
3324 if (!pUsage)
3325 {
3326 RTSemFastMutexRelease(pDevExt->mtxLdr);
3327 Log(("SUP_IOCTL_LDR_FREE: couldn't find image!\n"));
3328 return VERR_INVALID_HANDLE;
3329 }
3330
3331 /*
3332 * Check if we can remove anything.
3333 */
3334 rc = VINF_SUCCESS;
3335 pImage = pUsage->pImage;
3336 if (pImage->cUsage <= 1 || pUsage->cUsage <= 1)
3337 {
3338 /*
3339 * Check if there are any objects with destructors in the image, if
3340 * so leave it for the session cleanup routine so we get a chance to
3341 * clean things up in the right order and not leave them all dangling.
3342 */
3343 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
3344 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
3345 if (pImage->cUsage <= 1)
3346 {
3347 PSUPDRVOBJ pObj;
3348 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
3349 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImage))
3350 {
3351 rc = VERR_SHARING_VIOLATION; /** @todo VERR_DANGLING_OBJECTS */
3352 break;
3353 }
3354 }
3355 else
3356 {
3357 PSUPDRVUSAGE pGenUsage;
3358 for (pGenUsage = pSession->pUsage; pGenUsage; pGenUsage = pGenUsage->pNext)
3359 if (RT_UNLIKELY((uintptr_t)pGenUsage->pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImage))
3360 {
3361 rc = VERR_SHARING_VIOLATION; /** @todo VERR_DANGLING_OBJECTS */
3362 break;
3363 }
3364 }
3365 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
3366 if (rc == VINF_SUCCESS)
3367 {
3368 /* unlink it */
3369 if (pUsagePrev)
3370 pUsagePrev->pNext = pUsage->pNext;
3371 else
3372 pSession->pLdrUsage = pUsage->pNext;
3373
3374 /* free it */
3375 pUsage->pImage = NULL;
3376 pUsage->pNext = NULL;
3377 RTMemFree(pUsage);
3378
3379 /*
3380 * Derefrence the image.
3381 */
3382 if (pImage->cUsage <= 1)
3383 supdrvLdrFree(pDevExt, pImage);
3384 else
3385 pImage->cUsage--;
3386 }
3387 else
3388 Log(("supdrvIOCtl_LdrFree: Dangling objects in %p/%s!\n", pImage->pvImage, pImage->szName));
3389 }
3390 else
3391 {
3392 /*
3393 * Dereference both image and usage.
3394 */
3395 pImage->cUsage--;
3396 pUsage->cUsage--;
3397 }
3398
3399 RTSemFastMutexRelease(pDevExt->mtxLdr);
3400 return VINF_SUCCESS;
3401}
3402
3403
3404/**
3405 * Gets the address of a symbol in an open image.
3406 *
3407 * @returns 0 on success.
3408 * @returns SUPDRV_ERR_* on failure.
3409 * @param pDevExt Device globals.
3410 * @param pSession Session data.
3411 * @param pReq The request buffer.
3412 */
3413static int supdrvIOCtl_LdrGetSymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPLDRGETSYMBOL pReq)
3414{
3415 PSUPDRVLDRIMAGE pImage;
3416 PSUPDRVLDRUSAGE pUsage;
3417 uint32_t i;
3418 PSUPLDRSYM paSyms;
3419 const char *pchStrings;
3420 const size_t cbSymbol = strlen(pReq->u.In.szSymbol) + 1;
3421 void *pvSymbol = NULL;
3422 int rc = VERR_GENERAL_FAILURE;
3423 Log3(("supdrvIOCtl_LdrGetSymbol: pvImageBase=%p szSymbol=\"%s\"\n", pReq->u.In.pvImageBase, pReq->u.In.szSymbol));
3424
3425 /*
3426 * Find the ldr image.
3427 */
3428 RTSemFastMutexRequest(pDevExt->mtxLdr);
3429 pUsage = pSession->pLdrUsage;
3430 while (pUsage && pUsage->pImage->pvImage != pReq->u.In.pvImageBase)
3431 pUsage = pUsage->pNext;
3432 if (!pUsage)
3433 {
3434 RTSemFastMutexRelease(pDevExt->mtxLdr);
3435 Log(("SUP_IOCTL_LDR_GET_SYMBOL: couldn't find image!\n"));
3436 return VERR_INVALID_HANDLE;
3437 }
3438 pImage = pUsage->pImage;
3439 if (pImage->uState != SUP_IOCTL_LDR_LOAD)
3440 {
3441 unsigned uState = pImage->uState;
3442 RTSemFastMutexRelease(pDevExt->mtxLdr);
3443 Log(("SUP_IOCTL_LDR_GET_SYMBOL: invalid image state %d (%#x)!\n", uState, uState)); NOREF(uState);
3444 return VERR_ALREADY_LOADED;
3445 }
3446
3447 /*
3448 * Search the symbol string.
3449 */
3450 pchStrings = (const char *)((uint8_t *)pImage->pvImage + pImage->offStrTab);
3451 paSyms = (PSUPLDRSYM)((uint8_t *)pImage->pvImage + pImage->offSymbols);
3452 for (i = 0; i < pImage->cSymbols; i++)
3453 {
3454 if ( paSyms[i].offSymbol < pImage->cbImage /* paranoia */
3455 && paSyms[i].offName + cbSymbol <= pImage->cbStrTab
3456 && !memcmp(pchStrings + paSyms[i].offName, pReq->u.In.szSymbol, cbSymbol))
3457 {
3458 pvSymbol = (uint8_t *)pImage->pvImage + paSyms[i].offSymbol;
3459 rc = VINF_SUCCESS;
3460 break;
3461 }
3462 }
3463 RTSemFastMutexRelease(pDevExt->mtxLdr);
3464 pReq->u.Out.pvSymbol = pvSymbol;
3465 return rc;
3466}
3467
3468
3469/**
3470 * Updates the IDT patches to point to the specified VMM R0 entry
3471 * point (i.e. VMMR0Enter()).
3472 *
3473 * @returns IPRT status code.
3474 * @param pDevExt Device globals.
3475 * @param pSession Session data.
3476 * @param pVMMR0 VMMR0 image handle.
3477 * @param pvVMMR0EntryInt VMMR0EntryInt address.
3478 * @param pvVMMR0EntryFast VMMR0EntryFast address.
3479 * @param pvVMMR0EntryEx VMMR0EntryEx address.
3480 * @remark Caller must own the loader mutex.
3481 */
3482static int supdrvLdrSetR0EP(PSUPDRVDEVEXT pDevExt, void *pvVMMR0, void *pvVMMR0EntryInt, void *pvVMMR0EntryFast, void *pvVMMR0EntryEx)
3483{
3484 int rc = VINF_SUCCESS;
3485 LogFlow(("supdrvLdrSetR0EP pvVMMR0=%p pvVMMR0EntryInt=%p\n", pvVMMR0, pvVMMR0EntryInt));
3486
3487
3488 /*
3489 * Check if not yet set.
3490 */
3491 if (!pDevExt->pvVMMR0)
3492 {
3493#ifdef VBOX_WITH_IDT_PATCHING
3494 PSUPDRVPATCH pPatch;
3495#endif
3496
3497 /*
3498 * Set it and update IDT patch code.
3499 */
3500 pDevExt->pvVMMR0 = pvVMMR0;
3501 pDevExt->pfnVMMR0EntryInt = pvVMMR0EntryInt;
3502 pDevExt->pfnVMMR0EntryFast = pvVMMR0EntryFast;
3503 pDevExt->pfnVMMR0EntryEx = pvVMMR0EntryEx;
3504#ifdef VBOX_WITH_IDT_PATCHING
3505 for (pPatch = pDevExt->pIdtPatches; pPatch; pPatch = pPatch->pNext)
3506 {
3507# ifdef RT_ARCH_AMD64
3508 ASMAtomicXchgU64((volatile uint64_t *)&pPatch->auCode[pPatch->offVMMR0EntryFixup], (uint64_t)pvVMMR0);
3509# else /* RT_ARCH_X86 */
3510 ASMAtomicXchgU32((volatile uint32_t *)&pPatch->auCode[pPatch->offVMMR0EntryFixup],
3511 (uint32_t)pvVMMR0 - (uint32_t)&pPatch->auCode[pPatch->offVMMR0EntryFixup + 4]);
3512# endif
3513 }
3514#endif /* VBOX_WITH_IDT_PATCHING */
3515 }
3516 else
3517 {
3518 /*
3519 * Return failure or success depending on whether the values match or not.
3520 */
3521 if ( pDevExt->pvVMMR0 != pvVMMR0
3522 || (void *)pDevExt->pfnVMMR0EntryInt != pvVMMR0EntryInt
3523 || (void *)pDevExt->pfnVMMR0EntryFast != pvVMMR0EntryFast
3524 || (void *)pDevExt->pfnVMMR0EntryEx != pvVMMR0EntryEx)
3525 {
3526 AssertMsgFailed(("SUP_IOCTL_LDR_SETR0EP: Already set pointing to a different module!\n"));
3527 rc = VERR_INVALID_PARAMETER;
3528 }
3529 }
3530 return rc;
3531}
3532
3533
3534/**
3535 * Unsets the R0 entry point installed by supdrvLdrSetR0EP.
3536 *
3537 * @param pDevExt Device globals.
3538 */
3539static void supdrvLdrUnsetR0EP(PSUPDRVDEVEXT pDevExt)
3540{
3541#ifdef VBOX_WITH_IDT_PATCHING
3542 PSUPDRVPATCH pPatch;
3543#endif
3544
3545 pDevExt->pvVMMR0 = NULL;
3546 pDevExt->pfnVMMR0EntryInt = NULL;
3547 pDevExt->pfnVMMR0EntryFast = NULL;
3548 pDevExt->pfnVMMR0EntryEx = NULL;
3549
3550#ifdef VBOX_WITH_IDT_PATCHING
3551 for (pPatch = pDevExt->pIdtPatches; pPatch; pPatch = pPatch->pNext)
3552 {
3553# ifdef RT_ARCH_AMD64
3554 ASMAtomicXchgU64((volatile uint64_t *)&pPatch->auCode[pPatch->offVMMR0EntryFixup],
3555 (uint64_t)&pPatch->auCode[pPatch->offStub]);
3556# else /* RT_ARCH_X86 */
3557 ASMAtomicXchgU32((volatile uint32_t *)&pPatch->auCode[pPatch->offVMMR0EntryFixup],
3558 (uint32_t)&pPatch->auCode[pPatch->offStub] - (uint32_t)&pPatch->auCode[pPatch->offVMMR0EntryFixup + 4]);
3559# endif
3560 }
3561#endif /* VBOX_WITH_IDT_PATCHING */
3562}
3563
3564
3565/**
3566 * Adds a usage reference in the specified session of an image.
3567 *
3568 * @param pSession Session in question.
3569 * @param pImage Image which the session is using.
3570 */
3571static void supdrvLdrAddUsage(PSUPDRVSESSION pSession, PSUPDRVLDRIMAGE pImage)
3572{
3573 PSUPDRVLDRUSAGE pUsage;
3574 LogFlow(("supdrvLdrAddUsage: pImage=%p\n", pImage));
3575
3576 /*
3577 * Referenced it already?
3578 */
3579 pUsage = pSession->pLdrUsage;
3580 while (pUsage)
3581 {
3582 if (pUsage->pImage == pImage)
3583 {
3584 pUsage->cUsage++;
3585 return;
3586 }
3587 pUsage = pUsage->pNext;
3588 }
3589
3590 /*
3591 * Allocate new usage record.
3592 */
3593 pUsage = (PSUPDRVLDRUSAGE)RTMemAlloc(sizeof(*pUsage));
3594 Assert(pUsage);
3595 if (pUsage)
3596 {
3597 pUsage->cUsage = 1;
3598 pUsage->pImage = pImage;
3599 pUsage->pNext = pSession->pLdrUsage;
3600 pSession->pLdrUsage = pUsage;
3601 }
3602 /* ignore errors... */
3603}
3604
3605
3606/**
3607 * Frees a load image.
3608 *
3609 * @param pDevExt Pointer to device extension.
3610 * @param pImage Pointer to the image we're gonna free.
3611 * This image must exit!
3612 * @remark The caller MUST own SUPDRVDEVEXT::mtxLdr!
3613 */
3614static void supdrvLdrFree(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage)
3615{
3616 PSUPDRVLDRIMAGE pImagePrev;
3617 LogFlow(("supdrvLdrFree: pImage=%p\n", pImage));
3618
3619 /* find it - arg. should've used doubly linked list. */
3620 Assert(pDevExt->pLdrImages);
3621 pImagePrev = NULL;
3622 if (pDevExt->pLdrImages != pImage)
3623 {
3624 pImagePrev = pDevExt->pLdrImages;
3625 while (pImagePrev->pNext != pImage)
3626 pImagePrev = pImagePrev->pNext;
3627 Assert(pImagePrev->pNext == pImage);
3628 }
3629
3630 /* unlink */
3631 if (pImagePrev)
3632 pImagePrev->pNext = pImage->pNext;
3633 else
3634 pDevExt->pLdrImages = pImage->pNext;
3635
3636 /* check if this is VMMR0.r0 and fix the Idt patches if it is. */
3637 if (pDevExt->pvVMMR0 == pImage->pvImage)
3638 supdrvLdrUnsetR0EP(pDevExt);
3639
3640 /* check for objects with destructors in this image. (Shouldn't happen.) */
3641 if (pDevExt->pObjs)
3642 {
3643 unsigned cObjs = 0;
3644 PSUPDRVOBJ pObj;
3645 RTSPINLOCKTMP SpinlockTmp = RTSPINLOCKTMP_INITIALIZER;
3646 RTSpinlockAcquire(pDevExt->Spinlock, &SpinlockTmp);
3647 for (pObj = pDevExt->pObjs; pObj; pObj = pObj->pNext)
3648 if (RT_UNLIKELY((uintptr_t)pObj->pfnDestructor - (uintptr_t)pImage->pvImage < pImage->cbImage))
3649 {
3650 pObj->pfnDestructor = NULL;
3651 cObjs++;
3652 }
3653 RTSpinlockRelease(pDevExt->Spinlock, &SpinlockTmp);
3654 if (cObjs)
3655 OSDBGPRINT(("supdrvLdrFree: Image '%s' has %d dangling objects!\n", pImage->szName, cObjs));
3656 }
3657
3658 /* call termination function if fully loaded. */
3659 if ( pImage->pfnModuleTerm
3660 && pImage->uState == SUP_IOCTL_LDR_LOAD)
3661 {
3662 LogFlow(("supdrvIOCtl_LdrLoad: calling pfnModuleTerm=%p\n", pImage->pfnModuleTerm));
3663 pImage->pfnModuleTerm();
3664 }
3665
3666 /* free the image */
3667 pImage->cUsage = 0;
3668 pImage->pNext = 0;
3669 pImage->uState = SUP_IOCTL_LDR_FREE;
3670 RTMemExecFree(pImage);
3671}
3672
3673
3674/**
3675 * Gets the current paging mode of the CPU and stores in in pOut.
3676 */
3677static SUPPAGINGMODE supdrvIOCtl_GetPagingMode(void)
3678{
3679 SUPPAGINGMODE enmMode;
3680
3681 RTR0UINTREG cr0 = ASMGetCR0();
3682 if ((cr0 & (X86_CR0_PG | X86_CR0_PE)) != (X86_CR0_PG | X86_CR0_PE))
3683 enmMode = SUPPAGINGMODE_INVALID;
3684 else
3685 {
3686 RTR0UINTREG cr4 = ASMGetCR4();
3687 uint32_t fNXEPlusLMA = 0;
3688 if (cr4 & X86_CR4_PAE)
3689 {
3690 uint32_t fAmdFeatures = ASMCpuId_EDX(0x80000001);
3691 if (fAmdFeatures & (X86_CPUID_AMD_FEATURE_EDX_NX | X86_CPUID_AMD_FEATURE_EDX_LONG_MODE))
3692 {
3693 uint64_t efer = ASMRdMsr(MSR_K6_EFER);
3694 if ((fAmdFeatures & X86_CPUID_AMD_FEATURE_EDX_NX) && (efer & MSR_K6_EFER_NXE))
3695 fNXEPlusLMA |= RT_BIT(0);
3696 if ((fAmdFeatures & X86_CPUID_AMD_FEATURE_EDX_LONG_MODE) && (efer & MSR_K6_EFER_LMA))
3697 fNXEPlusLMA |= RT_BIT(1);
3698 }
3699 }
3700
3701 switch ((cr4 & (X86_CR4_PAE | X86_CR4_PGE)) | fNXEPlusLMA)
3702 {
3703 case 0:
3704 enmMode = SUPPAGINGMODE_32_BIT;
3705 break;
3706
3707 case X86_CR4_PGE:
3708 enmMode = SUPPAGINGMODE_32_BIT_GLOBAL;
3709 break;
3710
3711 case X86_CR4_PAE:
3712 enmMode = SUPPAGINGMODE_PAE;
3713 break;
3714
3715 case X86_CR4_PAE | RT_BIT(0):
3716 enmMode = SUPPAGINGMODE_PAE_NX;
3717 break;
3718
3719 case X86_CR4_PAE | X86_CR4_PGE:
3720 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3721 break;
3722
3723 case X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3724 enmMode = SUPPAGINGMODE_PAE_GLOBAL;
3725 break;
3726
3727 case RT_BIT(1) | X86_CR4_PAE:
3728 enmMode = SUPPAGINGMODE_AMD64;
3729 break;
3730
3731 case RT_BIT(1) | X86_CR4_PAE | RT_BIT(0):
3732 enmMode = SUPPAGINGMODE_AMD64_NX;
3733 break;
3734
3735 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE:
3736 enmMode = SUPPAGINGMODE_AMD64_GLOBAL;
3737 break;
3738
3739 case RT_BIT(1) | X86_CR4_PAE | X86_CR4_PGE | RT_BIT(0):
3740 enmMode = SUPPAGINGMODE_AMD64_GLOBAL_NX;
3741 break;
3742
3743 default:
3744 AssertMsgFailed(("Cannot happen! cr4=%#x fNXEPlusLMA=%d\n", cr4, fNXEPlusLMA));
3745 enmMode = SUPPAGINGMODE_INVALID;
3746 break;
3747 }
3748 }
3749 return enmMode;
3750}
3751
3752
3753/**
3754 * Creates the GIP.
3755 *
3756 * @returns negative errno.
3757 * @param pDevExt Instance data. GIP stuff may be updated.
3758 */
3759static int supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
3760{
3761 PSUPGLOBALINFOPAGE pGip;
3762 RTHCPHYS HCPhysGip;
3763 uint32_t u32SystemResolution;
3764 uint32_t u32Interval;
3765 int rc;
3766
3767 LogFlow(("supdrvGipCreate:\n"));
3768
3769 /* assert order */
3770 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
3771 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
3772 Assert(!pDevExt->pGipTimer);
3773
3774 /*
3775 * Allocate a suitable page with a default kernel mapping.
3776 */
3777 rc = RTR0MemObjAllocLow(&pDevExt->GipMemObj, PAGE_SIZE, false);
3778 if (RT_FAILURE(rc))
3779 {
3780 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
3781 return rc;
3782 }
3783 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
3784 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
3785
3786#if 0 /** @todo Disabled this as we didn't used to do it before and causes unnecessary stress on laptops.
3787 * It only applies to Windows and should probably revisited later, if possible made part of the
3788 * timer code (return min granularity in RTTimerGetSystemGranularity and set it in RTTimerStart). */
3789 /*
3790 * Try bump up the system timer resolution.
3791 * The more interrupts the better...
3792 */
3793 if ( RT_SUCCESS(RTTimerRequestSystemGranularity( 488281 /* 2048 HZ */, &u32SystemResolution))
3794 || RT_SUCCESS(RTTimerRequestSystemGranularity( 500000 /* 2000 HZ */, &u32SystemResolution))
3795 || RT_SUCCESS(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
3796 || RT_SUCCESS(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
3797 || RT_SUCCESS(RTTimerRequestSystemGranularity( 1953125 /* 512 HZ */, &u32SystemResolution))
3798 || RT_SUCCESS(RTTimerRequestSystemGranularity( 2000000 /* 500 HZ */, &u32SystemResolution))
3799 || RT_SUCCESS(RTTimerRequestSystemGranularity( 3906250 /* 256 HZ */, &u32SystemResolution))
3800 || RT_SUCCESS(RTTimerRequestSystemGranularity( 4000000 /* 250 HZ */, &u32SystemResolution))
3801 || RT_SUCCESS(RTTimerRequestSystemGranularity( 7812500 /* 128 HZ */, &u32SystemResolution))
3802 || RT_SUCCESS(RTTimerRequestSystemGranularity(10000000 /* 100 HZ */, &u32SystemResolution))
3803 || RT_SUCCESS(RTTimerRequestSystemGranularity(15625000 /* 64 HZ */, &u32SystemResolution))
3804 || RT_SUCCESS(RTTimerRequestSystemGranularity(31250000 /* 32 HZ */, &u32SystemResolution))
3805 )
3806 {
3807 Assert(RTTimerGetSystemGranularity() <= u32SystemResolution);
3808 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
3809 }
3810#endif
3811
3812 /*
3813 * Find a reasonable update interval and initialize the structure.
3814 */
3815 u32Interval = u32SystemResolution = RTTimerGetSystemGranularity();
3816 while (u32Interval < 10000000 /* 10 ms */)
3817 u32Interval += u32SystemResolution;
3818
3819 supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), 1000000000 / u32Interval /*=Hz*/);
3820
3821 /*
3822 * Create the timer.
3823 * If CPU_ALL isn't supported we'll have to fall back to synchronous mode.
3824 */
3825 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
3826 {
3827 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL, supdrvGipAsyncTimer, pDevExt);
3828 if (rc == VERR_NOT_SUPPORTED)
3829 {
3830 OSDBGPRINT(("supdrvGipCreate: omni timer not supported, falling back to synchronous mode\n"));
3831 pGip->u32Mode = SUPGIPMODE_SYNC_TSC;
3832 }
3833 }
3834 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
3835 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0, supdrvGipSyncTimer, pDevExt);
3836 if (RT_SUCCESS(rc))
3837 {
3838 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
3839 rc = RTMpNotificationRegister(supdrvGipMpEvent, pDevExt);
3840 if (RT_SUCCESS(rc))
3841 {
3842 /*
3843 * We're good.
3844 */
3845 dprintf(("supdrvGipCreate: %ld ns interval.\n", (long)u32Interval));
3846 return VINF_SUCCESS;
3847 }
3848
3849 OSDBGPRINT(("supdrvGipCreate: failed register MP event notfication. rc=%d\n", rc));
3850 }
3851 else
3852 {
3853 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %ld ns interval. rc=%d\n", (long)u32Interval, rc));
3854 Assert(!pDevExt->pGipTimer);
3855 }
3856 supdrvGipDestroy(pDevExt);
3857 return rc;
3858}
3859
3860
3861/**
3862 * Terminates the GIP.
3863 *
3864 * @param pDevExt Instance data. GIP stuff may be updated.
3865 */
3866static void supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
3867{
3868 int rc;
3869#ifdef DEBUG_DARWIN_GIP
3870 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
3871 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
3872 pDevExt->pGipTimer, pDevExt->GipMemObj));
3873#endif
3874
3875 /*
3876 * Invalid the GIP data.
3877 */
3878 if (pDevExt->pGip)
3879 {
3880 supdrvGipTerm(pDevExt->pGip);
3881 pDevExt->pGip = NULL;
3882 }
3883
3884 /*
3885 * Destroy the timer and free the GIP memory object.
3886 */
3887 if (pDevExt->pGipTimer)
3888 {
3889 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
3890 pDevExt->pGipTimer = NULL;
3891 }
3892
3893 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
3894 {
3895 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
3896 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
3897 }
3898
3899 /*
3900 * Finally, release the system timer resolution request if one succeeded.
3901 */
3902 if (pDevExt->u32SystemTimerGranularityGrant)
3903 {
3904 rc = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant); AssertRC(rc);
3905 pDevExt->u32SystemTimerGranularityGrant = 0;
3906 }
3907}
3908
3909
3910/**
3911 * Timer callback function sync GIP mode.
3912 * @param pTimer The timer.
3913 * @param pvUser The device extension.
3914 */
3915static DECLCALLBACK(void) supdrvGipSyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
3916{
3917 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
3918 supdrvGipUpdate(pDevExt->pGip, RTTimeSystemNanoTS());
3919}
3920
3921
3922/**
3923 * Timer callback function for async GIP mode.
3924 * @param pTimer The timer.
3925 * @param pvUser The device extension.
3926 */
3927static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
3928{
3929 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
3930 RTCPUID idCpu = RTMpCpuId();
3931 uint64_t NanoTS = RTTimeSystemNanoTS();
3932
3933 /** @todo reset the transaction number and whatnot when iTick == 1. */
3934 if (pDevExt->idGipMaster == idCpu)
3935 supdrvGipUpdate(pDevExt->pGip, NanoTS);
3936 else
3937 supdrvGipUpdatePerCpu(pDevExt->pGip, NanoTS, ASMGetApicId());
3938}
3939
3940
3941/**
3942 * Multiprocessor event notification callback.
3943 *
3944 * This is used to make sue that the GIP master gets passed on to
3945 * another CPU.
3946 *
3947 * @param enmEvent The event.
3948 * @param idCpu The cpu it applies to.
3949 * @param pvUser Pointer to the device extension.
3950 */
3951static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
3952{
3953 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
3954 if (enmEvent == RTMPEVENT_OFFLINE)
3955 {
3956 RTCPUID idGipMaster;
3957 ASMAtomicReadSize(&pDevExt->idGipMaster, &idGipMaster);
3958 if (idGipMaster == idCpu)
3959 {
3960 /*
3961 * Find a new GIP master.
3962 */
3963 bool fIgnored;
3964 unsigned i;
3965 RTCPUID idNewGipMaster = NIL_RTCPUID;
3966 RTCPUSET OnlineCpus;
3967 RTMpGetOnlineSet(&OnlineCpus);
3968
3969 for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
3970 {
3971 RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
3972 if ( RTCpuSetIsMember(&OnlineCpus, idCurCpu)
3973 && idCurCpu != idGipMaster)
3974 {
3975 idNewGipMaster = idCurCpu;
3976 break;
3977 }
3978 }
3979
3980 dprintf(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
3981 ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
3982 NOREF(fIgnored);
3983 }
3984 }
3985}
3986
3987
3988/**
3989 * Initializes the GIP data.
3990 *
3991 * @returns IPRT status code.
3992 * @param pDevExt Pointer to the device instance data.
3993 * @param pGip Pointer to the read-write kernel mapping of the GIP.
3994 * @param HCPhys The physical address of the GIP.
3995 * @param u64NanoTS The current nanosecond timestamp.
3996 * @param uUpdateHz The update freqence.
3997 */
3998int VBOXCALL supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys, uint64_t u64NanoTS, unsigned uUpdateHz)
3999{
4000 unsigned i;
4001#ifdef DEBUG_DARWIN_GIP
4002 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz));
4003#else
4004 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz));
4005#endif
4006
4007 /*
4008 * Initialize the structure.
4009 */
4010 memset(pGip, 0, PAGE_SIZE);
4011 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
4012 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
4013 pGip->u32Mode = supdrvGipDeterminTscMode(pDevExt);
4014 pGip->u32UpdateHz = uUpdateHz;
4015 pGip->u32UpdateIntervalNS = 1000000000 / uUpdateHz;
4016 pGip->u64NanoTSLastUpdateHz = u64NanoTS;
4017
4018 for (i = 0; i < RT_ELEMENTS(pGip->aCPUs); i++)
4019 {
4020 pGip->aCPUs[i].u32TransactionId = 2;
4021 pGip->aCPUs[i].u64NanoTS = u64NanoTS;
4022 pGip->aCPUs[i].u64TSC = ASMReadTSC();
4023
4024 /*
4025 * We don't know the following values until we've executed updates.
4026 * So, we'll just insert very high values.
4027 */
4028 pGip->aCPUs[i].u64CpuHz = _4G + 1;
4029 pGip->aCPUs[i].u32UpdateIntervalTSC = _2G / 4;
4030 pGip->aCPUs[i].au32TSCHistory[0] = _2G / 4;
4031 pGip->aCPUs[i].au32TSCHistory[1] = _2G / 4;
4032 pGip->aCPUs[i].au32TSCHistory[2] = _2G / 4;
4033 pGip->aCPUs[i].au32TSCHistory[3] = _2G / 4;
4034 pGip->aCPUs[i].au32TSCHistory[4] = _2G / 4;
4035 pGip->aCPUs[i].au32TSCHistory[5] = _2G / 4;
4036 pGip->aCPUs[i].au32TSCHistory[6] = _2G / 4;
4037 pGip->aCPUs[i].au32TSCHistory[7] = _2G / 4;
4038 }
4039
4040 /*
4041 * Link it to the device extension.
4042 */
4043 pDevExt->pGip = pGip;
4044 pDevExt->HCPhysGip = HCPhys;
4045 pDevExt->cGipUsers = 0;
4046
4047 return VINF_SUCCESS;
4048}
4049
4050
4051/**
4052 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
4053 *
4054 * @param idCpu Ignored.
4055 * @param pvUser1 Where to put the TSC.
4056 * @param pvUser2 Ignored.
4057 */
4058static DECLCALLBACK(void) supdrvDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
4059{
4060#if 1
4061 ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
4062#else
4063 *(uint64_t *)pvUser1 = ASMReadTSC();
4064#endif
4065}
4066
4067
4068/**
4069 * Determine if Async GIP mode is required because of TSC drift.
4070 *
4071 * When using the default/normal timer code it is essential that the time stamp counter
4072 * (TSC) runs never backwards, that is, a read operation to the counter should return
4073 * a bigger value than any previous read operation. This is guaranteed by the latest
4074 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
4075 * case we have to choose the asynchronous timer mode.
4076 *
4077 * @param poffMin Pointer to the determined difference between different cores.
4078 * @return false if the time stamp counters appear to be synchron, true otherwise.
4079 */
4080bool VBOXCALL supdrvDetermineAsyncTsc(uint64_t *poffMin)
4081{
4082 /*
4083 * Just iterate all the cpus 8 times and make sure that the TSC is
4084 * ever increasing. We don't bother taking TSC rollover into account.
4085 */
4086 RTCPUSET CpuSet;
4087 int iLastCpu = RTCpuLastIndex(RTMpGetSet(&CpuSet));
4088 int iCpu;
4089 int cLoops = 8;
4090 bool fAsync = false;
4091 int rc;
4092 uint64_t offMax = 0;
4093 uint64_t offMin = ~(uint64_t)0;
4094 uint64_t PrevTsc = ASMReadTSC();
4095
4096 while (cLoops-- > 0)
4097 {
4098 for (iCpu = 0; iCpu <= iLastCpu; iCpu++)
4099 {
4100 uint64_t CurTsc;
4101 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvDetermineAsyncTscWorker, &CurTsc, NULL);
4102 if (RT_SUCCESS(rc))
4103 {
4104 if (CurTsc <= PrevTsc)
4105 {
4106 fAsync = true;
4107 offMin = offMax = PrevTsc - CurTsc;
4108 dprintf(("supdrvDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
4109 iCpu, cLoops, CurTsc, PrevTsc));
4110 break;
4111 }
4112
4113 /* Gather statistics (except the first time). */
4114 if (iCpu != 0 || cLoops != 7)
4115 {
4116 uint64_t off = CurTsc - PrevTsc;
4117 if (off < offMin)
4118 offMin = off;
4119 if (off > offMax)
4120 offMax = off;
4121 dprintf2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
4122 }
4123
4124 /* Next */
4125 PrevTsc = CurTsc;
4126 }
4127 else if (rc == VERR_NOT_SUPPORTED)
4128 break;
4129 else
4130 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
4131 }
4132
4133 /* broke out of the loop. */
4134 if (iCpu <= iLastCpu)
4135 break;
4136 }
4137
4138 *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
4139 dprintf(("supdrvDetermineAsyncTsc: returns %d; iLastCpu=%d rc=%d offMin=%llx offMax=%llx\n",
4140 fAsync, iLastCpu, rc, offMin, offMax));
4141#if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
4142 OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
4143#endif
4144 return fAsync;
4145}
4146
4147
4148/**
4149 * Determin the GIP TSC mode.
4150 *
4151 * @returns The most suitable TSC mode.
4152 * @param pDevExt Pointer to the device instance data.
4153 */
4154static SUPGIPMODE supdrvGipDeterminTscMode(PSUPDRVDEVEXT pDevExt)
4155{
4156 /*
4157 * On SMP we're faced with two problems:
4158 * (1) There might be a skew between the CPU, so that cpu0
4159 * returns a TSC that is sligtly different from cpu1.
4160 * (2) Power management (and other things) may cause the TSC
4161 * to run at a non-constant speed, and cause the speed
4162 * to be different on the cpus. This will result in (1).
4163 *
4164 * So, on SMP systems we'll have to select the ASYNC update method
4165 * if there are symphoms of these problems.
4166 */
4167 if (RTMpGetCount() > 1)
4168 {
4169 uint32_t uEAX, uEBX, uECX, uEDX;
4170 uint64_t u64DiffCoresIgnored;
4171
4172 /* Permit the user and/or the OS specfic bits to force async mode. */
4173 if (supdrvOSGetForcedAsyncTscMode(pDevExt))
4174 return SUPGIPMODE_ASYNC_TSC;
4175
4176 /* Try check for current differences between the cpus. */
4177 if (supdrvDetermineAsyncTsc(&u64DiffCoresIgnored))
4178 return SUPGIPMODE_ASYNC_TSC;
4179
4180 /*
4181 * If the CPU supports power management and is an AMD one we
4182 * won't trust it unless it has the TscInvariant bit is set.
4183 */
4184 /* Check for "AuthenticAMD" */
4185 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
4186 if ( uEAX >= 1
4187 && uEBX == X86_CPUID_VENDOR_AMD_EBX
4188 && uECX == X86_CPUID_VENDOR_AMD_ECX
4189 && uEDX == X86_CPUID_VENDOR_AMD_EDX)
4190 {
4191 /* Check for APM support and that TscInvariant is cleared. */
4192 ASMCpuId(0x80000000, &uEAX, &uEBX, &uECX, &uEDX);
4193 if (uEAX >= 0x80000007)
4194 {
4195 ASMCpuId(0x80000007, &uEAX, &uEBX, &uECX, &uEDX);
4196 if ( !(uEDX & RT_BIT(8))/* TscInvariant */
4197 && (uEDX & 0x3e)) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
4198 return SUPGIPMODE_ASYNC_TSC;
4199 }
4200 }
4201 }
4202 return SUPGIPMODE_SYNC_TSC;
4203}
4204
4205
4206/**
4207 * Invalidates the GIP data upon termination.
4208 *
4209 * @param pGip Pointer to the read-write kernel mapping of the GIP.
4210 */
4211void VBOXCALL supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
4212{
4213 unsigned i;
4214 pGip->u32Magic = 0;
4215 for (i = 0; i < RT_ELEMENTS(pGip->aCPUs); i++)
4216 {
4217 pGip->aCPUs[i].u64NanoTS = 0;
4218 pGip->aCPUs[i].u64TSC = 0;
4219 pGip->aCPUs[i].iTSCHistoryHead = 0;
4220 }
4221}
4222
4223
4224/**
4225 * Worker routine for supdrvGipUpdate and supdrvGipUpdatePerCpu that
4226 * updates all the per cpu data except the transaction id.
4227 *
4228 * @param pGip The GIP.
4229 * @param pGipCpu Pointer to the per cpu data.
4230 * @param u64NanoTS The current time stamp.
4231 */
4232static void supdrvGipDoUpdateCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
4233{
4234 uint64_t u64TSC;
4235 uint64_t u64TSCDelta;
4236 uint32_t u32UpdateIntervalTSC;
4237 uint32_t u32UpdateIntervalTSCSlack;
4238 unsigned iTSCHistoryHead;
4239 uint64_t u64CpuHz;
4240
4241 /*
4242 * Update the NanoTS.
4243 */
4244 ASMAtomicXchgU64(&pGipCpu->u64NanoTS, u64NanoTS);
4245
4246 /*
4247 * Calc TSC delta.
4248 */
4249 /** @todo validate the NanoTS delta, don't trust the OS to call us when it should... */
4250 u64TSC = ASMReadTSC();
4251 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
4252 ASMAtomicXchgU64(&pGipCpu->u64TSC, u64TSC);
4253
4254 if (u64TSCDelta >> 32)
4255 {
4256 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
4257 pGipCpu->cErrors++;
4258 }
4259
4260 /*
4261 * TSC History.
4262 */
4263 Assert(ELEMENTS(pGipCpu->au32TSCHistory) == 8);
4264
4265 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
4266 ASMAtomicXchgU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
4267 ASMAtomicXchgU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
4268
4269 /*
4270 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
4271 */
4272 if (pGip->u32UpdateHz >= 1000)
4273 {
4274 uint32_t u32;
4275 u32 = pGipCpu->au32TSCHistory[0];
4276 u32 += pGipCpu->au32TSCHistory[1];
4277 u32 += pGipCpu->au32TSCHistory[2];
4278 u32 += pGipCpu->au32TSCHistory[3];
4279 u32 >>= 2;
4280 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
4281 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
4282 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
4283 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
4284 u32UpdateIntervalTSC >>= 2;
4285 u32UpdateIntervalTSC += u32;
4286 u32UpdateIntervalTSC >>= 1;
4287
4288 /* Value choosen for a 2GHz Athlon64 running linux 2.6.10/11, . */
4289 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
4290 }
4291 else if (pGip->u32UpdateHz >= 90)
4292 {
4293 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
4294 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
4295 u32UpdateIntervalTSC >>= 1;
4296
4297 /* value choosen on a 2GHz thinkpad running windows */
4298 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
4299 }
4300 else
4301 {
4302 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
4303
4304 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
4305 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
4306 }
4307 ASMAtomicXchgU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
4308
4309 /*
4310 * CpuHz.
4311 */
4312 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, pGip->u32UpdateHz);
4313 ASMAtomicXchgU64(&pGipCpu->u64CpuHz, u64CpuHz);
4314}
4315
4316
4317/**
4318 * Updates the GIP.
4319 *
4320 * @param pGip Pointer to the GIP.
4321 * @param u64NanoTS The current nanosecond timesamp.
4322 */
4323void VBOXCALL supdrvGipUpdate(PSUPGLOBALINFOPAGE pGip, uint64_t u64NanoTS)
4324{
4325 /*
4326 * Determin the relevant CPU data.
4327 */
4328 PSUPGIPCPU pGipCpu;
4329 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
4330 pGipCpu = &pGip->aCPUs[0];
4331 else
4332 {
4333 unsigned iCpu = ASMGetApicId();
4334 if (RT_LIKELY(iCpu >= RT_ELEMENTS(pGip->aCPUs)))
4335 return;
4336 pGipCpu = &pGip->aCPUs[iCpu];
4337 }
4338
4339 /*
4340 * Start update transaction.
4341 */
4342 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
4343 {
4344 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
4345 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
4346 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
4347 pGipCpu->cErrors++;
4348 return;
4349 }
4350
4351 /*
4352 * Recalc the update frequency every 0x800th time.
4353 */
4354 if (!(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
4355 {
4356 if (pGip->u64NanoTSLastUpdateHz)
4357 {
4358#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
4359 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
4360 uint32_t u32UpdateHz = (uint32_t)((UINT64_C(1000000000) * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
4361 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
4362 {
4363 ASMAtomicXchgU32(&pGip->u32UpdateHz, u32UpdateHz);
4364 ASMAtomicXchgU32(&pGip->u32UpdateIntervalNS, 1000000000 / u32UpdateHz);
4365 }
4366#endif
4367 }
4368 ASMAtomicXchgU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS);
4369 }
4370
4371 /*
4372 * Update the data.
4373 */
4374 supdrvGipDoUpdateCpu(pGip, pGipCpu, u64NanoTS);
4375
4376 /*
4377 * Complete transaction.
4378 */
4379 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
4380}
4381
4382
4383/**
4384 * Updates the per cpu GIP data for the calling cpu.
4385 *
4386 * @param pGip Pointer to the GIP.
4387 * @param u64NanoTS The current nanosecond timesamp.
4388 * @param iCpu The CPU index.
4389 */
4390void VBOXCALL supdrvGipUpdatePerCpu(PSUPGLOBALINFOPAGE pGip, uint64_t u64NanoTS, unsigned iCpu)
4391{
4392 PSUPGIPCPU pGipCpu;
4393
4394 if (RT_LIKELY(iCpu < RT_ELEMENTS(pGip->aCPUs)))
4395 {
4396 pGipCpu = &pGip->aCPUs[iCpu];
4397
4398 /*
4399 * Start update transaction.
4400 */
4401 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
4402 {
4403 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
4404 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
4405 pGipCpu->cErrors++;
4406 return;
4407 }
4408
4409 /*
4410 * Update the data.
4411 */
4412 supdrvGipDoUpdateCpu(pGip, pGipCpu, u64NanoTS);
4413
4414 /*
4415 * Complete transaction.
4416 */
4417 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
4418 }
4419}
4420
4421
4422#ifndef DEBUG /** @todo change #ifndef DEBUG -> #ifdef LOG_ENABLED */
4423/**
4424 * Stub function for non-debug builds.
4425 */
4426RTDECL(PRTLOGGER) RTLogDefaultInstance(void)
4427{
4428 return NULL;
4429}
4430
4431RTDECL(PRTLOGGER) RTLogRelDefaultInstance(void)
4432{
4433 return NULL;
4434}
4435
4436/**
4437 * Stub function for non-debug builds.
4438 */
4439RTDECL(int) RTLogSetDefaultInstanceThread(PRTLOGGER pLogger, uintptr_t uKey)
4440{
4441 return 0;
4442}
4443
4444/**
4445 * Stub function for non-debug builds.
4446 */
4447RTDECL(void) RTLogLogger(PRTLOGGER pLogger, void *pvCallerRet, const char *pszFormat, ...)
4448{
4449}
4450
4451/**
4452 * Stub function for non-debug builds.
4453 */
4454RTDECL(void) RTLogLoggerEx(PRTLOGGER pLogger, unsigned fFlags, unsigned iGroup, const char *pszFormat, ...)
4455{
4456}
4457
4458/**
4459 * Stub function for non-debug builds.
4460 */
4461RTDECL(void) RTLogLoggerExV(PRTLOGGER pLogger, unsigned fFlags, unsigned iGroup, const char *pszFormat, va_list args)
4462{
4463}
4464
4465/**
4466 * Stub function for non-debug builds.
4467 */
4468RTDECL(void) RTLogPrintf(const char *pszFormat, ...)
4469{
4470}
4471
4472/**
4473 * Stub function for non-debug builds.
4474 */
4475RTDECL(void) RTLogPrintfV(const char *pszFormat, va_list args)
4476{
4477}
4478#endif /* !DEBUG */
4479
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette