VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDrvGip.cpp@ 54558

Last change on this file since 54558 was 54551, checked in by vboxsync, 10 years ago

HostDrivers/Support: doxygen nit.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 169.6 KB
Line 
1/* $Id: SUPDrvGip.cpp 54551 2015-02-27 13:09:10Z vboxsync $ */
2/** @file
3 * VBoxDrv - The VirtualBox Support Driver - Common code for GIP.
4 */
5
6/*
7 * Copyright (C) 2006-2015 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27/*******************************************************************************
28* Header Files *
29*******************************************************************************/
30#define LOG_GROUP LOG_GROUP_SUP_DRV
31#define SUPDRV_AGNOSTIC
32#include "SUPDrvInternal.h"
33#ifndef PAGE_SHIFT
34# include <iprt/param.h>
35#endif
36#include <iprt/asm.h>
37#include <iprt/asm-amd64-x86.h>
38#include <iprt/asm-math.h>
39#include <iprt/cpuset.h>
40#include <iprt/handletable.h>
41#include <iprt/mem.h>
42#include <iprt/mp.h>
43#include <iprt/power.h>
44#include <iprt/process.h>
45#include <iprt/semaphore.h>
46#include <iprt/spinlock.h>
47#include <iprt/thread.h>
48#include <iprt/uuid.h>
49#include <iprt/net.h>
50#include <iprt/crc.h>
51#include <iprt/string.h>
52#include <iprt/timer.h>
53#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
54# include <iprt/rand.h>
55# include <iprt/path.h>
56#endif
57#include <iprt/uint128.h>
58#include <iprt/x86.h>
59
60#include <VBox/param.h>
61#include <VBox/log.h>
62#include <VBox/err.h>
63
64#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
65# include "dtrace/SUPDrv.h"
66#else
67/* ... */
68#endif
69
70
71/*******************************************************************************
72* Defined Constants And Macros *
73*******************************************************************************/
74/** The frequency by which we recalculate the u32UpdateHz and
75 * u32UpdateIntervalNS GIP members. The value must be a power of 2.
76 *
77 * Warning: Bumping this too high might overflow u32UpdateIntervalNS.
78 */
79#define GIP_UPDATEHZ_RECALC_FREQ 0x800
80
81/** A reserved TSC value used for synchronization as well as measurement of
82 * TSC deltas. */
83#define GIP_TSC_DELTA_RSVD UINT64_MAX
84/** The number of TSC delta measurement loops in total (includes primer and
85 * read-time loops). */
86#define GIP_TSC_DELTA_LOOPS 96
87/** The number of cache primer loops. */
88#define GIP_TSC_DELTA_PRIMER_LOOPS 4
89/** The number of loops until we keep computing the minumum read time. */
90#define GIP_TSC_DELTA_READ_TIME_LOOPS 24
91
92/** The TSC frequency refinement period in seconds.
93 * The timer fires after 200ms, then every second, this value just says when
94 * to stop it after that. */
95#define GIP_TSC_REFINE_PERIOD_IN_SECS 12
96/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_PRACTICALLY_ZERO rating */
97#define GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO 32
98/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_ROUGHLY_ZERO rating */
99#define GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO 448
100/** The TSC delta value for the initial GIP master - 0 in regular builds.
101 * To test the delta code this can be set to a non-zero value. */
102#if 0
103# define GIP_TSC_DELTA_INITIAL_MASTER_VALUE INT64_C(170139095182512) /* 0x00009abd9854acb0 */
104#else
105# define GIP_TSC_DELTA_INITIAL_MASTER_VALUE INT64_C(0)
106#endif
107
108AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS < GIP_TSC_DELTA_READ_TIME_LOOPS);
109AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS < GIP_TSC_DELTA_LOOPS);
110
111/** @def VBOX_SVN_REV
112 * The makefile should define this if it can. */
113#ifndef VBOX_SVN_REV
114# define VBOX_SVN_REV 0
115#endif
116
117#if 0 /* Don't start the GIP timers. Useful when debugging the IPRT timer code. */
118# define DO_NOT_START_GIP
119#endif
120
121
122/*******************************************************************************
123* Internal Functions *
124*******************************************************************************/
125static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
126static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
127static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS, uint64_t uCpuHz);
128#ifdef SUPDRV_USE_TSC_DELTA_THREAD
129static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt);
130static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt);
131static void supdrvTscDeltaThreadStartMeasurement(PSUPDRVDEVEXT pDevExt);
132#else
133static int supdrvMeasureInitialTscDeltas(PSUPDRVDEVEXT pDevExt);
134static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker);
135#endif
136
137
138/*******************************************************************************
139* Global Variables *
140*******************************************************************************/
141DECLEXPORT(PSUPGLOBALINFOPAGE) g_pSUPGlobalInfoPage = NULL;
142
143
144
145/*
146 *
147 * Misc Common GIP Code
148 * Misc Common GIP Code
149 * Misc Common GIP Code
150 *
151 *
152 */
153
154
155/**
156 * Finds the GIP CPU index corresponding to @a idCpu.
157 *
158 * @returns GIP CPU array index, UINT32_MAX if not found.
159 * @param pGip The GIP.
160 * @param idCpu The CPU ID.
161 */
162static uint32_t supdrvGipFindCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
163{
164 uint32_t i;
165 for (i = 0; i < pGip->cCpus; i++)
166 if (pGip->aCPUs[i].idCpu == idCpu)
167 return i;
168 return UINT32_MAX;
169}
170
171
172
173/*
174 *
175 * GIP Mapping and Unmapping Related Code.
176 * GIP Mapping and Unmapping Related Code.
177 * GIP Mapping and Unmapping Related Code.
178 *
179 *
180 */
181
182
183/**
184 * (Re-)initializes the per-cpu structure prior to starting or resuming the GIP
185 * updating.
186 *
187 * @param pGip Pointer to the GIP.
188 * @param pGipCpu The per CPU structure for this CPU.
189 * @param u64NanoTS The current time.
190 */
191static void supdrvGipReInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
192{
193 /*
194 * Here we don't really care about applying the TSC delta. The re-initialization of this
195 * value is not relevant especially while (re)starting the GIP as the first few ones will
196 * be ignored anyway, see supdrvGipDoUpdateCpu().
197 */
198 pGipCpu->u64TSC = ASMReadTSC() - pGipCpu->u32UpdateIntervalTSC;
199 pGipCpu->u64NanoTS = u64NanoTS;
200}
201
202
203/**
204 * Set the current TSC and NanoTS value for the CPU.
205 *
206 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
207 * @param pvUser1 Pointer to the ring-0 GIP mapping.
208 * @param pvUser2 Pointer to the variable holding the current time.
209 */
210static DECLCALLBACK(void) supdrvGipReInitCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
211{
212 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
213 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
214
215 if (RT_LIKELY(iCpu < pGip->cCpus && pGip->aCPUs[iCpu].idCpu == idCpu))
216 supdrvGipReInitCpu(pGip, &pGip->aCPUs[iCpu], *(uint64_t *)pvUser2);
217
218 NOREF(pvUser2);
219 NOREF(idCpu);
220}
221
222
223/**
224 * State structure for supdrvGipDetectGetGipCpuCallback.
225 */
226typedef struct SUPDRVGIPDETECTGETCPU
227{
228 /** Bitmap of APIC IDs that has been seen (initialized to zero).
229 * Used to detect duplicate APIC IDs (paranoia). */
230 uint8_t volatile bmApicId[256 / 8];
231 /** Mask of supported GIP CPU getter methods (SUPGIPGETCPU_XXX) (all bits set
232 * initially). The callback clears the methods not detected. */
233 uint32_t volatile fSupported;
234 /** The first callback detecting any kind of range issues (initialized to
235 * NIL_RTCPUID). */
236 RTCPUID volatile idCpuProblem;
237} SUPDRVGIPDETECTGETCPU;
238/** Pointer to state structure for supdrvGipDetectGetGipCpuCallback. */
239typedef SUPDRVGIPDETECTGETCPU *PSUPDRVGIPDETECTGETCPU;
240
241
242/**
243 * Checks for alternative ways of getting the CPU ID.
244 *
245 * This also checks the APIC ID, CPU ID and CPU set index values against the
246 * GIP tables.
247 *
248 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
249 * @param pvUser1 Pointer to the state structure.
250 * @param pvUser2 Pointer to the GIP.
251 */
252static DECLCALLBACK(void) supdrvGipDetectGetGipCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
253{
254 PSUPDRVGIPDETECTGETCPU pState = (PSUPDRVGIPDETECTGETCPU)pvUser1;
255 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser2;
256 uint32_t fSupported = 0;
257 uint16_t idApic;
258 int iCpuSet;
259
260 AssertMsg(idCpu == RTMpCpuId(), ("idCpu=%#x RTMpCpuId()=%#x\n", idCpu, RTMpCpuId())); /* paranoia^3 */
261
262 /*
263 * Check that the CPU ID and CPU set index are interchangable.
264 */
265 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
266 if ((RTCPUID)iCpuSet == idCpu)
267 {
268 AssertCompile(RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS));
269 if ( iCpuSet >= 0
270 && iCpuSet < RTCPUSET_MAX_CPUS
271 && RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS))
272 {
273 /*
274 * Check whether the IDTR.LIMIT contains a CPU number.
275 */
276#ifdef RT_ARCH_X86
277 uint16_t const cbIdt = sizeof(X86DESC64SYSTEM) * 256;
278#else
279 uint16_t const cbIdt = sizeof(X86DESCGATE) * 256;
280#endif
281 RTIDTR Idtr;
282 ASMGetIDTR(&Idtr);
283 if (Idtr.cbIdt >= cbIdt)
284 {
285 uint32_t uTmp = Idtr.cbIdt - cbIdt;
286 uTmp &= RTCPUSET_MAX_CPUS - 1;
287 if (uTmp == idCpu)
288 {
289 RTIDTR Idtr2;
290 ASMGetIDTR(&Idtr2);
291 if (Idtr2.cbIdt == Idtr.cbIdt)
292 fSupported |= SUPGIPGETCPU_IDTR_LIMIT_MASK_MAX_SET_CPUS;
293 }
294 }
295
296 /*
297 * Check whether RDTSCP is an option.
298 */
299 if (ASMHasCpuId())
300 {
301 if ( ASMIsValidExtRange(ASMCpuId_EAX(UINT32_C(0x80000000)))
302 && (ASMCpuId_EDX(UINT32_C(0x80000001)) & X86_CPUID_EXT_FEATURE_EDX_RDTSCP) )
303 {
304 uint32_t uAux;
305 ASMReadTscWithAux(&uAux);
306 if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
307 {
308 ASMNopPause();
309 ASMReadTscWithAux(&uAux);
310 if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
311 fSupported |= SUPGIPGETCPU_RDTSCP_MASK_MAX_SET_CPUS;
312 }
313 }
314 }
315 }
316 }
317
318 /*
319 * Check that the APIC ID is unique.
320 */
321 idApic = ASMGetApicId();
322 if (RT_LIKELY( idApic < RT_ELEMENTS(pGip->aiCpuFromApicId)
323 && !ASMAtomicBitTestAndSet(pState->bmApicId, idApic)))
324 fSupported |= SUPGIPGETCPU_APIC_ID;
325 else
326 {
327 AssertCompile(sizeof(pState->bmApicId) * 8 == RT_ELEMENTS(pGip->aiCpuFromApicId));
328 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
329 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - duplicate APIC ID.\n",
330 idCpu, iCpuSet, idApic));
331 }
332
333 /*
334 * Check that the iCpuSet is within the expected range.
335 */
336 if (RT_UNLIKELY( iCpuSet < 0
337 || (unsigned)iCpuSet >= RTCPUSET_MAX_CPUS
338 || (unsigned)iCpuSet >= RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)))
339 {
340 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
341 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU set index is out of range.\n",
342 idCpu, iCpuSet, idApic));
343 }
344 else
345 {
346 RTCPUID idCpu2 = RTMpCpuIdFromSetIndex(iCpuSet);
347 if (RT_UNLIKELY(idCpu2 != idCpu))
348 {
349 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
350 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU id/index roundtrip problem: %#x\n",
351 idCpu, iCpuSet, idApic, idCpu2));
352 }
353 }
354
355 /*
356 * Update the supported feature mask before we return.
357 */
358 ASMAtomicAndU32(&pState->fSupported, fSupported);
359
360 NOREF(pvUser2);
361}
362
363
364/**
365 * Increase the timer freqency on hosts where this is possible (NT).
366 *
367 * The idea is that more interrupts is better for us... Also, it's better than
368 * we increase the timer frequence, because we might end up getting inaccurate
369 * callbacks if someone else does it.
370 *
371 * @param pDevExt Sets u32SystemTimerGranularityGrant if increased.
372 */
373static void supdrvGipRequestHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
374{
375 if (pDevExt->u32SystemTimerGranularityGrant == 0)
376 {
377 uint32_t u32SystemResolution;
378 if ( RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
379 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
380 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1953125 /* 512 HZ */, &u32SystemResolution))
381 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 2000000 /* 500 HZ */, &u32SystemResolution))
382 )
383 {
384 Assert(RTTimerGetSystemGranularity() <= u32SystemResolution);
385 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
386 }
387 }
388}
389
390
391/**
392 * Undoes supdrvGipRequestHigherTimerFrequencyFromSystem.
393 *
394 * @param pDevExt Clears u32SystemTimerGranularityGrant.
395 */
396static void supdrvGipReleaseHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
397{
398 if (pDevExt->u32SystemTimerGranularityGrant)
399 {
400 int rc2 = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant);
401 AssertRC(rc2);
402 pDevExt->u32SystemTimerGranularityGrant = 0;
403 }
404}
405
406
407/**
408 * Maps the GIP into userspace and/or get the physical address of the GIP.
409 *
410 * @returns IPRT status code.
411 * @param pSession Session to which the GIP mapping should belong.
412 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
413 * @param pHCPhysGip Where to store the physical address. (optional)
414 *
415 * @remark There is no reference counting on the mapping, so one call to this function
416 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
417 * and remove the session as a GIP user.
418 */
419SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
420{
421 int rc;
422 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
423 RTR3PTR pGipR3 = NIL_RTR3PTR;
424 RTHCPHYS HCPhys = NIL_RTHCPHYS;
425 LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
426
427 /*
428 * Validate
429 */
430 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
431 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
432 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
433
434#ifdef SUPDRV_USE_MUTEX_FOR_GIP
435 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
436#else
437 RTSemFastMutexRequest(pDevExt->mtxGip);
438#endif
439 if (pDevExt->pGip)
440 {
441 /*
442 * Map it?
443 */
444 rc = VINF_SUCCESS;
445 if (ppGipR3)
446 {
447 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
448 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
449 RTMEM_PROT_READ, RTR0ProcHandleSelf());
450 if (RT_SUCCESS(rc))
451 pGipR3 = RTR0MemObjAddressR3(pSession->GipMapObjR3);
452 }
453
454 /*
455 * Get physical address.
456 */
457 if (pHCPhysGip && RT_SUCCESS(rc))
458 HCPhys = pDevExt->HCPhysGip;
459
460 /*
461 * Reference globally.
462 */
463 if (!pSession->fGipReferenced && RT_SUCCESS(rc))
464 {
465 pSession->fGipReferenced = 1;
466 pDevExt->cGipUsers++;
467 if (pDevExt->cGipUsers == 1)
468 {
469 PSUPGLOBALINFOPAGE pGipR0 = pDevExt->pGip;
470 uint64_t u64NanoTS;
471
472 /*
473 * GIP starts/resumes updating again. On windows we bump the
474 * host timer frequency to make sure we don't get stuck in guest
475 * mode and to get better timer (and possibly clock) accuracy.
476 */
477 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
478
479 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
480
481 /*
482 * document me
483 */
484 if (pGipR0->aCPUs[0].u32TransactionId != 2 /* not the first time */)
485 {
486 unsigned i;
487 for (i = 0; i < pGipR0->cCpus; i++)
488 ASMAtomicUoWriteU32(&pGipR0->aCPUs[i].u32TransactionId,
489 (pGipR0->aCPUs[i].u32TransactionId + GIP_UPDATEHZ_RECALC_FREQ * 2)
490 & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
491 ASMAtomicWriteU64(&pGipR0->u64NanoTSLastUpdateHz, 0);
492 }
493
494 /*
495 * document me
496 */
497 u64NanoTS = RTTimeSystemNanoTS() - pGipR0->u32UpdateIntervalNS;
498 if ( pGipR0->u32Mode == SUPGIPMODE_INVARIANT_TSC
499 || pGipR0->u32Mode == SUPGIPMODE_SYNC_TSC
500 || RTMpGetOnlineCount() == 1)
501 supdrvGipReInitCpu(pGipR0, &pGipR0->aCPUs[0], u64NanoTS);
502 else
503 RTMpOnAll(supdrvGipReInitCpuCallback, pGipR0, &u64NanoTS);
504
505 /*
506 * Detect alternative ways to figure the CPU ID in ring-3 and
507 * raw-mode context. Check the sanity of the APIC IDs, CPU IDs,
508 * and CPU set indexes while we're at it.
509 */
510 if (RT_SUCCESS(rc))
511 {
512 SUPDRVGIPDETECTGETCPU DetectState;
513 RT_BZERO((void *)&DetectState.bmApicId, sizeof(DetectState.bmApicId));
514 DetectState.fSupported = UINT32_MAX;
515 DetectState.idCpuProblem = NIL_RTCPUID;
516 rc = RTMpOnAll(supdrvGipDetectGetGipCpuCallback, &DetectState, pGipR0);
517 if (DetectState.idCpuProblem == NIL_RTCPUID)
518 {
519 if ( DetectState.fSupported != UINT32_MAX
520 && DetectState.fSupported != 0)
521 {
522 if (pGipR0->fGetGipCpu != DetectState.fSupported)
523 {
524 pGipR0->fGetGipCpu = DetectState.fSupported;
525 LogRel(("SUPR0GipMap: fGetGipCpu=%#x\n", DetectState.fSupported));
526 }
527 }
528 else
529 {
530 LogRel(("SUPR0GipMap: No supported ways of getting the APIC ID or CPU number in ring-3! (%#x)\n",
531 DetectState.fSupported));
532 rc = VERR_UNSUPPORTED_CPU;
533 }
534 }
535 else
536 {
537 LogRel(("SUPR0GipMap: APIC ID, CPU ID or CPU set index problem detected on CPU #%u (%#x)!\n",
538 DetectState.idCpuProblem, DetectState.idCpuProblem));
539 rc = VERR_INVALID_CPU_ID;
540 }
541 }
542
543 /*
544 * Start the GIP timer if all is well..
545 */
546 if (RT_SUCCESS(rc))
547 {
548#ifndef DO_NOT_START_GIP
549 rc = RTTimerStart(pDevExt->pGipTimer, 0 /* fire ASAP */); AssertRC(rc);
550#endif
551 rc = VINF_SUCCESS;
552 }
553
554 /*
555 * Bail out on error.
556 */
557 if (RT_FAILURE(rc))
558 {
559 LogRel(("SUPR0GipMap: failed rc=%Rrc\n", rc));
560 pDevExt->cGipUsers = 0;
561 pSession->fGipReferenced = 0;
562 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
563 {
564 int rc2 = RTR0MemObjFree(pSession->GipMapObjR3, false); AssertRC(rc2);
565 if (RT_SUCCESS(rc2))
566 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
567 }
568 HCPhys = NIL_RTHCPHYS;
569 pGipR3 = NIL_RTR3PTR;
570 }
571 }
572 }
573 }
574 else
575 {
576 rc = VERR_GENERAL_FAILURE;
577 Log(("SUPR0GipMap: GIP is not available!\n"));
578 }
579#ifdef SUPDRV_USE_MUTEX_FOR_GIP
580 RTSemMutexRelease(pDevExt->mtxGip);
581#else
582 RTSemFastMutexRelease(pDevExt->mtxGip);
583#endif
584
585 /*
586 * Write returns.
587 */
588 if (pHCPhysGip)
589 *pHCPhysGip = HCPhys;
590 if (ppGipR3)
591 *ppGipR3 = pGipR3;
592
593#ifdef DEBUG_DARWIN_GIP
594 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
595#else
596 LogFlow(( "SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
597#endif
598 return rc;
599}
600
601
602/**
603 * Unmaps any user mapping of the GIP and terminates all GIP access
604 * from this session.
605 *
606 * @returns IPRT status code.
607 * @param pSession Session to which the GIP mapping should belong.
608 */
609SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
610{
611 int rc = VINF_SUCCESS;
612 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
613#ifdef DEBUG_DARWIN_GIP
614 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
615 pSession,
616 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
617 pSession->GipMapObjR3));
618#else
619 LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
620#endif
621 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
622
623#ifdef SUPDRV_USE_MUTEX_FOR_GIP
624 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
625#else
626 RTSemFastMutexRequest(pDevExt->mtxGip);
627#endif
628
629 /*
630 * Unmap anything?
631 */
632 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
633 {
634 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
635 AssertRC(rc);
636 if (RT_SUCCESS(rc))
637 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
638 }
639
640 /*
641 * Dereference global GIP.
642 */
643 if (pSession->fGipReferenced && !rc)
644 {
645 pSession->fGipReferenced = 0;
646 if ( pDevExt->cGipUsers > 0
647 && !--pDevExt->cGipUsers)
648 {
649 LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
650#ifndef DO_NOT_START_GIP
651 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = VINF_SUCCESS;
652#endif
653 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
654 }
655 }
656
657#ifdef SUPDRV_USE_MUTEX_FOR_GIP
658 RTSemMutexRelease(pDevExt->mtxGip);
659#else
660 RTSemFastMutexRelease(pDevExt->mtxGip);
661#endif
662
663 return rc;
664}
665
666
667/**
668 * Gets the GIP pointer.
669 *
670 * @returns Pointer to the GIP or NULL.
671 */
672SUPDECL(PSUPGLOBALINFOPAGE) SUPGetGIP(void)
673{
674 return g_pSUPGlobalInfoPage;
675}
676
677
678
679
680
681/*
682 *
683 *
684 * GIP Initialization, Termination and CPU Offline / Online Related Code.
685 * GIP Initialization, Termination and CPU Offline / Online Related Code.
686 * GIP Initialization, Termination and CPU Offline / Online Related Code.
687 *
688 *
689 */
690
691/**
692 * Used by supdrvInitRefineInvariantTscFreqTimer and supdrvGipInitMeasureTscFreq
693 * to update the TSC frequency related GIP variables.
694 *
695 * @param pGip The GIP.
696 * @param nsElapsed The number of nano seconds elapsed.
697 * @param cElapsedTscTicks The corresponding number of TSC ticks.
698 * @param iTick The tick number for debugging.
699 */
700static void supdrvGipInitSetCpuFreq(PSUPGLOBALINFOPAGE pGip, uint64_t nsElapsed, uint64_t cElapsedTscTicks, uint32_t iTick)
701{
702 /*
703 * Calculate the frequency.
704 */
705 uint64_t uCpuHz;
706 if ( cElapsedTscTicks < UINT64_MAX / RT_NS_1SEC
707 && nsElapsed < UINT32_MAX)
708 uCpuHz = ASMMultU64ByU32DivByU32(cElapsedTscTicks, RT_NS_1SEC, (uint32_t)nsElapsed);
709 else
710 {
711 RTUINT128U CpuHz, Tmp, Divisor;
712 CpuHz.s.Lo = CpuHz.s.Hi = 0;
713 RTUInt128MulU64ByU64(&Tmp, cElapsedTscTicks, RT_NS_1SEC_64);
714 RTUInt128Div(&CpuHz, &Tmp, RTUInt128AssignU64(&Divisor, nsElapsed));
715 uCpuHz = CpuHz.s.Lo;
716 }
717
718 /*
719 * Update the GIP.
720 */
721 ASMAtomicWriteU64(&pGip->u64CpuHz, uCpuHz);
722 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
723 {
724 ASMAtomicWriteU64(&pGip->aCPUs[0].u64CpuHz, uCpuHz);
725
726 /* For inspecting the frequency calcs using tstGIP-2, debugger or similar. */
727 if (iTick + 1 < pGip->cCpus)
728 ASMAtomicWriteU64(&pGip->aCPUs[iTick + 1].u64CpuHz, uCpuHz);
729 }
730}
731
732
733/**
734 * Timer callback function for TSC frequency refinement in invariant GIP mode.
735 *
736 * This is started during driver init and fires once
737 * GIP_TSC_REFINE_PERIOD_IN_SECS seconds later.
738 *
739 * @param pTimer The timer.
740 * @param pvUser Opaque pointer to the device instance data.
741 * @param iTick The timer tick.
742 */
743static DECLCALLBACK(void) supdrvInitRefineInvariantTscFreqTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
744{
745 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
746 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
747 RTCPUID idCpu;
748 uint64_t cNsElapsed;
749 uint64_t cTscTicksElapsed;
750 uint64_t nsNow;
751 uint64_t uTsc;
752 RTCCUINTREG fEFlags;
753
754 /* Paranoia. */
755 AssertReturnVoid(pGip);
756 AssertReturnVoid(pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC);
757
758 /*
759 * If we got a power event, stop the refinement process.
760 */
761 if (pDevExt->fInvTscRefinePowerEvent)
762 {
763 int rc = RTTimerStop(pTimer); AssertRC(rc);
764 return;
765 }
766
767 /*
768 * Read the TSC and time, noting which CPU we are on.
769 *
770 * Don't bother spinning until RTTimeSystemNanoTS changes, since on
771 * systems where it matters we're in a context where we cannot waste that
772 * much time (DPC watchdog, called from clock interrupt).
773 */
774 fEFlags = ASMIntDisableFlags();
775 uTsc = ASMReadTSC();
776 nsNow = RTTimeSystemNanoTS();
777 idCpu = RTMpCpuId();
778 ASMSetFlags(fEFlags);
779
780 cNsElapsed = nsNow - pDevExt->nsStartInvarTscRefine;
781 cTscTicksElapsed = uTsc - pDevExt->uTscStartInvarTscRefine;
782
783 /*
784 * If the above measurement was taken on a different CPU than the one we
785 * started the process on, cTscTicksElapsed will need to be adjusted with
786 * the TSC deltas of both the CPUs.
787 *
788 * We ASSUME that the delta calculation process takes less time than the
789 * TSC frequency refinement timer. If it doesn't, we'll complain and
790 * drop the frequency refinement.
791 *
792 * Note! We cannot entirely trust enmUseTscDelta here because it's
793 * downgraded after each delta calculation.
794 */
795 if ( idCpu != pDevExt->idCpuInvarTscRefine
796 && pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
797 {
798 uint32_t iStartCpuSet = RTMpCpuIdToSetIndex(pDevExt->idCpuInvarTscRefine);
799 uint32_t iStopCpuSet = RTMpCpuIdToSetIndex(idCpu);
800 uint16_t iStartGipCpu = iStartCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
801 ? pGip->aiCpuFromCpuSetIdx[iStartCpuSet] : UINT16_MAX;
802 uint16_t iStopGipCpu = iStopCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
803 ? pGip->aiCpuFromCpuSetIdx[iStopCpuSet] : UINT16_MAX;
804 int64_t iStartTscDelta = iStartGipCpu < pGip->cCpus ? pGip->aCPUs[iStartGipCpu].i64TSCDelta : INT64_MAX;
805 int64_t iStopTscDelta = iStopGipCpu < pGip->cCpus ? pGip->aCPUs[iStopGipCpu].i64TSCDelta : INT64_MAX;
806 if (RT_LIKELY(iStartTscDelta != INT64_MAX && iStopTscDelta != INT64_MAX))
807 {
808 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
809 {
810 /* cTscTicksElapsed = (uTsc - iStopTscDelta) - (pDevExt->uTscStartInvarTscRefine - iStartTscDelta); */
811 cTscTicksElapsed += iStartTscDelta - iStopTscDelta;
812 }
813 }
814 /*
815 * Allow 5 times the refinement period to elapse before we give up on the TSC delta
816 * calculations.
817 */
818 else if (cNsElapsed > GIP_TSC_REFINE_PERIOD_IN_SECS * 5 * RT_NS_1SEC_64)
819 {
820 SUPR0Printf("vboxdrv: Failed to refine invariant TSC frequency because deltas are unavailable after %u (%u) seconds\n",
821 (uint32_t)(cNsElapsed / RT_NS_1SEC), GIP_TSC_REFINE_PERIOD_IN_SECS);
822 SUPR0Printf("vboxdrv: start: %u, %u, %#llx stop: %u, %u, %#llx\n",
823 iStartCpuSet, iStartGipCpu, iStartTscDelta, iStopCpuSet, iStopGipCpu, iStopTscDelta);
824 int rc = RTTimerStop(pTimer); AssertRC(rc);
825 return;
826 }
827 }
828
829 /*
830 * Calculate and update the CPU frequency variables in GIP.
831 *
832 * If there is a GIP user already and we've already refined the frequency
833 * a couple of times, don't update it as we want a stable frequency value
834 * for all VMs.
835 */
836 if ( pDevExt->cGipUsers == 0
837 || cNsElapsed < RT_NS_1SEC * 2)
838 {
839 supdrvGipInitSetCpuFreq(pGip, cNsElapsed, cTscTicksElapsed, (uint32_t)iTick);
840
841 /*
842 * Stop the timer once we've reached the defined refinement period.
843 */
844 if (cNsElapsed > GIP_TSC_REFINE_PERIOD_IN_SECS * RT_NS_1SEC_64)
845 {
846 int rc = RTTimerStop(pTimer);
847 AssertRC(rc);
848 }
849 }
850 else
851 {
852 int rc = RTTimerStop(pTimer);
853 AssertRC(rc);
854 }
855}
856
857
858/**
859 * @callback_method_impl{FNRTPOWERNOTIFICATION}
860 */
861static DECLCALLBACK(void) supdrvGipPowerNotificationCallback(RTPOWEREVENT enmEvent, void *pvUser)
862{
863 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
864
865 /*
866 * If the TSC frequency refinement timer we need to cancel it so it doesn't screw
867 * up the frequency after a long suspend.
868 */
869 if ( enmEvent == RTPOWEREVENT_SUSPEND
870 || enmEvent == RTPOWEREVENT_RESUME)
871 ASMAtomicWriteBool(&pDevExt->fInvTscRefinePowerEvent, true);
872}
873
874
875/**
876 * Start the TSC-frequency refinment timer for the invariant TSC GIP mode.
877 *
878 * We cannot use this in the synchronous and asynchronous tsc GIP modes because
879 * the CPU may change the TSC frequence between now and when the timer fires
880 * (supdrvInitAsyncRefineTscTimer).
881 *
882 * @param pDevExt Pointer to the device instance data.
883 * @param pGip Pointer to the GIP.
884 */
885static void supdrvGipInitStartTimerForRefiningInvariantTscFreq(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip)
886{
887 uint64_t u64NanoTS;
888 RTCCUINTREG fEFlags;
889 int rc;
890
891 /*
892 * Register a power management callback.
893 */
894 pDevExt->fInvTscRefinePowerEvent = false;
895 rc = RTPowerNotificationRegister(supdrvGipPowerNotificationCallback, pDevExt);
896 AssertRC(rc); /* ignore */
897
898 /*
899 * Record the TSC and NanoTS as the starting anchor point for refinement
900 * of the TSC. We try get as close to a clock tick as possible on systems
901 * which does not provide high resolution time.
902 */
903 u64NanoTS = RTTimeSystemNanoTS();
904 while (RTTimeSystemNanoTS() == u64NanoTS)
905 ASMNopPause();
906
907 fEFlags = ASMIntDisableFlags();
908 pDevExt->uTscStartInvarTscRefine = ASMReadTSC();
909 pDevExt->nsStartInvarTscRefine = RTTimeSystemNanoTS();
910 pDevExt->idCpuInvarTscRefine = RTMpCpuId();
911 ASMSetFlags(fEFlags);
912
913 /*
914 * Create a timer that runs on the same CPU so we won't have a depencency
915 * on the TSC-delta and can run in parallel to it. On systems that does not
916 * implement CPU specific timers we'll apply deltas in the timer callback,
917 * just like we do for CPUs going offline.
918 *
919 * The longer the refinement interval the better the accuracy, at least in
920 * theory. If it's too long though, ring-3 may already be starting its
921 * first VMs before we're done. On most systems we will be loading the
922 * support driver during boot and VMs won't be started for a while yet,
923 * it is really only a problem during development (especially with
924 * on-demand driver starting on windows).
925 *
926 * To avoid wasting time doing a long supdrvGipInitMeasureTscFreq() call
927 * to calculate the frequency during driver loading, the timer is set
928 * to fire after 200 ms the first time. It will then reschedule itself
929 * to fire every second until GIP_TSC_REFINE_PERIOD_IN_SECS has been
930 * reached or it notices that there is a user land client with GIP
931 * mapped (we want a stable frequency for all VMs).
932 */
933 rc = RTTimerCreateEx(&pDevExt->pInvarTscRefineTimer, RT_NS_1SEC,
934 RTTIMER_FLAGS_CPU(RTMpCpuIdToSetIndex(pDevExt->idCpuInvarTscRefine)),
935 supdrvInitRefineInvariantTscFreqTimer, pDevExt);
936 if (RT_SUCCESS(rc))
937 {
938 rc = RTTimerStart(pDevExt->pInvarTscRefineTimer, 2*RT_NS_100MS);
939 if (RT_SUCCESS(rc))
940 return;
941 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
942 }
943
944 if (rc == VERR_CPU_OFFLINE || rc == VERR_NOT_SUPPORTED)
945 {
946 rc = RTTimerCreateEx(&pDevExt->pInvarTscRefineTimer, RT_NS_1SEC, RTTIMER_FLAGS_CPU_ANY,
947 supdrvInitRefineInvariantTscFreqTimer, pDevExt);
948 if (RT_SUCCESS(rc))
949 {
950 rc = RTTimerStart(pDevExt->pInvarTscRefineTimer, 2*RT_NS_100MS);
951 if (RT_SUCCESS(rc))
952 return;
953 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
954 }
955 }
956
957 pDevExt->pInvarTscRefineTimer = NULL;
958 OSDBGPRINT(("vboxdrv: Failed to create or start TSC frequency refinement timer: rc=%Rrc\n", rc));
959}
960
961
962/**
963 * @callback_method_impl{PFNRTMPWORKER,
964 * RTMpOnSpecific callback for reading TSC and time on the CPU we started
965 * the measurements on.}
966 */
967DECLCALLBACK(void) supdrvGipInitReadTscAndNanoTsOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
968{
969 RTCCUINTREG fEFlags = ASMIntDisableFlags();
970 uint64_t *puTscStop = (uint64_t *)pvUser1;
971 uint64_t *pnsStop = (uint64_t *)pvUser2;
972
973 *puTscStop = ASMReadTSC();
974 *pnsStop = RTTimeSystemNanoTS();
975
976 ASMSetFlags(fEFlags);
977}
978
979
980/**
981 * Measures the TSC frequency of the system.
982 *
983 * The TSC frequency can vary on systems which are not reported as invariant.
984 * On such systems the object of this function is to find out what the nominal,
985 * maximum TSC frequency under 'normal' CPU operation.
986 *
987 * @returns VBox status code.
988 * @param pDevExt Pointer to the device instance.
989 * @param pGip Pointer to the GIP.
990 * @param fRough Set if we're doing the rough calculation that the
991 * TSC measuring code needs, where accuracy isn't all
992 * that important (too high is better than to low).
993 * When clear we try for best accuracy that we can
994 * achieve in reasonably short time.
995 */
996static int supdrvGipInitMeasureTscFreq(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, bool fRough)
997{
998 uint32_t nsTimerIncr = RTTimerGetSystemGranularity();
999 int cTriesLeft = fRough ? 4 : 2;
1000 while (cTriesLeft-- > 0)
1001 {
1002 RTCCUINTREG fEFlags;
1003 uint64_t nsStart;
1004 uint64_t nsStop;
1005 uint64_t uTscStart;
1006 uint64_t uTscStop;
1007 RTCPUID idCpuStart;
1008 RTCPUID idCpuStop;
1009
1010 /*
1011 * Synchronize with the host OS clock tick on systems without high
1012 * resolution time API (older Windows version for example).
1013 */
1014 nsStart = RTTimeSystemNanoTS();
1015 while (RTTimeSystemNanoTS() == nsStart)
1016 ASMNopPause();
1017
1018 /*
1019 * Read the TSC and current time, noting which CPU we're on.
1020 */
1021 fEFlags = ASMIntDisableFlags();
1022 uTscStart = ASMReadTSC();
1023 nsStart = RTTimeSystemNanoTS();
1024 idCpuStart = RTMpCpuId();
1025 ASMSetFlags(fEFlags);
1026
1027 /*
1028 * Delay for a while.
1029 */
1030 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1031 {
1032 /*
1033 * Sleep-wait since the TSC frequency is constant, it eases host load.
1034 * Shorter interval produces more variance in the frequency (esp. Windows).
1035 */
1036 uint64_t msElapsed = 0;
1037 uint64_t msDelay = ( ((fRough ? 16 : 200) * RT_NS_1MS + nsTimerIncr - 1) / nsTimerIncr * nsTimerIncr - RT_NS_100US )
1038 / RT_NS_1MS;
1039 do
1040 {
1041 RTThreadSleep((RTMSINTERVAL)(msDelay - msElapsed));
1042 nsStop = RTTimeSystemNanoTS();
1043 msElapsed = (nsStop - nsStart) / RT_NS_1MS;
1044 } while (msElapsed < msDelay);
1045
1046 while (RTTimeSystemNanoTS() == nsStop)
1047 ASMNopPause();
1048 }
1049 else
1050 {
1051 /*
1052 * Busy-wait keeping the frequency up.
1053 */
1054 do
1055 {
1056 ASMNopPause();
1057 nsStop = RTTimeSystemNanoTS();
1058 } while (nsStop - nsStart < RT_NS_100MS);
1059 }
1060
1061 /*
1062 * Read the TSC and time again.
1063 */
1064 fEFlags = ASMIntDisableFlags();
1065 uTscStop = ASMReadTSC();
1066 nsStop = RTTimeSystemNanoTS();
1067 idCpuStop = RTMpCpuId();
1068 ASMSetFlags(fEFlags);
1069
1070 /*
1071 * If the CPU changes things get a bit complicated and what we
1072 * can get away with depends on the GIP mode / TSC reliablity.
1073 */
1074 if (idCpuStop != idCpuStart)
1075 {
1076 bool fDoXCall = false;
1077
1078 /*
1079 * Synchronous TSC mode: we're probably fine as it's unlikely
1080 * that we were rescheduled because of TSC throttling or power
1081 * management reasons, so just go ahead.
1082 */
1083 if (pGip->u32Mode == SUPGIPMODE_SYNC_TSC)
1084 {
1085 /* Probably ok, maybe we should retry once?. */
1086 Assert(pGip->enmUseTscDelta == SUPGIPUSETSCDELTA_NOT_APPLICABLE);
1087 }
1088 /*
1089 * If we're just doing the rough measurement, do the cross call and
1090 * get on with things (we don't have deltas!).
1091 */
1092 else if (fRough)
1093 fDoXCall = true;
1094 /*
1095 * Invariant TSC mode: It doesn't matter if we have delta available
1096 * for both CPUs. That is not something we can assume at this point.
1097 *
1098 * Note! We cannot necessarily trust enmUseTscDelta here because it's
1099 * downgraded after each delta calculation and the delta
1100 * calculations may not be complete yet.
1101 */
1102 else if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1103 {
1104/** @todo This section of code is never reached atm, consider dropping it later on... */
1105 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1106 {
1107 uint32_t iStartCpuSet = RTMpCpuIdToSetIndex(idCpuStart);
1108 uint32_t iStopCpuSet = RTMpCpuIdToSetIndex(idCpuStop);
1109 uint16_t iStartGipCpu = iStartCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1110 ? pGip->aiCpuFromCpuSetIdx[iStartCpuSet] : UINT16_MAX;
1111 uint16_t iStopGipCpu = iStopCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1112 ? pGip->aiCpuFromCpuSetIdx[iStopCpuSet] : UINT16_MAX;
1113 int64_t iStartTscDelta = iStartGipCpu < pGip->cCpus ? pGip->aCPUs[iStartGipCpu].i64TSCDelta : INT64_MAX;
1114 int64_t iStopTscDelta = iStopGipCpu < pGip->cCpus ? pGip->aCPUs[iStopGipCpu].i64TSCDelta : INT64_MAX;
1115 if (RT_LIKELY(iStartTscDelta != INT64_MAX && iStopTscDelta != INT64_MAX))
1116 {
1117 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
1118 {
1119 uTscStart -= iStartTscDelta;
1120 uTscStop -= iStopTscDelta;
1121 }
1122 }
1123 /*
1124 * Invalid CPU indexes are not caused by online/offline races, so
1125 * we have to trigger driver load failure if that happens as GIP
1126 * and IPRT assumptions are busted on this system.
1127 */
1128 else if (iStopGipCpu >= pGip->cCpus || iStartGipCpu >= pGip->cCpus)
1129 {
1130 SUPR0Printf("vboxdrv: Unexpected CPU index in supdrvGipInitMeasureTscFreq.\n");
1131 SUPR0Printf("vboxdrv: start: %u, %u, %#llx stop: %u, %u, %#llx\n",
1132 iStartCpuSet, iStartGipCpu, iStartTscDelta, iStopCpuSet, iStopGipCpu, iStopTscDelta);
1133 return VERR_INVALID_CPU_INDEX;
1134 }
1135 /*
1136 * No valid deltas. We retry, if we're on our last retry
1137 * we do the cross call instead just to get a result. The
1138 * frequency will be refined in a few seconds anyways.
1139 */
1140 else if (cTriesLeft > 0)
1141 continue;
1142 else
1143 fDoXCall = true;
1144 }
1145 }
1146 /*
1147 * Asynchronous TSC mode: This is bad as the reason we usually
1148 * use this mode is to deal with variable TSC frequencies and
1149 * deltas. So, we need to get the TSC from the same CPU as
1150 * started it, we also need to keep that CPU busy. So, retry
1151 * and fall back to the cross call on the last attempt.
1152 */
1153 else
1154 {
1155 Assert(pGip->u32Mode == SUPGIPMODE_ASYNC_TSC);
1156 if (cTriesLeft > 0)
1157 continue;
1158 fDoXCall = true;
1159 }
1160
1161 if (fDoXCall)
1162 {
1163 /*
1164 * Try read the TSC and timestamp on the start CPU.
1165 */
1166 int rc = RTMpOnSpecific(idCpuStart, supdrvGipInitReadTscAndNanoTsOnCpu, &uTscStop, &nsStop);
1167 if (RT_FAILURE(rc) && (!fRough || cTriesLeft > 0))
1168 continue;
1169 }
1170 }
1171
1172 /*
1173 * Calculate the TSC frequency and update it (shared with the refinement timer).
1174 */
1175 supdrvGipInitSetCpuFreq(pGip, nsStop - nsStart, uTscStop - uTscStart, 0);
1176 return VINF_SUCCESS;
1177 }
1178
1179 Assert(!fRough);
1180 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
1181}
1182
1183
1184/**
1185 * Finds our (@a idCpu) entry, or allocates a new one if not found.
1186 *
1187 * @returns Index of the CPU in the cache set.
1188 * @param pGip The GIP.
1189 * @param idCpu The CPU ID.
1190 */
1191static uint32_t supdrvGipFindOrAllocCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
1192{
1193 uint32_t i, cTries;
1194
1195 /*
1196 * ASSUMES that CPU IDs are constant.
1197 */
1198 for (i = 0; i < pGip->cCpus; i++)
1199 if (pGip->aCPUs[i].idCpu == idCpu)
1200 return i;
1201
1202 cTries = 0;
1203 do
1204 {
1205 for (i = 0; i < pGip->cCpus; i++)
1206 {
1207 bool fRc;
1208 ASMAtomicCmpXchgSize(&pGip->aCPUs[i].idCpu, idCpu, NIL_RTCPUID, fRc);
1209 if (fRc)
1210 return i;
1211 }
1212 } while (cTries++ < 32);
1213 AssertReleaseFailed();
1214 return i - 1;
1215}
1216
1217
1218/**
1219 * The calling CPU should be accounted as online, update GIP accordingly.
1220 *
1221 * This is used by supdrvGipCreate() as well as supdrvGipMpEvent().
1222 *
1223 * @param pDevExt The device extension.
1224 * @param idCpu The CPU ID.
1225 */
1226static void supdrvGipMpEventOnlineOrInitOnCpu(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
1227{
1228 int iCpuSet = 0;
1229 uint16_t idApic = UINT16_MAX;
1230 uint32_t i = 0;
1231 uint64_t u64NanoTS = 0;
1232 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1233
1234 AssertPtrReturnVoid(pGip);
1235 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1236 AssertRelease(idCpu == RTMpCpuId());
1237 Assert(pGip->cPossibleCpus == RTMpGetCount());
1238
1239 /*
1240 * Do this behind a spinlock with interrupts disabled as this can fire
1241 * on all CPUs simultaneously, see @bugref{6110}.
1242 */
1243 RTSpinlockAcquire(pDevExt->hGipSpinlock);
1244
1245 /*
1246 * Update the globals.
1247 */
1248 ASMAtomicWriteU16(&pGip->cPresentCpus, RTMpGetPresentCount());
1249 ASMAtomicWriteU16(&pGip->cOnlineCpus, RTMpGetOnlineCount());
1250 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
1251 if (iCpuSet >= 0)
1252 {
1253 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
1254 RTCpuSetAddByIndex(&pGip->OnlineCpuSet, iCpuSet);
1255 RTCpuSetAddByIndex(&pGip->PresentCpuSet, iCpuSet);
1256 }
1257
1258 /*
1259 * Update the entry.
1260 */
1261 u64NanoTS = RTTimeSystemNanoTS() - pGip->u32UpdateIntervalNS;
1262 i = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
1263
1264 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS, pGip->u64CpuHz);
1265
1266 idApic = ASMGetApicId();
1267 ASMAtomicWriteU16(&pGip->aCPUs[i].idApic, idApic);
1268 ASMAtomicWriteS16(&pGip->aCPUs[i].iCpuSet, (int16_t)iCpuSet);
1269 ASMAtomicWriteSize(&pGip->aCPUs[i].idCpu, idCpu);
1270
1271 /*
1272 * Update the APIC ID and CPU set index mappings.
1273 */
1274 ASMAtomicWriteU16(&pGip->aiCpuFromApicId[idApic], i);
1275 ASMAtomicWriteU16(&pGip->aiCpuFromCpuSetIdx[iCpuSet], i);
1276
1277 /* Add this CPU to this set of CPUs we need to calculate the TSC-delta for. */
1278 RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, RTMpCpuIdToSetIndex(idCpu));
1279
1280 /* Update the Mp online/offline counter. */
1281 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
1282
1283 /* Commit it. */
1284 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_ONLINE);
1285
1286 RTSpinlockRelease(pDevExt->hGipSpinlock);
1287}
1288
1289
1290/**
1291 * RTMpOnSpecific callback wrapper for supdrvGipMpEventOnlineOrInitOnCpu().
1292 *
1293 * @param idCpu The CPU ID we are running on.
1294 * @param pvUser1 Opaque pointer to the device instance data.
1295 * @param pvUser2 Not used.
1296 */
1297static DECLCALLBACK(void) supdrvGipMpEventOnlineCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1298{
1299 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser1;
1300 NOREF(pvUser2);
1301 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
1302}
1303
1304
1305/**
1306 * The CPU should be accounted as offline, update the GIP accordingly.
1307 *
1308 * This is used by supdrvGipMpEvent.
1309 *
1310 * @param pDevExt The device extension.
1311 * @param idCpu The CPU ID.
1312 */
1313static void supdrvGipMpEventOffline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
1314{
1315 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1316 int iCpuSet;
1317 unsigned i;
1318
1319 AssertPtrReturnVoid(pGip);
1320 RTSpinlockAcquire(pDevExt->hGipSpinlock);
1321
1322 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
1323 AssertReturnVoid(iCpuSet >= 0);
1324
1325 i = pGip->aiCpuFromCpuSetIdx[iCpuSet];
1326 AssertReturnVoid(i < pGip->cCpus);
1327 AssertReturnVoid(pGip->aCPUs[i].idCpu == idCpu);
1328
1329 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
1330 RTCpuSetDelByIndex(&pGip->OnlineCpuSet, iCpuSet);
1331
1332 /* Update the Mp online/offline counter. */
1333 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
1334
1335 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1336 {
1337 /* Reset the TSC delta, we will recalculate it lazily. */
1338 ASMAtomicWriteS64(&pGip->aCPUs[i].i64TSCDelta, INT64_MAX);
1339 /* Remove this CPU from the set of CPUs that we have obtained the TSC deltas. */
1340 RTCpuSetDelByIndex(&pDevExt->TscDeltaObtainedCpuSet, iCpuSet);
1341 }
1342
1343 /* Commit it. */
1344 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_OFFLINE);
1345
1346 RTSpinlockRelease(pDevExt->hGipSpinlock);
1347}
1348
1349
1350/**
1351 * Multiprocessor event notification callback.
1352 *
1353 * This is used to make sure that the GIP master gets passed on to
1354 * another CPU. It also updates the associated CPU data.
1355 *
1356 * @param enmEvent The event.
1357 * @param idCpu The cpu it applies to.
1358 * @param pvUser Pointer to the device extension.
1359 */
1360static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
1361{
1362 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
1363 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1364
1365 if (pGip)
1366 {
1367 RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
1368 switch (enmEvent)
1369 {
1370 case RTMPEVENT_ONLINE:
1371 {
1372 RTThreadPreemptDisable(&PreemptState);
1373 if (idCpu == RTMpCpuId())
1374 {
1375 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
1376 RTThreadPreemptRestore(&PreemptState);
1377 }
1378 else
1379 {
1380 RTThreadPreemptRestore(&PreemptState);
1381 RTMpOnSpecific(idCpu, supdrvGipMpEventOnlineCallback, pDevExt, NULL /* pvUser2 */);
1382 }
1383
1384 /*
1385 * Recompute TSC-delta for the newly online'd CPU.
1386 */
1387 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1388 {
1389#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1390 supdrvTscDeltaThreadStartMeasurement(pDevExt);
1391#else
1392 uint32_t iCpu = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
1393 supdrvMeasureTscDeltaOne(pDevExt, iCpu);
1394#endif
1395 }
1396 break;
1397 }
1398
1399 case RTMPEVENT_OFFLINE:
1400 supdrvGipMpEventOffline(pDevExt, idCpu);
1401 break;
1402 }
1403 }
1404
1405 /*
1406 * Make sure there is a master GIP.
1407 */
1408 if (enmEvent == RTMPEVENT_OFFLINE)
1409 {
1410 RTCPUID idGipMaster = ASMAtomicReadU32(&pDevExt->idGipMaster);
1411 if (idGipMaster == idCpu)
1412 {
1413 /*
1414 * The GIP master is going offline, find a new one.
1415 */
1416 bool fIgnored;
1417 unsigned i;
1418 RTCPUID idNewGipMaster = NIL_RTCPUID;
1419 RTCPUSET OnlineCpus;
1420 RTMpGetOnlineSet(&OnlineCpus);
1421
1422 for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
1423 if (RTCpuSetIsMemberByIndex(&OnlineCpus, i))
1424 {
1425 RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
1426 if (idCurCpu != idGipMaster)
1427 {
1428 idNewGipMaster = idCurCpu;
1429 break;
1430 }
1431 }
1432
1433 Log(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
1434 ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
1435 NOREF(fIgnored);
1436 }
1437 }
1438}
1439
1440
1441/**
1442 * On CPU initialization callback for RTMpOnAll.
1443 *
1444 * @param idCpu The CPU ID.
1445 * @param pvUser1 The device extension.
1446 * @param pvUser2 The GIP.
1447 */
1448static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1449{
1450 /* This is good enough, even though it will update some of the globals a
1451 bit to much. */
1452 supdrvGipMpEventOnlineOrInitOnCpu((PSUPDRVDEVEXT)pvUser1, idCpu);
1453}
1454
1455
1456/**
1457 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
1458 *
1459 * @param idCpu Ignored.
1460 * @param pvUser1 Where to put the TSC.
1461 * @param pvUser2 Ignored.
1462 */
1463static DECLCALLBACK(void) supdrvGipInitDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1464{
1465 Assert(RTMpCpuIdToSetIndex(idCpu) == (intptr_t)pvUser2);
1466 ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
1467}
1468
1469
1470/**
1471 * Determine if Async GIP mode is required because of TSC drift.
1472 *
1473 * When using the default/normal timer code it is essential that the time stamp counter
1474 * (TSC) runs never backwards, that is, a read operation to the counter should return
1475 * a bigger value than any previous read operation. This is guaranteed by the latest
1476 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
1477 * case we have to choose the asynchronous timer mode.
1478 *
1479 * @param poffMin Pointer to the determined difference between different
1480 * cores (optional, can be NULL).
1481 * @return false if the time stamp counters appear to be synchronized, true otherwise.
1482 */
1483static bool supdrvGipInitDetermineAsyncTsc(uint64_t *poffMin)
1484{
1485 /*
1486 * Just iterate all the cpus 8 times and make sure that the TSC is
1487 * ever increasing. We don't bother taking TSC rollover into account.
1488 */
1489 int iEndCpu = RTMpGetArraySize();
1490 int iCpu;
1491 int cLoops = 8;
1492 bool fAsync = false;
1493 int rc = VINF_SUCCESS;
1494 uint64_t offMax = 0;
1495 uint64_t offMin = ~(uint64_t)0;
1496 uint64_t PrevTsc = ASMReadTSC();
1497
1498 while (cLoops-- > 0)
1499 {
1500 for (iCpu = 0; iCpu < iEndCpu; iCpu++)
1501 {
1502 uint64_t CurTsc;
1503 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvGipInitDetermineAsyncTscWorker,
1504 &CurTsc, (void *)(uintptr_t)iCpu);
1505 if (RT_SUCCESS(rc))
1506 {
1507 if (CurTsc <= PrevTsc)
1508 {
1509 fAsync = true;
1510 offMin = offMax = PrevTsc - CurTsc;
1511 Log(("supdrvGipInitDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
1512 iCpu, cLoops, CurTsc, PrevTsc));
1513 break;
1514 }
1515
1516 /* Gather statistics (except the first time). */
1517 if (iCpu != 0 || cLoops != 7)
1518 {
1519 uint64_t off = CurTsc - PrevTsc;
1520 if (off < offMin)
1521 offMin = off;
1522 if (off > offMax)
1523 offMax = off;
1524 Log2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
1525 }
1526
1527 /* Next */
1528 PrevTsc = CurTsc;
1529 }
1530 else if (rc == VERR_NOT_SUPPORTED)
1531 break;
1532 else
1533 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
1534 }
1535
1536 /* broke out of the loop. */
1537 if (iCpu < iEndCpu)
1538 break;
1539 }
1540
1541 if (poffMin)
1542 *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
1543 Log(("supdrvGipInitDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n",
1544 fAsync, iEndCpu, rc, offMin, offMax));
1545#if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
1546 OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
1547#endif
1548 return fAsync;
1549}
1550
1551
1552/**
1553 * supdrvGipInit() worker that determines the GIP TSC mode.
1554 *
1555 * @returns The most suitable TSC mode.
1556 * @param pDevExt Pointer to the device instance data.
1557 */
1558static SUPGIPMODE supdrvGipInitDetermineTscMode(PSUPDRVDEVEXT pDevExt)
1559{
1560 uint64_t u64DiffCoresIgnored;
1561 uint32_t uEAX, uEBX, uECX, uEDX;
1562
1563 /*
1564 * Establish whether the CPU advertises TSC as invariant, we need that in
1565 * a couple of places below.
1566 */
1567 bool fInvariantTsc = false;
1568 if (ASMHasCpuId())
1569 {
1570 uEAX = ASMCpuId_EAX(0x80000000);
1571 if (ASMIsValidExtRange(uEAX) && uEAX >= 0x80000007)
1572 {
1573 uEDX = ASMCpuId_EDX(0x80000007);
1574 if (uEDX & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR)
1575 fInvariantTsc = true;
1576 }
1577 }
1578
1579 /*
1580 * On single CPU systems, we don't need to consider ASYNC mode.
1581 */
1582 if (RTMpGetCount() <= 1)
1583 return fInvariantTsc ? SUPGIPMODE_INVARIANT_TSC : SUPGIPMODE_SYNC_TSC;
1584
1585 /*
1586 * Allow the user and/or OS specific bits to force async mode.
1587 */
1588 if (supdrvOSGetForcedAsyncTscMode(pDevExt))
1589 return SUPGIPMODE_ASYNC_TSC;
1590
1591 /*
1592 * Use invariant mode if the CPU says TSC is invariant.
1593 */
1594 if (fInvariantTsc)
1595 return SUPGIPMODE_INVARIANT_TSC;
1596
1597 /*
1598 * TSC is not invariant and we're on SMP, this presents two problems:
1599 *
1600 * (1) There might be a skew between the CPU, so that cpu0
1601 * returns a TSC that is slightly different from cpu1.
1602 * This screw may be due to (2), bad TSC initialization
1603 * or slightly different TSC rates.
1604 *
1605 * (2) Power management (and other things) may cause the TSC
1606 * to run at a non-constant speed, and cause the speed
1607 * to be different on the cpus. This will result in (1).
1608 *
1609 * If any of the above is detected, we will have to use ASYNC mode.
1610 */
1611 /* (1). Try check for current differences between the cpus. */
1612 if (supdrvGipInitDetermineAsyncTsc(&u64DiffCoresIgnored))
1613 return SUPGIPMODE_ASYNC_TSC;
1614
1615 /* (2) If it's an AMD CPU with power management, we won't trust its TSC. */
1616 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
1617 if ( ASMIsValidStdRange(uEAX)
1618 && ASMIsAmdCpuEx(uEBX, uECX, uEDX))
1619 {
1620 /* Check for APM support. */
1621 uEAX = ASMCpuId_EAX(0x80000000);
1622 if (ASMIsValidExtRange(uEAX) && uEAX >= 0x80000007)
1623 {
1624 uEDX = ASMCpuId_EDX(0x80000007);
1625 if (uEDX & 0x3e) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
1626 return SUPGIPMODE_ASYNC_TSC;
1627 }
1628 }
1629
1630 return SUPGIPMODE_SYNC_TSC;
1631}
1632
1633
1634/**
1635 * Initializes per-CPU GIP information.
1636 *
1637 * @param pGip Pointer to the GIP.
1638 * @param pCpu Pointer to which GIP CPU to initalize.
1639 * @param u64NanoTS The current nanosecond timestamp.
1640 * @param uCpuHz The CPU frequency to set, 0 if the caller doesn't know.
1641 */
1642static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS, uint64_t uCpuHz)
1643{
1644 pCpu->u32TransactionId = 2;
1645 pCpu->u64NanoTS = u64NanoTS;
1646 pCpu->u64TSC = ASMReadTSC();
1647 pCpu->u64TSCSample = GIP_TSC_DELTA_RSVD;
1648 pCpu->i64TSCDelta = pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED ? INT64_MAX : 0;
1649
1650 ASMAtomicWriteSize(&pCpu->enmState, SUPGIPCPUSTATE_INVALID);
1651 ASMAtomicWriteSize(&pCpu->idCpu, NIL_RTCPUID);
1652 ASMAtomicWriteS16(&pCpu->iCpuSet, -1);
1653 ASMAtomicWriteU16(&pCpu->idApic, UINT16_MAX);
1654
1655 /*
1656 * The first time we're called, we don't have a CPU frequency handy,
1657 * so pretend it's a 4 GHz CPU. On CPUs that are online, we'll get
1658 * called again and at that point we have a more plausible CPU frequency
1659 * value handy. The frequency history will also be adjusted again on
1660 * the 2nd timer callout (maybe we can skip that now?).
1661 */
1662 if (!uCpuHz)
1663 {
1664 pCpu->u64CpuHz = _4G - 1;
1665 pCpu->u32UpdateIntervalTSC = (uint32_t)((_4G - 1) / pGip->u32UpdateHz);
1666 }
1667 else
1668 {
1669 pCpu->u64CpuHz = uCpuHz;
1670 pCpu->u32UpdateIntervalTSC = (uint32_t)(uCpuHz / pGip->u32UpdateHz);
1671 }
1672 pCpu->au32TSCHistory[0]
1673 = pCpu->au32TSCHistory[1]
1674 = pCpu->au32TSCHistory[2]
1675 = pCpu->au32TSCHistory[3]
1676 = pCpu->au32TSCHistory[4]
1677 = pCpu->au32TSCHistory[5]
1678 = pCpu->au32TSCHistory[6]
1679 = pCpu->au32TSCHistory[7]
1680 = pCpu->u32UpdateIntervalTSC;
1681}
1682
1683
1684/**
1685 * Initializes the GIP data.
1686 *
1687 * @param pDevExt Pointer to the device instance data.
1688 * @param pGip Pointer to the read-write kernel mapping of the GIP.
1689 * @param HCPhys The physical address of the GIP.
1690 * @param u64NanoTS The current nanosecond timestamp.
1691 * @param uUpdateHz The update frequency.
1692 * @param uUpdateIntervalNS The update interval in nanoseconds.
1693 * @param cCpus The CPU count.
1694 */
1695static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
1696 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus)
1697{
1698 size_t const cbGip = RT_ALIGN_Z(RT_OFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), PAGE_SIZE);
1699 unsigned i;
1700#ifdef DEBUG_DARWIN_GIP
1701 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
1702#else
1703 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
1704#endif
1705
1706 /*
1707 * Initialize the structure.
1708 */
1709 memset(pGip, 0, cbGip);
1710
1711 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
1712 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
1713 pGip->u32Mode = supdrvGipInitDetermineTscMode(pDevExt);
1714 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
1715 /*|| pGip->u32Mode == SUPGIPMODE_SYNC_TSC */)
1716 pGip->enmUseTscDelta = supdrvOSAreTscDeltasInSync() /* Allow OS override (windows). */
1717 ? SUPGIPUSETSCDELTA_ZERO_CLAIMED : SUPGIPUSETSCDELTA_PRACTICALLY_ZERO /* downgrade later */;
1718 else
1719 pGip->enmUseTscDelta = SUPGIPUSETSCDELTA_NOT_APPLICABLE;
1720 pGip->cCpus = (uint16_t)cCpus;
1721 pGip->cPages = (uint16_t)(cbGip / PAGE_SIZE);
1722 pGip->u32UpdateHz = uUpdateHz;
1723 pGip->u32UpdateIntervalNS = uUpdateIntervalNS;
1724 pGip->fGetGipCpu = SUPGIPGETCPU_APIC_ID;
1725 RTCpuSetEmpty(&pGip->OnlineCpuSet);
1726 RTCpuSetEmpty(&pGip->PresentCpuSet);
1727 RTMpGetSet(&pGip->PossibleCpuSet);
1728 pGip->cOnlineCpus = RTMpGetOnlineCount();
1729 pGip->cPresentCpus = RTMpGetPresentCount();
1730 pGip->cPossibleCpus = RTMpGetCount();
1731 pGip->idCpuMax = RTMpGetMaxCpuId();
1732 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromApicId); i++)
1733 pGip->aiCpuFromApicId[i] = UINT16_MAX;
1734 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); i++)
1735 pGip->aiCpuFromCpuSetIdx[i] = UINT16_MAX;
1736 for (i = 0; i < cCpus; i++)
1737 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS, 0 /*uCpuHz*/);
1738
1739 /*
1740 * Link it to the device extension.
1741 */
1742 pDevExt->pGip = pGip;
1743 pDevExt->HCPhysGip = HCPhys;
1744 pDevExt->cGipUsers = 0;
1745}
1746
1747
1748/**
1749 * Creates the GIP.
1750 *
1751 * @returns VBox status code.
1752 * @param pDevExt Instance data. GIP stuff may be updated.
1753 */
1754int VBOXCALL supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
1755{
1756 PSUPGLOBALINFOPAGE pGip;
1757 RTHCPHYS HCPhysGip;
1758 uint32_t u32SystemResolution;
1759 uint32_t u32Interval;
1760 uint32_t u32MinInterval;
1761 uint32_t uMod;
1762 unsigned cCpus;
1763 int rc;
1764
1765 LogFlow(("supdrvGipCreate:\n"));
1766
1767 /*
1768 * Assert order.
1769 */
1770 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
1771 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
1772 Assert(!pDevExt->pGipTimer);
1773#ifdef SUPDRV_USE_MUTEX_FOR_GIP
1774 Assert(pDevExt->mtxGip != NIL_RTSEMMUTEX);
1775 Assert(pDevExt->mtxTscDelta != NIL_RTSEMMUTEX);
1776#else
1777 Assert(pDevExt->mtxGip != NIL_RTSEMFASTMUTEX);
1778 Assert(pDevExt->mtxTscDelta != NIL_RTSEMFASTMUTEX);
1779#endif
1780
1781 /*
1782 * Check the CPU count.
1783 */
1784 cCpus = RTMpGetArraySize();
1785 if ( cCpus > RTCPUSET_MAX_CPUS
1786 || cCpus > 256 /* ApicId is used for the mappings */)
1787 {
1788 SUPR0Printf("VBoxDrv: Too many CPUs (%u) for the GIP (max %u)\n", cCpus, RT_MIN(RTCPUSET_MAX_CPUS, 256));
1789 return VERR_TOO_MANY_CPUS;
1790 }
1791
1792 /*
1793 * Allocate a contiguous set of pages with a default kernel mapping.
1794 */
1795 rc = RTR0MemObjAllocCont(&pDevExt->GipMemObj, RT_UOFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), false /*fExecutable*/);
1796 if (RT_FAILURE(rc))
1797 {
1798 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
1799 return rc;
1800 }
1801 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
1802 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
1803
1804 /*
1805 * Find a reasonable update interval and initialize the structure.
1806 */
1807 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
1808 /** @todo figure out why using a 100Ms interval upsets timekeeping in VMs.
1809 * See @bugref{6710}. */
1810 u32MinInterval = RT_NS_10MS;
1811 u32SystemResolution = RTTimerGetSystemGranularity();
1812 u32Interval = u32MinInterval;
1813 uMod = u32MinInterval % u32SystemResolution;
1814 if (uMod)
1815 u32Interval += u32SystemResolution - uMod;
1816
1817 supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), RT_NS_1SEC / u32Interval /*=Hz*/, u32Interval, cCpus);
1818
1819 /*
1820 * Important sanity check...
1821 */
1822 if (RT_UNLIKELY( pGip->enmUseTscDelta == SUPGIPUSETSCDELTA_ZERO_CLAIMED
1823 && pGip->u32Mode == SUPGIPMODE_ASYNC_TSC
1824 && !supdrvOSGetForcedAsyncTscMode(pDevExt)))
1825 {
1826 /* Basically, invariant Windows boxes, should never be detected as async (i.e. TSC-deltas should be 0). */
1827 OSDBGPRINT(("supdrvGipCreate: The TSC-deltas should be normalized by the host OS, but verifying shows it's not!\n"));
1828 return VERR_INTERNAL_ERROR_2;
1829 }
1830
1831 /*
1832 * Do the TSC frequency measurements.
1833 *
1834 * If we're in invariant TSC mode, just to a quick preliminary measurement
1835 * that the TSC-delta measurement code can use to yield cross calls.
1836 *
1837 * If we're in any of the other two modes, neither which require MP init,
1838 * notifications or deltas for the job, do the full measurement now so
1839 * that supdrvGipInitOnCpu() can populate the TSC interval and history
1840 * array with more reasonable values.
1841 */
1842 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1843 {
1844 rc = supdrvGipInitMeasureTscFreq(pDevExt, pGip, true /*fRough*/); /* cannot fail */
1845 supdrvGipInitStartTimerForRefiningInvariantTscFreq(pDevExt, pGip);
1846 }
1847 else
1848 rc = supdrvGipInitMeasureTscFreq(pDevExt, pGip, false /*fRough*/);
1849 if (RT_SUCCESS(rc))
1850 {
1851 /*
1852 * Start TSC-delta measurement thread before we start getting MP
1853 * events that will try kick it into action (includes the
1854 * RTMpOnAll/supdrvGipInitOnCpu call below).
1855 */
1856 RTCpuSetEmpty(&pDevExt->TscDeltaCpuSet);
1857 RTCpuSetEmpty(&pDevExt->TscDeltaObtainedCpuSet);
1858#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1859 if ( pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED
1860 && pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1861 rc = supdrvTscDeltaThreadInit(pDevExt);
1862#endif
1863 if (RT_SUCCESS(rc))
1864 {
1865 rc = RTMpNotificationRegister(supdrvGipMpEvent, pDevExt);
1866 if (RT_SUCCESS(rc))
1867 {
1868 /*
1869 * Do GIP initialization on all online CPUs. Wake up the
1870 * TSC-delta thread afterwards.
1871 */
1872 rc = RTMpOnAll(supdrvGipInitOnCpu, pDevExt, pGip);
1873 if (RT_SUCCESS(rc))
1874 {
1875#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1876 supdrvTscDeltaThreadStartMeasurement(pDevExt);
1877#else
1878 uint16_t iCpu;
1879 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1880 {
1881 /*
1882 * Measure the TSC deltas now that we have MP notifications.
1883 */
1884 int cTries = 5;
1885 do
1886 {
1887 rc = supdrvMeasureInitialTscDeltas(pDevExt);
1888 if ( rc != VERR_TRY_AGAIN
1889 && rc != VERR_CPU_OFFLINE)
1890 break;
1891 } while (--cTries > 0);
1892 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
1893 Log(("supdrvTscDeltaInit: cpu[%u] delta %lld\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta));
1894 }
1895 else
1896 {
1897 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
1898 AssertMsg(!pGip->aCPUs[iCpu].i64TSCDelta, ("iCpu=%u %lld mode=%d\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta, pGip->u32Mode));
1899 }
1900 if (RT_SUCCESS(rc))
1901#endif
1902 {
1903 /*
1904 * Create the timer.
1905 * If CPU_ALL isn't supported we'll have to fall back to synchronous mode.
1906 */
1907 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
1908 {
1909 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL,
1910 supdrvGipAsyncTimer, pDevExt);
1911 if (rc == VERR_NOT_SUPPORTED)
1912 {
1913 OSDBGPRINT(("supdrvGipCreate: omni timer not supported, falling back to synchronous mode\n"));
1914 pGip->u32Mode = SUPGIPMODE_SYNC_TSC;
1915 }
1916 }
1917 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
1918 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0 /* fFlags */,
1919 supdrvGipSyncAndInvariantTimer, pDevExt);
1920 if (RT_SUCCESS(rc))
1921 {
1922 /*
1923 * We're good.
1924 */
1925 Log(("supdrvGipCreate: %u ns interval.\n", u32Interval));
1926 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
1927
1928 g_pSUPGlobalInfoPage = pGip;
1929 return VINF_SUCCESS;
1930 }
1931
1932 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %u ns interval. rc=%Rrc\n", u32Interval, rc));
1933 Assert(!pDevExt->pGipTimer);
1934 }
1935 }
1936 else
1937 OSDBGPRINT(("supdrvGipCreate: RTMpOnAll failed. rc=%Rrc\n", rc));
1938 }
1939 else
1940 OSDBGPRINT(("supdrvGipCreate: failed to register MP event notfication. rc=%Rrc\n", rc));
1941 }
1942 else
1943 OSDBGPRINT(("supdrvGipCreate: supdrvTscDeltaInit failed. rc=%Rrc\n", rc));
1944 }
1945 else
1946 OSDBGPRINT(("supdrvGipCreate: supdrvMeasureInitialTscDeltas failed. rc=%Rrc\n", rc));
1947
1948 /* Releases timer frequency increase too. */
1949 supdrvGipDestroy(pDevExt);
1950 return rc;
1951}
1952
1953
1954/**
1955 * Invalidates the GIP data upon termination.
1956 *
1957 * @param pGip Pointer to the read-write kernel mapping of the GIP.
1958 */
1959static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
1960{
1961 unsigned i;
1962 pGip->u32Magic = 0;
1963 for (i = 0; i < pGip->cCpus; i++)
1964 {
1965 pGip->aCPUs[i].u64NanoTS = 0;
1966 pGip->aCPUs[i].u64TSC = 0;
1967 pGip->aCPUs[i].iTSCHistoryHead = 0;
1968 pGip->aCPUs[i].u64TSCSample = 0;
1969 pGip->aCPUs[i].i64TSCDelta = INT64_MAX;
1970 }
1971}
1972
1973
1974/**
1975 * Terminates the GIP.
1976 *
1977 * @param pDevExt Instance data. GIP stuff may be updated.
1978 */
1979void VBOXCALL supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
1980{
1981 int rc;
1982#ifdef DEBUG_DARWIN_GIP
1983 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
1984 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
1985 pDevExt->pGipTimer, pDevExt->GipMemObj));
1986#endif
1987
1988 /*
1989 * Stop receiving MP notifications before tearing anything else down.
1990 */
1991 RTMpNotificationDeregister(supdrvGipMpEvent, pDevExt);
1992
1993#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1994 /*
1995 * Terminate the TSC-delta measurement thread and resources.
1996 */
1997 supdrvTscDeltaTerm(pDevExt);
1998#endif
1999
2000 /*
2001 * Destroy the TSC-refinement timer.
2002 */
2003 if (pDevExt->pInvarTscRefineTimer)
2004 {
2005 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
2006 pDevExt->pInvarTscRefineTimer = NULL;
2007 }
2008
2009 /*
2010 * Invalid the GIP data.
2011 */
2012 if (pDevExt->pGip)
2013 {
2014 supdrvGipTerm(pDevExt->pGip);
2015 pDevExt->pGip = NULL;
2016 }
2017 g_pSUPGlobalInfoPage = NULL;
2018
2019 /*
2020 * Destroy the timer and free the GIP memory object.
2021 */
2022 if (pDevExt->pGipTimer)
2023 {
2024 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
2025 pDevExt->pGipTimer = NULL;
2026 }
2027
2028 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
2029 {
2030 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
2031 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
2032 }
2033
2034 /*
2035 * Finally, make sure we've release the system timer resolution request
2036 * if one actually succeeded and is still pending.
2037 */
2038 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
2039}
2040
2041
2042
2043
2044/*
2045 *
2046 *
2047 * GIP Update Timer Related Code
2048 * GIP Update Timer Related Code
2049 * GIP Update Timer Related Code
2050 *
2051 *
2052 */
2053
2054
2055/**
2056 * Worker routine for supdrvGipUpdate() and supdrvGipUpdatePerCpu() that
2057 * updates all the per cpu data except the transaction id.
2058 *
2059 * @param pDevExt The device extension.
2060 * @param pGipCpu Pointer to the per cpu data.
2061 * @param u64NanoTS The current time stamp.
2062 * @param u64TSC The current TSC.
2063 * @param iTick The current timer tick.
2064 *
2065 * @remarks Can be called with interrupts disabled!
2066 */
2067static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick)
2068{
2069 uint64_t u64TSCDelta;
2070 uint32_t u32UpdateIntervalTSC;
2071 uint32_t u32UpdateIntervalTSCSlack;
2072 unsigned iTSCHistoryHead;
2073 uint64_t u64CpuHz;
2074 uint32_t u32TransactionId;
2075
2076 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2077 AssertPtrReturnVoid(pGip);
2078
2079 /* Delta between this and the previous update. */
2080 ASMAtomicUoWriteU32(&pGipCpu->u32PrevUpdateIntervalNS, (uint32_t)(u64NanoTS - pGipCpu->u64NanoTS));
2081
2082 /*
2083 * Update the NanoTS.
2084 */
2085 ASMAtomicWriteU64(&pGipCpu->u64NanoTS, u64NanoTS);
2086
2087 /*
2088 * Calc TSC delta.
2089 */
2090 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
2091 ASMAtomicWriteU64(&pGipCpu->u64TSC, u64TSC);
2092
2093 /*
2094 * We don't need to keep realculating the frequency when it's invariant, so
2095 * the remainder of this function is only for the sync and async TSC modes.
2096 */
2097 if (pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC)
2098 {
2099 if (u64TSCDelta >> 32)
2100 {
2101 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
2102 pGipCpu->cErrors++;
2103 }
2104
2105 /*
2106 * On the 2nd and 3rd callout, reset the history with the current TSC
2107 * interval since the values entered by supdrvGipInit are totally off.
2108 * The interval on the 1st callout completely unreliable, the 2nd is a bit
2109 * better, while the 3rd should be most reliable.
2110 */
2111 /** @todo Could we drop this now that we initializes the history
2112 * with nominal TSC frequency values? */
2113 u32TransactionId = pGipCpu->u32TransactionId;
2114 if (RT_UNLIKELY( ( u32TransactionId == 5
2115 || u32TransactionId == 7)
2116 && ( iTick == 2
2117 || iTick == 3) ))
2118 {
2119 unsigned i;
2120 for (i = 0; i < RT_ELEMENTS(pGipCpu->au32TSCHistory); i++)
2121 ASMAtomicUoWriteU32(&pGipCpu->au32TSCHistory[i], (uint32_t)u64TSCDelta);
2122 }
2123
2124 /*
2125 * Validate the NanoTS deltas between timer fires with an arbitrary threshold of 0.5%.
2126 * Wait until we have at least one full history since the above history reset. The
2127 * assumption is that the majority of the previous history values will be tolerable.
2128 * See @bugref{6710} comment #67.
2129 */
2130 /** @todo Could we drop the fuding there now that we initializes the history
2131 * with nominal TSC frequency values? */
2132 if ( u32TransactionId > 23 /* 7 + (8 * 2) */
2133 && pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
2134 {
2135 uint32_t uNanoTsThreshold = pGip->u32UpdateIntervalNS / 200;
2136 if ( pGipCpu->u32PrevUpdateIntervalNS > pGip->u32UpdateIntervalNS + uNanoTsThreshold
2137 || pGipCpu->u32PrevUpdateIntervalNS < pGip->u32UpdateIntervalNS - uNanoTsThreshold)
2138 {
2139 uint32_t u32;
2140 u32 = pGipCpu->au32TSCHistory[0];
2141 u32 += pGipCpu->au32TSCHistory[1];
2142 u32 += pGipCpu->au32TSCHistory[2];
2143 u32 += pGipCpu->au32TSCHistory[3];
2144 u32 >>= 2;
2145 u64TSCDelta = pGipCpu->au32TSCHistory[4];
2146 u64TSCDelta += pGipCpu->au32TSCHistory[5];
2147 u64TSCDelta += pGipCpu->au32TSCHistory[6];
2148 u64TSCDelta += pGipCpu->au32TSCHistory[7];
2149 u64TSCDelta >>= 2;
2150 u64TSCDelta += u32;
2151 u64TSCDelta >>= 1;
2152 }
2153 }
2154
2155 /*
2156 * TSC History.
2157 */
2158 Assert(RT_ELEMENTS(pGipCpu->au32TSCHistory) == 8);
2159 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
2160 ASMAtomicWriteU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
2161 ASMAtomicWriteU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
2162
2163 /*
2164 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
2165 *
2166 * On Windows, we have an occasional (but recurring) sour value that messed up
2167 * the history but taking only 1 interval reduces the precision overall.
2168 */
2169 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
2170 || pGip->u32UpdateHz >= 1000)
2171 {
2172 uint32_t u32;
2173 u32 = pGipCpu->au32TSCHistory[0];
2174 u32 += pGipCpu->au32TSCHistory[1];
2175 u32 += pGipCpu->au32TSCHistory[2];
2176 u32 += pGipCpu->au32TSCHistory[3];
2177 u32 >>= 2;
2178 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
2179 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
2180 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
2181 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
2182 u32UpdateIntervalTSC >>= 2;
2183 u32UpdateIntervalTSC += u32;
2184 u32UpdateIntervalTSC >>= 1;
2185
2186 /* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11. */
2187 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
2188 }
2189 else if (pGip->u32UpdateHz >= 90)
2190 {
2191 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
2192 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
2193 u32UpdateIntervalTSC >>= 1;
2194
2195 /* value chosen on a 2GHz thinkpad running windows */
2196 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
2197 }
2198 else
2199 {
2200 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
2201
2202 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
2203 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
2204 }
2205 ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
2206
2207 /*
2208 * CpuHz.
2209 */
2210 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, RT_NS_1SEC);
2211 u64CpuHz /= pGip->u32UpdateIntervalNS;
2212 ASMAtomicWriteU64(&pGipCpu->u64CpuHz, u64CpuHz);
2213 }
2214}
2215
2216
2217/**
2218 * Updates the GIP.
2219 *
2220 * @param pDevExt The device extension.
2221 * @param u64NanoTS The current nanosecond timesamp.
2222 * @param u64TSC The current TSC timesamp.
2223 * @param idCpu The CPU ID.
2224 * @param iTick The current timer tick.
2225 *
2226 * @remarks Can be called with interrupts disabled!
2227 */
2228static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick)
2229{
2230 /*
2231 * Determine the relevant CPU data.
2232 */
2233 PSUPGIPCPU pGipCpu;
2234 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2235 AssertPtrReturnVoid(pGip);
2236
2237 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
2238 pGipCpu = &pGip->aCPUs[0];
2239 else
2240 {
2241 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
2242 if (RT_UNLIKELY(iCpu >= pGip->cCpus))
2243 return;
2244 pGipCpu = &pGip->aCPUs[iCpu];
2245 if (RT_UNLIKELY(pGipCpu->idCpu != idCpu))
2246 return;
2247 }
2248
2249 /*
2250 * Start update transaction.
2251 */
2252 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
2253 {
2254 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
2255 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
2256 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2257 pGipCpu->cErrors++;
2258 return;
2259 }
2260
2261 /*
2262 * Recalc the update frequency every 0x800th time.
2263 */
2264 if ( pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC /* cuz we're not recalculating the frequency on invariants hosts. */
2265 && !(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
2266 {
2267 if (pGip->u64NanoTSLastUpdateHz)
2268 {
2269#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
2270 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
2271 uint32_t u32UpdateHz = (uint32_t)((RT_NS_1SEC_64 * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
2272 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
2273 {
2274 /** @todo r=ramshankar: Changing u32UpdateHz might screw up TSC frequency
2275 * calculation on non-invariant hosts if it changes the history decision
2276 * taken in supdrvGipDoUpdateCpu(). */
2277 uint64_t u64Interval = u64Delta / GIP_UPDATEHZ_RECALC_FREQ;
2278 ASMAtomicWriteU32(&pGip->u32UpdateHz, u32UpdateHz);
2279 ASMAtomicWriteU32(&pGip->u32UpdateIntervalNS, (uint32_t)u64Interval);
2280 }
2281#endif
2282 }
2283 ASMAtomicWriteU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS | 1);
2284 }
2285
2286 /*
2287 * Update the data.
2288 */
2289 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
2290
2291 /*
2292 * Complete transaction.
2293 */
2294 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2295}
2296
2297
2298/**
2299 * Updates the per cpu GIP data for the calling cpu.
2300 *
2301 * @param pDevExt The device extension.
2302 * @param u64NanoTS The current nanosecond timesamp.
2303 * @param u64TSC The current TSC timesamp.
2304 * @param idCpu The CPU ID.
2305 * @param idApic The APIC id for the CPU index.
2306 * @param iTick The current timer tick.
2307 *
2308 * @remarks Can be called with interrupts disabled!
2309 */
2310static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
2311 RTCPUID idCpu, uint8_t idApic, uint64_t iTick)
2312{
2313 uint32_t iCpu;
2314 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2315
2316 /*
2317 * Avoid a potential race when a CPU online notification doesn't fire on
2318 * the onlined CPU but the tick creeps in before the event notification is
2319 * run.
2320 */
2321 if (RT_UNLIKELY(iTick == 1))
2322 {
2323 iCpu = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
2324 if (pGip->aCPUs[iCpu].enmState == SUPGIPCPUSTATE_OFFLINE)
2325 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
2326 }
2327
2328 iCpu = pGip->aiCpuFromApicId[idApic];
2329 if (RT_LIKELY(iCpu < pGip->cCpus))
2330 {
2331 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
2332 if (pGipCpu->idCpu == idCpu)
2333 {
2334 /*
2335 * Start update transaction.
2336 */
2337 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
2338 {
2339 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
2340 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2341 pGipCpu->cErrors++;
2342 return;
2343 }
2344
2345 /*
2346 * Update the data.
2347 */
2348 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
2349
2350 /*
2351 * Complete transaction.
2352 */
2353 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2354 }
2355 }
2356}
2357
2358
2359/**
2360 * Timer callback function for the sync and invariant GIP modes.
2361 *
2362 * @param pTimer The timer.
2363 * @param pvUser Opaque pointer to the device extension.
2364 * @param iTick The timer tick.
2365 */
2366static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2367{
2368 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
2369 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2370 RTCCUINTREG fOldFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
2371 uint64_t u64TSC = ASMReadTSC();
2372 uint64_t u64NanoTS = RTTimeSystemNanoTS();
2373
2374 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
2375 {
2376 /*
2377 * The calculations in supdrvGipUpdate() is somewhat timing sensitive,
2378 * missing timer ticks is not an option for GIP because the GIP users
2379 * will end up incrementing the time in 1ns per time getter call until
2380 * there is a complete timer update. So, if the delta has yet to be
2381 * calculated, we just pretend it is zero for now (the GIP users
2382 * probably won't have it for a wee while either and will do the same).
2383 *
2384 * We could maybe on some platforms try cross calling a CPU with a
2385 * working delta here, but it's not worth the hassle since the
2386 * likelyhood of this happening is really low. On Windows, Linux, and
2387 * Solaris timers fire on the CPU they were registered/started on.
2388 * Darwin timers doesn't necessarily (they are high priority threads).
2389 */
2390 uint32_t iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
2391 uint16_t iGipCpu = RT_LIKELY(iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx))
2392 ? pGip->aiCpuFromCpuSetIdx[iCpuSet] : UINT16_MAX;
2393 Assert(!ASMIntAreEnabled());
2394 if (RT_LIKELY(iGipCpu < pGip->cCpus))
2395 {
2396 int64_t iTscDelta = pGip->aCPUs[iGipCpu].i64TSCDelta;
2397 if (iTscDelta != INT64_MAX)
2398 u64TSC -= iTscDelta;
2399 }
2400 }
2401
2402 supdrvGipUpdate(pDevExt, u64NanoTS, u64TSC, NIL_RTCPUID, iTick);
2403
2404 ASMSetFlags(fOldFlags);
2405}
2406
2407
2408/**
2409 * Timer callback function for async GIP mode.
2410 * @param pTimer The timer.
2411 * @param pvUser Opaque pointer to the device extension.
2412 * @param iTick The timer tick.
2413 */
2414static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2415{
2416 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
2417 RTCCUINTREG fOldFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
2418 RTCPUID idCpu = RTMpCpuId();
2419 uint64_t u64TSC = ASMReadTSC();
2420 uint64_t NanoTS = RTTimeSystemNanoTS();
2421
2422 /** @todo reset the transaction number and whatnot when iTick == 1. */
2423 if (pDevExt->idGipMaster == idCpu)
2424 supdrvGipUpdate(pDevExt, NanoTS, u64TSC, idCpu, iTick);
2425 else
2426 supdrvGipUpdatePerCpu(pDevExt, NanoTS, u64TSC, idCpu, ASMGetApicId(), iTick);
2427
2428 ASMSetFlags(fOldFlags);
2429}
2430
2431
2432
2433
2434/*
2435 *
2436 *
2437 * TSC Delta Measurements And Related Code
2438 * TSC Delta Measurements And Related Code
2439 * TSC Delta Measurements And Related Code
2440 *
2441 *
2442 */
2443
2444
2445/*
2446 * Select TSC delta measurement algorithm.
2447 */
2448#if 0
2449# define GIP_TSC_DELTA_METHOD_1
2450#else
2451# define GIP_TSC_DELTA_METHOD_2
2452#endif
2453
2454/** For padding variables to keep them away from other cache lines. Better too
2455 * large than too small!
2456 * @remarks Current AMD64 and x86 CPUs seems to use 64 bytes. There are claims
2457 * that NetBurst had 128 byte cache lines while the 486 thru Pentium
2458 * III had 32 bytes cache lines. */
2459#define GIP_TSC_DELTA_CACHE_LINE_SIZE 128
2460
2461
2462/**
2463 * TSC delta measurment algorithm \#2 result entry.
2464 */
2465typedef struct SUPDRVTSCDELTAMETHOD2ENTRY
2466{
2467 uint32_t iSeqMine;
2468 uint32_t iSeqOther;
2469 uint64_t uTsc;
2470} SUPDRVTSCDELTAMETHOD2ENTRY;
2471
2472/**
2473 * TSC delta measurment algorithm \#2 Data.
2474 */
2475typedef struct SUPDRVTSCDELTAMETHOD2
2476{
2477 /** Padding to make sure the iCurSeqNo is in its own cache line. */
2478 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2479 /** The current sequence number of this worker. */
2480 uint32_t volatile iCurSeqNo;
2481 /** Padding to make sure the iCurSeqNo is in its own cache line. */
2482 uint32_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint32_t) - 1];
2483 /** Result table. */
2484 SUPDRVTSCDELTAMETHOD2ENTRY aResults[64];
2485} SUPDRVTSCDELTAMETHOD2;
2486/** Pointer to the data for TSC delta mesurment algorithm \#2 .*/
2487typedef SUPDRVTSCDELTAMETHOD2 *PSUPDRVTSCDELTAMETHOD2;
2488
2489
2490/**
2491 * The TSC delta synchronization struct, version 2.
2492 *
2493 * The syncrhonization variable is completely isolated in its own cache line
2494 * (provided our max cache line size estimate is correct).
2495 */
2496typedef struct SUPTSCDELTASYNC2
2497{
2498 /** Padding to make sure the uVar1 is in its own cache line. */
2499 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2500
2501 /** The synchronization variable, holds values GIP_TSC_DELTA_SYNC_*. */
2502 volatile uint32_t uSyncVar;
2503 /** Sequence synchronizing variable used for post 'GO' synchronization. */
2504 volatile uint32_t uSyncSeq;
2505
2506 /** Padding to make sure the uVar1 is in its own cache line. */
2507 uint64_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t) - 2];
2508
2509 /** Start RDTSC value. Put here mainly to save stack space. */
2510 uint64_t uTscStart;
2511 /** Copy of SUPDRVGIPTSCDELTARGS::cMaxTscTicks. */
2512 uint64_t cMaxTscTicks;
2513} SUPTSCDELTASYNC2;
2514AssertCompileSize(SUPTSCDELTASYNC2, GIP_TSC_DELTA_CACHE_LINE_SIZE * 2 + sizeof(uint64_t));
2515typedef SUPTSCDELTASYNC2 *PSUPTSCDELTASYNC2;
2516
2517/** Prestart wait. */
2518#define GIP_TSC_DELTA_SYNC2_PRESTART_WAIT UINT32_C(0x0ffe)
2519/** Prestart aborted. */
2520#define GIP_TSC_DELTA_SYNC2_PRESTART_ABORT UINT32_C(0x0fff)
2521/** Ready (on your mark). */
2522#define GIP_TSC_DELTA_SYNC2_READY UINT32_C(0x1000)
2523/** Steady (get set). */
2524#define GIP_TSC_DELTA_SYNC2_STEADY UINT32_C(0x1001)
2525/** Go! */
2526#define GIP_TSC_DELTA_SYNC2_GO UINT32_C(0x1002)
2527/** Used by the verfication test. */
2528#define GIP_TSC_DELTA_SYNC2_GO_GO UINT32_C(0x1003)
2529
2530/** We reached the time limit. */
2531#define GIP_TSC_DELTA_SYNC2_TIMEOUT UINT32_C(0x1ffe)
2532/** The other party won't touch the sync struct ever again. */
2533#define GIP_TSC_DELTA_SYNC2_FINAL UINT32_C(0x1fff)
2534
2535
2536/**
2537 * Argument package/state passed by supdrvMeasureTscDeltaOne() to the RTMpOn
2538 * callback worker.
2539 * @todo add
2540 */
2541typedef struct SUPDRVGIPTSCDELTARGS
2542{
2543 /** The device extension. */
2544 PSUPDRVDEVEXT pDevExt;
2545 /** Pointer to the GIP CPU array entry for the worker. */
2546 PSUPGIPCPU pWorker;
2547 /** Pointer to the GIP CPU array entry for the master. */
2548 PSUPGIPCPU pMaster;
2549 /** The maximum number of ticks to spend in supdrvMeasureTscDeltaCallback.
2550 * (This is what we need a rough TSC frequency for.) */
2551 uint64_t cMaxTscTicks;
2552 /** Used to abort synchronization setup. */
2553 bool volatile fAbortSetup;
2554
2555 /** Padding to make sure the master variables live in its own cache lines. */
2556 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2557
2558 /** @name Master
2559 * @{ */
2560 /** The time the master spent in the MP worker. */
2561 uint64_t cElapsedMasterTscTicks;
2562 /** The iTry value when stopped at. */
2563 uint32_t iTry;
2564 /** Set if the run timed out. */
2565 bool volatile fTimedOut;
2566 /** Pointer to the master's synchronization struct (on stack). */
2567 PSUPTSCDELTASYNC2 volatile pSyncMaster;
2568 /** Master data union. */
2569 union
2570 {
2571 /** Data (master) for delta verification. */
2572 struct
2573 {
2574 /** Verification test TSC values for the master. */
2575 uint64_t volatile auTscs[32];
2576 } Verify;
2577 /** Data (master) for measurement method \#2. */
2578 struct
2579 {
2580 /** Data and sequence number. */
2581 SUPDRVTSCDELTAMETHOD2 Data;
2582 /** The lag setting for the next run. */
2583 bool fLag;
2584 /** Number of hits. */
2585 uint32_t cHits;
2586 } M2;
2587 } uMaster;
2588 /** The verifier verdict, VINF_SUCCESS if ok, VERR_OUT_OF_RANGE if not,
2589 * VERR_TRY_AGAIN on timeout. */
2590 int32_t rcVerify;
2591#ifdef TSCDELTA_VERIFY_WITH_STATS
2592 /** The maximum difference between TSC read during delta verification. */
2593 int64_t cMaxVerifyTscTicks;
2594 /** The minimum difference between two TSC reads during verification. */
2595 int64_t cMinVerifyTscTicks;
2596 /** The bad TSC diff, worker relative to master (= worker - master).
2597 * Negative value means the worker is behind the master. */
2598 int64_t iVerifyBadTscDiff;
2599#endif
2600 /** @} */
2601
2602 /** Padding to make sure the uVar1 is in its own cache line. */
2603 uint64_t au64CacheLinePaddingBetween[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2604
2605 /** @name Proletarian
2606 * @{ */
2607 /** Pointer to the worker's synchronization struct (on stack). */
2608 PSUPTSCDELTASYNC2 volatile pSyncWorker;
2609 /** The time the worker spent in the MP worker. */
2610 uint64_t cElapsedWorkerTscTicks;
2611 /** Worker data union. */
2612 union
2613 {
2614 /** Data (worker) for delta verification. */
2615 struct
2616 {
2617 /** Verification test TSC values for the worker. */
2618 uint64_t volatile auTscs[32];
2619 } Verify;
2620 /** Data (worker) for measurement method \#2. */
2621 struct
2622 {
2623 /** Data and sequence number. */
2624 SUPDRVTSCDELTAMETHOD2 Data;
2625 /** The lag setting for the next run (set by master). */
2626 bool fLag;
2627 } M2;
2628 } uWorker;
2629 /** @} */
2630
2631 /** Padding to make sure the above is in its own cache line. */
2632 uint64_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2633} SUPDRVGIPTSCDELTARGS;
2634typedef SUPDRVGIPTSCDELTARGS *PSUPDRVGIPTSCDELTARGS;
2635
2636
2637/** @name Macros that implements the basic synchronization steps common to
2638 * the algorithms.
2639 *
2640 * Must be used from loop as the timeouts are implemented via 'break' statements
2641 * at the moment.
2642 *
2643 * @{
2644 */
2645#if defined(DEBUG_bird) /* || defined(VBOX_STRICT) */
2646# define TSCDELTA_DBG_VARS() uint32_t iDbgCounter
2647# define TSCDELTA_DBG_START_LOOP() do { iDbgCounter = 0; } while (0)
2648# define TSCDELTA_DBG_CHECK_LOOP() \
2649 do { iDbgCounter++; if ((iDbgCounter & UINT32_C(0x01ffffff)) == 0) RT_BREAKPOINT(); } while (0)
2650#else
2651# define TSCDELTA_DBG_VARS() ((void)0)
2652# define TSCDELTA_DBG_START_LOOP() ((void)0)
2653# define TSCDELTA_DBG_CHECK_LOOP() ((void)0)
2654#endif
2655#if 0
2656# define TSCDELTA_DBG_SYNC_MSG(a_Args) SUPR0Printf a_Args
2657#else
2658# define TSCDELTA_DBG_SYNC_MSG(a_Args) ((void)0)
2659#endif
2660#if 0
2661# define TSCDELTA_DBG_SYNC_MSG2(a_Args) SUPR0Printf a_Args
2662#else
2663# define TSCDELTA_DBG_SYNC_MSG2(a_Args) ((void)0)
2664#endif
2665#if 0
2666# define TSCDELTA_DBG_SYNC_MSG9(a_Args) SUPR0Printf a_Args
2667#else
2668# define TSCDELTA_DBG_SYNC_MSG9(a_Args) ((void)0)
2669#endif
2670
2671
2672static bool supdrvTscDeltaSync2_Before(PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
2673 bool fIsMaster, PRTCCUINTREG pfEFlags, PSUPDRVGIPTSCDELTARGS pArgs)
2674{
2675 uint32_t iMySeq = fIsMaster ? 0 : 256;
2676 uint32_t const iMaxSeq = iMySeq + 16; /* For the last loop, darn linux/freebsd C-ishness. */
2677 uint32_t u32Tmp;
2678 uint32_t iSync2Loops = 0;
2679 RTCCUINTREG fEFlags;
2680 TSCDELTA_DBG_VARS();
2681
2682 *pfEFlags = X86_EFL_IF | X86_EFL_1; /* should shut up most nagging compilers. */
2683
2684 /*
2685 * The master tells the worker to get on it's mark.
2686 */
2687 if (fIsMaster)
2688 {
2689 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_STEADY, GIP_TSC_DELTA_SYNC2_READY)))
2690 { /* likely*/ }
2691 else
2692 {
2693 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #1 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
2694 return false;
2695 }
2696 }
2697
2698 /*
2699 * Wait for the on your mark signal (ack in the master case). We process timeouts here.
2700 */
2701 ASMAtomicWriteU32(&(pMySync)->uSyncSeq, 0);
2702 for (;;)
2703 {
2704 fEFlags = ASMIntDisableFlags();
2705 u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
2706 if (u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY)
2707 break;
2708 ASMSetFlags(fEFlags);
2709 ASMNopPause();
2710
2711 /* Abort? */
2712 if (u32Tmp != GIP_TSC_DELTA_SYNC2_READY)
2713 {
2714 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #2 u32Tmp=%#x\n", fIsMaster ? "master" : "worker", u32Tmp));
2715 return false;
2716 }
2717
2718 /* Check for timeouts every so often (not every loop in case RDTSC is
2719 trapping or something). Must check the first time around. */
2720#if 0 /* For debugging the timeout paths. */
2721 static uint32_t volatile xxx;
2722#endif
2723 if ( ( (iSync2Loops & 0x3ff) == 0
2724 && ASMReadTSC() - pMySync->uTscStart > pMySync->cMaxTscTicks)
2725#if 0 /* This is crazy, I know, but enable this code and the results are markedly better when enabled on the 1.4GHz AMD (debug). */
2726 || (!fIsMaster && (++xxx & 0xf) == 0)
2727#endif
2728 )
2729 {
2730 /* Try switch our own state into timeout mode so the master cannot tell us to 'GO',
2731 ignore the timeout if we've got the go ahead already (simpler). */
2732 if (ASMAtomicCmpXchgU32(&pMySync->uSyncVar, GIP_TSC_DELTA_SYNC2_TIMEOUT, GIP_TSC_DELTA_SYNC2_READY))
2733 {
2734 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: timeout\n", fIsMaster ? "master" : "worker"));
2735 ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_TIMEOUT, GIP_TSC_DELTA_SYNC2_STEADY);
2736 ASMAtomicWriteBool(&pArgs->fTimedOut, true);
2737 return false;
2738 }
2739 }
2740 iSync2Loops++;
2741 }
2742
2743 /*
2744 * Interrupts are now disabled and will remain disabled until we do
2745 * TSCDELTA_MASTER_SYNC_AFTER / TSCDELTA_OTHER_SYNC_AFTER.
2746 */
2747 *pfEFlags = fEFlags;
2748
2749 /*
2750 * The worker tells the master that it is on its mark and that the master
2751 * need to get into position as well.
2752 */
2753 if (!fIsMaster)
2754 {
2755 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_STEADY, GIP_TSC_DELTA_SYNC2_READY)))
2756 { /* likely */ }
2757 else
2758 {
2759 ASMSetFlags(fEFlags);
2760 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #3 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
2761 return false;
2762 }
2763 }
2764
2765 /*
2766 * The master sends the 'go' to the worker and wait for ACK.
2767 */
2768 if (fIsMaster)
2769 {
2770 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO, GIP_TSC_DELTA_SYNC2_STEADY)))
2771 { /* likely */ }
2772 else
2773 {
2774 ASMSetFlags(fEFlags);
2775 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #4 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
2776 return false;
2777 }
2778 }
2779
2780 /*
2781 * Wait for the 'go' signal (ack in the master case).
2782 */
2783 TSCDELTA_DBG_START_LOOP();
2784 for (;;)
2785 {
2786 u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
2787 if (u32Tmp == GIP_TSC_DELTA_SYNC2_GO)
2788 break;
2789 if (RT_LIKELY(u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY))
2790 { /* likely */ }
2791 else
2792 {
2793 ASMSetFlags(fEFlags);
2794 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #5 u32Tmp=%#x\n", fIsMaster ? "master" : "worker", u32Tmp));
2795 return false;
2796 }
2797
2798 TSCDELTA_DBG_CHECK_LOOP();
2799 ASMNopPause();
2800 }
2801
2802 /*
2803 * The worker acks the 'go' (shouldn't fail).
2804 */
2805 if (!fIsMaster)
2806 {
2807 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO, GIP_TSC_DELTA_SYNC2_STEADY)))
2808 { /* likely */ }
2809 else
2810 {
2811 ASMSetFlags(fEFlags);
2812 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #6 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
2813 return false;
2814 }
2815 }
2816
2817 /*
2818 * Try enter mostly lockstep execution with it.
2819 */
2820 for (;;)
2821 {
2822 uint32_t iOtherSeq1, iOtherSeq2;
2823 ASMCompilerBarrier();
2824 ASMSerializeInstruction();
2825
2826 ASMAtomicWriteU32(&pMySync->uSyncSeq, iMySeq);
2827 ASMNopPause();
2828 iOtherSeq1 = ASMAtomicXchgU32(&pOtherSync->uSyncSeq, iMySeq);
2829 ASMNopPause();
2830 iOtherSeq2 = ASMAtomicReadU32(&pMySync->uSyncSeq);
2831
2832 ASMCompilerBarrier();
2833 if (iOtherSeq1 == iOtherSeq2)
2834 return true;
2835
2836 /* Did the other guy give up? Should we give up? */
2837 if ( iOtherSeq1 == UINT32_MAX
2838 || iOtherSeq2 == UINT32_MAX)
2839 return true;
2840 if (++iMySeq >= iMaxSeq)
2841 {
2842 ASMAtomicWriteU32(&pMySync->uSyncSeq, UINT32_MAX);
2843 return true;
2844 }
2845 ASMNopPause();
2846 }
2847}
2848
2849#define TSCDELTA_MASTER_SYNC_BEFORE(a_pMySync, a_pOtherSync, a_pfEFlags, a_pArgs) \
2850 if (RT_LIKELY(supdrvTscDeltaSync2_Before(a_pMySync, a_pOtherSync, true /*fIsMaster*/, a_pfEFlags, a_pArgs))) \
2851 { /*likely*/ } \
2852 else if (true) \
2853 { \
2854 TSCDELTA_DBG_SYNC_MSG9(("sync/before/master: #89\n")); \
2855 break; \
2856 } else do {} while (0)
2857#define TSCDELTA_OTHER_SYNC_BEFORE(a_pMySync, a_pOtherSync, a_pfEFlags, a_pArgs) \
2858 if (RT_LIKELY(supdrvTscDeltaSync2_Before(a_pMySync, a_pOtherSync, false /*fIsMaster*/, a_pfEFlags, a_pArgs))) \
2859 { /*likely*/ } \
2860 else if (true) \
2861 { \
2862 TSCDELTA_DBG_SYNC_MSG9(("sync/before/other: #89\n")); \
2863 break; \
2864 } else do {} while (0)
2865
2866
2867static bool supdrvTscDeltaSync2_After(PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
2868 bool fIsMaster, RTCCUINTREG fEFlags)
2869{
2870 TSCDELTA_DBG_VARS();
2871
2872 /*
2873 * Wait for the 'ready' signal. In the master's case, this means the
2874 * worker has completed its data collection, while in the worker's case it
2875 * means the master is done processing the data and it's time for the next
2876 * loop iteration (or whatever).
2877 */
2878 ASMSetFlags(fEFlags);
2879 TSCDELTA_DBG_START_LOOP();
2880 for (;;)
2881 {
2882 uint32_t u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
2883 if ( u32Tmp == GIP_TSC_DELTA_SYNC2_READY
2884 || (u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY && !fIsMaster) /* kicked twice => race */ )
2885 return true;
2886 ASMNopPause();
2887 if (RT_LIKELY(u32Tmp == GIP_TSC_DELTA_SYNC2_GO))
2888 { /* likely */}
2889 else
2890 {
2891 TSCDELTA_DBG_SYNC_MSG(("sync/after/other: #1 u32Tmp=%#x\n", u32Tmp));
2892 return false; /* shouldn't ever happen! */
2893 }
2894 TSCDELTA_DBG_CHECK_LOOP();
2895 ASMNopPause();
2896 }
2897}
2898
2899#define TSCDELTA_MASTER_SYNC_AFTER(a_pMySync, a_pOtherSync, a_fEFlags) \
2900 if (RT_LIKELY(supdrvTscDeltaSync2_After(a_pMySync, a_pOtherSync, true /*fIsMaster*/, a_fEFlags))) \
2901 { /* likely */ } \
2902 else if (true) \
2903 { \
2904 TSCDELTA_DBG_SYNC_MSG9(("sync/after/master: #97\n")); \
2905 break; \
2906 } else do {} while (0)
2907
2908#define TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(a_pMySync, a_pOtherSync) \
2909 /* \
2910 * Tell the worker that we're done processing the data and ready for the next round. \
2911 */ \
2912 if (RT_LIKELY(ASMAtomicCmpXchgU32(&(a_pOtherSync)->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_GO))) \
2913 { /* likely */ } \
2914 else if (true)\
2915 { \
2916 TSCDELTA_DBG_SYNC_MSG(("sync/after/master: #99 uSyncVar=%#x\n", (a_pOtherSync)->uSyncVar)); \
2917 break; \
2918 } else do {} while (0)
2919
2920#define TSCDELTA_OTHER_SYNC_AFTER(a_pMySync, a_pOtherSync, a_fEFlags) \
2921 if (true) { \
2922 /* \
2923 * Tell the master that we're done collecting data and wait for the next round to start. \
2924 */ \
2925 if (RT_LIKELY(ASMAtomicCmpXchgU32(&(a_pOtherSync)->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_GO))) \
2926 { /* likely */ } \
2927 else \
2928 { \
2929 ASMSetFlags(a_fEFlags); \
2930 TSCDELTA_DBG_SYNC_MSG(("sync/after/other: #0 uSyncVar=%#x\n", (a_pOtherSync)->uSyncVar)); \
2931 break; \
2932 } \
2933 if (RT_LIKELY(supdrvTscDeltaSync2_After(a_pMySync, a_pOtherSync, false /*fIsMaster*/, a_fEFlags))) \
2934 { /* likely */ } \
2935 else \
2936 { \
2937 TSCDELTA_DBG_SYNC_MSG9(("sync/after/other: #98\n")); \
2938 break; \
2939 } \
2940 } else do {} while (0)
2941/** @} */
2942
2943
2944#ifdef GIP_TSC_DELTA_METHOD_1
2945/**
2946 * TSC delta measurment algorithm \#1 (GIP_TSC_DELTA_METHOD_1).
2947 *
2948 *
2949 * We ignore the first few runs of the loop in order to prime the
2950 * cache. Also, we need to be careful about using 'pause' instruction
2951 * in critical busy-wait loops in this code - it can cause undesired
2952 * behaviour with hyperthreading.
2953 *
2954 * We try to minimize the measurement error by computing the minimum
2955 * read time of the compare statement in the worker by taking TSC
2956 * measurements across it.
2957 *
2958 * It must be noted that the computed minimum read time is mostly to
2959 * eliminate huge deltas when the worker is too early and doesn't by
2960 * itself help produce more accurate deltas. We allow two times the
2961 * computed minimum as an arbibtrary acceptable threshold. Therefore,
2962 * it is still possible to get negative deltas where there are none
2963 * when the worker is earlier. As long as these occasional negative
2964 * deltas are lower than the time it takes to exit guest-context and
2965 * the OS to reschedule EMT on a different CPU we won't expose a TSC
2966 * that jumped backwards. It is because of the existence of the
2967 * negative deltas we don't recompute the delta with the master and
2968 * worker interchanged to eliminate the remaining measurement error.
2969 *
2970 *
2971 * @param pArgs The argument/state data.
2972 * @param pMySync My synchronization structure.
2973 * @param pOtherSync My partner's synchronization structure.
2974 * @param fIsMaster Set if master, clear if worker.
2975 * @param iTry The attempt number.
2976 */
2977static void supdrvTscDeltaMethod1Loop(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
2978 bool fIsMaster, uint32_t iTry)
2979{
2980 PSUPGIPCPU pGipCpuWorker = pArgs->pWorker;
2981 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
2982 uint64_t uMinCmpReadTime = UINT64_MAX;
2983 unsigned iLoop;
2984 NOREF(iTry);
2985
2986 for (iLoop = 0; iLoop < GIP_TSC_DELTA_LOOPS; iLoop++)
2987 {
2988 RTCCUINTREG fEFlags;
2989 if (fIsMaster)
2990 {
2991 /*
2992 * The master.
2993 */
2994 AssertMsg(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD,
2995 ("%#llx idMaster=%#x idWorker=%#x (idGipMaster=%#x)\n",
2996 pGipCpuMaster->u64TSCSample, pGipCpuMaster->idCpu, pGipCpuWorker->idCpu, pArgs->pDevExt->idGipMaster));
2997 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
2998
2999 do
3000 {
3001 ASMSerializeInstruction();
3002 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, ASMReadTSC());
3003 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
3004
3005 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3006
3007 /* Process the data. */
3008 if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
3009 {
3010 if (pGipCpuWorker->u64TSCSample != GIP_TSC_DELTA_RSVD)
3011 {
3012 int64_t iDelta = pGipCpuWorker->u64TSCSample
3013 - (pGipCpuMaster->u64TSCSample - pGipCpuMaster->i64TSCDelta);
3014 if ( iDelta >= GIP_TSC_DELTA_INITIAL_MASTER_VALUE
3015 ? iDelta < pGipCpuWorker->i64TSCDelta
3016 : iDelta > pGipCpuWorker->i64TSCDelta || pGipCpuWorker->i64TSCDelta == INT64_MAX)
3017 pGipCpuWorker->i64TSCDelta = iDelta;
3018 }
3019 }
3020
3021 /* Reset our TSC sample and tell the worker to move on. */
3022 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, GIP_TSC_DELTA_RSVD);
3023 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3024 }
3025 else
3026 {
3027 /*
3028 * The worker.
3029 */
3030 uint64_t uTscWorker;
3031 uint64_t uTscWorkerFlushed;
3032 uint64_t uCmpReadTime;
3033
3034 ASMAtomicReadU64(&pGipCpuMaster->u64TSCSample); /* Warm the cache line. */
3035 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3036
3037 /*
3038 * Keep reading the TSC until we notice that the master has read his. Reading
3039 * the TSC -after- the master has updated the memory is way too late. We thus
3040 * compensate by trying to measure how long it took for the worker to notice
3041 * the memory flushed from the master.
3042 */
3043 do
3044 {
3045 ASMSerializeInstruction();
3046 uTscWorker = ASMReadTSC();
3047 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
3048 ASMSerializeInstruction();
3049 uTscWorkerFlushed = ASMReadTSC();
3050
3051 uCmpReadTime = uTscWorkerFlushed - uTscWorker;
3052 if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
3053 {
3054 /* This is totally arbitrary a.k.a I don't like it but I have no better ideas for now. */
3055 if (uCmpReadTime < (uMinCmpReadTime << 1))
3056 {
3057 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, uTscWorker);
3058 if (uCmpReadTime < uMinCmpReadTime)
3059 uMinCmpReadTime = uCmpReadTime;
3060 }
3061 else
3062 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
3063 }
3064 else if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS)
3065 {
3066 if (uCmpReadTime < uMinCmpReadTime)
3067 uMinCmpReadTime = uCmpReadTime;
3068 }
3069
3070 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3071 }
3072 }
3073
3074 TSCDELTA_DBG_SYNC_MSG9(("sync/method1loop/%s: #92 iLoop=%u MyState=%#x\n", fIsMaster ? "master" : "worker", iLoop,
3075 pMySync->uSyncVar));
3076
3077 /*
3078 * We must reset the worker TSC sample value in case it gets picked as a
3079 * GIP master later on (it's trashed above, naturally).
3080 */
3081 if (!fIsMaster)
3082 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
3083}
3084#endif /* GIP_TSC_DELTA_METHOD_1 */
3085
3086
3087#ifdef GIP_TSC_DELTA_METHOD_2
3088/*
3089 * TSC delta measurement algorithm \#2 configuration and code - Experimental!!
3090 */
3091
3092# define GIP_TSC_DELTA_M2_LOOPS (7 + GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3093# define GIP_TSC_DELTA_M2_PRIMER_LOOPS 0
3094
3095
3096static void supdrvTscDeltaMethod2ProcessDataOnMaster(PSUPDRVGIPTSCDELTARGS pArgs, uint32_t iLoop)
3097{
3098 int64_t iMasterTscDelta = pArgs->pMaster->i64TSCDelta;
3099 int64_t iBestDelta = pArgs->pWorker->i64TSCDelta;
3100 uint32_t idxResult;
3101 uint32_t cHits = 0;
3102
3103 /*
3104 * Look for matching entries in the master and worker tables.
3105 */
3106 for (idxResult = 0; idxResult < RT_ELEMENTS(pArgs->uMaster.M2.Data.aResults); idxResult++)
3107 {
3108 uint32_t idxOther = pArgs->uMaster.M2.Data.aResults[idxResult].iSeqOther;
3109 if (idxOther & 1)
3110 {
3111 idxOther >>= 1;
3112 if (idxOther < RT_ELEMENTS(pArgs->uWorker.M2.Data.aResults))
3113 {
3114 if (pArgs->uWorker.M2.Data.aResults[idxOther].iSeqOther == pArgs->uMaster.M2.Data.aResults[idxResult].iSeqMine)
3115 {
3116 int64_t iDelta;
3117 iDelta = pArgs->uWorker.M2.Data.aResults[idxOther].uTsc
3118 - (pArgs->uMaster.M2.Data.aResults[idxResult].uTsc - iMasterTscDelta);
3119 if ( iDelta >= GIP_TSC_DELTA_INITIAL_MASTER_VALUE
3120 ? iDelta < iBestDelta
3121 : iDelta > iBestDelta || iBestDelta == INT64_MAX)
3122 iBestDelta = iDelta;
3123 cHits++;
3124 }
3125 }
3126 }
3127 }
3128
3129 /*
3130 * Save the results.
3131 */
3132 if (cHits > 2)
3133 pArgs->pWorker->i64TSCDelta = iBestDelta;
3134 pArgs->uMaster.M2.cHits += cHits;
3135}
3136
3137
3138/**
3139 * The core function of the 2nd TSC delta mesurment algorithm.
3140 *
3141 * The idea here is that we have the two CPUs execute the exact same code
3142 * collecting a largish set of TSC samples. The code has one data dependency on
3143 * the other CPU which intention it is to synchronize the execution as well as
3144 * help cross references the two sets of TSC samples (the sequence numbers).
3145 *
3146 * The @a fLag parameter is used to modify the execution a tiny bit on one or
3147 * both of the CPUs. When @a fLag differs between the CPUs, it is thought that
3148 * it will help with making the CPUs enter lock step execution occationally.
3149 *
3150 */
3151static void supdrvTscDeltaMethod2CollectData(PSUPDRVTSCDELTAMETHOD2 pMyData, uint32_t volatile *piOtherSeqNo, bool fLag)
3152{
3153 SUPDRVTSCDELTAMETHOD2ENTRY *pEntry = &pMyData->aResults[0];
3154 uint32_t cLeft = RT_ELEMENTS(pMyData->aResults);
3155
3156 ASMAtomicWriteU32(&pMyData->iCurSeqNo, 0);
3157 ASMSerializeInstruction();
3158 while (cLeft-- > 0)
3159 {
3160 uint64_t uTsc;
3161 uint32_t iSeqMine = ASMAtomicIncU32(&pMyData->iCurSeqNo);
3162 uint32_t iSeqOther = ASMAtomicReadU32(piOtherSeqNo);
3163 ASMCompilerBarrier();
3164 ASMSerializeInstruction(); /* Way better result than with ASMMemoryFenceSSE2() in this position! */
3165 uTsc = ASMReadTSC();
3166 ASMAtomicIncU32(&pMyData->iCurSeqNo);
3167 ASMCompilerBarrier();
3168 ASMSerializeInstruction();
3169 pEntry->iSeqMine = iSeqMine;
3170 pEntry->iSeqOther = iSeqOther;
3171 pEntry->uTsc = uTsc;
3172 pEntry++;
3173 ASMSerializeInstruction();
3174 if (fLag)
3175 ASMNopPause();
3176 }
3177}
3178
3179
3180/**
3181 * TSC delta measurment algorithm \#2 (GIP_TSC_DELTA_METHOD_2).
3182 *
3183 * See supdrvTscDeltaMethod2CollectData for algorithm details.
3184 *
3185 * @param pArgs The argument/state data.
3186 * @param pMySync My synchronization structure.
3187 * @param pOtherSync My partner's synchronization structure.
3188 * @param fIsMaster Set if master, clear if worker.
3189 * @param iTry The attempt number.
3190 */
3191static void supdrvTscDeltaMethod2Loop(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
3192 bool fIsMaster, uint32_t iTry)
3193{
3194 unsigned iLoop;
3195
3196 for (iLoop = 0; iLoop < GIP_TSC_DELTA_M2_LOOPS; iLoop++)
3197 {
3198 RTCCUINTREG fEFlags;
3199 if (fIsMaster)
3200 {
3201 /*
3202 * Adjust the loop lag fudge.
3203 */
3204# if GIP_TSC_DELTA_M2_PRIMER_LOOPS > 0
3205 if (iLoop < GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3206 {
3207 /* Lag during the priming to be nice to everyone.. */
3208 pArgs->uMaster.M2.fLag = true;
3209 pArgs->uWorker.M2.fLag = true;
3210 }
3211 else
3212# endif
3213 if (iLoop < (GIP_TSC_DELTA_M2_LOOPS - GIP_TSC_DELTA_M2_PRIMER_LOOPS) / 4)
3214 {
3215 /* 25 % of the body without lagging. */
3216 pArgs->uMaster.M2.fLag = false;
3217 pArgs->uWorker.M2.fLag = false;
3218 }
3219 else if (iLoop < (GIP_TSC_DELTA_M2_LOOPS - GIP_TSC_DELTA_M2_PRIMER_LOOPS) / 4 * 2)
3220 {
3221 /* 25 % of the body with both lagging. */
3222 pArgs->uMaster.M2.fLag = true;
3223 pArgs->uWorker.M2.fLag = true;
3224 }
3225 else
3226 {
3227 /* 50% of the body with alternating lag. */
3228 pArgs->uMaster.M2.fLag = (iLoop & 1) == 0;
3229 pArgs->uWorker.M2.fLag= (iLoop & 1) == 1;
3230 }
3231
3232 /*
3233 * Sync up with the worker and collect data.
3234 */
3235 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3236 supdrvTscDeltaMethod2CollectData(&pArgs->uMaster.M2.Data, &pArgs->uWorker.M2.Data.iCurSeqNo, pArgs->uMaster.M2.fLag);
3237 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3238
3239 /*
3240 * Process the data.
3241 */
3242# if GIP_TSC_DELTA_M2_PRIMER_LOOPS > 0
3243 if (iLoop >= GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3244# endif
3245 supdrvTscDeltaMethod2ProcessDataOnMaster(pArgs, iLoop);
3246
3247 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3248 }
3249 else
3250 {
3251 /*
3252 * The worker.
3253 */
3254 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3255 supdrvTscDeltaMethod2CollectData(&pArgs->uWorker.M2.Data, &pArgs->uMaster.M2.Data.iCurSeqNo, pArgs->uWorker.M2.fLag);
3256 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3257 }
3258 }
3259}
3260
3261#endif /* GIP_TSC_DELTA_METHOD_2 */
3262
3263
3264
3265static int supdrvTscDeltaVerify(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync,
3266 PSUPTSCDELTASYNC2 pOtherSync, bool fIsMaster, int64_t iWorkerTscDelta)
3267{
3268 /*PSUPGIPCPU pGipCpuWorker = pArgs->pWorker; - unused */
3269 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3270 uint32_t i;
3271 TSCDELTA_DBG_VARS();
3272
3273 for (;;)
3274 {
3275 RTCCUINTREG fEFlags;
3276 AssertCompile((RT_ELEMENTS(pArgs->uMaster.Verify.auTscs) & 1) == 0);
3277 AssertCompile(RT_ELEMENTS(pArgs->uMaster.Verify.auTscs) == RT_ELEMENTS(pArgs->uWorker.Verify.auTscs));
3278
3279 if (fIsMaster)
3280 {
3281 uint64_t uTscWorker;
3282 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3283
3284 /*
3285 * Collect TSC, master goes first.
3286 */
3287 for (i = 0; i < RT_ELEMENTS(pArgs->uMaster.Verify.auTscs); i += 2)
3288 {
3289 /* Read, kick & wait #1. */
3290 uint64_t register uTsc = ASMReadTSC();
3291 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO_GO);
3292 ASMSerializeInstruction();
3293 pArgs->uMaster.Verify.auTscs[i] = uTsc;
3294 TSCDELTA_DBG_START_LOOP();
3295 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO)
3296 {
3297 TSCDELTA_DBG_CHECK_LOOP();
3298 ASMNopPause();
3299 }
3300
3301 /* Read, kick & wait #2. */
3302 uTsc = ASMReadTSC();
3303 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO);
3304 ASMSerializeInstruction();
3305 pArgs->uMaster.Verify.auTscs[i + 1] = uTsc;
3306 TSCDELTA_DBG_START_LOOP();
3307 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO_GO)
3308 {
3309 TSCDELTA_DBG_CHECK_LOOP();
3310 ASMNopPause();
3311 }
3312 }
3313
3314 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3315
3316 /*
3317 * Process the data.
3318 */
3319#ifdef TSCDELTA_VERIFY_WITH_STATS
3320 pArgs->cMaxVerifyTscTicks = INT64_MIN;
3321 pArgs->cMinVerifyTscTicks = INT64_MAX;
3322 pArgs->iVerifyBadTscDiff = 0;
3323#endif
3324 ASMAtomicWriteS32(&pArgs->rcVerify, VINF_SUCCESS);
3325 uTscWorker = 0;
3326 for (i = 0; i < RT_ELEMENTS(pArgs->uMaster.Verify.auTscs); i++)
3327 {
3328 /* Master vs previous worker entry. */
3329 uint64_t uTscMaster = pArgs->uMaster.Verify.auTscs[i] - pGipCpuMaster->i64TSCDelta;
3330 int64_t iDiff;
3331 if (i > 0)
3332 {
3333 iDiff = uTscMaster - uTscWorker;
3334#ifdef TSCDELTA_VERIFY_WITH_STATS
3335 if (iDiff > pArgs->cMaxVerifyTscTicks)
3336 pArgs->cMaxVerifyTscTicks = iDiff;
3337 if (iDiff < pArgs->cMinVerifyTscTicks)
3338 pArgs->cMinVerifyTscTicks = iDiff;
3339#endif
3340 if (iDiff < 0)
3341 {
3342#ifdef TSCDELTA_VERIFY_WITH_STATS
3343 pArgs->iVerifyBadTscDiff = -iDiff;
3344#endif
3345 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_OUT_OF_RANGE);
3346 break;
3347 }
3348 }
3349
3350 /* Worker vs master. */
3351 uTscWorker = pArgs->uWorker.Verify.auTscs[i] - iWorkerTscDelta;
3352 iDiff = uTscWorker - uTscMaster;
3353#ifdef TSCDELTA_VERIFY_WITH_STATS
3354 if (iDiff > pArgs->cMaxVerifyTscTicks)
3355 pArgs->cMaxVerifyTscTicks = iDiff;
3356 if (iDiff < pArgs->cMinVerifyTscTicks)
3357 pArgs->cMinVerifyTscTicks = iDiff;
3358#endif
3359 if (iDiff < 0)
3360 {
3361#ifdef TSCDELTA_VERIFY_WITH_STATS
3362 pArgs->iVerifyBadTscDiff = iDiff;
3363#endif
3364 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_OUT_OF_RANGE);
3365 break;
3366 }
3367 }
3368
3369 /* Done. */
3370 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3371 }
3372 else
3373 {
3374 /*
3375 * The worker, master leads.
3376 */
3377 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3378
3379 for (i = 0; i < RT_ELEMENTS(pArgs->uWorker.Verify.auTscs); i += 2)
3380 {
3381 uint64_t register uTsc;
3382
3383 /* Wait, Read and Kick #1. */
3384 TSCDELTA_DBG_START_LOOP();
3385 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO)
3386 {
3387 TSCDELTA_DBG_CHECK_LOOP();
3388 ASMNopPause();
3389 }
3390 uTsc = ASMReadTSC();
3391 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO_GO);
3392 ASMSerializeInstruction();
3393 pArgs->uWorker.Verify.auTscs[i] = uTsc;
3394
3395 /* Wait, Read and Kick #2. */
3396 TSCDELTA_DBG_START_LOOP();
3397 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO_GO)
3398 {
3399 TSCDELTA_DBG_CHECK_LOOP();
3400 ASMNopPause();
3401 }
3402 uTsc = ASMReadTSC();
3403 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO);
3404 ASMSerializeInstruction();
3405 pArgs->uWorker.Verify.auTscs[i + 1] = uTsc;
3406 }
3407
3408 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3409 }
3410 return pArgs->rcVerify;
3411 }
3412
3413 /*
3414 * Timed out, please retry.
3415 */
3416 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_TRY_AGAIN);
3417 return VERR_TIMEOUT;
3418}
3419
3420
3421
3422/**
3423 * Handles the special abort procedure during synchronization setup in
3424 * supdrvMeasureTscDeltaCallbackUnwrapped().
3425 *
3426 * @returns 0 (dummy, ignored)
3427 * @param pArgs Pointer to argument/state data.
3428 * @param pMySync Pointer to my sync structure.
3429 * @param fIsMaster Set if we're the master, clear if worker.
3430 * @param fTimeout Set if it's a timeout.
3431 */
3432DECL_NO_INLINE(static, int)
3433supdrvMeasureTscDeltaCallbackAbortSyncSetup(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, bool fIsMaster, bool fTimeout)
3434{
3435 PSUPTSCDELTASYNC2 volatile *ppMySync = fIsMaster ? &pArgs->pSyncMaster : &pArgs->pSyncWorker;
3436 PSUPTSCDELTASYNC2 volatile *ppOtherSync = fIsMaster ? &pArgs->pSyncWorker : &pArgs->pSyncMaster;
3437 TSCDELTA_DBG_VARS();
3438
3439 /*
3440 * Clear our sync pointer and make sure the abort flag is set.
3441 */
3442 ASMAtomicWriteNullPtr(ppMySync);
3443 ASMAtomicWriteBool(&pArgs->fAbortSetup, true);
3444 if (fTimeout)
3445 ASMAtomicWriteBool(&pArgs->fTimedOut, true);
3446
3447 /*
3448 * Make sure the other party is out of there and won't be touching our
3449 * sync state again (would cause stack corruption).
3450 */
3451 TSCDELTA_DBG_START_LOOP();
3452 while (ASMAtomicReadPtrT(ppOtherSync, PSUPTSCDELTASYNC2) != NULL)
3453 {
3454 ASMNopPause();
3455 ASMNopPause();
3456 ASMNopPause();
3457 TSCDELTA_DBG_CHECK_LOOP();
3458 }
3459
3460 return 0;
3461}
3462
3463
3464/**
3465 * This is used by supdrvMeasureInitialTscDeltas() to read the TSC on two CPUs
3466 * and compute the delta between them.
3467 *
3468 * To reduce code size a good when timeout handling was added, a dummy return
3469 * value had to be added (saves 1-3 lines per timeout case), thus this
3470 * 'Unwrapped' function and the dummy 0 return value.
3471 *
3472 * @returns 0 (dummy, ignored)
3473 * @param idCpu The CPU we are current scheduled on.
3474 * @param pArgs Pointer to a parameter package.
3475 *
3476 * @remarks Measuring TSC deltas between the CPUs is tricky because we need to
3477 * read the TSC at exactly the same time on both the master and the
3478 * worker CPUs. Due to DMA, bus arbitration, cache locality,
3479 * contention, SMI, pipelining etc. there is no guaranteed way of
3480 * doing this on x86 CPUs.
3481 */
3482static int supdrvMeasureTscDeltaCallbackUnwrapped(RTCPUID idCpu, PSUPDRVGIPTSCDELTARGS pArgs)
3483{
3484 PSUPDRVDEVEXT pDevExt = pArgs->pDevExt;
3485 PSUPGIPCPU pGipCpuWorker = pArgs->pWorker;
3486 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3487 bool const fIsMaster = idCpu == pGipCpuMaster->idCpu;
3488 uint32_t iTry;
3489 PSUPTSCDELTASYNC2 volatile *ppMySync = fIsMaster ? &pArgs->pSyncMaster : &pArgs->pSyncWorker;
3490 PSUPTSCDELTASYNC2 volatile *ppOtherSync = fIsMaster ? &pArgs->pSyncWorker : &pArgs->pSyncMaster;
3491 SUPTSCDELTASYNC2 MySync;
3492 PSUPTSCDELTASYNC2 pOtherSync;
3493 int rc;
3494 TSCDELTA_DBG_VARS();
3495
3496 /* A bit of paranoia first. */
3497 if (!pGipCpuMaster || !pGipCpuWorker)
3498 return 0;
3499
3500 /*
3501 * If the CPU isn't part of the measurement, return immediately.
3502 */
3503 if ( !fIsMaster
3504 && idCpu != pGipCpuWorker->idCpu)
3505 return 0;
3506
3507 /*
3508 * Set up my synchronization stuff and wait for the other party to show up.
3509 *
3510 * We don't wait forever since the other party may be off fishing (offline,
3511 * spinning with ints disables, whatever), we must play nice to the rest of
3512 * the system as this context generally isn't one in which we will get
3513 * preempted and we may hold up a number of lower priority interrupts.
3514 */
3515 ASMAtomicWriteU32(&MySync.uSyncVar, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT);
3516 ASMAtomicWritePtr(ppMySync, &MySync);
3517 MySync.uTscStart = ASMReadTSC();
3518 MySync.cMaxTscTicks = pArgs->cMaxTscTicks;
3519
3520 /* Look for the partner, might not be here yet... Special abort considerations. */
3521 iTry = 0;
3522 TSCDELTA_DBG_START_LOOP();
3523 while ((pOtherSync = ASMAtomicReadPtrT(ppOtherSync, PSUPTSCDELTASYNC2)) == NULL)
3524 {
3525 ASMNopPause();
3526 if ( ASMAtomicReadBool(&pArgs->fAbortSetup)
3527 || !RTMpIsCpuOnline(fIsMaster ? pGipCpuWorker->idCpu : pGipCpuWorker->idCpu) )
3528 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3529 if ( (iTry++ & 0xff) == 0
3530 && ASMReadTSC() - MySync.uTscStart > pArgs->cMaxTscTicks)
3531 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, true /*fTimeout*/);
3532 TSCDELTA_DBG_CHECK_LOOP();
3533 ASMNopPause();
3534 }
3535
3536 /* I found my partner, waiting to be found... Special abort considerations. */
3537 if (fIsMaster)
3538 if (!ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)) /* parnaoia */
3539 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3540
3541 iTry = 0;
3542 TSCDELTA_DBG_START_LOOP();
3543 while (ASMAtomicReadU32(&MySync.uSyncVar) == GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)
3544 {
3545 ASMNopPause();
3546 if (ASMAtomicReadBool(&pArgs->fAbortSetup))
3547 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3548 if ( (iTry++ & 0xff) == 0
3549 && ASMReadTSC() - MySync.uTscStart > pArgs->cMaxTscTicks)
3550 {
3551 if ( fIsMaster
3552 && !ASMAtomicCmpXchgU32(&MySync.uSyncVar, GIP_TSC_DELTA_SYNC2_PRESTART_ABORT, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT))
3553 break; /* race #1: slave has moved on, handle timeout in loop instead. */
3554 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, true /*fTimeout*/);
3555 }
3556 TSCDELTA_DBG_CHECK_LOOP();
3557 }
3558
3559 if (!fIsMaster)
3560 if (!ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)) /* race #1 */
3561 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3562
3563/** @todo Add a resumable state to pArgs so we don't waste time if we time
3564 * out or something. Timeouts are legit, any of the two CPUs may get
3565 * interrupted. */
3566
3567 /*
3568 * Start by seeing if we have a zero delta between the two CPUs.
3569 * This should normally be the case.
3570 */
3571 rc = supdrvTscDeltaVerify(pArgs, &MySync, pOtherSync, fIsMaster, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
3572 if (RT_SUCCESS(rc))
3573 {
3574 if (fIsMaster)
3575 {
3576 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
3577 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
3578 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
3579 }
3580 }
3581 /*
3582 * If the verification didn't time out, do regular delta measurements.
3583 * We retry this until we get a reasonable value.
3584 */
3585 else if (rc != VERR_TIMEOUT)
3586 {
3587 Assert(pGipCpuWorker->i64TSCDelta == INT64_MAX);
3588 for (iTry = 0; iTry < 12; iTry++)
3589 {
3590 /*
3591 * Check the state before we start.
3592 */
3593 uint32_t u32Tmp = ASMAtomicReadU32(&MySync.uSyncVar);
3594 if ( u32Tmp != GIP_TSC_DELTA_SYNC2_READY
3595 && (fIsMaster || u32Tmp != GIP_TSC_DELTA_SYNC2_STEADY) /* worker may be late prepping for the next round */ )
3596 {
3597 TSCDELTA_DBG_SYNC_MSG(("sync/loop/%s: #0 iTry=%u MyState=%#x\n", fIsMaster ? "master" : "worker", iTry, u32Tmp));
3598 break;
3599 }
3600
3601 /*
3602 * Do the measurements.
3603 */
3604#ifdef GIP_TSC_DELTA_METHOD_1
3605 supdrvTscDeltaMethod1Loop(pArgs, &MySync, pOtherSync, fIsMaster, iTry);
3606#elif defined(GIP_TSC_DELTA_METHOD_2)
3607 supdrvTscDeltaMethod2Loop(pArgs, &MySync, pOtherSync, fIsMaster, iTry);
3608#else
3609# error "huh??"
3610#endif
3611
3612 /*
3613 * Check the state.
3614 */
3615 u32Tmp = ASMAtomicReadU32(&MySync.uSyncVar);
3616 if ( u32Tmp != GIP_TSC_DELTA_SYNC2_READY
3617 && (fIsMaster || u32Tmp != GIP_TSC_DELTA_SYNC2_STEADY) /* worker may be late prepping for the next round */ )
3618 {
3619 if (fIsMaster)
3620 TSCDELTA_DBG_SYNC_MSG(("sync/loop/master: #1 iTry=%u MyState=%#x\n", iTry, u32Tmp));
3621 else
3622 TSCDELTA_DBG_SYNC_MSG2(("sync/loop/worker: #1 iTry=%u MyState=%#x\n", iTry, u32Tmp));
3623 break;
3624 }
3625
3626 /*
3627 * Success? If so, stop trying. Master decides.
3628 */
3629 if (fIsMaster)
3630 {
3631 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
3632 {
3633 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
3634 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
3635 TSCDELTA_DBG_SYNC_MSG2(("sync/loop/master: #9 iTry=%u MyState=%#x\n", iTry, MySync.uSyncVar));
3636 break;
3637 }
3638 }
3639 }
3640 if (fIsMaster)
3641 pArgs->iTry = iTry;
3642 }
3643
3644 /*
3645 * End the synchroniziation dance. We tell the other that we're done,
3646 * then wait for the same kind of reply.
3647 */
3648 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_FINAL);
3649 ASMAtomicWriteNullPtr(ppMySync);
3650 iTry = 0;
3651 TSCDELTA_DBG_START_LOOP();
3652 while (ASMAtomicReadU32(&MySync.uSyncVar) != GIP_TSC_DELTA_SYNC2_FINAL)
3653 {
3654 iTry++;
3655 if ( iTry == 0
3656 && !RTMpIsCpuOnline(fIsMaster ? pGipCpuWorker->idCpu : pGipCpuWorker->idCpu))
3657 break; /* this really shouldn't happen. */
3658 TSCDELTA_DBG_CHECK_LOOP();
3659 ASMNopPause();
3660 }
3661
3662 /*
3663 * Collect some runtime stats.
3664 */
3665 if (fIsMaster)
3666 pArgs->cElapsedMasterTscTicks = ASMReadTSC() - MySync.uTscStart;
3667 else
3668 pArgs->cElapsedWorkerTscTicks = ASMReadTSC() - MySync.uTscStart;
3669 return 0;
3670}
3671
3672/**
3673 * Callback used by supdrvMeasureInitialTscDeltas() to read the TSC on two CPUs
3674 * and compute the delta between them.
3675 *
3676 * @param idCpu The CPU we are current scheduled on.
3677 * @param pvUser1 Pointer to a parameter package (SUPDRVGIPTSCDELTARGS).
3678 * @param pvUser2 Unused.
3679 */
3680static DECLCALLBACK(void) supdrvMeasureTscDeltaCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
3681{
3682 supdrvMeasureTscDeltaCallbackUnwrapped(idCpu, (PSUPDRVGIPTSCDELTARGS)pvUser1);
3683}
3684
3685
3686/**
3687 * Measures the TSC delta between the master GIP CPU and one specified worker
3688 * CPU.
3689 *
3690 * @returns VBox status code.
3691 * @retval VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED on pure measurement
3692 * failure.
3693 * @param pDevExt Pointer to the device instance data.
3694 * @param idxWorker The index of the worker CPU from the GIP's array of
3695 * CPUs.
3696 *
3697 * @remarks This must be called with preemption enabled!
3698 */
3699static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker)
3700{
3701 int rc;
3702 int rc2;
3703 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
3704 RTCPUID idMaster = pDevExt->idGipMaster;
3705 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[idxWorker];
3706 PSUPGIPCPU pGipCpuMaster;
3707 uint32_t iGipCpuMaster;
3708
3709 /* Validate input a bit. */
3710 AssertReturn(pGip, VERR_INVALID_PARAMETER);
3711 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
3712 Assert(RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3713
3714 /*
3715 * Don't attempt measuring the delta for the GIP master.
3716 */
3717 if (pGipCpuWorker->idCpu == idMaster)
3718 {
3719 if (pGipCpuWorker->i64TSCDelta == INT64_MAX) /* This shouldn't happen, but just in case. */
3720 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
3721 return VINF_SUCCESS;
3722 }
3723
3724 /*
3725 * One measurement at at time, at least for now. We might be using
3726 * broadcast IPIs so, so be nice to the rest of the system.
3727 */
3728#ifdef SUPDRV_USE_MUTEX_FOR_GIP
3729 rc = RTSemMutexRequest(pDevExt->mtxTscDelta, RT_INDEFINITE_WAIT);
3730#else
3731 rc = RTSemFastMutexRequest(pDevExt->mtxTscDelta);
3732#endif
3733 if (RT_FAILURE(rc))
3734 return rc;
3735
3736 /*
3737 * If the CPU has hyper-threading and the APIC IDs of the master and worker are adjacent,
3738 * try pick a different master. (This fudge only works with multi core systems.)
3739 * ASSUMES related threads have adjacent APIC IDs. ASSUMES two threads per core.
3740 *
3741 * We skip this on AMDs for now as their HTT is different from intel's and
3742 * it doesn't seem to have any favorable effect on the results.
3743 *
3744 * If the master is offline, we need a new master too, so share the code.
3745 */
3746 iGipCpuMaster = supdrvGipFindCpuIndexForCpuId(pGip, idMaster);
3747 AssertReturn(iGipCpuMaster < pGip->cCpus, VERR_INVALID_CPU_ID);
3748 pGipCpuMaster = &pGip->aCPUs[iGipCpuMaster];
3749 if ( ( (pGipCpuMaster->idApic & ~1) == (pGipCpuWorker->idApic & ~1)
3750 && ASMHasCpuId()
3751 && ASMIsValidStdRange(ASMCpuId_EAX(0))
3752 && (ASMCpuId_EDX(1) & X86_CPUID_FEATURE_EDX_HTT)
3753 && !ASMIsAmdCpu()
3754 && pGip->cOnlineCpus > 2)
3755 || !RTMpIsCpuOnline(idMaster) )
3756 {
3757 uint32_t i;
3758 for (i = 0; i < pGip->cCpus; i++)
3759 if ( i != iGipCpuMaster
3760 && i != idxWorker
3761 && pGip->aCPUs[i].enmState == SUPGIPCPUSTATE_ONLINE
3762 && pGip->aCPUs[i].i64TSCDelta != INT64_MAX
3763 && pGip->aCPUs[i].idCpu != NIL_RTCPUID
3764 && pGip->aCPUs[i].idCpu != idMaster /* paranoia starts here... */
3765 && pGip->aCPUs[i].idCpu != pGipCpuWorker->idCpu
3766 && pGip->aCPUs[i].idApic != pGipCpuWorker->idApic
3767 && pGip->aCPUs[i].idApic != pGipCpuMaster->idApic
3768 && RTMpIsCpuOnline(pGip->aCPUs[i].idCpu))
3769 {
3770 iGipCpuMaster = i;
3771 pGipCpuMaster = &pGip->aCPUs[i];
3772 idMaster = pGipCpuMaster->idCpu;
3773 break;
3774 }
3775 }
3776
3777 if (RTCpuSetIsMemberByIndex(&pGip->OnlineCpuSet, pGipCpuWorker->iCpuSet))
3778 {
3779 /*
3780 * Initialize data package for the RTMpOnPair callback.
3781 */
3782 PSUPDRVGIPTSCDELTARGS pArgs = (PSUPDRVGIPTSCDELTARGS)RTMemAllocZ(sizeof(*pArgs));
3783 if (pArgs)
3784 {
3785 pArgs->pWorker = pGipCpuWorker;
3786 pArgs->pMaster = pGipCpuMaster;
3787 pArgs->pDevExt = pDevExt;
3788 pArgs->pSyncMaster = NULL;
3789 pArgs->pSyncWorker = NULL;
3790 pArgs->cMaxTscTicks = ASMAtomicReadU64(&pGip->u64CpuHz) / 512; /* 1953 us */
3791
3792 /*
3793 * Do the RTMpOnPair call. We reset i64TSCDelta first so we
3794 * and supdrvMeasureTscDeltaCallback can use it as a success check.
3795 */
3796 /** @todo Store the i64TSCDelta result in pArgs first? Perhaps deals with
3797 * that when doing the restart loop reorg. */
3798 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
3799 rc = RTMpOnPair(pGipCpuMaster->idCpu, pGipCpuWorker->idCpu, RTMPON_F_CONCURRENT_EXEC,
3800 supdrvMeasureTscDeltaCallback, pArgs, NULL);
3801 if (RT_SUCCESS(rc))
3802 {
3803#if 0
3804 SUPR0Printf("mponpair ticks: %9llu %9llu max: %9llu iTry: %u%s\n", pArgs->cElapsedMasterTscTicks,
3805 pArgs->cElapsedWorkerTscTicks, pArgs->cMaxTscTicks, pArgs->iTry,
3806 pArgs->fTimedOut ? " timed out" :"");
3807#endif
3808#if 0
3809 SUPR0Printf("rcVerify=%d iVerifyBadTscDiff=%lld cMinVerifyTscTicks=%lld cMaxVerifyTscTicks=%lld\n",
3810 pArgs->rcVerify, pArgs->iVerifyBadTscDiff, pArgs->cMinVerifyTscTicks, pArgs->cMaxVerifyTscTicks);
3811#endif
3812 if (RT_LIKELY(pGipCpuWorker->i64TSCDelta != INT64_MAX))
3813 {
3814 /*
3815 * Work the TSC delta applicability rating. It starts
3816 * optimistic in supdrvGipInit, we downgrade it here.
3817 */
3818 SUPGIPUSETSCDELTA enmRating;
3819 if ( pGipCpuWorker->i64TSCDelta > GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO
3820 || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO)
3821 enmRating = SUPGIPUSETSCDELTA_NOT_ZERO;
3822 else if ( pGipCpuWorker->i64TSCDelta > GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO
3823 || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO)
3824 enmRating = SUPGIPUSETSCDELTA_ROUGHLY_ZERO;
3825 else
3826 enmRating = SUPGIPUSETSCDELTA_PRACTICALLY_ZERO;
3827 if (pGip->enmUseTscDelta < enmRating)
3828 {
3829 AssertCompile(sizeof(pGip->enmUseTscDelta) == sizeof(uint32_t));
3830 ASMAtomicWriteU32((uint32_t volatile *)&pGip->enmUseTscDelta, enmRating);
3831 }
3832 }
3833 else
3834 rc = VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
3835 }
3836 /** @todo return try-again if we get an offline CPU error. */
3837
3838 RTMemFree(pArgs);
3839 }
3840 else
3841 rc = VERR_NO_MEMORY;
3842 }
3843 else
3844 rc = VERR_CPU_OFFLINE;
3845
3846 /*
3847 * We're done now.
3848 */
3849#ifdef SUPDRV_USE_MUTEX_FOR_GIP
3850 rc2 = RTSemMutexRelease(pDevExt->mtxTscDelta); AssertRC(rc2);
3851#else
3852 rc2 = RTSemFastMutexRelease(pDevExt->mtxTscDelta); AssertRC(rc2);
3853#endif
3854 return rc;
3855}
3856
3857
3858/**
3859 * Clears TSC delta related variables.
3860 *
3861 * Clears all TSC samples as well as the delta synchronization variable on the
3862 * all the per-CPU structs. Optionally also clears the per-cpu deltas too.
3863 *
3864 * @param pDevExt Pointer to the device instance data.
3865 * @param fClearDeltas Whether the deltas are also to be cleared.
3866 */
3867static void supdrvClearTscSamples(PSUPDRVDEVEXT pDevExt, bool fClearDeltas)
3868{
3869 unsigned iCpu;
3870 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
3871 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
3872 {
3873 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
3874 ASMAtomicWriteU64(&pGipCpu->u64TSCSample, GIP_TSC_DELTA_RSVD);
3875 if (fClearDeltas)
3876 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, INT64_MAX);
3877 }
3878}
3879
3880
3881/**
3882 * Performs the initial measurements of the TSC deltas between CPUs.
3883 *
3884 * This is called by supdrvGipCreate or triggered by it if threaded.
3885 *
3886 * @returns VBox status code.
3887 * @param pDevExt Pointer to the device instance data.
3888 *
3889 * @remarks Must be called only after supdrvGipInitOnCpu() as this function uses
3890 * idCpu, GIP's online CPU set which are populated in
3891 * supdrvGipInitOnCpu().
3892 */
3893static int supdrvMeasureInitialTscDeltas(PSUPDRVDEVEXT pDevExt)
3894{
3895 PSUPGIPCPU pGipCpuMaster;
3896 unsigned iCpu;
3897 unsigned iOddEven;
3898 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
3899 uint32_t idxMaster = UINT32_MAX;
3900 int rc = VINF_SUCCESS;
3901 uint32_t cMpOnOffEvents = ASMAtomicReadU32(&pDevExt->cMpOnOffEvents);
3902
3903 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
3904
3905 /*
3906 * Pick the first CPU online as the master TSC and make it the new GIP master based
3907 * on the APIC ID.
3908 *
3909 * Technically we can simply use "idGipMaster" but doing this gives us master as CPU 0
3910 * in most cases making it nicer/easier for comparisons. It is safe to update the GIP
3911 * master as this point since the sync/async timer isn't created yet.
3912 */
3913 supdrvClearTscSamples(pDevExt, true /* fClearDeltas */);
3914 for (iCpu = 0; iCpu < RT_ELEMENTS(pGip->aiCpuFromApicId); iCpu++)
3915 {
3916 uint16_t idxCpu = pGip->aiCpuFromApicId[iCpu];
3917 if (idxCpu != UINT16_MAX)
3918 {
3919 PSUPGIPCPU pGipCpu = &pGip->aCPUs[idxCpu];
3920 if (RTCpuSetIsMemberByIndex(&pGip->OnlineCpuSet, pGipCpu->iCpuSet))
3921 {
3922 idxMaster = idxCpu;
3923 pGipCpu->i64TSCDelta = GIP_TSC_DELTA_INITIAL_MASTER_VALUE;
3924 break;
3925 }
3926 }
3927 }
3928 AssertReturn(idxMaster != UINT32_MAX, VERR_CPU_NOT_FOUND);
3929 pGipCpuMaster = &pGip->aCPUs[idxMaster];
3930 ASMAtomicWriteSize(&pDevExt->idGipMaster, pGipCpuMaster->idCpu);
3931
3932 /*
3933 * If there is only a single CPU online we have nothing to do.
3934 */
3935 if (pGip->cOnlineCpus <= 1)
3936 {
3937 AssertReturn(pGip->cOnlineCpus > 0, VERR_INTERNAL_ERROR_5);
3938 return VINF_SUCCESS;
3939 }
3940
3941 /*
3942 * Loop thru the GIP CPU array and get deltas for each CPU (except the
3943 * master). We do the CPUs with the even numbered APIC IDs first so that
3944 * we've got alternative master CPUs to pick from on hyper-threaded systems.
3945 */
3946 for (iOddEven = 0; iOddEven < 2; iOddEven++)
3947 {
3948 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
3949 {
3950 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
3951 if ( iCpu != idxMaster
3952 && (iOddEven > 0 || (pGipCpuWorker->idApic & 1) == 0)
3953 && RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
3954 {
3955 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
3956 if (RT_FAILURE(rc))
3957 {
3958 SUPR0Printf("supdrvMeasureTscDeltaOne failed. rc=%d CPU[%u].idCpu=%u Master[%u].idCpu=%u\n", rc, iCpu,
3959 pGipCpuWorker->idCpu, idxMaster, pDevExt->idGipMaster, pGipCpuMaster->idCpu);
3960 break;
3961 }
3962
3963 if (ASMAtomicReadU32(&pDevExt->cMpOnOffEvents) != cMpOnOffEvents)
3964 {
3965 SUPR0Printf("One or more CPUs transitioned between online & offline states. I'm confused, retry...\n");
3966 rc = VERR_TRY_AGAIN;
3967 break;
3968 }
3969 }
3970 }
3971 }
3972
3973 return rc;
3974}
3975
3976
3977#ifdef SUPDRV_USE_TSC_DELTA_THREAD
3978
3979/**
3980 * Switches the TSC-delta measurement thread into the butchered state.
3981 *
3982 * @returns VBox status code.
3983 * @param pDevExt Pointer to the device instance data.
3984 * @param fSpinlockHeld Whether the TSC-delta spinlock is held or not.
3985 * @param pszFailed An error message to log.
3986 * @param rcFailed The error code to exit the thread with.
3987 */
3988static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed)
3989{
3990 if (!fSpinlockHeld)
3991 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
3992
3993 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Butchered;
3994 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
3995 OSDBGPRINT(("supdrvTscDeltaThreadButchered: %s. rc=%Rrc\n", rcFailed));
3996 return rcFailed;
3997}
3998
3999
4000/**
4001 * The TSC-delta measurement thread.
4002 *
4003 * @returns VBox status code.
4004 * @param hThread The thread handle.
4005 * @param pvUser Opaque pointer to the device instance data.
4006 */
4007static DECLCALLBACK(int) supdrvTscDeltaThread(RTTHREAD hThread, void *pvUser)
4008{
4009 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
4010 bool fInitialMeasurement = true;
4011 uint32_t cConsecutiveTimeouts = 0;
4012 int rc = VERR_INTERNAL_ERROR_2;
4013 for (;;)
4014 {
4015 /*
4016 * Switch on the current state.
4017 */
4018 SUPDRVTSCDELTATHREADSTATE enmState;
4019 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4020 enmState = pDevExt->enmTscDeltaThreadState;
4021 switch (enmState)
4022 {
4023 case kTscDeltaThreadState_Creating:
4024 {
4025 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
4026 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
4027 if (RT_FAILURE(rc))
4028 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
4029 /* fall thru */
4030 }
4031
4032 case kTscDeltaThreadState_Listening:
4033 {
4034 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4035
4036 /* Simple adaptive timeout. */
4037 if (cConsecutiveTimeouts++ == 10)
4038 {
4039 if (pDevExt->cMsTscDeltaTimeout == 1) /* 10 ms */
4040 pDevExt->cMsTscDeltaTimeout = 10;
4041 else if (pDevExt->cMsTscDeltaTimeout == 10) /* +100 ms */
4042 pDevExt->cMsTscDeltaTimeout = 100;
4043 else if (pDevExt->cMsTscDeltaTimeout == 100) /* +1000 ms */
4044 pDevExt->cMsTscDeltaTimeout = 500;
4045 cConsecutiveTimeouts = 0;
4046 }
4047 rc = RTThreadUserWait(pDevExt->hTscDeltaThread, pDevExt->cMsTscDeltaTimeout);
4048 if ( RT_FAILURE(rc)
4049 && rc != VERR_TIMEOUT)
4050 return supdrvTscDeltaThreadButchered(pDevExt, false /* fSpinlockHeld */, "RTThreadUserWait", rc);
4051 RTThreadUserReset(pDevExt->hTscDeltaThread);
4052 break;
4053 }
4054
4055 case kTscDeltaThreadState_WaitAndMeasure:
4056 {
4057 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Measuring;
4058 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent); /* (Safe on windows as long as spinlock isn't IRQ safe.) */
4059 if (RT_FAILURE(rc))
4060 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
4061 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4062 pDevExt->cMsTscDeltaTimeout = 1;
4063 RTThreadSleep(1);
4064 /* fall thru */
4065 }
4066
4067 case kTscDeltaThreadState_Measuring:
4068 {
4069 cConsecutiveTimeouts = 0;
4070 if (fInitialMeasurement)
4071 {
4072 int cTries = 8;
4073 int cMsWaitPerTry = 10;
4074 fInitialMeasurement = false;
4075 do
4076 {
4077 rc = supdrvMeasureInitialTscDeltas(pDevExt);
4078 if ( RT_SUCCESS(rc)
4079 || ( RT_FAILURE(rc)
4080 && rc != VERR_TRY_AGAIN
4081 && rc != VERR_CPU_OFFLINE))
4082 {
4083 break;
4084 }
4085 RTThreadSleep(cMsWaitPerTry);
4086 } while (cTries-- > 0);
4087 }
4088 else
4089 {
4090 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4091 unsigned iCpu;
4092
4093 /* Measure TSC-deltas only for the CPUs that are in the set. */
4094 rc = VINF_SUCCESS;
4095 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
4096 {
4097 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
4098 if (RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
4099 {
4100 if (pGipCpuWorker->i64TSCDelta == INT64_MAX)
4101 {
4102 int rc2 = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
4103 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
4104 rc = rc2;
4105 }
4106 else
4107 {
4108 /*
4109 * The thread/someone must've called SUPR0TscDeltaMeasureBySetIndex,
4110 * mark the delta as fine to get the timer thread off our back.
4111 */
4112 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
4113 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
4114 }
4115 }
4116 }
4117 }
4118 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4119 if (pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4120 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
4121 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4122 Assert(rc != VERR_NOT_AVAILABLE); /* VERR_NOT_AVAILABLE is used as the initial value. */
4123 ASMAtomicWriteS32(&pDevExt->rcTscDelta, rc);
4124 break;
4125 }
4126
4127 case kTscDeltaThreadState_Terminating:
4128 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Destroyed;
4129 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4130 return VINF_SUCCESS;
4131
4132 case kTscDeltaThreadState_Butchered:
4133 default:
4134 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE);
4135 }
4136 }
4137
4138 return rc;
4139}
4140
4141
4142/**
4143 * Waits for the TSC-delta measurement thread to respond to a state change.
4144 *
4145 * @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time,
4146 * other error code on internal error.
4147 *
4148 * @param pThis Pointer to the grant service instance data.
4149 * @param enmCurState The current state.
4150 * @param enmNewState The new state we're waiting for it to enter.
4151 */
4152static int supdrvTscDeltaThreadWait(PSUPDRVDEVEXT pDevExt, SUPDRVTSCDELTATHREADSTATE enmCurState,
4153 SUPDRVTSCDELTATHREADSTATE enmNewState)
4154{
4155 /*
4156 * Wait a short while for the expected state transition.
4157 */
4158 int rc;
4159 RTSemEventWait(pDevExt->hTscDeltaEvent, RT_MS_1SEC);
4160 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4161 if (pDevExt->enmTscDeltaThreadState == enmNewState)
4162 {
4163 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4164 rc = VINF_SUCCESS;
4165 }
4166 else if (pDevExt->enmTscDeltaThreadState == enmCurState)
4167 {
4168 /*
4169 * Wait longer if the state has not yet transitioned to the one we want.
4170 */
4171 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4172 rc = RTSemEventWait(pDevExt->hTscDeltaEvent, 50 * RT_MS_1SEC);
4173 if ( RT_SUCCESS(rc)
4174 || rc == VERR_TIMEOUT)
4175 {
4176 /*
4177 * Check the state whether we've succeeded.
4178 */
4179 SUPDRVTSCDELTATHREADSTATE enmState;
4180 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4181 enmState = pDevExt->enmTscDeltaThreadState;
4182 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4183 if (enmState == enmNewState)
4184 rc = VINF_SUCCESS;
4185 else if (enmState == enmCurState)
4186 {
4187 rc = VERR_TIMEOUT;
4188 OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmState=%d enmNewState=%d\n", enmState,
4189 enmNewState));
4190 }
4191 else
4192 {
4193 rc = VERR_INTERNAL_ERROR;
4194 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState,
4195 enmState, enmNewState));
4196 }
4197 }
4198 else
4199 OSDBGPRINT(("supdrvTscDeltaThreadWait: RTSemEventWait failed. rc=%Rrc\n", rc));
4200 }
4201 else
4202 {
4203 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4204 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d\n", enmCurState, enmNewState));
4205 rc = VERR_INTERNAL_ERROR;
4206 }
4207
4208 return rc;
4209}
4210
4211
4212/**
4213 * Signals the TSC-delta thread to start measuring TSC-deltas.
4214 *
4215 * @param pDevExt Pointer to the device instance data.
4216 */
4217static void supdrvTscDeltaThreadStartMeasurement(PSUPDRVDEVEXT pDevExt)
4218{
4219 if (RT_LIKELY(pDevExt->hTscDeltaThread != NIL_RTTHREAD))
4220 {
4221 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4222 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
4223 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4224 {
4225 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
4226 }
4227 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4228 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4229 }
4230}
4231
4232
4233/**
4234 * Terminates the actual thread running supdrvTscDeltaThread().
4235 *
4236 * This is an internal worker function for supdrvTscDeltaThreadInit() and
4237 * supdrvTscDeltaTerm().
4238 *
4239 * @param pDevExt Pointer to the device instance data.
4240 */
4241static void supdrvTscDeltaThreadTerminate(PSUPDRVDEVEXT pDevExt)
4242{
4243 int rc;
4244 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4245 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Terminating;
4246 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4247 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4248 rc = RTThreadWait(pDevExt->hTscDeltaThread, 50 * RT_MS_1SEC, NULL /* prc */);
4249 if (RT_FAILURE(rc))
4250 {
4251 /* Signal a few more times before giving up. */
4252 int cTriesLeft = 5;
4253 while (--cTriesLeft > 0)
4254 {
4255 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4256 rc = RTThreadWait(pDevExt->hTscDeltaThread, 2 * RT_MS_1SEC, NULL /* prc */);
4257 if (rc != VERR_TIMEOUT)
4258 break;
4259 }
4260 }
4261}
4262
4263
4264/**
4265 * Initializes and spawns the TSC-delta measurement thread.
4266 *
4267 * A thread is required for servicing re-measurement requests from events like
4268 * CPUs coming online, suspend/resume etc. as it cannot be done synchronously
4269 * under all contexts on all OSs.
4270 *
4271 * @returns VBox status code.
4272 * @param pDevExt Pointer to the device instance data.
4273 *
4274 * @remarks Must only be called -after- initializing GIP and setting up MP
4275 * notifications!
4276 */
4277static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt)
4278{
4279 int rc;
4280 Assert(pDevExt->pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
4281 rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck");
4282 if (RT_SUCCESS(rc))
4283 {
4284 rc = RTSemEventCreate(&pDevExt->hTscDeltaEvent);
4285 if (RT_SUCCESS(rc))
4286 {
4287 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Creating;
4288 pDevExt->cMsTscDeltaTimeout = 1;
4289 rc = RTThreadCreate(&pDevExt->hTscDeltaThread, supdrvTscDeltaThread, pDevExt, 0 /* cbStack */,
4290 RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "VBoxTscThread");
4291 if (RT_SUCCESS(rc))
4292 {
4293 rc = supdrvTscDeltaThreadWait(pDevExt, kTscDeltaThreadState_Creating, kTscDeltaThreadState_Listening);
4294 if (RT_SUCCESS(rc))
4295 {
4296 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
4297 return rc;
4298 }
4299
4300 OSDBGPRINT(("supdrvTscDeltaInit: supdrvTscDeltaThreadWait failed. rc=%Rrc\n", rc));
4301 supdrvTscDeltaThreadTerminate(pDevExt);
4302 }
4303 else
4304 OSDBGPRINT(("supdrvTscDeltaInit: RTThreadCreate failed. rc=%Rrc\n", rc));
4305 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
4306 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
4307 }
4308 else
4309 OSDBGPRINT(("supdrvTscDeltaInit: RTSemEventCreate failed. rc=%Rrc\n", rc));
4310 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
4311 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
4312 }
4313 else
4314 OSDBGPRINT(("supdrvTscDeltaInit: RTSpinlockCreate failed. rc=%Rrc\n", rc));
4315
4316 return rc;
4317}
4318
4319
4320/**
4321 * Terminates the TSC-delta measurement thread and cleanup.
4322 *
4323 * @param pDevExt Pointer to the device instance data.
4324 */
4325static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt)
4326{
4327 if ( pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK
4328 && pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
4329 {
4330 supdrvTscDeltaThreadTerminate(pDevExt);
4331 }
4332
4333 if (pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK)
4334 {
4335 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
4336 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
4337 }
4338
4339 if (pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
4340 {
4341 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
4342 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
4343 }
4344
4345 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
4346}
4347
4348#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
4349
4350/**
4351 * Measure the TSC delta for the CPU given by its CPU set index.
4352 *
4353 * @returns VBox status code.
4354 * @retval VERR_INTERRUPTED if interrupted while waiting.
4355 * @retval VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED if we were unable to get a
4356 * measurment.
4357 * @retval VERR_CPU_OFFLINE if the specified CPU is offline.
4358 *
4359 * @param pSession The caller's session. GIP must've been mapped.
4360 * @param iCpuSet The CPU set index of the CPU to measure.
4361 * @param fFlags Flags, SUP_TSCDELTA_MEASURE_F_XXX.
4362 * @param cMsWaitRetry Number of milliseconds to wait between each retry.
4363 * @param cMsWaitThread Number of milliseconds to wait for the thread to get
4364 * ready.
4365 * @param cTries Number of times to try, pass 0 for the default.
4366 */
4367SUPR0DECL(int) SUPR0TscDeltaMeasureBySetIndex(PSUPDRVSESSION pSession, uint32_t iCpuSet, uint32_t fFlags,
4368 RTMSINTERVAL cMsWaitRetry, RTMSINTERVAL cMsWaitThread, uint32_t cTries)
4369{
4370 PSUPDRVDEVEXT pDevExt;
4371 PSUPGLOBALINFOPAGE pGip;
4372 uint16_t iGipCpu;
4373 int rc;
4374#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4375 uint64_t msTsStartWait;
4376 uint32_t iWaitLoop;
4377#endif
4378
4379 /*
4380 * Validate and adjust the input.
4381 */
4382 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4383 if (!pSession->fGipReferenced)
4384 return VERR_WRONG_ORDER;
4385
4386 pDevExt = pSession->pDevExt;
4387 AssertReturn(SUP_IS_DEVEXT_VALID(pDevExt), VERR_INVALID_PARAMETER);
4388
4389 pGip = pDevExt->pGip;
4390 AssertPtrReturn(pGip, VERR_INTERNAL_ERROR_2);
4391
4392 AssertReturn(iCpuSet < RTCPUSET_MAX_CPUS, VERR_INVALID_CPU_INDEX);
4393 AssertReturn(iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx), VERR_INVALID_CPU_INDEX);
4394 iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet];
4395 AssertReturn(iGipCpu < pGip->cCpus, VERR_INVALID_CPU_INDEX);
4396
4397 if (fFlags & ~SUP_TSCDELTA_MEASURE_F_VALID_MASK)
4398 return VERR_INVALID_FLAGS;
4399
4400 /*
4401 * The request is a noop if the TSC delta isn't being used.
4402 */
4403 if (pGip->enmUseTscDelta <= SUPGIPUSETSCDELTA_ZERO_CLAIMED)
4404 return VINF_SUCCESS;
4405
4406 if (cTries == 0)
4407 cTries = 12;
4408 else if (cTries > 256)
4409 cTries = 256;
4410
4411 if (cMsWaitRetry == 0)
4412 cMsWaitRetry = 2;
4413 else if (cMsWaitRetry > 1000)
4414 cMsWaitRetry = 1000;
4415
4416#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4417 /*
4418 * Has the TSC already been measured and we're not forced to redo it?
4419 */
4420 if ( pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX
4421 && !(fFlags & SUP_TSCDELTA_MEASURE_F_FORCE))
4422 return VINF_SUCCESS;
4423
4424 /*
4425 * Asynchronous request? Forward it to the thread, no waiting.
4426 */
4427 if (fFlags & SUP_TSCDELTA_MEASURE_F_ASYNC)
4428 {
4429 /** @todo Async. doesn't implement options like retries, waiting. We'll need
4430 * to pass those options to the thread somehow and implement it in the
4431 * thread. Check if anyone uses/needs fAsync before implementing this. */
4432 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4433 RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, iCpuSet);
4434 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
4435 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4436 {
4437 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
4438 rc = VINF_SUCCESS;
4439 }
4440 else if (pDevExt->enmTscDeltaThreadState != kTscDeltaThreadState_WaitAndMeasure)
4441 rc = VERR_THREAD_IS_DEAD;
4442 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4443 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4444 return VINF_SUCCESS;
4445 }
4446
4447 /*
4448 * If a TSC-delta measurement request is already being serviced by the thread,
4449 * wait 'cTries' times if a retry-timeout is provided, otherwise bail as busy.
4450 */
4451 msTsStartWait = RTTimeSystemMilliTS();
4452 for (iWaitLoop = 0;; iWaitLoop++)
4453 {
4454 uint64_t cMsElapsed;
4455 SUPDRVTSCDELTATHREADSTATE enmState;
4456 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4457 enmState = pDevExt->enmTscDeltaThreadState;
4458 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4459
4460 if (enmState == kTscDeltaThreadState_Measuring)
4461 { /* Must wait, the thread is busy. */ }
4462 else if (enmState == kTscDeltaThreadState_WaitAndMeasure)
4463 { /* Must wait, this state only says what will happen next. */ }
4464 else if (enmState == kTscDeltaThreadState_Terminating)
4465 { /* Must wait, this state only says what should happen next. */ }
4466 else
4467 break; /* All other states, the thread is either idly listening or dead. */
4468
4469 /* Wait or fail. */
4470 if (cMsWaitThread == 0)
4471 return VERR_SUPDRV_TSC_DELTA_MEASUREMENT_BUSY;
4472 cMsElapsed = RTTimeSystemMilliTS() - msTsStartWait;
4473 if (cMsElapsed >= cMsWaitThread)
4474 return VERR_SUPDRV_TSC_DELTA_MEASUREMENT_BUSY;
4475
4476 rc = RTThreadSleep(RT_MIN((RTMSINTERVAL)(cMsWaitThread - cMsElapsed), RT_MIN(iWaitLoop + 1, 10)));
4477 if (rc == VERR_INTERRUPTED)
4478 return rc;
4479 }
4480#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
4481
4482 /*
4483 * Try measure the TSC delta the given number of times.
4484 */
4485 for (;;)
4486 {
4487 /* Unless we're forced to measure the delta, check whether it's done already. */
4488 if ( !(fFlags & SUP_TSCDELTA_MEASURE_F_FORCE)
4489 && pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX)
4490 {
4491 rc = VINF_SUCCESS;
4492 break;
4493 }
4494
4495 /* Measure it. */
4496 rc = supdrvMeasureTscDeltaOne(pDevExt, iGipCpu);
4497 if (rc != VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED)
4498 {
4499 Assert(pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX || RT_FAILURE_NP(rc));
4500 break;
4501 }
4502
4503 /* Retry? */
4504 if (cTries <= 1)
4505 break;
4506 cTries--;
4507
4508 /* Always delay between retries (be nice to the rest of the system
4509 and avoid the BSOD hounds). */
4510 rc = RTThreadSleep(cMsWaitRetry);
4511 if (rc == VERR_INTERRUPTED)
4512 break;
4513 }
4514
4515 return rc;
4516}
4517
4518
4519/**
4520 * Service a TSC-delta measurement request.
4521 *
4522 * @returns VBox status code.
4523 * @param pDevExt Pointer to the device instance data.
4524 * @param pSession The support driver session.
4525 * @param pReq Pointer to the TSC-delta measurement request.
4526 */
4527int VBOXCALL supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCDELTAMEASURE pReq)
4528{
4529 uint32_t cTries;
4530 uint32_t iCpuSet;
4531 uint32_t fFlags;
4532 RTMSINTERVAL cMsWaitRetry;
4533
4534 /*
4535 * Validate and adjust/resolve the input so they can be passed onto SUPR0TscDeltaMeasureBySetIndex.
4536 */
4537 AssertPtr(pDevExt); AssertPtr(pSession); AssertPtr(pReq); /* paranoia^2 */
4538
4539 if (pReq->u.In.idCpu == NIL_RTCPUID)
4540 return VERR_INVALID_CPU_ID;
4541 iCpuSet = RTMpCpuIdToSetIndex(pReq->u.In.idCpu);
4542 if (iCpuSet >= RTCPUSET_MAX_CPUS)
4543 return VERR_INVALID_CPU_ID;
4544
4545 cTries = pReq->u.In.cRetries == 0 ? 0 : (uint32_t)pReq->u.In.cRetries + 1;
4546
4547 cMsWaitRetry = RT_MAX(pReq->u.In.cMsWaitRetry, 5);
4548
4549 fFlags = 0;
4550 if (pReq->u.In.fAsync)
4551 fFlags |= SUP_TSCDELTA_MEASURE_F_ASYNC;
4552 if (pReq->u.In.fForce)
4553 fFlags |= SUP_TSCDELTA_MEASURE_F_FORCE;
4554
4555 return SUPR0TscDeltaMeasureBySetIndex(pSession, iCpuSet, fFlags, cMsWaitRetry,
4556 cTries == 0 ? 5 * RT_MS_1SEC : cMsWaitRetry * cTries /*cMsWaitThread*/,
4557 cTries);
4558}
4559
4560
4561/**
4562 * Reads TSC with delta applied.
4563 *
4564 * Will try to resolve delta value INT64_MAX before applying it. This is the
4565 * main purpose of this function, to handle the case where the delta needs to be
4566 * determined.
4567 *
4568 * @returns VBox status code.
4569 * @param pDevExt Pointer to the device instance data.
4570 * @param pSession The support driver session.
4571 * @param pReq Pointer to the TSC-read request.
4572 */
4573int VBOXCALL supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCREAD pReq)
4574{
4575 PSUPGLOBALINFOPAGE pGip;
4576 int rc;
4577
4578 /*
4579 * Validate. We require the client to have mapped GIP (no asserting on
4580 * ring-3 preconditions).
4581 */
4582 AssertPtr(pDevExt); AssertPtr(pReq); AssertPtr(pSession); /* paranoia^2 */
4583 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
4584 return VERR_WRONG_ORDER;
4585 pGip = pDevExt->pGip;
4586 AssertReturn(pGip, VERR_INTERNAL_ERROR_2);
4587
4588 /*
4589 * We're usually here because we need to apply delta, but we shouldn't be
4590 * upset if the GIP is some different mode.
4591 */
4592 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
4593 {
4594 uint32_t cTries = 0;
4595 for (;;)
4596 {
4597 /*
4598 * Start by gathering the data, using CLI for disabling preemption
4599 * while we do that.
4600 */
4601 RTCCUINTREG fEFlags = ASMIntDisableFlags();
4602 int iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
4603 int iGipCpu;
4604 if (RT_LIKELY( (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
4605 && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
4606 {
4607 int64_t i64Delta = pGip->aCPUs[iGipCpu].i64TSCDelta;
4608 pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
4609 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
4610 ASMSetFlags(fEFlags);
4611
4612 /*
4613 * If we're lucky we've got a delta, but no predicitions here
4614 * as this I/O control is normally only used when the TSC delta
4615 * is set to INT64_MAX.
4616 */
4617 if (i64Delta != INT64_MAX)
4618 {
4619 pReq->u.Out.u64AdjustedTsc -= i64Delta;
4620 rc = VINF_SUCCESS;
4621 break;
4622 }
4623
4624 /* Give up after a few times. */
4625 if (cTries >= 4)
4626 {
4627 rc = VWRN_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
4628 break;
4629 }
4630
4631 /* Need to measure the delta an try again. */
4632 rc = supdrvMeasureTscDeltaOne(pDevExt, iGipCpu);
4633 Assert(pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX || RT_FAILURE_NP(rc));
4634 /** @todo should probably delay on failure... dpc watchdogs */
4635 }
4636 else
4637 {
4638 /* This really shouldn't happen. */
4639 AssertMsgFailed(("idCpu=%#x iCpuSet=%#x (%d)\n", RTMpCpuId(), iCpuSet, iCpuSet));
4640 pReq->u.Out.idApic = ASMGetApicId();
4641 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
4642 ASMSetFlags(fEFlags);
4643 rc = VERR_INTERNAL_ERROR_5; /** @todo change to warning. */
4644 break;
4645 }
4646 }
4647 }
4648 else
4649 {
4650 /*
4651 * No delta to apply. Easy. Deal with preemption the lazy way.
4652 */
4653 RTCCUINTREG fEFlags = ASMIntDisableFlags();
4654 int iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
4655 int iGipCpu;
4656 if (RT_LIKELY( (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
4657 && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
4658 pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
4659 else
4660 pReq->u.Out.idApic = ASMGetApicId();
4661 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
4662 ASMSetFlags(fEFlags);
4663 rc = VINF_SUCCESS;
4664 }
4665
4666 return rc;
4667}
4668
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette