VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDrvGip.cpp@ 54578

Last change on this file since 54578 was 54578, checked in by vboxsync, 10 years ago

HostDrivers/Support: Host suspend/resume now recomputes TSC-deltas for all CPUs.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 171.9 KB
Line 
1/* $Id: SUPDrvGip.cpp 54578 2015-03-02 14:42:45Z vboxsync $ */
2/** @file
3 * VBoxDrv - The VirtualBox Support Driver - Common code for GIP.
4 */
5
6/*
7 * Copyright (C) 2006-2015 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27/*******************************************************************************
28* Header Files *
29*******************************************************************************/
30#define LOG_GROUP LOG_GROUP_SUP_DRV
31#define SUPDRV_AGNOSTIC
32#include "SUPDrvInternal.h"
33#ifndef PAGE_SHIFT
34# include <iprt/param.h>
35#endif
36#include <iprt/asm.h>
37#include <iprt/asm-amd64-x86.h>
38#include <iprt/asm-math.h>
39#include <iprt/cpuset.h>
40#include <iprt/handletable.h>
41#include <iprt/mem.h>
42#include <iprt/mp.h>
43#include <iprt/power.h>
44#include <iprt/process.h>
45#include <iprt/semaphore.h>
46#include <iprt/spinlock.h>
47#include <iprt/thread.h>
48#include <iprt/uuid.h>
49#include <iprt/net.h>
50#include <iprt/crc.h>
51#include <iprt/string.h>
52#include <iprt/timer.h>
53#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
54# include <iprt/rand.h>
55# include <iprt/path.h>
56#endif
57#include <iprt/uint128.h>
58#include <iprt/x86.h>
59
60#include <VBox/param.h>
61#include <VBox/log.h>
62#include <VBox/err.h>
63
64#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
65# include "dtrace/SUPDrv.h"
66#else
67/* ... */
68#endif
69
70
71/*******************************************************************************
72* Defined Constants And Macros *
73*******************************************************************************/
74/** The frequency by which we recalculate the u32UpdateHz and
75 * u32UpdateIntervalNS GIP members. The value must be a power of 2.
76 *
77 * Warning: Bumping this too high might overflow u32UpdateIntervalNS.
78 */
79#define GIP_UPDATEHZ_RECALC_FREQ 0x800
80
81/** A reserved TSC value used for synchronization as well as measurement of
82 * TSC deltas. */
83#define GIP_TSC_DELTA_RSVD UINT64_MAX
84/** The number of TSC delta measurement loops in total (includes primer and
85 * read-time loops). */
86#define GIP_TSC_DELTA_LOOPS 96
87/** The number of cache primer loops. */
88#define GIP_TSC_DELTA_PRIMER_LOOPS 4
89/** The number of loops until we keep computing the minumum read time. */
90#define GIP_TSC_DELTA_READ_TIME_LOOPS 24
91
92/** The TSC frequency refinement period in seconds.
93 * The timer fires after 200ms, then every second, this value just says when
94 * to stop it after that. */
95#define GIP_TSC_REFINE_PERIOD_IN_SECS 12
96/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_PRACTICALLY_ZERO rating */
97#define GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO 32
98/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_ROUGHLY_ZERO rating */
99#define GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO 448
100/** The TSC delta value for the initial GIP master - 0 in regular builds.
101 * To test the delta code this can be set to a non-zero value. */
102#if 0
103# define GIP_TSC_DELTA_INITIAL_MASTER_VALUE INT64_C(170139095182512) /* 0x00009abd9854acb0 */
104#else
105# define GIP_TSC_DELTA_INITIAL_MASTER_VALUE INT64_C(0)
106#endif
107
108AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS < GIP_TSC_DELTA_READ_TIME_LOOPS);
109AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS < GIP_TSC_DELTA_LOOPS);
110
111/** @def VBOX_SVN_REV
112 * The makefile should define this if it can. */
113#ifndef VBOX_SVN_REV
114# define VBOX_SVN_REV 0
115#endif
116
117#if 0 /* Don't start the GIP timers. Useful when debugging the IPRT timer code. */
118# define DO_NOT_START_GIP
119#endif
120
121
122/*******************************************************************************
123* Internal Functions *
124*******************************************************************************/
125static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
126static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
127static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS, uint64_t uCpuHz);
128static void supdrvTscResetSamples(PSUPDRVDEVEXT pDevExt, bool fClearDeltas);
129#ifdef SUPDRV_USE_TSC_DELTA_THREAD
130static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt);
131static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt);
132static void supdrvTscDeltaThreadStartMeasurement(PSUPDRVDEVEXT pDevExt, bool fForceAll);
133#else
134static int supdrvMeasureInitialTscDeltas(PSUPDRVDEVEXT pDevExt);
135static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker);
136#endif
137
138
139/*******************************************************************************
140* Global Variables *
141*******************************************************************************/
142DECLEXPORT(PSUPGLOBALINFOPAGE) g_pSUPGlobalInfoPage = NULL;
143
144
145
146/*
147 *
148 * Misc Common GIP Code
149 * Misc Common GIP Code
150 * Misc Common GIP Code
151 *
152 *
153 */
154
155
156/**
157 * Finds the GIP CPU index corresponding to @a idCpu.
158 *
159 * @returns GIP CPU array index, UINT32_MAX if not found.
160 * @param pGip The GIP.
161 * @param idCpu The CPU ID.
162 */
163static uint32_t supdrvGipFindCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
164{
165 uint32_t i;
166 for (i = 0; i < pGip->cCpus; i++)
167 if (pGip->aCPUs[i].idCpu == idCpu)
168 return i;
169 return UINT32_MAX;
170}
171
172
173
174/*
175 *
176 * GIP Mapping and Unmapping Related Code.
177 * GIP Mapping and Unmapping Related Code.
178 * GIP Mapping and Unmapping Related Code.
179 *
180 *
181 */
182
183
184/**
185 * (Re-)initializes the per-cpu structure prior to starting or resuming the GIP
186 * updating.
187 *
188 * @param pGip Pointer to the GIP.
189 * @param pGipCpu The per CPU structure for this CPU.
190 * @param u64NanoTS The current time.
191 */
192static void supdrvGipReInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
193{
194 /*
195 * Here we don't really care about applying the TSC delta. The re-initialization of this
196 * value is not relevant especially while (re)starting the GIP as the first few ones will
197 * be ignored anyway, see supdrvGipDoUpdateCpu().
198 */
199 pGipCpu->u64TSC = ASMReadTSC() - pGipCpu->u32UpdateIntervalTSC;
200 pGipCpu->u64NanoTS = u64NanoTS;
201}
202
203
204/**
205 * Set the current TSC and NanoTS value for the CPU.
206 *
207 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
208 * @param pvUser1 Pointer to the ring-0 GIP mapping.
209 * @param pvUser2 Pointer to the variable holding the current time.
210 */
211static DECLCALLBACK(void) supdrvGipReInitCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
212{
213 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
214 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
215
216 if (RT_LIKELY(iCpu < pGip->cCpus && pGip->aCPUs[iCpu].idCpu == idCpu))
217 supdrvGipReInitCpu(pGip, &pGip->aCPUs[iCpu], *(uint64_t *)pvUser2);
218
219 NOREF(pvUser2);
220 NOREF(idCpu);
221}
222
223
224/**
225 * State structure for supdrvGipDetectGetGipCpuCallback.
226 */
227typedef struct SUPDRVGIPDETECTGETCPU
228{
229 /** Bitmap of APIC IDs that has been seen (initialized to zero).
230 * Used to detect duplicate APIC IDs (paranoia). */
231 uint8_t volatile bmApicId[256 / 8];
232 /** Mask of supported GIP CPU getter methods (SUPGIPGETCPU_XXX) (all bits set
233 * initially). The callback clears the methods not detected. */
234 uint32_t volatile fSupported;
235 /** The first callback detecting any kind of range issues (initialized to
236 * NIL_RTCPUID). */
237 RTCPUID volatile idCpuProblem;
238} SUPDRVGIPDETECTGETCPU;
239/** Pointer to state structure for supdrvGipDetectGetGipCpuCallback. */
240typedef SUPDRVGIPDETECTGETCPU *PSUPDRVGIPDETECTGETCPU;
241
242
243/**
244 * Checks for alternative ways of getting the CPU ID.
245 *
246 * This also checks the APIC ID, CPU ID and CPU set index values against the
247 * GIP tables.
248 *
249 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
250 * @param pvUser1 Pointer to the state structure.
251 * @param pvUser2 Pointer to the GIP.
252 */
253static DECLCALLBACK(void) supdrvGipDetectGetGipCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
254{
255 PSUPDRVGIPDETECTGETCPU pState = (PSUPDRVGIPDETECTGETCPU)pvUser1;
256 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser2;
257 uint32_t fSupported = 0;
258 uint16_t idApic;
259 int iCpuSet;
260
261 AssertMsg(idCpu == RTMpCpuId(), ("idCpu=%#x RTMpCpuId()=%#x\n", idCpu, RTMpCpuId())); /* paranoia^3 */
262
263 /*
264 * Check that the CPU ID and CPU set index are interchangable.
265 */
266 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
267 if ((RTCPUID)iCpuSet == idCpu)
268 {
269 AssertCompile(RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS));
270 if ( iCpuSet >= 0
271 && iCpuSet < RTCPUSET_MAX_CPUS
272 && RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS))
273 {
274 /*
275 * Check whether the IDTR.LIMIT contains a CPU number.
276 */
277#ifdef RT_ARCH_X86
278 uint16_t const cbIdt = sizeof(X86DESC64SYSTEM) * 256;
279#else
280 uint16_t const cbIdt = sizeof(X86DESCGATE) * 256;
281#endif
282 RTIDTR Idtr;
283 ASMGetIDTR(&Idtr);
284 if (Idtr.cbIdt >= cbIdt)
285 {
286 uint32_t uTmp = Idtr.cbIdt - cbIdt;
287 uTmp &= RTCPUSET_MAX_CPUS - 1;
288 if (uTmp == idCpu)
289 {
290 RTIDTR Idtr2;
291 ASMGetIDTR(&Idtr2);
292 if (Idtr2.cbIdt == Idtr.cbIdt)
293 fSupported |= SUPGIPGETCPU_IDTR_LIMIT_MASK_MAX_SET_CPUS;
294 }
295 }
296
297 /*
298 * Check whether RDTSCP is an option.
299 */
300 if (ASMHasCpuId())
301 {
302 if ( ASMIsValidExtRange(ASMCpuId_EAX(UINT32_C(0x80000000)))
303 && (ASMCpuId_EDX(UINT32_C(0x80000001)) & X86_CPUID_EXT_FEATURE_EDX_RDTSCP) )
304 {
305 uint32_t uAux;
306 ASMReadTscWithAux(&uAux);
307 if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
308 {
309 ASMNopPause();
310 ASMReadTscWithAux(&uAux);
311 if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
312 fSupported |= SUPGIPGETCPU_RDTSCP_MASK_MAX_SET_CPUS;
313 }
314 }
315 }
316 }
317 }
318
319 /*
320 * Check that the APIC ID is unique.
321 */
322 idApic = ASMGetApicId();
323 if (RT_LIKELY( idApic < RT_ELEMENTS(pGip->aiCpuFromApicId)
324 && !ASMAtomicBitTestAndSet(pState->bmApicId, idApic)))
325 fSupported |= SUPGIPGETCPU_APIC_ID;
326 else
327 {
328 AssertCompile(sizeof(pState->bmApicId) * 8 == RT_ELEMENTS(pGip->aiCpuFromApicId));
329 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
330 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - duplicate APIC ID.\n",
331 idCpu, iCpuSet, idApic));
332 }
333
334 /*
335 * Check that the iCpuSet is within the expected range.
336 */
337 if (RT_UNLIKELY( iCpuSet < 0
338 || (unsigned)iCpuSet >= RTCPUSET_MAX_CPUS
339 || (unsigned)iCpuSet >= RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)))
340 {
341 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
342 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU set index is out of range.\n",
343 idCpu, iCpuSet, idApic));
344 }
345 else
346 {
347 RTCPUID idCpu2 = RTMpCpuIdFromSetIndex(iCpuSet);
348 if (RT_UNLIKELY(idCpu2 != idCpu))
349 {
350 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
351 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU id/index roundtrip problem: %#x\n",
352 idCpu, iCpuSet, idApic, idCpu2));
353 }
354 }
355
356 /*
357 * Update the supported feature mask before we return.
358 */
359 ASMAtomicAndU32(&pState->fSupported, fSupported);
360
361 NOREF(pvUser2);
362}
363
364
365/**
366 * Increase the timer freqency on hosts where this is possible (NT).
367 *
368 * The idea is that more interrupts is better for us... Also, it's better than
369 * we increase the timer frequence, because we might end up getting inaccurate
370 * callbacks if someone else does it.
371 *
372 * @param pDevExt Sets u32SystemTimerGranularityGrant if increased.
373 */
374static void supdrvGipRequestHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
375{
376 if (pDevExt->u32SystemTimerGranularityGrant == 0)
377 {
378 uint32_t u32SystemResolution;
379 if ( RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
380 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
381 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1953125 /* 512 HZ */, &u32SystemResolution))
382 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 2000000 /* 500 HZ */, &u32SystemResolution))
383 )
384 {
385 Assert(RTTimerGetSystemGranularity() <= u32SystemResolution);
386 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
387 }
388 }
389}
390
391
392/**
393 * Undoes supdrvGipRequestHigherTimerFrequencyFromSystem.
394 *
395 * @param pDevExt Clears u32SystemTimerGranularityGrant.
396 */
397static void supdrvGipReleaseHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
398{
399 if (pDevExt->u32SystemTimerGranularityGrant)
400 {
401 int rc2 = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant);
402 AssertRC(rc2);
403 pDevExt->u32SystemTimerGranularityGrant = 0;
404 }
405}
406
407
408/**
409 * Maps the GIP into userspace and/or get the physical address of the GIP.
410 *
411 * @returns IPRT status code.
412 * @param pSession Session to which the GIP mapping should belong.
413 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
414 * @param pHCPhysGip Where to store the physical address. (optional)
415 *
416 * @remark There is no reference counting on the mapping, so one call to this function
417 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
418 * and remove the session as a GIP user.
419 */
420SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
421{
422 int rc;
423 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
424 RTR3PTR pGipR3 = NIL_RTR3PTR;
425 RTHCPHYS HCPhys = NIL_RTHCPHYS;
426 LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
427
428 /*
429 * Validate
430 */
431 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
432 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
433 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
434
435#ifdef SUPDRV_USE_MUTEX_FOR_GIP
436 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
437#else
438 RTSemFastMutexRequest(pDevExt->mtxGip);
439#endif
440 if (pDevExt->pGip)
441 {
442 /*
443 * Map it?
444 */
445 rc = VINF_SUCCESS;
446 if (ppGipR3)
447 {
448 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
449 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
450 RTMEM_PROT_READ, RTR0ProcHandleSelf());
451 if (RT_SUCCESS(rc))
452 pGipR3 = RTR0MemObjAddressR3(pSession->GipMapObjR3);
453 }
454
455 /*
456 * Get physical address.
457 */
458 if (pHCPhysGip && RT_SUCCESS(rc))
459 HCPhys = pDevExt->HCPhysGip;
460
461 /*
462 * Reference globally.
463 */
464 if (!pSession->fGipReferenced && RT_SUCCESS(rc))
465 {
466 pSession->fGipReferenced = 1;
467 pDevExt->cGipUsers++;
468 if (pDevExt->cGipUsers == 1)
469 {
470 PSUPGLOBALINFOPAGE pGipR0 = pDevExt->pGip;
471 uint64_t u64NanoTS;
472
473 /*
474 * GIP starts/resumes updating again. On windows we bump the
475 * host timer frequency to make sure we don't get stuck in guest
476 * mode and to get better timer (and possibly clock) accuracy.
477 */
478 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
479
480 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
481
482 /*
483 * document me
484 */
485 if (pGipR0->aCPUs[0].u32TransactionId != 2 /* not the first time */)
486 {
487 unsigned i;
488 for (i = 0; i < pGipR0->cCpus; i++)
489 ASMAtomicUoWriteU32(&pGipR0->aCPUs[i].u32TransactionId,
490 (pGipR0->aCPUs[i].u32TransactionId + GIP_UPDATEHZ_RECALC_FREQ * 2)
491 & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
492 ASMAtomicWriteU64(&pGipR0->u64NanoTSLastUpdateHz, 0);
493 }
494
495 /*
496 * document me
497 */
498 u64NanoTS = RTTimeSystemNanoTS() - pGipR0->u32UpdateIntervalNS;
499 if ( pGipR0->u32Mode == SUPGIPMODE_INVARIANT_TSC
500 || pGipR0->u32Mode == SUPGIPMODE_SYNC_TSC
501 || RTMpGetOnlineCount() == 1)
502 supdrvGipReInitCpu(pGipR0, &pGipR0->aCPUs[0], u64NanoTS);
503 else
504 RTMpOnAll(supdrvGipReInitCpuCallback, pGipR0, &u64NanoTS);
505
506 /*
507 * Detect alternative ways to figure the CPU ID in ring-3 and
508 * raw-mode context. Check the sanity of the APIC IDs, CPU IDs,
509 * and CPU set indexes while we're at it.
510 */
511 if (RT_SUCCESS(rc))
512 {
513 SUPDRVGIPDETECTGETCPU DetectState;
514 RT_BZERO((void *)&DetectState.bmApicId, sizeof(DetectState.bmApicId));
515 DetectState.fSupported = UINT32_MAX;
516 DetectState.idCpuProblem = NIL_RTCPUID;
517 rc = RTMpOnAll(supdrvGipDetectGetGipCpuCallback, &DetectState, pGipR0);
518 if (DetectState.idCpuProblem == NIL_RTCPUID)
519 {
520 if ( DetectState.fSupported != UINT32_MAX
521 && DetectState.fSupported != 0)
522 {
523 if (pGipR0->fGetGipCpu != DetectState.fSupported)
524 {
525 pGipR0->fGetGipCpu = DetectState.fSupported;
526 LogRel(("SUPR0GipMap: fGetGipCpu=%#x\n", DetectState.fSupported));
527 }
528 }
529 else
530 {
531 LogRel(("SUPR0GipMap: No supported ways of getting the APIC ID or CPU number in ring-3! (%#x)\n",
532 DetectState.fSupported));
533 rc = VERR_UNSUPPORTED_CPU;
534 }
535 }
536 else
537 {
538 LogRel(("SUPR0GipMap: APIC ID, CPU ID or CPU set index problem detected on CPU #%u (%#x)!\n",
539 DetectState.idCpuProblem, DetectState.idCpuProblem));
540 rc = VERR_INVALID_CPU_ID;
541 }
542 }
543
544 /*
545 * Start the GIP timer if all is well..
546 */
547 if (RT_SUCCESS(rc))
548 {
549#ifndef DO_NOT_START_GIP
550 rc = RTTimerStart(pDevExt->pGipTimer, 0 /* fire ASAP */); AssertRC(rc);
551#endif
552 rc = VINF_SUCCESS;
553 }
554
555 /*
556 * Bail out on error.
557 */
558 if (RT_FAILURE(rc))
559 {
560 LogRel(("SUPR0GipMap: failed rc=%Rrc\n", rc));
561 pDevExt->cGipUsers = 0;
562 pSession->fGipReferenced = 0;
563 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
564 {
565 int rc2 = RTR0MemObjFree(pSession->GipMapObjR3, false); AssertRC(rc2);
566 if (RT_SUCCESS(rc2))
567 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
568 }
569 HCPhys = NIL_RTHCPHYS;
570 pGipR3 = NIL_RTR3PTR;
571 }
572 }
573 }
574 }
575 else
576 {
577 rc = VERR_GENERAL_FAILURE;
578 Log(("SUPR0GipMap: GIP is not available!\n"));
579 }
580#ifdef SUPDRV_USE_MUTEX_FOR_GIP
581 RTSemMutexRelease(pDevExt->mtxGip);
582#else
583 RTSemFastMutexRelease(pDevExt->mtxGip);
584#endif
585
586 /*
587 * Write returns.
588 */
589 if (pHCPhysGip)
590 *pHCPhysGip = HCPhys;
591 if (ppGipR3)
592 *ppGipR3 = pGipR3;
593
594#ifdef DEBUG_DARWIN_GIP
595 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
596#else
597 LogFlow(( "SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
598#endif
599 return rc;
600}
601
602
603/**
604 * Unmaps any user mapping of the GIP and terminates all GIP access
605 * from this session.
606 *
607 * @returns IPRT status code.
608 * @param pSession Session to which the GIP mapping should belong.
609 */
610SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
611{
612 int rc = VINF_SUCCESS;
613 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
614#ifdef DEBUG_DARWIN_GIP
615 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
616 pSession,
617 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
618 pSession->GipMapObjR3));
619#else
620 LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
621#endif
622 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
623
624#ifdef SUPDRV_USE_MUTEX_FOR_GIP
625 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
626#else
627 RTSemFastMutexRequest(pDevExt->mtxGip);
628#endif
629
630 /*
631 * Unmap anything?
632 */
633 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
634 {
635 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
636 AssertRC(rc);
637 if (RT_SUCCESS(rc))
638 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
639 }
640
641 /*
642 * Dereference global GIP.
643 */
644 if (pSession->fGipReferenced && !rc)
645 {
646 pSession->fGipReferenced = 0;
647 if ( pDevExt->cGipUsers > 0
648 && !--pDevExt->cGipUsers)
649 {
650 LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
651#ifndef DO_NOT_START_GIP
652 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = VINF_SUCCESS;
653#endif
654 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
655 }
656 }
657
658#ifdef SUPDRV_USE_MUTEX_FOR_GIP
659 RTSemMutexRelease(pDevExt->mtxGip);
660#else
661 RTSemFastMutexRelease(pDevExt->mtxGip);
662#endif
663
664 return rc;
665}
666
667
668/**
669 * Gets the GIP pointer.
670 *
671 * @returns Pointer to the GIP or NULL.
672 */
673SUPDECL(PSUPGLOBALINFOPAGE) SUPGetGIP(void)
674{
675 return g_pSUPGlobalInfoPage;
676}
677
678
679
680
681
682/*
683 *
684 *
685 * GIP Initialization, Termination and CPU Offline / Online Related Code.
686 * GIP Initialization, Termination and CPU Offline / Online Related Code.
687 * GIP Initialization, Termination and CPU Offline / Online Related Code.
688 *
689 *
690 */
691
692/**
693 * Used by supdrvInitRefineInvariantTscFreqTimer and supdrvGipInitMeasureTscFreq
694 * to update the TSC frequency related GIP variables.
695 *
696 * @param pGip The GIP.
697 * @param nsElapsed The number of nano seconds elapsed.
698 * @param cElapsedTscTicks The corresponding number of TSC ticks.
699 * @param iTick The tick number for debugging.
700 */
701static void supdrvGipInitSetCpuFreq(PSUPGLOBALINFOPAGE pGip, uint64_t nsElapsed, uint64_t cElapsedTscTicks, uint32_t iTick)
702{
703 /*
704 * Calculate the frequency.
705 */
706 uint64_t uCpuHz;
707 if ( cElapsedTscTicks < UINT64_MAX / RT_NS_1SEC
708 && nsElapsed < UINT32_MAX)
709 uCpuHz = ASMMultU64ByU32DivByU32(cElapsedTscTicks, RT_NS_1SEC, (uint32_t)nsElapsed);
710 else
711 {
712 RTUINT128U CpuHz, Tmp, Divisor;
713 CpuHz.s.Lo = CpuHz.s.Hi = 0;
714 RTUInt128MulU64ByU64(&Tmp, cElapsedTscTicks, RT_NS_1SEC_64);
715 RTUInt128Div(&CpuHz, &Tmp, RTUInt128AssignU64(&Divisor, nsElapsed));
716 uCpuHz = CpuHz.s.Lo;
717 }
718
719 /*
720 * Update the GIP.
721 */
722 ASMAtomicWriteU64(&pGip->u64CpuHz, uCpuHz);
723 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
724 {
725 ASMAtomicWriteU64(&pGip->aCPUs[0].u64CpuHz, uCpuHz);
726
727 /* For inspecting the frequency calcs using tstGIP-2, debugger or similar. */
728 if (iTick + 1 < pGip->cCpus)
729 ASMAtomicWriteU64(&pGip->aCPUs[iTick + 1].u64CpuHz, uCpuHz);
730 }
731}
732
733
734/**
735 * Timer callback function for TSC frequency refinement in invariant GIP mode.
736 *
737 * This is started during driver init and fires once
738 * GIP_TSC_REFINE_PERIOD_IN_SECS seconds later.
739 *
740 * @param pTimer The timer.
741 * @param pvUser Opaque pointer to the device instance data.
742 * @param iTick The timer tick.
743 */
744static DECLCALLBACK(void) supdrvInitRefineInvariantTscFreqTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
745{
746 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
747 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
748 RTCPUID idCpu;
749 uint64_t cNsElapsed;
750 uint64_t cTscTicksElapsed;
751 uint64_t nsNow;
752 uint64_t uTsc;
753 RTCCUINTREG fEFlags;
754
755 /* Paranoia. */
756 AssertReturnVoid(pGip);
757 AssertReturnVoid(pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC);
758
759 /*
760 * If we got a power event, stop the refinement process.
761 */
762 if (pDevExt->fInvTscRefinePowerEvent)
763 {
764 int rc = RTTimerStop(pTimer); AssertRC(rc);
765 return;
766 }
767
768 /*
769 * Read the TSC and time, noting which CPU we are on.
770 *
771 * Don't bother spinning until RTTimeSystemNanoTS changes, since on
772 * systems where it matters we're in a context where we cannot waste that
773 * much time (DPC watchdog, called from clock interrupt).
774 */
775 fEFlags = ASMIntDisableFlags();
776 uTsc = ASMReadTSC();
777 nsNow = RTTimeSystemNanoTS();
778 idCpu = RTMpCpuId();
779 ASMSetFlags(fEFlags);
780
781 cNsElapsed = nsNow - pDevExt->nsStartInvarTscRefine;
782 cTscTicksElapsed = uTsc - pDevExt->uTscStartInvarTscRefine;
783
784 /*
785 * If the above measurement was taken on a different CPU than the one we
786 * started the process on, cTscTicksElapsed will need to be adjusted with
787 * the TSC deltas of both the CPUs.
788 *
789 * We ASSUME that the delta calculation process takes less time than the
790 * TSC frequency refinement timer. If it doesn't, we'll complain and
791 * drop the frequency refinement.
792 *
793 * Note! We cannot entirely trust enmUseTscDelta here because it's
794 * downgraded after each delta calculation.
795 */
796 if ( idCpu != pDevExt->idCpuInvarTscRefine
797 && pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
798 {
799 uint32_t iStartCpuSet = RTMpCpuIdToSetIndex(pDevExt->idCpuInvarTscRefine);
800 uint32_t iStopCpuSet = RTMpCpuIdToSetIndex(idCpu);
801 uint16_t iStartGipCpu = iStartCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
802 ? pGip->aiCpuFromCpuSetIdx[iStartCpuSet] : UINT16_MAX;
803 uint16_t iStopGipCpu = iStopCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
804 ? pGip->aiCpuFromCpuSetIdx[iStopCpuSet] : UINT16_MAX;
805 int64_t iStartTscDelta = iStartGipCpu < pGip->cCpus ? pGip->aCPUs[iStartGipCpu].i64TSCDelta : INT64_MAX;
806 int64_t iStopTscDelta = iStopGipCpu < pGip->cCpus ? pGip->aCPUs[iStopGipCpu].i64TSCDelta : INT64_MAX;
807 if (RT_LIKELY(iStartTscDelta != INT64_MAX && iStopTscDelta != INT64_MAX))
808 {
809 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
810 {
811 /* cTscTicksElapsed = (uTsc - iStopTscDelta) - (pDevExt->uTscStartInvarTscRefine - iStartTscDelta); */
812 cTscTicksElapsed += iStartTscDelta - iStopTscDelta;
813 }
814 }
815 /*
816 * Allow 5 times the refinement period to elapse before we give up on the TSC delta
817 * calculations.
818 */
819 else if (cNsElapsed > GIP_TSC_REFINE_PERIOD_IN_SECS * 5 * RT_NS_1SEC_64)
820 {
821 SUPR0Printf("vboxdrv: Failed to refine invariant TSC frequency because deltas are unavailable after %u (%u) seconds\n",
822 (uint32_t)(cNsElapsed / RT_NS_1SEC), GIP_TSC_REFINE_PERIOD_IN_SECS);
823 SUPR0Printf("vboxdrv: start: %u, %u, %#llx stop: %u, %u, %#llx\n",
824 iStartCpuSet, iStartGipCpu, iStartTscDelta, iStopCpuSet, iStopGipCpu, iStopTscDelta);
825 int rc = RTTimerStop(pTimer); AssertRC(rc);
826 return;
827 }
828 }
829
830 /*
831 * Calculate and update the CPU frequency variables in GIP.
832 *
833 * If there is a GIP user already and we've already refined the frequency
834 * a couple of times, don't update it as we want a stable frequency value
835 * for all VMs.
836 */
837 if ( pDevExt->cGipUsers == 0
838 || cNsElapsed < RT_NS_1SEC * 2)
839 {
840 supdrvGipInitSetCpuFreq(pGip, cNsElapsed, cTscTicksElapsed, (uint32_t)iTick);
841
842 /*
843 * Stop the timer once we've reached the defined refinement period.
844 */
845 if (cNsElapsed > GIP_TSC_REFINE_PERIOD_IN_SECS * RT_NS_1SEC_64)
846 {
847 int rc = RTTimerStop(pTimer);
848 AssertRC(rc);
849 }
850 }
851 else
852 {
853 int rc = RTTimerStop(pTimer);
854 AssertRC(rc);
855 }
856}
857
858
859/**
860 * @callback_method_impl{FNRTPOWERNOTIFICATION}
861 */
862static DECLCALLBACK(void) supdrvGipPowerNotificationCallback(RTPOWEREVENT enmEvent, void *pvUser)
863{
864 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
865 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
866
867 /*
868 * If the TSC frequency refinement timer is running, we need to cancel it so it
869 * doesn't screw up the frequency after a long suspend.
870 *
871 * Recalculate all TSC-deltas on host resume as it may have changed, seen
872 * on Windows 7 running on the Dell Optiplex Intel Core i5-3570.
873 */
874 if (enmEvent == RTPOWEREVENT_RESUME)
875 {
876 ASMAtomicWriteBool(&pDevExt->fInvTscRefinePowerEvent, true);
877 if ( RT_LIKELY(pGip)
878 && pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
879 {
880#ifdef SUPDRV_USE_TSC_DELTA_THREAD
881 supdrvTscDeltaThreadStartMeasurement(pDevExt, true /* fForceAll */);
882#else
883 RTCpuSetCopy(&pDevExt->TscDeltaCpuSet, &pGip->OnlineCpuSet);
884 supdrvMeasureInitialTscDeltas(pDevExt);
885#endif
886 }
887 }
888 else if (enmEvent == RTPOWEREVENT_SUSPEND)
889 ASMAtomicWriteBool(&pDevExt->fInvTscRefinePowerEvent, true);
890}
891
892
893/**
894 * Start the TSC-frequency refinment timer for the invariant TSC GIP mode.
895 *
896 * We cannot use this in the synchronous and asynchronous tsc GIP modes because
897 * the CPU may change the TSC frequence between now and when the timer fires
898 * (supdrvInitAsyncRefineTscTimer).
899 *
900 * @param pDevExt Pointer to the device instance data.
901 * @param pGip Pointer to the GIP.
902 */
903static void supdrvGipInitStartTimerForRefiningInvariantTscFreq(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip)
904{
905 uint64_t u64NanoTS;
906 RTCCUINTREG fEFlags;
907 int rc;
908
909 /*
910 * Register a power management callback.
911 */
912 pDevExt->fInvTscRefinePowerEvent = false;
913 rc = RTPowerNotificationRegister(supdrvGipPowerNotificationCallback, pDevExt);
914 AssertRC(rc); /* ignore */
915
916 /*
917 * Record the TSC and NanoTS as the starting anchor point for refinement
918 * of the TSC. We try get as close to a clock tick as possible on systems
919 * which does not provide high resolution time.
920 */
921 u64NanoTS = RTTimeSystemNanoTS();
922 while (RTTimeSystemNanoTS() == u64NanoTS)
923 ASMNopPause();
924
925 fEFlags = ASMIntDisableFlags();
926 pDevExt->uTscStartInvarTscRefine = ASMReadTSC();
927 pDevExt->nsStartInvarTscRefine = RTTimeSystemNanoTS();
928 pDevExt->idCpuInvarTscRefine = RTMpCpuId();
929 ASMSetFlags(fEFlags);
930
931 /*
932 * Create a timer that runs on the same CPU so we won't have a depencency
933 * on the TSC-delta and can run in parallel to it. On systems that does not
934 * implement CPU specific timers we'll apply deltas in the timer callback,
935 * just like we do for CPUs going offline.
936 *
937 * The longer the refinement interval the better the accuracy, at least in
938 * theory. If it's too long though, ring-3 may already be starting its
939 * first VMs before we're done. On most systems we will be loading the
940 * support driver during boot and VMs won't be started for a while yet,
941 * it is really only a problem during development (especially with
942 * on-demand driver starting on windows).
943 *
944 * To avoid wasting time doing a long supdrvGipInitMeasureTscFreq() call
945 * to calculate the frequency during driver loading, the timer is set
946 * to fire after 200 ms the first time. It will then reschedule itself
947 * to fire every second until GIP_TSC_REFINE_PERIOD_IN_SECS has been
948 * reached or it notices that there is a user land client with GIP
949 * mapped (we want a stable frequency for all VMs).
950 */
951 rc = RTTimerCreateEx(&pDevExt->pInvarTscRefineTimer, RT_NS_1SEC,
952 RTTIMER_FLAGS_CPU(RTMpCpuIdToSetIndex(pDevExt->idCpuInvarTscRefine)),
953 supdrvInitRefineInvariantTscFreqTimer, pDevExt);
954 if (RT_SUCCESS(rc))
955 {
956 rc = RTTimerStart(pDevExt->pInvarTscRefineTimer, 2*RT_NS_100MS);
957 if (RT_SUCCESS(rc))
958 return;
959 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
960 }
961
962 if (rc == VERR_CPU_OFFLINE || rc == VERR_NOT_SUPPORTED)
963 {
964 rc = RTTimerCreateEx(&pDevExt->pInvarTscRefineTimer, RT_NS_1SEC, RTTIMER_FLAGS_CPU_ANY,
965 supdrvInitRefineInvariantTscFreqTimer, pDevExt);
966 if (RT_SUCCESS(rc))
967 {
968 rc = RTTimerStart(pDevExt->pInvarTscRefineTimer, 2*RT_NS_100MS);
969 if (RT_SUCCESS(rc))
970 return;
971 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
972 }
973 }
974
975 pDevExt->pInvarTscRefineTimer = NULL;
976 OSDBGPRINT(("vboxdrv: Failed to create or start TSC frequency refinement timer: rc=%Rrc\n", rc));
977}
978
979
980/**
981 * @callback_method_impl{PFNRTMPWORKER,
982 * RTMpOnSpecific callback for reading TSC and time on the CPU we started
983 * the measurements on.}
984 */
985DECLCALLBACK(void) supdrvGipInitReadTscAndNanoTsOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
986{
987 RTCCUINTREG fEFlags = ASMIntDisableFlags();
988 uint64_t *puTscStop = (uint64_t *)pvUser1;
989 uint64_t *pnsStop = (uint64_t *)pvUser2;
990
991 *puTscStop = ASMReadTSC();
992 *pnsStop = RTTimeSystemNanoTS();
993
994 ASMSetFlags(fEFlags);
995}
996
997
998/**
999 * Measures the TSC frequency of the system.
1000 *
1001 * The TSC frequency can vary on systems which are not reported as invariant.
1002 * On such systems the object of this function is to find out what the nominal,
1003 * maximum TSC frequency under 'normal' CPU operation.
1004 *
1005 * @returns VBox status code.
1006 * @param pDevExt Pointer to the device instance.
1007 * @param pGip Pointer to the GIP.
1008 * @param fRough Set if we're doing the rough calculation that the
1009 * TSC measuring code needs, where accuracy isn't all
1010 * that important (too high is better than to low).
1011 * When clear we try for best accuracy that we can
1012 * achieve in reasonably short time.
1013 */
1014static int supdrvGipInitMeasureTscFreq(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, bool fRough)
1015{
1016 uint32_t nsTimerIncr = RTTimerGetSystemGranularity();
1017 int cTriesLeft = fRough ? 4 : 2;
1018 while (cTriesLeft-- > 0)
1019 {
1020 RTCCUINTREG fEFlags;
1021 uint64_t nsStart;
1022 uint64_t nsStop;
1023 uint64_t uTscStart;
1024 uint64_t uTscStop;
1025 RTCPUID idCpuStart;
1026 RTCPUID idCpuStop;
1027
1028 /*
1029 * Synchronize with the host OS clock tick on systems without high
1030 * resolution time API (older Windows version for example).
1031 */
1032 nsStart = RTTimeSystemNanoTS();
1033 while (RTTimeSystemNanoTS() == nsStart)
1034 ASMNopPause();
1035
1036 /*
1037 * Read the TSC and current time, noting which CPU we're on.
1038 */
1039 fEFlags = ASMIntDisableFlags();
1040 uTscStart = ASMReadTSC();
1041 nsStart = RTTimeSystemNanoTS();
1042 idCpuStart = RTMpCpuId();
1043 ASMSetFlags(fEFlags);
1044
1045 /*
1046 * Delay for a while.
1047 */
1048 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1049 {
1050 /*
1051 * Sleep-wait since the TSC frequency is constant, it eases host load.
1052 * Shorter interval produces more variance in the frequency (esp. Windows).
1053 */
1054 uint64_t msElapsed = 0;
1055 uint64_t msDelay = ( ((fRough ? 16 : 200) * RT_NS_1MS + nsTimerIncr - 1) / nsTimerIncr * nsTimerIncr - RT_NS_100US )
1056 / RT_NS_1MS;
1057 do
1058 {
1059 RTThreadSleep((RTMSINTERVAL)(msDelay - msElapsed));
1060 nsStop = RTTimeSystemNanoTS();
1061 msElapsed = (nsStop - nsStart) / RT_NS_1MS;
1062 } while (msElapsed < msDelay);
1063
1064 while (RTTimeSystemNanoTS() == nsStop)
1065 ASMNopPause();
1066 }
1067 else
1068 {
1069 /*
1070 * Busy-wait keeping the frequency up.
1071 */
1072 do
1073 {
1074 ASMNopPause();
1075 nsStop = RTTimeSystemNanoTS();
1076 } while (nsStop - nsStart < RT_NS_100MS);
1077 }
1078
1079 /*
1080 * Read the TSC and time again.
1081 */
1082 fEFlags = ASMIntDisableFlags();
1083 uTscStop = ASMReadTSC();
1084 nsStop = RTTimeSystemNanoTS();
1085 idCpuStop = RTMpCpuId();
1086 ASMSetFlags(fEFlags);
1087
1088 /*
1089 * If the CPU changes things get a bit complicated and what we
1090 * can get away with depends on the GIP mode / TSC reliablity.
1091 */
1092 if (idCpuStop != idCpuStart)
1093 {
1094 bool fDoXCall = false;
1095
1096 /*
1097 * Synchronous TSC mode: we're probably fine as it's unlikely
1098 * that we were rescheduled because of TSC throttling or power
1099 * management reasons, so just go ahead.
1100 */
1101 if (pGip->u32Mode == SUPGIPMODE_SYNC_TSC)
1102 {
1103 /* Probably ok, maybe we should retry once?. */
1104 Assert(pGip->enmUseTscDelta == SUPGIPUSETSCDELTA_NOT_APPLICABLE);
1105 }
1106 /*
1107 * If we're just doing the rough measurement, do the cross call and
1108 * get on with things (we don't have deltas!).
1109 */
1110 else if (fRough)
1111 fDoXCall = true;
1112 /*
1113 * Invariant TSC mode: It doesn't matter if we have delta available
1114 * for both CPUs. That is not something we can assume at this point.
1115 *
1116 * Note! We cannot necessarily trust enmUseTscDelta here because it's
1117 * downgraded after each delta calculation and the delta
1118 * calculations may not be complete yet.
1119 */
1120 else if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1121 {
1122/** @todo This section of code is never reached atm, consider dropping it later on... */
1123 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1124 {
1125 uint32_t iStartCpuSet = RTMpCpuIdToSetIndex(idCpuStart);
1126 uint32_t iStopCpuSet = RTMpCpuIdToSetIndex(idCpuStop);
1127 uint16_t iStartGipCpu = iStartCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1128 ? pGip->aiCpuFromCpuSetIdx[iStartCpuSet] : UINT16_MAX;
1129 uint16_t iStopGipCpu = iStopCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1130 ? pGip->aiCpuFromCpuSetIdx[iStopCpuSet] : UINT16_MAX;
1131 int64_t iStartTscDelta = iStartGipCpu < pGip->cCpus ? pGip->aCPUs[iStartGipCpu].i64TSCDelta : INT64_MAX;
1132 int64_t iStopTscDelta = iStopGipCpu < pGip->cCpus ? pGip->aCPUs[iStopGipCpu].i64TSCDelta : INT64_MAX;
1133 if (RT_LIKELY(iStartTscDelta != INT64_MAX && iStopTscDelta != INT64_MAX))
1134 {
1135 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
1136 {
1137 uTscStart -= iStartTscDelta;
1138 uTscStop -= iStopTscDelta;
1139 }
1140 }
1141 /*
1142 * Invalid CPU indexes are not caused by online/offline races, so
1143 * we have to trigger driver load failure if that happens as GIP
1144 * and IPRT assumptions are busted on this system.
1145 */
1146 else if (iStopGipCpu >= pGip->cCpus || iStartGipCpu >= pGip->cCpus)
1147 {
1148 SUPR0Printf("vboxdrv: Unexpected CPU index in supdrvGipInitMeasureTscFreq.\n");
1149 SUPR0Printf("vboxdrv: start: %u, %u, %#llx stop: %u, %u, %#llx\n",
1150 iStartCpuSet, iStartGipCpu, iStartTscDelta, iStopCpuSet, iStopGipCpu, iStopTscDelta);
1151 return VERR_INVALID_CPU_INDEX;
1152 }
1153 /*
1154 * No valid deltas. We retry, if we're on our last retry
1155 * we do the cross call instead just to get a result. The
1156 * frequency will be refined in a few seconds anyways.
1157 */
1158 else if (cTriesLeft > 0)
1159 continue;
1160 else
1161 fDoXCall = true;
1162 }
1163 }
1164 /*
1165 * Asynchronous TSC mode: This is bad as the reason we usually
1166 * use this mode is to deal with variable TSC frequencies and
1167 * deltas. So, we need to get the TSC from the same CPU as
1168 * started it, we also need to keep that CPU busy. So, retry
1169 * and fall back to the cross call on the last attempt.
1170 */
1171 else
1172 {
1173 Assert(pGip->u32Mode == SUPGIPMODE_ASYNC_TSC);
1174 if (cTriesLeft > 0)
1175 continue;
1176 fDoXCall = true;
1177 }
1178
1179 if (fDoXCall)
1180 {
1181 /*
1182 * Try read the TSC and timestamp on the start CPU.
1183 */
1184 int rc = RTMpOnSpecific(idCpuStart, supdrvGipInitReadTscAndNanoTsOnCpu, &uTscStop, &nsStop);
1185 if (RT_FAILURE(rc) && (!fRough || cTriesLeft > 0))
1186 continue;
1187 }
1188 }
1189
1190 /*
1191 * Calculate the TSC frequency and update it (shared with the refinement timer).
1192 */
1193 supdrvGipInitSetCpuFreq(pGip, nsStop - nsStart, uTscStop - uTscStart, 0);
1194 return VINF_SUCCESS;
1195 }
1196
1197 Assert(!fRough);
1198 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
1199}
1200
1201
1202/**
1203 * Finds our (@a idCpu) entry, or allocates a new one if not found.
1204 *
1205 * @returns Index of the CPU in the cache set.
1206 * @param pGip The GIP.
1207 * @param idCpu The CPU ID.
1208 */
1209static uint32_t supdrvGipFindOrAllocCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
1210{
1211 uint32_t i, cTries;
1212
1213 /*
1214 * ASSUMES that CPU IDs are constant.
1215 */
1216 for (i = 0; i < pGip->cCpus; i++)
1217 if (pGip->aCPUs[i].idCpu == idCpu)
1218 return i;
1219
1220 cTries = 0;
1221 do
1222 {
1223 for (i = 0; i < pGip->cCpus; i++)
1224 {
1225 bool fRc;
1226 ASMAtomicCmpXchgSize(&pGip->aCPUs[i].idCpu, idCpu, NIL_RTCPUID, fRc);
1227 if (fRc)
1228 return i;
1229 }
1230 } while (cTries++ < 32);
1231 AssertReleaseFailed();
1232 return i - 1;
1233}
1234
1235
1236/**
1237 * The calling CPU should be accounted as online, update GIP accordingly.
1238 *
1239 * This is used by supdrvGipCreate() as well as supdrvGipMpEvent().
1240 *
1241 * @param pDevExt The device extension.
1242 * @param idCpu The CPU ID.
1243 */
1244static void supdrvGipMpEventOnlineOrInitOnCpu(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
1245{
1246 int iCpuSet = 0;
1247 uint16_t idApic = UINT16_MAX;
1248 uint32_t i = 0;
1249 uint64_t u64NanoTS = 0;
1250 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1251
1252 AssertPtrReturnVoid(pGip);
1253 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1254 AssertRelease(idCpu == RTMpCpuId());
1255 Assert(pGip->cPossibleCpus == RTMpGetCount());
1256
1257 /*
1258 * Do this behind a spinlock with interrupts disabled as this can fire
1259 * on all CPUs simultaneously, see @bugref{6110}.
1260 */
1261 RTSpinlockAcquire(pDevExt->hGipSpinlock);
1262
1263 /*
1264 * Update the globals.
1265 */
1266 ASMAtomicWriteU16(&pGip->cPresentCpus, RTMpGetPresentCount());
1267 ASMAtomicWriteU16(&pGip->cOnlineCpus, RTMpGetOnlineCount());
1268 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
1269 if (iCpuSet >= 0)
1270 {
1271 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
1272 RTCpuSetAddByIndex(&pGip->OnlineCpuSet, iCpuSet);
1273 RTCpuSetAddByIndex(&pGip->PresentCpuSet, iCpuSet);
1274 }
1275
1276 /*
1277 * Update the entry.
1278 */
1279 u64NanoTS = RTTimeSystemNanoTS() - pGip->u32UpdateIntervalNS;
1280 i = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
1281
1282 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS, pGip->u64CpuHz);
1283
1284 idApic = ASMGetApicId();
1285 ASMAtomicWriteU16(&pGip->aCPUs[i].idApic, idApic);
1286 ASMAtomicWriteS16(&pGip->aCPUs[i].iCpuSet, (int16_t)iCpuSet);
1287 ASMAtomicWriteSize(&pGip->aCPUs[i].idCpu, idCpu);
1288
1289 /*
1290 * Update the APIC ID and CPU set index mappings.
1291 */
1292 ASMAtomicWriteU16(&pGip->aiCpuFromApicId[idApic], i);
1293 ASMAtomicWriteU16(&pGip->aiCpuFromCpuSetIdx[iCpuSet], i);
1294
1295 /* Add this CPU to this set of CPUs we need to calculate the TSC-delta for. */
1296 RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, RTMpCpuIdToSetIndex(idCpu));
1297
1298 /* Update the Mp online/offline counter. */
1299 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
1300
1301 /* Commit it. */
1302 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_ONLINE);
1303
1304 RTSpinlockRelease(pDevExt->hGipSpinlock);
1305}
1306
1307
1308/**
1309 * RTMpOnSpecific callback wrapper for supdrvGipMpEventOnlineOrInitOnCpu().
1310 *
1311 * @param idCpu The CPU ID we are running on.
1312 * @param pvUser1 Opaque pointer to the device instance data.
1313 * @param pvUser2 Not used.
1314 */
1315static DECLCALLBACK(void) supdrvGipMpEventOnlineCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1316{
1317 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser1;
1318 NOREF(pvUser2);
1319 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
1320}
1321
1322
1323/**
1324 * The CPU should be accounted as offline, update the GIP accordingly.
1325 *
1326 * This is used by supdrvGipMpEvent.
1327 *
1328 * @param pDevExt The device extension.
1329 * @param idCpu The CPU ID.
1330 */
1331static void supdrvGipMpEventOffline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
1332{
1333 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1334 int iCpuSet;
1335 unsigned i;
1336
1337 AssertPtrReturnVoid(pGip);
1338 RTSpinlockAcquire(pDevExt->hGipSpinlock);
1339
1340 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
1341 AssertReturnVoid(iCpuSet >= 0);
1342
1343 i = pGip->aiCpuFromCpuSetIdx[iCpuSet];
1344 AssertReturnVoid(i < pGip->cCpus);
1345 AssertReturnVoid(pGip->aCPUs[i].idCpu == idCpu);
1346
1347 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
1348 RTCpuSetDelByIndex(&pGip->OnlineCpuSet, iCpuSet);
1349
1350 /* Update the Mp online/offline counter. */
1351 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
1352
1353 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1354 {
1355 /* Reset the TSC delta, we will recalculate it lazily. */
1356 ASMAtomicWriteS64(&pGip->aCPUs[i].i64TSCDelta, INT64_MAX);
1357 /* Remove this CPU from the set of CPUs that we have obtained the TSC deltas. */
1358 RTCpuSetDelByIndex(&pDevExt->TscDeltaObtainedCpuSet, iCpuSet);
1359 }
1360
1361 /* Commit it. */
1362 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_OFFLINE);
1363
1364 RTSpinlockRelease(pDevExt->hGipSpinlock);
1365}
1366
1367
1368/**
1369 * Multiprocessor event notification callback.
1370 *
1371 * This is used to make sure that the GIP master gets passed on to
1372 * another CPU. It also updates the associated CPU data.
1373 *
1374 * @param enmEvent The event.
1375 * @param idCpu The cpu it applies to.
1376 * @param pvUser Pointer to the device extension.
1377 */
1378static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
1379{
1380 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
1381 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1382
1383 if (pGip)
1384 {
1385 RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
1386 switch (enmEvent)
1387 {
1388 case RTMPEVENT_ONLINE:
1389 {
1390 RTThreadPreemptDisable(&PreemptState);
1391 if (idCpu == RTMpCpuId())
1392 {
1393 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
1394 RTThreadPreemptRestore(&PreemptState);
1395 }
1396 else
1397 {
1398 RTThreadPreemptRestore(&PreemptState);
1399 RTMpOnSpecific(idCpu, supdrvGipMpEventOnlineCallback, pDevExt, NULL /* pvUser2 */);
1400 }
1401
1402 /*
1403 * Recompute TSC-delta for the newly online'd CPU.
1404 */
1405 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1406 {
1407#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1408 supdrvTscDeltaThreadStartMeasurement(pDevExt, false /* fForceAll */);
1409#else
1410 uint32_t iCpu = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
1411 supdrvMeasureTscDeltaOne(pDevExt, iCpu);
1412#endif
1413 }
1414 break;
1415 }
1416
1417 case RTMPEVENT_OFFLINE:
1418 supdrvGipMpEventOffline(pDevExt, idCpu);
1419 break;
1420 }
1421 }
1422
1423 /*
1424 * Make sure there is a master GIP.
1425 */
1426 if (enmEvent == RTMPEVENT_OFFLINE)
1427 {
1428 RTCPUID idGipMaster = ASMAtomicReadU32(&pDevExt->idGipMaster);
1429 if (idGipMaster == idCpu)
1430 {
1431 /*
1432 * The GIP master is going offline, find a new one.
1433 */
1434 bool fIgnored;
1435 unsigned i;
1436 RTCPUID idNewGipMaster = NIL_RTCPUID;
1437 RTCPUSET OnlineCpus;
1438 RTMpGetOnlineSet(&OnlineCpus);
1439
1440 for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
1441 if (RTCpuSetIsMemberByIndex(&OnlineCpus, i))
1442 {
1443 RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
1444 if (idCurCpu != idGipMaster)
1445 {
1446 idNewGipMaster = idCurCpu;
1447 break;
1448 }
1449 }
1450
1451 Log(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
1452 ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
1453 NOREF(fIgnored);
1454 }
1455 }
1456}
1457
1458
1459/**
1460 * On CPU initialization callback for RTMpOnAll.
1461 *
1462 * @param idCpu The CPU ID.
1463 * @param pvUser1 The device extension.
1464 * @param pvUser2 The GIP.
1465 */
1466static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1467{
1468 /* This is good enough, even though it will update some of the globals a
1469 bit to much. */
1470 supdrvGipMpEventOnlineOrInitOnCpu((PSUPDRVDEVEXT)pvUser1, idCpu);
1471}
1472
1473
1474/**
1475 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
1476 *
1477 * @param idCpu Ignored.
1478 * @param pvUser1 Where to put the TSC.
1479 * @param pvUser2 Ignored.
1480 */
1481static DECLCALLBACK(void) supdrvGipInitDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1482{
1483 Assert(RTMpCpuIdToSetIndex(idCpu) == (intptr_t)pvUser2);
1484 ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
1485}
1486
1487
1488/**
1489 * Determine if Async GIP mode is required because of TSC drift.
1490 *
1491 * When using the default/normal timer code it is essential that the time stamp counter
1492 * (TSC) runs never backwards, that is, a read operation to the counter should return
1493 * a bigger value than any previous read operation. This is guaranteed by the latest
1494 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
1495 * case we have to choose the asynchronous timer mode.
1496 *
1497 * @param poffMin Pointer to the determined difference between different
1498 * cores (optional, can be NULL).
1499 * @return false if the time stamp counters appear to be synchronized, true otherwise.
1500 */
1501static bool supdrvGipInitDetermineAsyncTsc(uint64_t *poffMin)
1502{
1503 /*
1504 * Just iterate all the cpus 8 times and make sure that the TSC is
1505 * ever increasing. We don't bother taking TSC rollover into account.
1506 */
1507 int iEndCpu = RTMpGetArraySize();
1508 int iCpu;
1509 int cLoops = 8;
1510 bool fAsync = false;
1511 int rc = VINF_SUCCESS;
1512 uint64_t offMax = 0;
1513 uint64_t offMin = ~(uint64_t)0;
1514 uint64_t PrevTsc = ASMReadTSC();
1515
1516 while (cLoops-- > 0)
1517 {
1518 for (iCpu = 0; iCpu < iEndCpu; iCpu++)
1519 {
1520 uint64_t CurTsc;
1521 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvGipInitDetermineAsyncTscWorker,
1522 &CurTsc, (void *)(uintptr_t)iCpu);
1523 if (RT_SUCCESS(rc))
1524 {
1525 if (CurTsc <= PrevTsc)
1526 {
1527 fAsync = true;
1528 offMin = offMax = PrevTsc - CurTsc;
1529 Log(("supdrvGipInitDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
1530 iCpu, cLoops, CurTsc, PrevTsc));
1531 break;
1532 }
1533
1534 /* Gather statistics (except the first time). */
1535 if (iCpu != 0 || cLoops != 7)
1536 {
1537 uint64_t off = CurTsc - PrevTsc;
1538 if (off < offMin)
1539 offMin = off;
1540 if (off > offMax)
1541 offMax = off;
1542 Log2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
1543 }
1544
1545 /* Next */
1546 PrevTsc = CurTsc;
1547 }
1548 else if (rc == VERR_NOT_SUPPORTED)
1549 break;
1550 else
1551 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
1552 }
1553
1554 /* broke out of the loop. */
1555 if (iCpu < iEndCpu)
1556 break;
1557 }
1558
1559 if (poffMin)
1560 *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
1561 Log(("supdrvGipInitDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n",
1562 fAsync, iEndCpu, rc, offMin, offMax));
1563#if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
1564 OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
1565#endif
1566 return fAsync;
1567}
1568
1569
1570/**
1571 * supdrvGipInit() worker that determines the GIP TSC mode.
1572 *
1573 * @returns The most suitable TSC mode.
1574 * @param pDevExt Pointer to the device instance data.
1575 */
1576static SUPGIPMODE supdrvGipInitDetermineTscMode(PSUPDRVDEVEXT pDevExt)
1577{
1578 uint64_t u64DiffCoresIgnored;
1579 uint32_t uEAX, uEBX, uECX, uEDX;
1580
1581 /*
1582 * Establish whether the CPU advertises TSC as invariant, we need that in
1583 * a couple of places below.
1584 */
1585 bool fInvariantTsc = false;
1586 if (ASMHasCpuId())
1587 {
1588 uEAX = ASMCpuId_EAX(0x80000000);
1589 if (ASMIsValidExtRange(uEAX) && uEAX >= 0x80000007)
1590 {
1591 uEDX = ASMCpuId_EDX(0x80000007);
1592 if (uEDX & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR)
1593 fInvariantTsc = true;
1594 }
1595 }
1596
1597 /*
1598 * On single CPU systems, we don't need to consider ASYNC mode.
1599 */
1600 if (RTMpGetCount() <= 1)
1601 return fInvariantTsc ? SUPGIPMODE_INVARIANT_TSC : SUPGIPMODE_SYNC_TSC;
1602
1603 /*
1604 * Allow the user and/or OS specific bits to force async mode.
1605 */
1606 if (supdrvOSGetForcedAsyncTscMode(pDevExt))
1607 return SUPGIPMODE_ASYNC_TSC;
1608
1609 /*
1610 * Use invariant mode if the CPU says TSC is invariant.
1611 */
1612 if (fInvariantTsc)
1613 return SUPGIPMODE_INVARIANT_TSC;
1614
1615 /*
1616 * TSC is not invariant and we're on SMP, this presents two problems:
1617 *
1618 * (1) There might be a skew between the CPU, so that cpu0
1619 * returns a TSC that is slightly different from cpu1.
1620 * This screw may be due to (2), bad TSC initialization
1621 * or slightly different TSC rates.
1622 *
1623 * (2) Power management (and other things) may cause the TSC
1624 * to run at a non-constant speed, and cause the speed
1625 * to be different on the cpus. This will result in (1).
1626 *
1627 * If any of the above is detected, we will have to use ASYNC mode.
1628 */
1629 /* (1). Try check for current differences between the cpus. */
1630 if (supdrvGipInitDetermineAsyncTsc(&u64DiffCoresIgnored))
1631 return SUPGIPMODE_ASYNC_TSC;
1632
1633 /* (2) If it's an AMD CPU with power management, we won't trust its TSC. */
1634 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
1635 if ( ASMIsValidStdRange(uEAX)
1636 && ASMIsAmdCpuEx(uEBX, uECX, uEDX))
1637 {
1638 /* Check for APM support. */
1639 uEAX = ASMCpuId_EAX(0x80000000);
1640 if (ASMIsValidExtRange(uEAX) && uEAX >= 0x80000007)
1641 {
1642 uEDX = ASMCpuId_EDX(0x80000007);
1643 if (uEDX & 0x3e) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
1644 return SUPGIPMODE_ASYNC_TSC;
1645 }
1646 }
1647
1648 return SUPGIPMODE_SYNC_TSC;
1649}
1650
1651
1652/**
1653 * Initializes per-CPU GIP information.
1654 *
1655 * @param pGip Pointer to the GIP.
1656 * @param pCpu Pointer to which GIP CPU to initalize.
1657 * @param u64NanoTS The current nanosecond timestamp.
1658 * @param uCpuHz The CPU frequency to set, 0 if the caller doesn't know.
1659 */
1660static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS, uint64_t uCpuHz)
1661{
1662 pCpu->u32TransactionId = 2;
1663 pCpu->u64NanoTS = u64NanoTS;
1664 pCpu->u64TSC = ASMReadTSC();
1665 pCpu->u64TSCSample = GIP_TSC_DELTA_RSVD;
1666 pCpu->i64TSCDelta = pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED ? INT64_MAX : 0;
1667
1668 ASMAtomicWriteSize(&pCpu->enmState, SUPGIPCPUSTATE_INVALID);
1669 ASMAtomicWriteSize(&pCpu->idCpu, NIL_RTCPUID);
1670 ASMAtomicWriteS16(&pCpu->iCpuSet, -1);
1671 ASMAtomicWriteU16(&pCpu->idApic, UINT16_MAX);
1672
1673 /*
1674 * The first time we're called, we don't have a CPU frequency handy,
1675 * so pretend it's a 4 GHz CPU. On CPUs that are online, we'll get
1676 * called again and at that point we have a more plausible CPU frequency
1677 * value handy. The frequency history will also be adjusted again on
1678 * the 2nd timer callout (maybe we can skip that now?).
1679 */
1680 if (!uCpuHz)
1681 {
1682 pCpu->u64CpuHz = _4G - 1;
1683 pCpu->u32UpdateIntervalTSC = (uint32_t)((_4G - 1) / pGip->u32UpdateHz);
1684 }
1685 else
1686 {
1687 pCpu->u64CpuHz = uCpuHz;
1688 pCpu->u32UpdateIntervalTSC = (uint32_t)(uCpuHz / pGip->u32UpdateHz);
1689 }
1690 pCpu->au32TSCHistory[0]
1691 = pCpu->au32TSCHistory[1]
1692 = pCpu->au32TSCHistory[2]
1693 = pCpu->au32TSCHistory[3]
1694 = pCpu->au32TSCHistory[4]
1695 = pCpu->au32TSCHistory[5]
1696 = pCpu->au32TSCHistory[6]
1697 = pCpu->au32TSCHistory[7]
1698 = pCpu->u32UpdateIntervalTSC;
1699}
1700
1701
1702/**
1703 * Initializes the GIP data.
1704 *
1705 * @param pDevExt Pointer to the device instance data.
1706 * @param pGip Pointer to the read-write kernel mapping of the GIP.
1707 * @param HCPhys The physical address of the GIP.
1708 * @param u64NanoTS The current nanosecond timestamp.
1709 * @param uUpdateHz The update frequency.
1710 * @param uUpdateIntervalNS The update interval in nanoseconds.
1711 * @param cCpus The CPU count.
1712 */
1713static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
1714 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus)
1715{
1716 size_t const cbGip = RT_ALIGN_Z(RT_OFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), PAGE_SIZE);
1717 unsigned i;
1718#ifdef DEBUG_DARWIN_GIP
1719 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
1720#else
1721 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
1722#endif
1723
1724 /*
1725 * Initialize the structure.
1726 */
1727 memset(pGip, 0, cbGip);
1728
1729 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
1730 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
1731 pGip->u32Mode = supdrvGipInitDetermineTscMode(pDevExt);
1732 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
1733 /*|| pGip->u32Mode == SUPGIPMODE_SYNC_TSC */)
1734 pGip->enmUseTscDelta = supdrvOSAreTscDeltasInSync() /* Allow OS override (windows). */
1735 ? SUPGIPUSETSCDELTA_ZERO_CLAIMED : SUPGIPUSETSCDELTA_PRACTICALLY_ZERO /* downgrade later */;
1736 else
1737 pGip->enmUseTscDelta = SUPGIPUSETSCDELTA_NOT_APPLICABLE;
1738 pGip->cCpus = (uint16_t)cCpus;
1739 pGip->cPages = (uint16_t)(cbGip / PAGE_SIZE);
1740 pGip->u32UpdateHz = uUpdateHz;
1741 pGip->u32UpdateIntervalNS = uUpdateIntervalNS;
1742 pGip->fGetGipCpu = SUPGIPGETCPU_APIC_ID;
1743 RTCpuSetEmpty(&pGip->OnlineCpuSet);
1744 RTCpuSetEmpty(&pGip->PresentCpuSet);
1745 RTMpGetSet(&pGip->PossibleCpuSet);
1746 pGip->cOnlineCpus = RTMpGetOnlineCount();
1747 pGip->cPresentCpus = RTMpGetPresentCount();
1748 pGip->cPossibleCpus = RTMpGetCount();
1749 pGip->idCpuMax = RTMpGetMaxCpuId();
1750 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromApicId); i++)
1751 pGip->aiCpuFromApicId[i] = UINT16_MAX;
1752 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); i++)
1753 pGip->aiCpuFromCpuSetIdx[i] = UINT16_MAX;
1754 for (i = 0; i < cCpus; i++)
1755 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS, 0 /*uCpuHz*/);
1756
1757 /*
1758 * Link it to the device extension.
1759 */
1760 pDevExt->pGip = pGip;
1761 pDevExt->HCPhysGip = HCPhys;
1762 pDevExt->cGipUsers = 0;
1763}
1764
1765
1766/**
1767 * Creates the GIP.
1768 *
1769 * @returns VBox status code.
1770 * @param pDevExt Instance data. GIP stuff may be updated.
1771 */
1772int VBOXCALL supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
1773{
1774 PSUPGLOBALINFOPAGE pGip;
1775 RTHCPHYS HCPhysGip;
1776 uint32_t u32SystemResolution;
1777 uint32_t u32Interval;
1778 uint32_t u32MinInterval;
1779 uint32_t uMod;
1780 unsigned cCpus;
1781 int rc;
1782
1783 LogFlow(("supdrvGipCreate:\n"));
1784
1785 /*
1786 * Assert order.
1787 */
1788 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
1789 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
1790 Assert(!pDevExt->pGipTimer);
1791#ifdef SUPDRV_USE_MUTEX_FOR_GIP
1792 Assert(pDevExt->mtxGip != NIL_RTSEMMUTEX);
1793 Assert(pDevExt->mtxTscDelta != NIL_RTSEMMUTEX);
1794#else
1795 Assert(pDevExt->mtxGip != NIL_RTSEMFASTMUTEX);
1796 Assert(pDevExt->mtxTscDelta != NIL_RTSEMFASTMUTEX);
1797#endif
1798
1799 /*
1800 * Check the CPU count.
1801 */
1802 cCpus = RTMpGetArraySize();
1803 if ( cCpus > RTCPUSET_MAX_CPUS
1804 || cCpus > 256 /* ApicId is used for the mappings */)
1805 {
1806 SUPR0Printf("VBoxDrv: Too many CPUs (%u) for the GIP (max %u)\n", cCpus, RT_MIN(RTCPUSET_MAX_CPUS, 256));
1807 return VERR_TOO_MANY_CPUS;
1808 }
1809
1810 /*
1811 * Allocate a contiguous set of pages with a default kernel mapping.
1812 */
1813 rc = RTR0MemObjAllocCont(&pDevExt->GipMemObj, RT_UOFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), false /*fExecutable*/);
1814 if (RT_FAILURE(rc))
1815 {
1816 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
1817 return rc;
1818 }
1819 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
1820 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
1821
1822 /*
1823 * Find a reasonable update interval and initialize the structure.
1824 */
1825 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
1826 /** @todo figure out why using a 100Ms interval upsets timekeeping in VMs.
1827 * See @bugref{6710}. */
1828 u32MinInterval = RT_NS_10MS;
1829 u32SystemResolution = RTTimerGetSystemGranularity();
1830 u32Interval = u32MinInterval;
1831 uMod = u32MinInterval % u32SystemResolution;
1832 if (uMod)
1833 u32Interval += u32SystemResolution - uMod;
1834
1835 supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), RT_NS_1SEC / u32Interval /*=Hz*/, u32Interval, cCpus);
1836
1837 /*
1838 * Important sanity check...
1839 */
1840 if (RT_UNLIKELY( pGip->enmUseTscDelta == SUPGIPUSETSCDELTA_ZERO_CLAIMED
1841 && pGip->u32Mode == SUPGIPMODE_ASYNC_TSC
1842 && !supdrvOSGetForcedAsyncTscMode(pDevExt)))
1843 {
1844 OSDBGPRINT(("supdrvGipCreate: Host-OS/user claims the TSC-deltas are zero but we detected async. TSC! Bad.\n"));
1845 return VERR_INTERNAL_ERROR_2;
1846 }
1847
1848 /* It doesn't make sense to do TSC-delta detection on systems we detect as async. */
1849 AssertReturn( pGip->u32Mode != SUPGIPMODE_ASYNC_TSC
1850 || pGip->enmUseTscDelta <= SUPGIPUSETSCDELTA_ZERO_CLAIMED, VERR_INTERNAL_ERROR_3);
1851
1852 /*
1853 * Do the TSC frequency measurements.
1854 *
1855 * If we're in invariant TSC mode, just to a quick preliminary measurement
1856 * that the TSC-delta measurement code can use to yield cross calls.
1857 *
1858 * If we're in any of the other two modes, neither which require MP init,
1859 * notifications or deltas for the job, do the full measurement now so
1860 * that supdrvGipInitOnCpu() can populate the TSC interval and history
1861 * array with more reasonable values.
1862 */
1863 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1864 {
1865 rc = supdrvGipInitMeasureTscFreq(pDevExt, pGip, true /*fRough*/); /* cannot fail */
1866 supdrvGipInitStartTimerForRefiningInvariantTscFreq(pDevExt, pGip);
1867 }
1868 else
1869 rc = supdrvGipInitMeasureTscFreq(pDevExt, pGip, false /*fRough*/);
1870 if (RT_SUCCESS(rc))
1871 {
1872 /*
1873 * Start TSC-delta measurement thread before we start getting MP
1874 * events that will try kick it into action (includes the
1875 * RTMpOnAll/supdrvGipInitOnCpu call below).
1876 */
1877 RTCpuSetEmpty(&pDevExt->TscDeltaCpuSet);
1878 RTCpuSetEmpty(&pDevExt->TscDeltaObtainedCpuSet);
1879#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1880 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1881 rc = supdrvTscDeltaThreadInit(pDevExt);
1882#endif
1883 if (RT_SUCCESS(rc))
1884 {
1885 rc = RTMpNotificationRegister(supdrvGipMpEvent, pDevExt);
1886 if (RT_SUCCESS(rc))
1887 {
1888 /*
1889 * Do GIP initialization on all online CPUs. Wake up the
1890 * TSC-delta thread afterwards.
1891 */
1892 rc = RTMpOnAll(supdrvGipInitOnCpu, pDevExt, pGip);
1893 if (RT_SUCCESS(rc))
1894 {
1895#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1896 supdrvTscDeltaThreadStartMeasurement(pDevExt, true /* fForceAll */);
1897#else
1898 uint16_t iCpu;
1899 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1900 {
1901 /*
1902 * Measure the TSC deltas now that we have MP notifications.
1903 */
1904 int cTries = 5;
1905 do
1906 {
1907 rc = supdrvMeasureInitialTscDeltas(pDevExt);
1908 if ( rc != VERR_TRY_AGAIN
1909 && rc != VERR_CPU_OFFLINE)
1910 break;
1911 } while (--cTries > 0);
1912 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
1913 Log(("supdrvTscDeltaInit: cpu[%u] delta %lld\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta));
1914 }
1915 else
1916 {
1917 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
1918 AssertMsg(!pGip->aCPUs[iCpu].i64TSCDelta, ("iCpu=%u %lld mode=%d\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta, pGip->u32Mode));
1919 }
1920 if (RT_SUCCESS(rc))
1921#endif
1922 {
1923 /*
1924 * Create the timer.
1925 * If CPU_ALL isn't supported we'll have to fall back to synchronous mode.
1926 */
1927 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
1928 {
1929 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL,
1930 supdrvGipAsyncTimer, pDevExt);
1931 if (rc == VERR_NOT_SUPPORTED)
1932 {
1933 OSDBGPRINT(("supdrvGipCreate: omni timer not supported, falling back to synchronous mode\n"));
1934 pGip->u32Mode = SUPGIPMODE_SYNC_TSC;
1935 }
1936 }
1937 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
1938 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0 /* fFlags */,
1939 supdrvGipSyncAndInvariantTimer, pDevExt);
1940 if (RT_SUCCESS(rc))
1941 {
1942 /*
1943 * We're good.
1944 */
1945 Log(("supdrvGipCreate: %u ns interval.\n", u32Interval));
1946 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
1947
1948 g_pSUPGlobalInfoPage = pGip;
1949 return VINF_SUCCESS;
1950 }
1951
1952 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %u ns interval. rc=%Rrc\n", u32Interval, rc));
1953 Assert(!pDevExt->pGipTimer);
1954 }
1955 }
1956 else
1957 OSDBGPRINT(("supdrvGipCreate: RTMpOnAll failed. rc=%Rrc\n", rc));
1958 }
1959 else
1960 OSDBGPRINT(("supdrvGipCreate: failed to register MP event notfication. rc=%Rrc\n", rc));
1961 }
1962 else
1963 OSDBGPRINT(("supdrvGipCreate: supdrvTscDeltaInit failed. rc=%Rrc\n", rc));
1964 }
1965 else
1966 OSDBGPRINT(("supdrvGipCreate: supdrvMeasureInitialTscDeltas failed. rc=%Rrc\n", rc));
1967
1968 /* Releases timer frequency increase too. */
1969 supdrvGipDestroy(pDevExt);
1970 return rc;
1971}
1972
1973
1974/**
1975 * Invalidates the GIP data upon termination.
1976 *
1977 * @param pGip Pointer to the read-write kernel mapping of the GIP.
1978 */
1979static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
1980{
1981 unsigned i;
1982 pGip->u32Magic = 0;
1983 for (i = 0; i < pGip->cCpus; i++)
1984 {
1985 pGip->aCPUs[i].u64NanoTS = 0;
1986 pGip->aCPUs[i].u64TSC = 0;
1987 pGip->aCPUs[i].iTSCHistoryHead = 0;
1988 pGip->aCPUs[i].u64TSCSample = 0;
1989 pGip->aCPUs[i].i64TSCDelta = INT64_MAX;
1990 }
1991}
1992
1993
1994/**
1995 * Terminates the GIP.
1996 *
1997 * @param pDevExt Instance data. GIP stuff may be updated.
1998 */
1999void VBOXCALL supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
2000{
2001 int rc;
2002#ifdef DEBUG_DARWIN_GIP
2003 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
2004 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
2005 pDevExt->pGipTimer, pDevExt->GipMemObj));
2006#endif
2007
2008 /*
2009 * Stop receiving MP notifications before tearing anything else down.
2010 */
2011 RTMpNotificationDeregister(supdrvGipMpEvent, pDevExt);
2012
2013#ifdef SUPDRV_USE_TSC_DELTA_THREAD
2014 /*
2015 * Terminate the TSC-delta measurement thread and resources.
2016 */
2017 supdrvTscDeltaTerm(pDevExt);
2018#endif
2019
2020 /*
2021 * Destroy the TSC-refinement timer.
2022 */
2023 if (pDevExt->pInvarTscRefineTimer)
2024 {
2025 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
2026 pDevExt->pInvarTscRefineTimer = NULL;
2027 }
2028
2029 /*
2030 * Invalid the GIP data.
2031 */
2032 if (pDevExt->pGip)
2033 {
2034 supdrvGipTerm(pDevExt->pGip);
2035 pDevExt->pGip = NULL;
2036 }
2037 g_pSUPGlobalInfoPage = NULL;
2038
2039 /*
2040 * Destroy the timer and free the GIP memory object.
2041 */
2042 if (pDevExt->pGipTimer)
2043 {
2044 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
2045 pDevExt->pGipTimer = NULL;
2046 }
2047
2048 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
2049 {
2050 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
2051 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
2052 }
2053
2054 /*
2055 * Finally, make sure we've release the system timer resolution request
2056 * if one actually succeeded and is still pending.
2057 */
2058 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
2059}
2060
2061
2062
2063
2064/*
2065 *
2066 *
2067 * GIP Update Timer Related Code
2068 * GIP Update Timer Related Code
2069 * GIP Update Timer Related Code
2070 *
2071 *
2072 */
2073
2074
2075/**
2076 * Worker routine for supdrvGipUpdate() and supdrvGipUpdatePerCpu() that
2077 * updates all the per cpu data except the transaction id.
2078 *
2079 * @param pDevExt The device extension.
2080 * @param pGipCpu Pointer to the per cpu data.
2081 * @param u64NanoTS The current time stamp.
2082 * @param u64TSC The current TSC.
2083 * @param iTick The current timer tick.
2084 *
2085 * @remarks Can be called with interrupts disabled!
2086 */
2087static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick)
2088{
2089 uint64_t u64TSCDelta;
2090 uint32_t u32UpdateIntervalTSC;
2091 uint32_t u32UpdateIntervalTSCSlack;
2092 unsigned iTSCHistoryHead;
2093 uint64_t u64CpuHz;
2094 uint32_t u32TransactionId;
2095
2096 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2097 AssertPtrReturnVoid(pGip);
2098
2099 /* Delta between this and the previous update. */
2100 ASMAtomicUoWriteU32(&pGipCpu->u32PrevUpdateIntervalNS, (uint32_t)(u64NanoTS - pGipCpu->u64NanoTS));
2101
2102 /*
2103 * Update the NanoTS.
2104 */
2105 ASMAtomicWriteU64(&pGipCpu->u64NanoTS, u64NanoTS);
2106
2107 /*
2108 * Calc TSC delta.
2109 */
2110 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
2111 ASMAtomicWriteU64(&pGipCpu->u64TSC, u64TSC);
2112
2113 /*
2114 * We don't need to keep realculating the frequency when it's invariant, so
2115 * the remainder of this function is only for the sync and async TSC modes.
2116 */
2117 if (pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC)
2118 {
2119 if (u64TSCDelta >> 32)
2120 {
2121 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
2122 pGipCpu->cErrors++;
2123 }
2124
2125 /*
2126 * On the 2nd and 3rd callout, reset the history with the current TSC
2127 * interval since the values entered by supdrvGipInit are totally off.
2128 * The interval on the 1st callout completely unreliable, the 2nd is a bit
2129 * better, while the 3rd should be most reliable.
2130 */
2131 /** @todo Could we drop this now that we initializes the history
2132 * with nominal TSC frequency values? */
2133 u32TransactionId = pGipCpu->u32TransactionId;
2134 if (RT_UNLIKELY( ( u32TransactionId == 5
2135 || u32TransactionId == 7)
2136 && ( iTick == 2
2137 || iTick == 3) ))
2138 {
2139 unsigned i;
2140 for (i = 0; i < RT_ELEMENTS(pGipCpu->au32TSCHistory); i++)
2141 ASMAtomicUoWriteU32(&pGipCpu->au32TSCHistory[i], (uint32_t)u64TSCDelta);
2142 }
2143
2144 /*
2145 * Validate the NanoTS deltas between timer fires with an arbitrary threshold of 0.5%.
2146 * Wait until we have at least one full history since the above history reset. The
2147 * assumption is that the majority of the previous history values will be tolerable.
2148 * See @bugref{6710} comment #67.
2149 */
2150 /** @todo Could we drop the fuding there now that we initializes the history
2151 * with nominal TSC frequency values? */
2152 if ( u32TransactionId > 23 /* 7 + (8 * 2) */
2153 && pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
2154 {
2155 uint32_t uNanoTsThreshold = pGip->u32UpdateIntervalNS / 200;
2156 if ( pGipCpu->u32PrevUpdateIntervalNS > pGip->u32UpdateIntervalNS + uNanoTsThreshold
2157 || pGipCpu->u32PrevUpdateIntervalNS < pGip->u32UpdateIntervalNS - uNanoTsThreshold)
2158 {
2159 uint32_t u32;
2160 u32 = pGipCpu->au32TSCHistory[0];
2161 u32 += pGipCpu->au32TSCHistory[1];
2162 u32 += pGipCpu->au32TSCHistory[2];
2163 u32 += pGipCpu->au32TSCHistory[3];
2164 u32 >>= 2;
2165 u64TSCDelta = pGipCpu->au32TSCHistory[4];
2166 u64TSCDelta += pGipCpu->au32TSCHistory[5];
2167 u64TSCDelta += pGipCpu->au32TSCHistory[6];
2168 u64TSCDelta += pGipCpu->au32TSCHistory[7];
2169 u64TSCDelta >>= 2;
2170 u64TSCDelta += u32;
2171 u64TSCDelta >>= 1;
2172 }
2173 }
2174
2175 /*
2176 * TSC History.
2177 */
2178 Assert(RT_ELEMENTS(pGipCpu->au32TSCHistory) == 8);
2179 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
2180 ASMAtomicWriteU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
2181 ASMAtomicWriteU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
2182
2183 /*
2184 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
2185 *
2186 * On Windows, we have an occasional (but recurring) sour value that messed up
2187 * the history but taking only 1 interval reduces the precision overall.
2188 */
2189 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
2190 || pGip->u32UpdateHz >= 1000)
2191 {
2192 uint32_t u32;
2193 u32 = pGipCpu->au32TSCHistory[0];
2194 u32 += pGipCpu->au32TSCHistory[1];
2195 u32 += pGipCpu->au32TSCHistory[2];
2196 u32 += pGipCpu->au32TSCHistory[3];
2197 u32 >>= 2;
2198 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
2199 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
2200 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
2201 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
2202 u32UpdateIntervalTSC >>= 2;
2203 u32UpdateIntervalTSC += u32;
2204 u32UpdateIntervalTSC >>= 1;
2205
2206 /* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11. */
2207 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
2208 }
2209 else if (pGip->u32UpdateHz >= 90)
2210 {
2211 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
2212 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
2213 u32UpdateIntervalTSC >>= 1;
2214
2215 /* value chosen on a 2GHz thinkpad running windows */
2216 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
2217 }
2218 else
2219 {
2220 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
2221
2222 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
2223 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
2224 }
2225 ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
2226
2227 /*
2228 * CpuHz.
2229 */
2230 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, RT_NS_1SEC);
2231 u64CpuHz /= pGip->u32UpdateIntervalNS;
2232 ASMAtomicWriteU64(&pGipCpu->u64CpuHz, u64CpuHz);
2233 }
2234}
2235
2236
2237/**
2238 * Updates the GIP.
2239 *
2240 * @param pDevExt The device extension.
2241 * @param u64NanoTS The current nanosecond timesamp.
2242 * @param u64TSC The current TSC timesamp.
2243 * @param idCpu The CPU ID.
2244 * @param iTick The current timer tick.
2245 *
2246 * @remarks Can be called with interrupts disabled!
2247 */
2248static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick)
2249{
2250 /*
2251 * Determine the relevant CPU data.
2252 */
2253 PSUPGIPCPU pGipCpu;
2254 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2255 AssertPtrReturnVoid(pGip);
2256
2257 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
2258 pGipCpu = &pGip->aCPUs[0];
2259 else
2260 {
2261 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
2262 if (RT_UNLIKELY(iCpu >= pGip->cCpus))
2263 return;
2264 pGipCpu = &pGip->aCPUs[iCpu];
2265 if (RT_UNLIKELY(pGipCpu->idCpu != idCpu))
2266 return;
2267 }
2268
2269 /*
2270 * Start update transaction.
2271 */
2272 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
2273 {
2274 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
2275 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
2276 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2277 pGipCpu->cErrors++;
2278 return;
2279 }
2280
2281 /*
2282 * Recalc the update frequency every 0x800th time.
2283 */
2284 if ( pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC /* cuz we're not recalculating the frequency on invariants hosts. */
2285 && !(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
2286 {
2287 if (pGip->u64NanoTSLastUpdateHz)
2288 {
2289#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
2290 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
2291 uint32_t u32UpdateHz = (uint32_t)((RT_NS_1SEC_64 * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
2292 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
2293 {
2294 /** @todo r=ramshankar: Changing u32UpdateHz might screw up TSC frequency
2295 * calculation on non-invariant hosts if it changes the history decision
2296 * taken in supdrvGipDoUpdateCpu(). */
2297 uint64_t u64Interval = u64Delta / GIP_UPDATEHZ_RECALC_FREQ;
2298 ASMAtomicWriteU32(&pGip->u32UpdateHz, u32UpdateHz);
2299 ASMAtomicWriteU32(&pGip->u32UpdateIntervalNS, (uint32_t)u64Interval);
2300 }
2301#endif
2302 }
2303 ASMAtomicWriteU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS | 1);
2304 }
2305
2306 /*
2307 * Update the data.
2308 */
2309 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
2310
2311 /*
2312 * Complete transaction.
2313 */
2314 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2315}
2316
2317
2318/**
2319 * Updates the per cpu GIP data for the calling cpu.
2320 *
2321 * @param pDevExt The device extension.
2322 * @param u64NanoTS The current nanosecond timesamp.
2323 * @param u64TSC The current TSC timesamp.
2324 * @param idCpu The CPU ID.
2325 * @param idApic The APIC id for the CPU index.
2326 * @param iTick The current timer tick.
2327 *
2328 * @remarks Can be called with interrupts disabled!
2329 */
2330static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
2331 RTCPUID idCpu, uint8_t idApic, uint64_t iTick)
2332{
2333 uint32_t iCpu;
2334 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2335
2336 /*
2337 * Avoid a potential race when a CPU online notification doesn't fire on
2338 * the onlined CPU but the tick creeps in before the event notification is
2339 * run.
2340 */
2341 if (RT_UNLIKELY(iTick == 1))
2342 {
2343 iCpu = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
2344 if (pGip->aCPUs[iCpu].enmState == SUPGIPCPUSTATE_OFFLINE)
2345 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
2346 }
2347
2348 iCpu = pGip->aiCpuFromApicId[idApic];
2349 if (RT_LIKELY(iCpu < pGip->cCpus))
2350 {
2351 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
2352 if (pGipCpu->idCpu == idCpu)
2353 {
2354 /*
2355 * Start update transaction.
2356 */
2357 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
2358 {
2359 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
2360 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2361 pGipCpu->cErrors++;
2362 return;
2363 }
2364
2365 /*
2366 * Update the data.
2367 */
2368 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
2369
2370 /*
2371 * Complete transaction.
2372 */
2373 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2374 }
2375 }
2376}
2377
2378
2379/**
2380 * Timer callback function for the sync and invariant GIP modes.
2381 *
2382 * @param pTimer The timer.
2383 * @param pvUser Opaque pointer to the device extension.
2384 * @param iTick The timer tick.
2385 */
2386static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2387{
2388 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
2389 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2390 RTCCUINTREG fOldFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
2391 uint64_t u64TSC = ASMReadTSC();
2392 uint64_t u64NanoTS = RTTimeSystemNanoTS();
2393
2394 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
2395 {
2396 /*
2397 * The calculations in supdrvGipUpdate() is somewhat timing sensitive,
2398 * missing timer ticks is not an option for GIP because the GIP users
2399 * will end up incrementing the time in 1ns per time getter call until
2400 * there is a complete timer update. So, if the delta has yet to be
2401 * calculated, we just pretend it is zero for now (the GIP users
2402 * probably won't have it for a wee while either and will do the same).
2403 *
2404 * We could maybe on some platforms try cross calling a CPU with a
2405 * working delta here, but it's not worth the hassle since the
2406 * likelyhood of this happening is really low. On Windows, Linux, and
2407 * Solaris timers fire on the CPU they were registered/started on.
2408 * Darwin timers doesn't necessarily (they are high priority threads).
2409 */
2410 uint32_t iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
2411 uint16_t iGipCpu = RT_LIKELY(iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx))
2412 ? pGip->aiCpuFromCpuSetIdx[iCpuSet] : UINT16_MAX;
2413 Assert(!ASMIntAreEnabled());
2414 if (RT_LIKELY(iGipCpu < pGip->cCpus))
2415 {
2416 int64_t iTscDelta = pGip->aCPUs[iGipCpu].i64TSCDelta;
2417 if (iTscDelta != INT64_MAX)
2418 u64TSC -= iTscDelta;
2419 }
2420 }
2421
2422 supdrvGipUpdate(pDevExt, u64NanoTS, u64TSC, NIL_RTCPUID, iTick);
2423
2424 ASMSetFlags(fOldFlags);
2425}
2426
2427
2428/**
2429 * Timer callback function for async GIP mode.
2430 * @param pTimer The timer.
2431 * @param pvUser Opaque pointer to the device extension.
2432 * @param iTick The timer tick.
2433 */
2434static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2435{
2436 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
2437 RTCCUINTREG fOldFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
2438 RTCPUID idCpu = RTMpCpuId();
2439 uint64_t u64TSC = ASMReadTSC();
2440 uint64_t NanoTS = RTTimeSystemNanoTS();
2441
2442 /** @todo reset the transaction number and whatnot when iTick == 1. */
2443 if (pDevExt->idGipMaster == idCpu)
2444 supdrvGipUpdate(pDevExt, NanoTS, u64TSC, idCpu, iTick);
2445 else
2446 supdrvGipUpdatePerCpu(pDevExt, NanoTS, u64TSC, idCpu, ASMGetApicId(), iTick);
2447
2448 ASMSetFlags(fOldFlags);
2449}
2450
2451
2452
2453
2454/*
2455 *
2456 *
2457 * TSC Delta Measurements And Related Code
2458 * TSC Delta Measurements And Related Code
2459 * TSC Delta Measurements And Related Code
2460 *
2461 *
2462 */
2463
2464
2465/*
2466 * Select TSC delta measurement algorithm.
2467 */
2468#if 0
2469# define GIP_TSC_DELTA_METHOD_1
2470#else
2471# define GIP_TSC_DELTA_METHOD_2
2472#endif
2473
2474/** For padding variables to keep them away from other cache lines. Better too
2475 * large than too small!
2476 * @remarks Current AMD64 and x86 CPUs seems to use 64 bytes. There are claims
2477 * that NetBurst had 128 byte cache lines while the 486 thru Pentium
2478 * III had 32 bytes cache lines. */
2479#define GIP_TSC_DELTA_CACHE_LINE_SIZE 128
2480
2481
2482/**
2483 * TSC delta measurment algorithm \#2 result entry.
2484 */
2485typedef struct SUPDRVTSCDELTAMETHOD2ENTRY
2486{
2487 uint32_t iSeqMine;
2488 uint32_t iSeqOther;
2489 uint64_t uTsc;
2490} SUPDRVTSCDELTAMETHOD2ENTRY;
2491
2492/**
2493 * TSC delta measurment algorithm \#2 Data.
2494 */
2495typedef struct SUPDRVTSCDELTAMETHOD2
2496{
2497 /** Padding to make sure the iCurSeqNo is in its own cache line. */
2498 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2499 /** The current sequence number of this worker. */
2500 uint32_t volatile iCurSeqNo;
2501 /** Padding to make sure the iCurSeqNo is in its own cache line. */
2502 uint32_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint32_t) - 1];
2503 /** Result table. */
2504 SUPDRVTSCDELTAMETHOD2ENTRY aResults[64];
2505} SUPDRVTSCDELTAMETHOD2;
2506/** Pointer to the data for TSC delta mesurment algorithm \#2 .*/
2507typedef SUPDRVTSCDELTAMETHOD2 *PSUPDRVTSCDELTAMETHOD2;
2508
2509
2510/**
2511 * The TSC delta synchronization struct, version 2.
2512 *
2513 * The syncrhonization variable is completely isolated in its own cache line
2514 * (provided our max cache line size estimate is correct).
2515 */
2516typedef struct SUPTSCDELTASYNC2
2517{
2518 /** Padding to make sure the uVar1 is in its own cache line. */
2519 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2520
2521 /** The synchronization variable, holds values GIP_TSC_DELTA_SYNC_*. */
2522 volatile uint32_t uSyncVar;
2523 /** Sequence synchronizing variable used for post 'GO' synchronization. */
2524 volatile uint32_t uSyncSeq;
2525
2526 /** Padding to make sure the uVar1 is in its own cache line. */
2527 uint64_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t) - 2];
2528
2529 /** Start RDTSC value. Put here mainly to save stack space. */
2530 uint64_t uTscStart;
2531 /** Copy of SUPDRVGIPTSCDELTARGS::cMaxTscTicks. */
2532 uint64_t cMaxTscTicks;
2533} SUPTSCDELTASYNC2;
2534AssertCompileSize(SUPTSCDELTASYNC2, GIP_TSC_DELTA_CACHE_LINE_SIZE * 2 + sizeof(uint64_t));
2535typedef SUPTSCDELTASYNC2 *PSUPTSCDELTASYNC2;
2536
2537/** Prestart wait. */
2538#define GIP_TSC_DELTA_SYNC2_PRESTART_WAIT UINT32_C(0x0ffe)
2539/** Prestart aborted. */
2540#define GIP_TSC_DELTA_SYNC2_PRESTART_ABORT UINT32_C(0x0fff)
2541/** Ready (on your mark). */
2542#define GIP_TSC_DELTA_SYNC2_READY UINT32_C(0x1000)
2543/** Steady (get set). */
2544#define GIP_TSC_DELTA_SYNC2_STEADY UINT32_C(0x1001)
2545/** Go! */
2546#define GIP_TSC_DELTA_SYNC2_GO UINT32_C(0x1002)
2547/** Used by the verfication test. */
2548#define GIP_TSC_DELTA_SYNC2_GO_GO UINT32_C(0x1003)
2549
2550/** We reached the time limit. */
2551#define GIP_TSC_DELTA_SYNC2_TIMEOUT UINT32_C(0x1ffe)
2552/** The other party won't touch the sync struct ever again. */
2553#define GIP_TSC_DELTA_SYNC2_FINAL UINT32_C(0x1fff)
2554
2555
2556/**
2557 * Argument package/state passed by supdrvMeasureTscDeltaOne() to the RTMpOn
2558 * callback worker.
2559 * @todo add
2560 */
2561typedef struct SUPDRVGIPTSCDELTARGS
2562{
2563 /** The device extension. */
2564 PSUPDRVDEVEXT pDevExt;
2565 /** Pointer to the GIP CPU array entry for the worker. */
2566 PSUPGIPCPU pWorker;
2567 /** Pointer to the GIP CPU array entry for the master. */
2568 PSUPGIPCPU pMaster;
2569 /** The maximum number of ticks to spend in supdrvMeasureTscDeltaCallback.
2570 * (This is what we need a rough TSC frequency for.) */
2571 uint64_t cMaxTscTicks;
2572 /** Used to abort synchronization setup. */
2573 bool volatile fAbortSetup;
2574
2575 /** Padding to make sure the master variables live in its own cache lines. */
2576 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2577
2578 /** @name Master
2579 * @{ */
2580 /** The time the master spent in the MP worker. */
2581 uint64_t cElapsedMasterTscTicks;
2582 /** The iTry value when stopped at. */
2583 uint32_t iTry;
2584 /** Set if the run timed out. */
2585 bool volatile fTimedOut;
2586 /** Pointer to the master's synchronization struct (on stack). */
2587 PSUPTSCDELTASYNC2 volatile pSyncMaster;
2588 /** Master data union. */
2589 union
2590 {
2591 /** Data (master) for delta verification. */
2592 struct
2593 {
2594 /** Verification test TSC values for the master. */
2595 uint64_t volatile auTscs[32];
2596 } Verify;
2597 /** Data (master) for measurement method \#2. */
2598 struct
2599 {
2600 /** Data and sequence number. */
2601 SUPDRVTSCDELTAMETHOD2 Data;
2602 /** The lag setting for the next run. */
2603 bool fLag;
2604 /** Number of hits. */
2605 uint32_t cHits;
2606 } M2;
2607 } uMaster;
2608 /** The verifier verdict, VINF_SUCCESS if ok, VERR_OUT_OF_RANGE if not,
2609 * VERR_TRY_AGAIN on timeout. */
2610 int32_t rcVerify;
2611#ifdef TSCDELTA_VERIFY_WITH_STATS
2612 /** The maximum difference between TSC read during delta verification. */
2613 int64_t cMaxVerifyTscTicks;
2614 /** The minimum difference between two TSC reads during verification. */
2615 int64_t cMinVerifyTscTicks;
2616 /** The bad TSC diff, worker relative to master (= worker - master).
2617 * Negative value means the worker is behind the master. */
2618 int64_t iVerifyBadTscDiff;
2619#endif
2620 /** @} */
2621
2622 /** Padding to make sure the worker variables live is in its own cache line. */
2623 uint64_t au64CacheLinePaddingBetween[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2624
2625 /** @name Proletarian
2626 * @{ */
2627 /** Pointer to the worker's synchronization struct (on stack). */
2628 PSUPTSCDELTASYNC2 volatile pSyncWorker;
2629 /** The time the worker spent in the MP worker. */
2630 uint64_t cElapsedWorkerTscTicks;
2631 /** Worker data union. */
2632 union
2633 {
2634 /** Data (worker) for delta verification. */
2635 struct
2636 {
2637 /** Verification test TSC values for the worker. */
2638 uint64_t volatile auTscs[32];
2639 } Verify;
2640 /** Data (worker) for measurement method \#2. */
2641 struct
2642 {
2643 /** Data and sequence number. */
2644 SUPDRVTSCDELTAMETHOD2 Data;
2645 /** The lag setting for the next run (set by master). */
2646 bool fLag;
2647 } M2;
2648 } uWorker;
2649 /** @} */
2650
2651 /** Padding to make sure the above is in its own cache line. */
2652 uint64_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2653} SUPDRVGIPTSCDELTARGS;
2654typedef SUPDRVGIPTSCDELTARGS *PSUPDRVGIPTSCDELTARGS;
2655
2656
2657/** @name Macros that implements the basic synchronization steps common to
2658 * the algorithms.
2659 *
2660 * Must be used from loop as the timeouts are implemented via 'break' statements
2661 * at the moment.
2662 *
2663 * @{
2664 */
2665#if defined(DEBUG_bird) /* || defined(VBOX_STRICT) */
2666# define TSCDELTA_DBG_VARS() uint32_t iDbgCounter
2667# define TSCDELTA_DBG_START_LOOP() do { iDbgCounter = 0; } while (0)
2668# define TSCDELTA_DBG_CHECK_LOOP() \
2669 do { iDbgCounter++; if ((iDbgCounter & UINT32_C(0x01ffffff)) == 0) RT_BREAKPOINT(); } while (0)
2670#else
2671# define TSCDELTA_DBG_VARS() ((void)0)
2672# define TSCDELTA_DBG_START_LOOP() ((void)0)
2673# define TSCDELTA_DBG_CHECK_LOOP() ((void)0)
2674#endif
2675#if 0
2676# define TSCDELTA_DBG_SYNC_MSG(a_Args) SUPR0Printf a_Args
2677#else
2678# define TSCDELTA_DBG_SYNC_MSG(a_Args) ((void)0)
2679#endif
2680#if 0
2681# define TSCDELTA_DBG_SYNC_MSG2(a_Args) SUPR0Printf a_Args
2682#else
2683# define TSCDELTA_DBG_SYNC_MSG2(a_Args) ((void)0)
2684#endif
2685#if 0
2686# define TSCDELTA_DBG_SYNC_MSG9(a_Args) SUPR0Printf a_Args
2687#else
2688# define TSCDELTA_DBG_SYNC_MSG9(a_Args) ((void)0)
2689#endif
2690
2691
2692static bool supdrvTscDeltaSync2_Before(PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
2693 bool fIsMaster, PRTCCUINTREG pfEFlags, PSUPDRVGIPTSCDELTARGS pArgs)
2694{
2695 uint32_t iMySeq = fIsMaster ? 0 : 256;
2696 uint32_t const iMaxSeq = iMySeq + 16; /* For the last loop, darn linux/freebsd C-ishness. */
2697 uint32_t u32Tmp;
2698 uint32_t iSync2Loops = 0;
2699 RTCCUINTREG fEFlags;
2700 TSCDELTA_DBG_VARS();
2701
2702 *pfEFlags = X86_EFL_IF | X86_EFL_1; /* should shut up most nagging compilers. */
2703
2704 /*
2705 * The master tells the worker to get on it's mark.
2706 */
2707 if (fIsMaster)
2708 {
2709 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_STEADY, GIP_TSC_DELTA_SYNC2_READY)))
2710 { /* likely*/ }
2711 else
2712 {
2713 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #1 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
2714 return false;
2715 }
2716 }
2717
2718 /*
2719 * Wait for the on your mark signal (ack in the master case). We process timeouts here.
2720 */
2721 ASMAtomicWriteU32(&(pMySync)->uSyncSeq, 0);
2722 for (;;)
2723 {
2724 fEFlags = ASMIntDisableFlags();
2725 u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
2726 if (u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY)
2727 break;
2728 ASMSetFlags(fEFlags);
2729 ASMNopPause();
2730
2731 /* Abort? */
2732 if (u32Tmp != GIP_TSC_DELTA_SYNC2_READY)
2733 {
2734 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #2 u32Tmp=%#x\n", fIsMaster ? "master" : "worker", u32Tmp));
2735 return false;
2736 }
2737
2738 /* Check for timeouts every so often (not every loop in case RDTSC is
2739 trapping or something). Must check the first time around. */
2740#if 0 /* For debugging the timeout paths. */
2741 static uint32_t volatile xxx;
2742#endif
2743 if ( ( (iSync2Loops & 0x3ff) == 0
2744 && ASMReadTSC() - pMySync->uTscStart > pMySync->cMaxTscTicks)
2745#if 0 /* This is crazy, I know, but enable this code and the results are markedly better when enabled on the 1.4GHz AMD (debug). */
2746 || (!fIsMaster && (++xxx & 0xf) == 0)
2747#endif
2748 )
2749 {
2750 /* Try switch our own state into timeout mode so the master cannot tell us to 'GO',
2751 ignore the timeout if we've got the go ahead already (simpler). */
2752 if (ASMAtomicCmpXchgU32(&pMySync->uSyncVar, GIP_TSC_DELTA_SYNC2_TIMEOUT, GIP_TSC_DELTA_SYNC2_READY))
2753 {
2754 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: timeout\n", fIsMaster ? "master" : "worker"));
2755 ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_TIMEOUT, GIP_TSC_DELTA_SYNC2_STEADY);
2756 ASMAtomicWriteBool(&pArgs->fTimedOut, true);
2757 return false;
2758 }
2759 }
2760 iSync2Loops++;
2761 }
2762
2763 /*
2764 * Interrupts are now disabled and will remain disabled until we do
2765 * TSCDELTA_MASTER_SYNC_AFTER / TSCDELTA_OTHER_SYNC_AFTER.
2766 */
2767 *pfEFlags = fEFlags;
2768
2769 /*
2770 * The worker tells the master that it is on its mark and that the master
2771 * need to get into position as well.
2772 */
2773 if (!fIsMaster)
2774 {
2775 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_STEADY, GIP_TSC_DELTA_SYNC2_READY)))
2776 { /* likely */ }
2777 else
2778 {
2779 ASMSetFlags(fEFlags);
2780 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #3 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
2781 return false;
2782 }
2783 }
2784
2785 /*
2786 * The master sends the 'go' to the worker and wait for ACK.
2787 */
2788 if (fIsMaster)
2789 {
2790 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO, GIP_TSC_DELTA_SYNC2_STEADY)))
2791 { /* likely */ }
2792 else
2793 {
2794 ASMSetFlags(fEFlags);
2795 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #4 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
2796 return false;
2797 }
2798 }
2799
2800 /*
2801 * Wait for the 'go' signal (ack in the master case).
2802 */
2803 TSCDELTA_DBG_START_LOOP();
2804 for (;;)
2805 {
2806 u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
2807 if (u32Tmp == GIP_TSC_DELTA_SYNC2_GO)
2808 break;
2809 if (RT_LIKELY(u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY))
2810 { /* likely */ }
2811 else
2812 {
2813 ASMSetFlags(fEFlags);
2814 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #5 u32Tmp=%#x\n", fIsMaster ? "master" : "worker", u32Tmp));
2815 return false;
2816 }
2817
2818 TSCDELTA_DBG_CHECK_LOOP();
2819 ASMNopPause();
2820 }
2821
2822 /*
2823 * The worker acks the 'go' (shouldn't fail).
2824 */
2825 if (!fIsMaster)
2826 {
2827 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO, GIP_TSC_DELTA_SYNC2_STEADY)))
2828 { /* likely */ }
2829 else
2830 {
2831 ASMSetFlags(fEFlags);
2832 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #6 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
2833 return false;
2834 }
2835 }
2836
2837 /*
2838 * Try enter mostly lockstep execution with it.
2839 */
2840 for (;;)
2841 {
2842 uint32_t iOtherSeq1, iOtherSeq2;
2843 ASMCompilerBarrier();
2844 ASMSerializeInstruction();
2845
2846 ASMAtomicWriteU32(&pMySync->uSyncSeq, iMySeq);
2847 ASMNopPause();
2848 iOtherSeq1 = ASMAtomicXchgU32(&pOtherSync->uSyncSeq, iMySeq);
2849 ASMNopPause();
2850 iOtherSeq2 = ASMAtomicReadU32(&pMySync->uSyncSeq);
2851
2852 ASMCompilerBarrier();
2853 if (iOtherSeq1 == iOtherSeq2)
2854 return true;
2855
2856 /* Did the other guy give up? Should we give up? */
2857 if ( iOtherSeq1 == UINT32_MAX
2858 || iOtherSeq2 == UINT32_MAX)
2859 return true;
2860 if (++iMySeq >= iMaxSeq)
2861 {
2862 ASMAtomicWriteU32(&pMySync->uSyncSeq, UINT32_MAX);
2863 return true;
2864 }
2865 ASMNopPause();
2866 }
2867}
2868
2869#define TSCDELTA_MASTER_SYNC_BEFORE(a_pMySync, a_pOtherSync, a_pfEFlags, a_pArgs) \
2870 if (RT_LIKELY(supdrvTscDeltaSync2_Before(a_pMySync, a_pOtherSync, true /*fIsMaster*/, a_pfEFlags, a_pArgs))) \
2871 { /*likely*/ } \
2872 else if (true) \
2873 { \
2874 TSCDELTA_DBG_SYNC_MSG9(("sync/before/master: #89\n")); \
2875 break; \
2876 } else do {} while (0)
2877#define TSCDELTA_OTHER_SYNC_BEFORE(a_pMySync, a_pOtherSync, a_pfEFlags, a_pArgs) \
2878 if (RT_LIKELY(supdrvTscDeltaSync2_Before(a_pMySync, a_pOtherSync, false /*fIsMaster*/, a_pfEFlags, a_pArgs))) \
2879 { /*likely*/ } \
2880 else if (true) \
2881 { \
2882 TSCDELTA_DBG_SYNC_MSG9(("sync/before/other: #89\n")); \
2883 break; \
2884 } else do {} while (0)
2885
2886
2887static bool supdrvTscDeltaSync2_After(PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
2888 bool fIsMaster, RTCCUINTREG fEFlags)
2889{
2890 TSCDELTA_DBG_VARS();
2891
2892 /*
2893 * Wait for the 'ready' signal. In the master's case, this means the
2894 * worker has completed its data collection, while in the worker's case it
2895 * means the master is done processing the data and it's time for the next
2896 * loop iteration (or whatever).
2897 */
2898 ASMSetFlags(fEFlags);
2899 TSCDELTA_DBG_START_LOOP();
2900 for (;;)
2901 {
2902 uint32_t u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
2903 if ( u32Tmp == GIP_TSC_DELTA_SYNC2_READY
2904 || (u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY && !fIsMaster) /* kicked twice => race */ )
2905 return true;
2906 ASMNopPause();
2907 if (RT_LIKELY(u32Tmp == GIP_TSC_DELTA_SYNC2_GO))
2908 { /* likely */}
2909 else
2910 {
2911 TSCDELTA_DBG_SYNC_MSG(("sync/after/other: #1 u32Tmp=%#x\n", u32Tmp));
2912 return false; /* shouldn't ever happen! */
2913 }
2914 TSCDELTA_DBG_CHECK_LOOP();
2915 ASMNopPause();
2916 }
2917}
2918
2919#define TSCDELTA_MASTER_SYNC_AFTER(a_pMySync, a_pOtherSync, a_fEFlags) \
2920 if (RT_LIKELY(supdrvTscDeltaSync2_After(a_pMySync, a_pOtherSync, true /*fIsMaster*/, a_fEFlags))) \
2921 { /* likely */ } \
2922 else if (true) \
2923 { \
2924 TSCDELTA_DBG_SYNC_MSG9(("sync/after/master: #97\n")); \
2925 break; \
2926 } else do {} while (0)
2927
2928#define TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(a_pMySync, a_pOtherSync) \
2929 /* \
2930 * Tell the worker that we're done processing the data and ready for the next round. \
2931 */ \
2932 if (RT_LIKELY(ASMAtomicCmpXchgU32(&(a_pOtherSync)->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_GO))) \
2933 { /* likely */ } \
2934 else if (true)\
2935 { \
2936 TSCDELTA_DBG_SYNC_MSG(("sync/after/master: #99 uSyncVar=%#x\n", (a_pOtherSync)->uSyncVar)); \
2937 break; \
2938 } else do {} while (0)
2939
2940#define TSCDELTA_OTHER_SYNC_AFTER(a_pMySync, a_pOtherSync, a_fEFlags) \
2941 if (true) { \
2942 /* \
2943 * Tell the master that we're done collecting data and wait for the next round to start. \
2944 */ \
2945 if (RT_LIKELY(ASMAtomicCmpXchgU32(&(a_pOtherSync)->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_GO))) \
2946 { /* likely */ } \
2947 else \
2948 { \
2949 ASMSetFlags(a_fEFlags); \
2950 TSCDELTA_DBG_SYNC_MSG(("sync/after/other: #0 uSyncVar=%#x\n", (a_pOtherSync)->uSyncVar)); \
2951 break; \
2952 } \
2953 if (RT_LIKELY(supdrvTscDeltaSync2_After(a_pMySync, a_pOtherSync, false /*fIsMaster*/, a_fEFlags))) \
2954 { /* likely */ } \
2955 else \
2956 { \
2957 TSCDELTA_DBG_SYNC_MSG9(("sync/after/other: #98\n")); \
2958 break; \
2959 } \
2960 } else do {} while (0)
2961/** @} */
2962
2963
2964#ifdef GIP_TSC_DELTA_METHOD_1
2965/**
2966 * TSC delta measurment algorithm \#1 (GIP_TSC_DELTA_METHOD_1).
2967 *
2968 *
2969 * We ignore the first few runs of the loop in order to prime the
2970 * cache. Also, we need to be careful about using 'pause' instruction
2971 * in critical busy-wait loops in this code - it can cause undesired
2972 * behaviour with hyperthreading.
2973 *
2974 * We try to minimize the measurement error by computing the minimum
2975 * read time of the compare statement in the worker by taking TSC
2976 * measurements across it.
2977 *
2978 * It must be noted that the computed minimum read time is mostly to
2979 * eliminate huge deltas when the worker is too early and doesn't by
2980 * itself help produce more accurate deltas. We allow two times the
2981 * computed minimum as an arbibtrary acceptable threshold. Therefore,
2982 * it is still possible to get negative deltas where there are none
2983 * when the worker is earlier. As long as these occasional negative
2984 * deltas are lower than the time it takes to exit guest-context and
2985 * the OS to reschedule EMT on a different CPU we won't expose a TSC
2986 * that jumped backwards. It is because of the existence of the
2987 * negative deltas we don't recompute the delta with the master and
2988 * worker interchanged to eliminate the remaining measurement error.
2989 *
2990 *
2991 * @param pArgs The argument/state data.
2992 * @param pMySync My synchronization structure.
2993 * @param pOtherSync My partner's synchronization structure.
2994 * @param fIsMaster Set if master, clear if worker.
2995 * @param iTry The attempt number.
2996 */
2997static void supdrvTscDeltaMethod1Loop(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
2998 bool fIsMaster, uint32_t iTry)
2999{
3000 PSUPGIPCPU pGipCpuWorker = pArgs->pWorker;
3001 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3002 uint64_t uMinCmpReadTime = UINT64_MAX;
3003 unsigned iLoop;
3004 NOREF(iTry);
3005
3006 for (iLoop = 0; iLoop < GIP_TSC_DELTA_LOOPS; iLoop++)
3007 {
3008 RTCCUINTREG fEFlags;
3009 if (fIsMaster)
3010 {
3011 /*
3012 * The master.
3013 */
3014 AssertMsg(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD,
3015 ("%#llx idMaster=%#x idWorker=%#x (idGipMaster=%#x)\n",
3016 pGipCpuMaster->u64TSCSample, pGipCpuMaster->idCpu, pGipCpuWorker->idCpu, pArgs->pDevExt->idGipMaster));
3017 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3018
3019 do
3020 {
3021 ASMSerializeInstruction();
3022 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, ASMReadTSC());
3023 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
3024
3025 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3026
3027 /* Process the data. */
3028 if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
3029 {
3030 if (pGipCpuWorker->u64TSCSample != GIP_TSC_DELTA_RSVD)
3031 {
3032 int64_t iDelta = pGipCpuWorker->u64TSCSample
3033 - (pGipCpuMaster->u64TSCSample - pGipCpuMaster->i64TSCDelta);
3034 if ( iDelta >= GIP_TSC_DELTA_INITIAL_MASTER_VALUE
3035 ? iDelta < pGipCpuWorker->i64TSCDelta
3036 : iDelta > pGipCpuWorker->i64TSCDelta || pGipCpuWorker->i64TSCDelta == INT64_MAX)
3037 pGipCpuWorker->i64TSCDelta = iDelta;
3038 }
3039 }
3040
3041 /* Reset our TSC sample and tell the worker to move on. */
3042 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, GIP_TSC_DELTA_RSVD);
3043 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3044 }
3045 else
3046 {
3047 /*
3048 * The worker.
3049 */
3050 uint64_t uTscWorker;
3051 uint64_t uTscWorkerFlushed;
3052 uint64_t uCmpReadTime;
3053
3054 ASMAtomicReadU64(&pGipCpuMaster->u64TSCSample); /* Warm the cache line. */
3055 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3056
3057 /*
3058 * Keep reading the TSC until we notice that the master has read his. Reading
3059 * the TSC -after- the master has updated the memory is way too late. We thus
3060 * compensate by trying to measure how long it took for the worker to notice
3061 * the memory flushed from the master.
3062 */
3063 do
3064 {
3065 ASMSerializeInstruction();
3066 uTscWorker = ASMReadTSC();
3067 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
3068 ASMSerializeInstruction();
3069 uTscWorkerFlushed = ASMReadTSC();
3070
3071 uCmpReadTime = uTscWorkerFlushed - uTscWorker;
3072 if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
3073 {
3074 /* This is totally arbitrary a.k.a I don't like it but I have no better ideas for now. */
3075 if (uCmpReadTime < (uMinCmpReadTime << 1))
3076 {
3077 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, uTscWorker);
3078 if (uCmpReadTime < uMinCmpReadTime)
3079 uMinCmpReadTime = uCmpReadTime;
3080 }
3081 else
3082 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
3083 }
3084 else if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS)
3085 {
3086 if (uCmpReadTime < uMinCmpReadTime)
3087 uMinCmpReadTime = uCmpReadTime;
3088 }
3089
3090 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3091 }
3092 }
3093
3094 TSCDELTA_DBG_SYNC_MSG9(("sync/method1loop/%s: #92 iLoop=%u MyState=%#x\n", fIsMaster ? "master" : "worker", iLoop,
3095 pMySync->uSyncVar));
3096
3097 /*
3098 * We must reset the worker TSC sample value in case it gets picked as a
3099 * GIP master later on (it's trashed above, naturally).
3100 */
3101 if (!fIsMaster)
3102 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
3103}
3104#endif /* GIP_TSC_DELTA_METHOD_1 */
3105
3106
3107#ifdef GIP_TSC_DELTA_METHOD_2
3108/*
3109 * TSC delta measurement algorithm \#2 configuration and code - Experimental!!
3110 */
3111
3112# define GIP_TSC_DELTA_M2_LOOPS (7 + GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3113# define GIP_TSC_DELTA_M2_PRIMER_LOOPS 0
3114
3115
3116static void supdrvTscDeltaMethod2ProcessDataOnMaster(PSUPDRVGIPTSCDELTARGS pArgs, uint32_t iLoop)
3117{
3118 int64_t iMasterTscDelta = pArgs->pMaster->i64TSCDelta;
3119 int64_t iBestDelta = pArgs->pWorker->i64TSCDelta;
3120 uint32_t idxResult;
3121 uint32_t cHits = 0;
3122
3123 /*
3124 * Look for matching entries in the master and worker tables.
3125 */
3126 for (idxResult = 0; idxResult < RT_ELEMENTS(pArgs->uMaster.M2.Data.aResults); idxResult++)
3127 {
3128 uint32_t idxOther = pArgs->uMaster.M2.Data.aResults[idxResult].iSeqOther;
3129 if (idxOther & 1)
3130 {
3131 idxOther >>= 1;
3132 if (idxOther < RT_ELEMENTS(pArgs->uWorker.M2.Data.aResults))
3133 {
3134 if (pArgs->uWorker.M2.Data.aResults[idxOther].iSeqOther == pArgs->uMaster.M2.Data.aResults[idxResult].iSeqMine)
3135 {
3136 int64_t iDelta;
3137 iDelta = pArgs->uWorker.M2.Data.aResults[idxOther].uTsc
3138 - (pArgs->uMaster.M2.Data.aResults[idxResult].uTsc - iMasterTscDelta);
3139 if ( iDelta >= GIP_TSC_DELTA_INITIAL_MASTER_VALUE
3140 ? iDelta < iBestDelta
3141 : iDelta > iBestDelta || iBestDelta == INT64_MAX)
3142 iBestDelta = iDelta;
3143 cHits++;
3144 }
3145 }
3146 }
3147 }
3148
3149 /*
3150 * Save the results.
3151 */
3152 if (cHits > 2)
3153 pArgs->pWorker->i64TSCDelta = iBestDelta;
3154 pArgs->uMaster.M2.cHits += cHits;
3155}
3156
3157
3158/**
3159 * The core function of the 2nd TSC delta mesurment algorithm.
3160 *
3161 * The idea here is that we have the two CPUs execute the exact same code
3162 * collecting a largish set of TSC samples. The code has one data dependency on
3163 * the other CPU which intention it is to synchronize the execution as well as
3164 * help cross references the two sets of TSC samples (the sequence numbers).
3165 *
3166 * The @a fLag parameter is used to modify the execution a tiny bit on one or
3167 * both of the CPUs. When @a fLag differs between the CPUs, it is thought that
3168 * it will help with making the CPUs enter lock step execution occationally.
3169 *
3170 */
3171static void supdrvTscDeltaMethod2CollectData(PSUPDRVTSCDELTAMETHOD2 pMyData, uint32_t volatile *piOtherSeqNo, bool fLag)
3172{
3173 SUPDRVTSCDELTAMETHOD2ENTRY *pEntry = &pMyData->aResults[0];
3174 uint32_t cLeft = RT_ELEMENTS(pMyData->aResults);
3175
3176 ASMAtomicWriteU32(&pMyData->iCurSeqNo, 0);
3177 ASMSerializeInstruction();
3178 while (cLeft-- > 0)
3179 {
3180 uint64_t uTsc;
3181 uint32_t iSeqMine = ASMAtomicIncU32(&pMyData->iCurSeqNo);
3182 uint32_t iSeqOther = ASMAtomicReadU32(piOtherSeqNo);
3183 ASMCompilerBarrier();
3184 ASMSerializeInstruction(); /* Way better result than with ASMMemoryFenceSSE2() in this position! */
3185 uTsc = ASMReadTSC();
3186 ASMAtomicIncU32(&pMyData->iCurSeqNo);
3187 ASMCompilerBarrier();
3188 ASMSerializeInstruction();
3189 pEntry->iSeqMine = iSeqMine;
3190 pEntry->iSeqOther = iSeqOther;
3191 pEntry->uTsc = uTsc;
3192 pEntry++;
3193 ASMSerializeInstruction();
3194 if (fLag)
3195 ASMNopPause();
3196 }
3197}
3198
3199
3200/**
3201 * TSC delta measurment algorithm \#2 (GIP_TSC_DELTA_METHOD_2).
3202 *
3203 * See supdrvTscDeltaMethod2CollectData for algorithm details.
3204 *
3205 * @param pArgs The argument/state data.
3206 * @param pMySync My synchronization structure.
3207 * @param pOtherSync My partner's synchronization structure.
3208 * @param fIsMaster Set if master, clear if worker.
3209 * @param iTry The attempt number.
3210 */
3211static void supdrvTscDeltaMethod2Loop(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
3212 bool fIsMaster, uint32_t iTry)
3213{
3214 unsigned iLoop;
3215
3216 for (iLoop = 0; iLoop < GIP_TSC_DELTA_M2_LOOPS; iLoop++)
3217 {
3218 RTCCUINTREG fEFlags;
3219 if (fIsMaster)
3220 {
3221 /*
3222 * Adjust the loop lag fudge.
3223 */
3224# if GIP_TSC_DELTA_M2_PRIMER_LOOPS > 0
3225 if (iLoop < GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3226 {
3227 /* Lag during the priming to be nice to everyone.. */
3228 pArgs->uMaster.M2.fLag = true;
3229 pArgs->uWorker.M2.fLag = true;
3230 }
3231 else
3232# endif
3233 if (iLoop < (GIP_TSC_DELTA_M2_LOOPS - GIP_TSC_DELTA_M2_PRIMER_LOOPS) / 4)
3234 {
3235 /* 25 % of the body without lagging. */
3236 pArgs->uMaster.M2.fLag = false;
3237 pArgs->uWorker.M2.fLag = false;
3238 }
3239 else if (iLoop < (GIP_TSC_DELTA_M2_LOOPS - GIP_TSC_DELTA_M2_PRIMER_LOOPS) / 4 * 2)
3240 {
3241 /* 25 % of the body with both lagging. */
3242 pArgs->uMaster.M2.fLag = true;
3243 pArgs->uWorker.M2.fLag = true;
3244 }
3245 else
3246 {
3247 /* 50% of the body with alternating lag. */
3248 pArgs->uMaster.M2.fLag = (iLoop & 1) == 0;
3249 pArgs->uWorker.M2.fLag= (iLoop & 1) == 1;
3250 }
3251
3252 /*
3253 * Sync up with the worker and collect data.
3254 */
3255 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3256 supdrvTscDeltaMethod2CollectData(&pArgs->uMaster.M2.Data, &pArgs->uWorker.M2.Data.iCurSeqNo, pArgs->uMaster.M2.fLag);
3257 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3258
3259 /*
3260 * Process the data.
3261 */
3262# if GIP_TSC_DELTA_M2_PRIMER_LOOPS > 0
3263 if (iLoop >= GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3264# endif
3265 supdrvTscDeltaMethod2ProcessDataOnMaster(pArgs, iLoop);
3266
3267 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3268 }
3269 else
3270 {
3271 /*
3272 * The worker.
3273 */
3274 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3275 supdrvTscDeltaMethod2CollectData(&pArgs->uWorker.M2.Data, &pArgs->uMaster.M2.Data.iCurSeqNo, pArgs->uWorker.M2.fLag);
3276 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3277 }
3278 }
3279}
3280
3281#endif /* GIP_TSC_DELTA_METHOD_2 */
3282
3283
3284
3285static int supdrvTscDeltaVerify(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync,
3286 PSUPTSCDELTASYNC2 pOtherSync, bool fIsMaster, int64_t iWorkerTscDelta)
3287{
3288 /*PSUPGIPCPU pGipCpuWorker = pArgs->pWorker; - unused */
3289 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3290 uint32_t i;
3291 TSCDELTA_DBG_VARS();
3292
3293 for (;;)
3294 {
3295 RTCCUINTREG fEFlags;
3296 AssertCompile((RT_ELEMENTS(pArgs->uMaster.Verify.auTscs) & 1) == 0);
3297 AssertCompile(RT_ELEMENTS(pArgs->uMaster.Verify.auTscs) == RT_ELEMENTS(pArgs->uWorker.Verify.auTscs));
3298
3299 if (fIsMaster)
3300 {
3301 uint64_t uTscWorker;
3302 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3303
3304 /*
3305 * Collect TSC, master goes first.
3306 */
3307 for (i = 0; i < RT_ELEMENTS(pArgs->uMaster.Verify.auTscs); i += 2)
3308 {
3309 /* Read, kick & wait #1. */
3310 uint64_t register uTsc = ASMReadTSC();
3311 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO_GO);
3312 ASMSerializeInstruction();
3313 pArgs->uMaster.Verify.auTscs[i] = uTsc;
3314 TSCDELTA_DBG_START_LOOP();
3315 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO)
3316 {
3317 TSCDELTA_DBG_CHECK_LOOP();
3318 ASMNopPause();
3319 }
3320
3321 /* Read, kick & wait #2. */
3322 uTsc = ASMReadTSC();
3323 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO);
3324 ASMSerializeInstruction();
3325 pArgs->uMaster.Verify.auTscs[i + 1] = uTsc;
3326 TSCDELTA_DBG_START_LOOP();
3327 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO_GO)
3328 {
3329 TSCDELTA_DBG_CHECK_LOOP();
3330 ASMNopPause();
3331 }
3332 }
3333
3334 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3335
3336 /*
3337 * Process the data.
3338 */
3339#ifdef TSCDELTA_VERIFY_WITH_STATS
3340 pArgs->cMaxVerifyTscTicks = INT64_MIN;
3341 pArgs->cMinVerifyTscTicks = INT64_MAX;
3342 pArgs->iVerifyBadTscDiff = 0;
3343#endif
3344 ASMAtomicWriteS32(&pArgs->rcVerify, VINF_SUCCESS);
3345 uTscWorker = 0;
3346 for (i = 0; i < RT_ELEMENTS(pArgs->uMaster.Verify.auTscs); i++)
3347 {
3348 /* Master vs previous worker entry. */
3349 uint64_t uTscMaster = pArgs->uMaster.Verify.auTscs[i] - pGipCpuMaster->i64TSCDelta;
3350 int64_t iDiff;
3351 if (i > 0)
3352 {
3353 iDiff = uTscMaster - uTscWorker;
3354#ifdef TSCDELTA_VERIFY_WITH_STATS
3355 if (iDiff > pArgs->cMaxVerifyTscTicks)
3356 pArgs->cMaxVerifyTscTicks = iDiff;
3357 if (iDiff < pArgs->cMinVerifyTscTicks)
3358 pArgs->cMinVerifyTscTicks = iDiff;
3359#endif
3360 if (iDiff < 0)
3361 {
3362#ifdef TSCDELTA_VERIFY_WITH_STATS
3363 pArgs->iVerifyBadTscDiff = -iDiff;
3364#endif
3365 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_OUT_OF_RANGE);
3366 break;
3367 }
3368 }
3369
3370 /* Worker vs master. */
3371 uTscWorker = pArgs->uWorker.Verify.auTscs[i] - iWorkerTscDelta;
3372 iDiff = uTscWorker - uTscMaster;
3373#ifdef TSCDELTA_VERIFY_WITH_STATS
3374 if (iDiff > pArgs->cMaxVerifyTscTicks)
3375 pArgs->cMaxVerifyTscTicks = iDiff;
3376 if (iDiff < pArgs->cMinVerifyTscTicks)
3377 pArgs->cMinVerifyTscTicks = iDiff;
3378#endif
3379 if (iDiff < 0)
3380 {
3381#ifdef TSCDELTA_VERIFY_WITH_STATS
3382 pArgs->iVerifyBadTscDiff = iDiff;
3383#endif
3384 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_OUT_OF_RANGE);
3385 break;
3386 }
3387 }
3388
3389 /* Done. */
3390 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3391 }
3392 else
3393 {
3394 /*
3395 * The worker, master leads.
3396 */
3397 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3398
3399 for (i = 0; i < RT_ELEMENTS(pArgs->uWorker.Verify.auTscs); i += 2)
3400 {
3401 uint64_t register uTsc;
3402
3403 /* Wait, Read and Kick #1. */
3404 TSCDELTA_DBG_START_LOOP();
3405 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO)
3406 {
3407 TSCDELTA_DBG_CHECK_LOOP();
3408 ASMNopPause();
3409 }
3410 uTsc = ASMReadTSC();
3411 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO_GO);
3412 ASMSerializeInstruction();
3413 pArgs->uWorker.Verify.auTscs[i] = uTsc;
3414
3415 /* Wait, Read and Kick #2. */
3416 TSCDELTA_DBG_START_LOOP();
3417 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO_GO)
3418 {
3419 TSCDELTA_DBG_CHECK_LOOP();
3420 ASMNopPause();
3421 }
3422 uTsc = ASMReadTSC();
3423 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO);
3424 ASMSerializeInstruction();
3425 pArgs->uWorker.Verify.auTscs[i + 1] = uTsc;
3426 }
3427
3428 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3429 }
3430 return pArgs->rcVerify;
3431 }
3432
3433 /*
3434 * Timed out, please retry.
3435 */
3436 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_TRY_AGAIN);
3437 return VERR_TIMEOUT;
3438}
3439
3440
3441
3442/**
3443 * Handles the special abort procedure during synchronization setup in
3444 * supdrvMeasureTscDeltaCallbackUnwrapped().
3445 *
3446 * @returns 0 (dummy, ignored)
3447 * @param pArgs Pointer to argument/state data.
3448 * @param pMySync Pointer to my sync structure.
3449 * @param fIsMaster Set if we're the master, clear if worker.
3450 * @param fTimeout Set if it's a timeout.
3451 */
3452DECL_NO_INLINE(static, int)
3453supdrvMeasureTscDeltaCallbackAbortSyncSetup(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, bool fIsMaster, bool fTimeout)
3454{
3455 PSUPTSCDELTASYNC2 volatile *ppMySync = fIsMaster ? &pArgs->pSyncMaster : &pArgs->pSyncWorker;
3456 PSUPTSCDELTASYNC2 volatile *ppOtherSync = fIsMaster ? &pArgs->pSyncWorker : &pArgs->pSyncMaster;
3457 TSCDELTA_DBG_VARS();
3458
3459 /*
3460 * Clear our sync pointer and make sure the abort flag is set.
3461 */
3462 ASMAtomicWriteNullPtr(ppMySync);
3463 ASMAtomicWriteBool(&pArgs->fAbortSetup, true);
3464 if (fTimeout)
3465 ASMAtomicWriteBool(&pArgs->fTimedOut, true);
3466
3467 /*
3468 * Make sure the other party is out of there and won't be touching our
3469 * sync state again (would cause stack corruption).
3470 */
3471 TSCDELTA_DBG_START_LOOP();
3472 while (ASMAtomicReadPtrT(ppOtherSync, PSUPTSCDELTASYNC2) != NULL)
3473 {
3474 ASMNopPause();
3475 ASMNopPause();
3476 ASMNopPause();
3477 TSCDELTA_DBG_CHECK_LOOP();
3478 }
3479
3480 return 0;
3481}
3482
3483
3484/**
3485 * This is used by supdrvMeasureInitialTscDeltas() to read the TSC on two CPUs
3486 * and compute the delta between them.
3487 *
3488 * To reduce code size a good when timeout handling was added, a dummy return
3489 * value had to be added (saves 1-3 lines per timeout case), thus this
3490 * 'Unwrapped' function and the dummy 0 return value.
3491 *
3492 * @returns 0 (dummy, ignored)
3493 * @param idCpu The CPU we are current scheduled on.
3494 * @param pArgs Pointer to a parameter package.
3495 *
3496 * @remarks Measuring TSC deltas between the CPUs is tricky because we need to
3497 * read the TSC at exactly the same time on both the master and the
3498 * worker CPUs. Due to DMA, bus arbitration, cache locality,
3499 * contention, SMI, pipelining etc. there is no guaranteed way of
3500 * doing this on x86 CPUs.
3501 */
3502static int supdrvMeasureTscDeltaCallbackUnwrapped(RTCPUID idCpu, PSUPDRVGIPTSCDELTARGS pArgs)
3503{
3504 PSUPDRVDEVEXT pDevExt = pArgs->pDevExt;
3505 PSUPGIPCPU pGipCpuWorker = pArgs->pWorker;
3506 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3507 bool const fIsMaster = idCpu == pGipCpuMaster->idCpu;
3508 uint32_t iTry;
3509 PSUPTSCDELTASYNC2 volatile *ppMySync = fIsMaster ? &pArgs->pSyncMaster : &pArgs->pSyncWorker;
3510 PSUPTSCDELTASYNC2 volatile *ppOtherSync = fIsMaster ? &pArgs->pSyncWorker : &pArgs->pSyncMaster;
3511 SUPTSCDELTASYNC2 MySync;
3512 PSUPTSCDELTASYNC2 pOtherSync;
3513 int rc;
3514 TSCDELTA_DBG_VARS();
3515
3516 /* A bit of paranoia first. */
3517 if (!pGipCpuMaster || !pGipCpuWorker)
3518 return 0;
3519
3520 /*
3521 * If the CPU isn't part of the measurement, return immediately.
3522 */
3523 if ( !fIsMaster
3524 && idCpu != pGipCpuWorker->idCpu)
3525 return 0;
3526
3527 /*
3528 * Set up my synchronization stuff and wait for the other party to show up.
3529 *
3530 * We don't wait forever since the other party may be off fishing (offline,
3531 * spinning with ints disables, whatever), we must play nice to the rest of
3532 * the system as this context generally isn't one in which we will get
3533 * preempted and we may hold up a number of lower priority interrupts.
3534 */
3535 ASMAtomicWriteU32(&MySync.uSyncVar, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT);
3536 ASMAtomicWritePtr(ppMySync, &MySync);
3537 MySync.uTscStart = ASMReadTSC();
3538 MySync.cMaxTscTicks = pArgs->cMaxTscTicks;
3539
3540 /* Look for the partner, might not be here yet... Special abort considerations. */
3541 iTry = 0;
3542 TSCDELTA_DBG_START_LOOP();
3543 while ((pOtherSync = ASMAtomicReadPtrT(ppOtherSync, PSUPTSCDELTASYNC2)) == NULL)
3544 {
3545 ASMNopPause();
3546 if ( ASMAtomicReadBool(&pArgs->fAbortSetup)
3547 || !RTMpIsCpuOnline(fIsMaster ? pGipCpuWorker->idCpu : pGipCpuWorker->idCpu) )
3548 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3549 if ( (iTry++ & 0xff) == 0
3550 && ASMReadTSC() - MySync.uTscStart > pArgs->cMaxTscTicks)
3551 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, true /*fTimeout*/);
3552 TSCDELTA_DBG_CHECK_LOOP();
3553 ASMNopPause();
3554 }
3555
3556 /* I found my partner, waiting to be found... Special abort considerations. */
3557 if (fIsMaster)
3558 if (!ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)) /* parnaoia */
3559 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3560
3561 iTry = 0;
3562 TSCDELTA_DBG_START_LOOP();
3563 while (ASMAtomicReadU32(&MySync.uSyncVar) == GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)
3564 {
3565 ASMNopPause();
3566 if (ASMAtomicReadBool(&pArgs->fAbortSetup))
3567 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3568 if ( (iTry++ & 0xff) == 0
3569 && ASMReadTSC() - MySync.uTscStart > pArgs->cMaxTscTicks)
3570 {
3571 if ( fIsMaster
3572 && !ASMAtomicCmpXchgU32(&MySync.uSyncVar, GIP_TSC_DELTA_SYNC2_PRESTART_ABORT, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT))
3573 break; /* race #1: slave has moved on, handle timeout in loop instead. */
3574 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, true /*fTimeout*/);
3575 }
3576 TSCDELTA_DBG_CHECK_LOOP();
3577 }
3578
3579 if (!fIsMaster)
3580 if (!ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)) /* race #1 */
3581 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3582
3583/** @todo Add a resumable state to pArgs so we don't waste time if we time
3584 * out or something. Timeouts are legit, any of the two CPUs may get
3585 * interrupted. */
3586
3587 /*
3588 * Start by seeing if we have a zero delta between the two CPUs.
3589 * This should normally be the case.
3590 */
3591 rc = supdrvTscDeltaVerify(pArgs, &MySync, pOtherSync, fIsMaster, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
3592 if (RT_SUCCESS(rc))
3593 {
3594 if (fIsMaster)
3595 {
3596 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
3597 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
3598 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
3599 }
3600 }
3601 /*
3602 * If the verification didn't time out, do regular delta measurements.
3603 * We retry this until we get a reasonable value.
3604 */
3605 else if (rc != VERR_TIMEOUT)
3606 {
3607 Assert(pGipCpuWorker->i64TSCDelta == INT64_MAX);
3608 for (iTry = 0; iTry < 12; iTry++)
3609 {
3610 /*
3611 * Check the state before we start.
3612 */
3613 uint32_t u32Tmp = ASMAtomicReadU32(&MySync.uSyncVar);
3614 if ( u32Tmp != GIP_TSC_DELTA_SYNC2_READY
3615 && (fIsMaster || u32Tmp != GIP_TSC_DELTA_SYNC2_STEADY) /* worker may be late prepping for the next round */ )
3616 {
3617 TSCDELTA_DBG_SYNC_MSG(("sync/loop/%s: #0 iTry=%u MyState=%#x\n", fIsMaster ? "master" : "worker", iTry, u32Tmp));
3618 break;
3619 }
3620
3621 /*
3622 * Do the measurements.
3623 */
3624#ifdef GIP_TSC_DELTA_METHOD_1
3625 supdrvTscDeltaMethod1Loop(pArgs, &MySync, pOtherSync, fIsMaster, iTry);
3626#elif defined(GIP_TSC_DELTA_METHOD_2)
3627 supdrvTscDeltaMethod2Loop(pArgs, &MySync, pOtherSync, fIsMaster, iTry);
3628#else
3629# error "huh??"
3630#endif
3631
3632 /*
3633 * Check the state.
3634 */
3635 u32Tmp = ASMAtomicReadU32(&MySync.uSyncVar);
3636 if ( u32Tmp != GIP_TSC_DELTA_SYNC2_READY
3637 && (fIsMaster || u32Tmp != GIP_TSC_DELTA_SYNC2_STEADY) /* worker may be late prepping for the next round */ )
3638 {
3639 if (fIsMaster)
3640 TSCDELTA_DBG_SYNC_MSG(("sync/loop/master: #1 iTry=%u MyState=%#x\n", iTry, u32Tmp));
3641 else
3642 TSCDELTA_DBG_SYNC_MSG2(("sync/loop/worker: #1 iTry=%u MyState=%#x\n", iTry, u32Tmp));
3643 break;
3644 }
3645
3646 /*
3647 * Success? If so, stop trying. Master decides.
3648 */
3649 if (fIsMaster)
3650 {
3651 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
3652 {
3653 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
3654 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
3655 TSCDELTA_DBG_SYNC_MSG2(("sync/loop/master: #9 iTry=%u MyState=%#x\n", iTry, MySync.uSyncVar));
3656 break;
3657 }
3658 }
3659 }
3660 if (fIsMaster)
3661 pArgs->iTry = iTry;
3662 }
3663
3664 /*
3665 * End the synchroniziation dance. We tell the other that we're done,
3666 * then wait for the same kind of reply.
3667 */
3668 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_FINAL);
3669 ASMAtomicWriteNullPtr(ppMySync);
3670 iTry = 0;
3671 TSCDELTA_DBG_START_LOOP();
3672 while (ASMAtomicReadU32(&MySync.uSyncVar) != GIP_TSC_DELTA_SYNC2_FINAL)
3673 {
3674 iTry++;
3675 if ( iTry == 0
3676 && !RTMpIsCpuOnline(fIsMaster ? pGipCpuWorker->idCpu : pGipCpuWorker->idCpu))
3677 break; /* this really shouldn't happen. */
3678 TSCDELTA_DBG_CHECK_LOOP();
3679 ASMNopPause();
3680 }
3681
3682 /*
3683 * Collect some runtime stats.
3684 */
3685 if (fIsMaster)
3686 pArgs->cElapsedMasterTscTicks = ASMReadTSC() - MySync.uTscStart;
3687 else
3688 pArgs->cElapsedWorkerTscTicks = ASMReadTSC() - MySync.uTscStart;
3689 return 0;
3690}
3691
3692/**
3693 * Callback used by supdrvMeasureInitialTscDeltas() to read the TSC on two CPUs
3694 * and compute the delta between them.
3695 *
3696 * @param idCpu The CPU we are current scheduled on.
3697 * @param pvUser1 Pointer to a parameter package (SUPDRVGIPTSCDELTARGS).
3698 * @param pvUser2 Unused.
3699 */
3700static DECLCALLBACK(void) supdrvMeasureTscDeltaCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
3701{
3702 supdrvMeasureTscDeltaCallbackUnwrapped(idCpu, (PSUPDRVGIPTSCDELTARGS)pvUser1);
3703}
3704
3705
3706/**
3707 * Measures the TSC delta between the master GIP CPU and one specified worker
3708 * CPU.
3709 *
3710 * @returns VBox status code.
3711 * @retval VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED on pure measurement
3712 * failure.
3713 * @param pDevExt Pointer to the device instance data.
3714 * @param idxWorker The index of the worker CPU from the GIP's array of
3715 * CPUs.
3716 *
3717 * @remarks This must be called with preemption enabled!
3718 */
3719static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker)
3720{
3721 int rc;
3722 int rc2;
3723 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
3724 RTCPUID idMaster = pDevExt->idGipMaster;
3725 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[idxWorker];
3726 PSUPGIPCPU pGipCpuMaster;
3727 uint32_t iGipCpuMaster;
3728
3729 /* Validate input a bit. */
3730 AssertReturn(pGip, VERR_INVALID_PARAMETER);
3731 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
3732 Assert(RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3733
3734 /*
3735 * Don't attempt measuring the delta for the GIP master.
3736 */
3737 if (pGipCpuWorker->idCpu == idMaster)
3738 {
3739 if (pGipCpuWorker->i64TSCDelta == INT64_MAX) /* This shouldn't happen, but just in case. */
3740 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
3741 return VINF_SUCCESS;
3742 }
3743
3744 /*
3745 * One measurement at at time, at least for now. We might be using
3746 * broadcast IPIs so, so be nice to the rest of the system.
3747 */
3748#ifdef SUPDRV_USE_MUTEX_FOR_GIP
3749 rc = RTSemMutexRequest(pDevExt->mtxTscDelta, RT_INDEFINITE_WAIT);
3750#else
3751 rc = RTSemFastMutexRequest(pDevExt->mtxTscDelta);
3752#endif
3753 if (RT_FAILURE(rc))
3754 return rc;
3755
3756 /*
3757 * If the CPU has hyper-threading and the APIC IDs of the master and worker are adjacent,
3758 * try pick a different master. (This fudge only works with multi core systems.)
3759 * ASSUMES related threads have adjacent APIC IDs. ASSUMES two threads per core.
3760 *
3761 * We skip this on AMDs for now as their HTT is different from intel's and
3762 * it doesn't seem to have any favorable effect on the results.
3763 *
3764 * If the master is offline, we need a new master too, so share the code.
3765 */
3766 iGipCpuMaster = supdrvGipFindCpuIndexForCpuId(pGip, idMaster);
3767 AssertReturn(iGipCpuMaster < pGip->cCpus, VERR_INVALID_CPU_ID);
3768 pGipCpuMaster = &pGip->aCPUs[iGipCpuMaster];
3769 if ( ( (pGipCpuMaster->idApic & ~1) == (pGipCpuWorker->idApic & ~1)
3770 && ASMHasCpuId()
3771 && ASMIsValidStdRange(ASMCpuId_EAX(0))
3772 && (ASMCpuId_EDX(1) & X86_CPUID_FEATURE_EDX_HTT)
3773 && !ASMIsAmdCpu()
3774 && pGip->cOnlineCpus > 2)
3775 || !RTMpIsCpuOnline(idMaster) )
3776 {
3777 uint32_t i;
3778 for (i = 0; i < pGip->cCpus; i++)
3779 if ( i != iGipCpuMaster
3780 && i != idxWorker
3781 && pGip->aCPUs[i].enmState == SUPGIPCPUSTATE_ONLINE
3782 && pGip->aCPUs[i].i64TSCDelta != INT64_MAX
3783 && pGip->aCPUs[i].idCpu != NIL_RTCPUID
3784 && pGip->aCPUs[i].idCpu != idMaster /* paranoia starts here... */
3785 && pGip->aCPUs[i].idCpu != pGipCpuWorker->idCpu
3786 && pGip->aCPUs[i].idApic != pGipCpuWorker->idApic
3787 && pGip->aCPUs[i].idApic != pGipCpuMaster->idApic
3788 && RTMpIsCpuOnline(pGip->aCPUs[i].idCpu))
3789 {
3790 iGipCpuMaster = i;
3791 pGipCpuMaster = &pGip->aCPUs[i];
3792 idMaster = pGipCpuMaster->idCpu;
3793 break;
3794 }
3795 }
3796
3797 if (RTCpuSetIsMemberByIndex(&pGip->OnlineCpuSet, pGipCpuWorker->iCpuSet))
3798 {
3799 /*
3800 * Initialize data package for the RTMpOnPair callback.
3801 */
3802 PSUPDRVGIPTSCDELTARGS pArgs = (PSUPDRVGIPTSCDELTARGS)RTMemAllocZ(sizeof(*pArgs));
3803 if (pArgs)
3804 {
3805 pArgs->pWorker = pGipCpuWorker;
3806 pArgs->pMaster = pGipCpuMaster;
3807 pArgs->pDevExt = pDevExt;
3808 pArgs->pSyncMaster = NULL;
3809 pArgs->pSyncWorker = NULL;
3810 pArgs->cMaxTscTicks = ASMAtomicReadU64(&pGip->u64CpuHz) / 512; /* 1953 us */
3811
3812 /*
3813 * Do the RTMpOnPair call. We reset i64TSCDelta first so we
3814 * and supdrvMeasureTscDeltaCallback can use it as a success check.
3815 */
3816 /** @todo Store the i64TSCDelta result in pArgs first? Perhaps deals with
3817 * that when doing the restart loop reorg. */
3818 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
3819 rc = RTMpOnPair(pGipCpuMaster->idCpu, pGipCpuWorker->idCpu, RTMPON_F_CONCURRENT_EXEC,
3820 supdrvMeasureTscDeltaCallback, pArgs, NULL);
3821 if (RT_SUCCESS(rc))
3822 {
3823#if 0
3824 SUPR0Printf("mponpair ticks: %9llu %9llu max: %9llu iTry: %u%s\n", pArgs->cElapsedMasterTscTicks,
3825 pArgs->cElapsedWorkerTscTicks, pArgs->cMaxTscTicks, pArgs->iTry,
3826 pArgs->fTimedOut ? " timed out" :"");
3827#endif
3828#if 0
3829 SUPR0Printf("rcVerify=%d iVerifyBadTscDiff=%lld cMinVerifyTscTicks=%lld cMaxVerifyTscTicks=%lld\n",
3830 pArgs->rcVerify, pArgs->iVerifyBadTscDiff, pArgs->cMinVerifyTscTicks, pArgs->cMaxVerifyTscTicks);
3831#endif
3832 if (RT_LIKELY(pGipCpuWorker->i64TSCDelta != INT64_MAX))
3833 {
3834 /*
3835 * Work the TSC delta applicability rating. It starts
3836 * optimistic in supdrvGipInit, we downgrade it here.
3837 */
3838 SUPGIPUSETSCDELTA enmRating;
3839 if ( pGipCpuWorker->i64TSCDelta > GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO
3840 || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO)
3841 enmRating = SUPGIPUSETSCDELTA_NOT_ZERO;
3842 else if ( pGipCpuWorker->i64TSCDelta > GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO
3843 || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO)
3844 enmRating = SUPGIPUSETSCDELTA_ROUGHLY_ZERO;
3845 else
3846 enmRating = SUPGIPUSETSCDELTA_PRACTICALLY_ZERO;
3847 if (pGip->enmUseTscDelta < enmRating)
3848 {
3849 AssertCompile(sizeof(pGip->enmUseTscDelta) == sizeof(uint32_t));
3850 ASMAtomicWriteU32((uint32_t volatile *)&pGip->enmUseTscDelta, enmRating);
3851 }
3852 }
3853 else
3854 rc = VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
3855 }
3856 /** @todo return try-again if we get an offline CPU error. */
3857
3858 RTMemFree(pArgs);
3859 }
3860 else
3861 rc = VERR_NO_MEMORY;
3862 }
3863 else
3864 rc = VERR_CPU_OFFLINE;
3865
3866 /*
3867 * We're done now.
3868 */
3869#ifdef SUPDRV_USE_MUTEX_FOR_GIP
3870 rc2 = RTSemMutexRelease(pDevExt->mtxTscDelta); AssertRC(rc2);
3871#else
3872 rc2 = RTSemFastMutexRelease(pDevExt->mtxTscDelta); AssertRC(rc2);
3873#endif
3874 return rc;
3875}
3876
3877
3878/**
3879 * Resets the TSC-delta related TSC samples and optionally the deltas
3880 * themselves.
3881 *
3882 * @param pDevExt Pointer to the device instance data.
3883 * @param fResetTscDeltas Whether the TSC-deltas are also to be reset.
3884 *
3885 * @remarks This might be called while holding a spinlock!
3886 */
3887static void supdrvTscResetSamples(PSUPDRVDEVEXT pDevExt, bool fResetTscDeltas)
3888{
3889 unsigned iCpu;
3890 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
3891 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
3892 {
3893 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
3894 ASMAtomicWriteU64(&pGipCpu->u64TSCSample, GIP_TSC_DELTA_RSVD);
3895 if (fResetTscDeltas)
3896 {
3897 RTCpuSetDelByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpu->iCpuSet);
3898 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, INT64_MAX);
3899 }
3900 }
3901}
3902
3903
3904/**
3905 * Picks an online CPU as the master TSC for TSC-delta computations.
3906 *
3907 * @returns VBox status code.
3908 * @param pDevExt Pointer to the device instance data.
3909 * @param pidxMaster Where to store the CPU array index of the chosen
3910 * master. Optional, can be NULL.
3911 */
3912static int supdrvTscPickMaster(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster)
3913{
3914 /*
3915 * Pick the first CPU online as the master TSC and make it the new GIP master based
3916 * on the APIC ID.
3917 *
3918 * Technically we can simply use "idGipMaster" but doing this gives us master as CPU 0
3919 * in most cases making it nicer/easier for comparisons. It is safe to update the GIP
3920 * master as this point since the sync/async timer isn't created yet.
3921 */
3922 unsigned iCpu;
3923 uint32_t idxMaster = UINT32_MAX;
3924 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
3925 for (iCpu = 0; iCpu < RT_ELEMENTS(pGip->aiCpuFromApicId); iCpu++)
3926 {
3927 uint16_t idxCpu = pGip->aiCpuFromApicId[iCpu];
3928 if (idxCpu != UINT16_MAX)
3929 {
3930 PSUPGIPCPU pGipCpu = &pGip->aCPUs[idxCpu];
3931 if (RTCpuSetIsMemberByIndex(&pGip->OnlineCpuSet, pGipCpu->iCpuSet))
3932 {
3933 idxMaster = idxCpu;
3934 pGipCpu->i64TSCDelta = GIP_TSC_DELTA_INITIAL_MASTER_VALUE;
3935 ASMAtomicWriteSize(&pDevExt->idGipMaster, pGipCpu->idCpu);
3936 if (pidxMaster)
3937 *pidxMaster = idxMaster;
3938 return VINF_SUCCESS;
3939 }
3940 }
3941 }
3942 return VERR_CPU_OFFLINE;
3943}
3944
3945
3946/**
3947 * Performs the initial measurements of the TSC deltas between CPUs.
3948 *
3949 * This is called by supdrvGipCreate(), supdrvGipPowerNotificationCallback() or
3950 * triggered by it if threaded.
3951 *
3952 * @returns VBox status code.
3953 * @param pDevExt Pointer to the device instance data.
3954 *
3955 * @remarks Must be called only after supdrvGipInitOnCpu() as this function uses
3956 * idCpu, GIP's online CPU set which are populated in
3957 * supdrvGipInitOnCpu().
3958 */
3959static int supdrvMeasureInitialTscDeltas(PSUPDRVDEVEXT pDevExt)
3960{
3961 PSUPGIPCPU pGipCpuMaster;
3962 unsigned iCpu;
3963 unsigned iOddEven;
3964 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
3965 uint32_t idxMaster = UINT32_MAX;
3966 uint32_t cMpOnOffEvents = ASMAtomicReadU32(&pDevExt->cMpOnOffEvents);
3967
3968 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
3969 supdrvTscResetSamples(pDevExt, true /* fClearDeltas */);
3970 int rc = supdrvTscPickMaster(pDevExt, &idxMaster);
3971 if (RT_FAILURE(rc))
3972 {
3973 SUPR0Printf("Failed to pick a CPU master for TSC-delta measurements rc=%Rrc\n", rc);
3974 return rc;
3975 }
3976 AssertReturn(idxMaster < pGip->cCpus, VERR_INVALID_CPU_INDEX);
3977 pGipCpuMaster = &pGip->aCPUs[idxMaster];
3978
3979 /*
3980 * If there is only a single CPU online we have nothing to do.
3981 */
3982 if (pGip->cOnlineCpus <= 1)
3983 {
3984 AssertReturn(pGip->cOnlineCpus > 0, VERR_INTERNAL_ERROR_5);
3985 return VINF_SUCCESS;
3986 }
3987
3988 /*
3989 * Loop thru the GIP CPU array and get deltas for each CPU (except the
3990 * master). We do the CPUs with the even numbered APIC IDs first so that
3991 * we've got alternative master CPUs to pick from on hyper-threaded systems.
3992 */
3993 for (iOddEven = 0; iOddEven < 2; iOddEven++)
3994 {
3995 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
3996 {
3997 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
3998 if ( iCpu != idxMaster
3999 && (iOddEven > 0 || (pGipCpuWorker->idApic & 1) == 0)
4000 && RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
4001 {
4002 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
4003 if (RT_FAILURE(rc))
4004 {
4005 SUPR0Printf("supdrvMeasureTscDeltaOne failed. rc=%d CPU[%u].idCpu=%u Master[%u].idCpu=%u\n", rc, iCpu,
4006 pGipCpuWorker->idCpu, idxMaster, pDevExt->idGipMaster, pGipCpuMaster->idCpu);
4007 break;
4008 }
4009
4010 if (ASMAtomicReadU32(&pDevExt->cMpOnOffEvents) != cMpOnOffEvents)
4011 {
4012 SUPR0Printf("One or more CPUs transitioned between online & offline states. I'm confused, retry...\n");
4013 rc = VERR_TRY_AGAIN;
4014 break;
4015 }
4016 }
4017 }
4018 }
4019
4020 return rc;
4021}
4022
4023
4024#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4025
4026/**
4027 * Switches the TSC-delta measurement thread into the butchered state.
4028 *
4029 * @returns VBox status code.
4030 * @param pDevExt Pointer to the device instance data.
4031 * @param fSpinlockHeld Whether the TSC-delta spinlock is held or not.
4032 * @param pszFailed An error message to log.
4033 * @param rcFailed The error code to exit the thread with.
4034 */
4035static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed)
4036{
4037 if (!fSpinlockHeld)
4038 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4039
4040 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Butchered;
4041 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4042 OSDBGPRINT(("supdrvTscDeltaThreadButchered: %s. rc=%Rrc\n", rcFailed));
4043 return rcFailed;
4044}
4045
4046
4047/**
4048 * The TSC-delta measurement thread.
4049 *
4050 * @returns VBox status code.
4051 * @param hThread The thread handle.
4052 * @param pvUser Opaque pointer to the device instance data.
4053 */
4054static DECLCALLBACK(int) supdrvTscDeltaThread(RTTHREAD hThread, void *pvUser)
4055{
4056 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
4057 uint32_t cConsecutiveTimeouts = 0;
4058 int rc = VERR_INTERNAL_ERROR_2;
4059 for (;;)
4060 {
4061 /*
4062 * Switch on the current state.
4063 */
4064 SUPDRVTSCDELTATHREADSTATE enmState;
4065 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4066 enmState = pDevExt->enmTscDeltaThreadState;
4067 switch (enmState)
4068 {
4069 case kTscDeltaThreadState_Creating:
4070 {
4071 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
4072 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
4073 if (RT_FAILURE(rc))
4074 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
4075 /* fall thru */
4076 }
4077
4078 case kTscDeltaThreadState_Listening:
4079 {
4080 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4081
4082 /* Simple adaptive timeout. */
4083 if (cConsecutiveTimeouts++ == 10)
4084 {
4085 if (pDevExt->cMsTscDeltaTimeout == 1) /* 10 ms */
4086 pDevExt->cMsTscDeltaTimeout = 10;
4087 else if (pDevExt->cMsTscDeltaTimeout == 10) /* +100 ms */
4088 pDevExt->cMsTscDeltaTimeout = 100;
4089 else if (pDevExt->cMsTscDeltaTimeout == 100) /* +1000 ms */
4090 pDevExt->cMsTscDeltaTimeout = 500;
4091 cConsecutiveTimeouts = 0;
4092 }
4093 rc = RTThreadUserWait(pDevExt->hTscDeltaThread, pDevExt->cMsTscDeltaTimeout);
4094 if ( RT_FAILURE(rc)
4095 && rc != VERR_TIMEOUT)
4096 return supdrvTscDeltaThreadButchered(pDevExt, false /* fSpinlockHeld */, "RTThreadUserWait", rc);
4097 RTThreadUserReset(pDevExt->hTscDeltaThread);
4098 break;
4099 }
4100
4101 case kTscDeltaThreadState_WaitAndMeasure:
4102 {
4103 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Measuring;
4104 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent); /* (Safe on windows as long as spinlock isn't IRQ safe.) */
4105 if (RT_FAILURE(rc))
4106 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
4107 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4108 pDevExt->cMsTscDeltaTimeout = 1;
4109 RTThreadSleep(1);
4110 /* fall thru */
4111 }
4112
4113 case kTscDeltaThreadState_Measuring:
4114 {
4115 cConsecutiveTimeouts = 0;
4116 if (pDevExt->fTscThreadRecomputeAllDeltas)
4117 {
4118 int cTries = 8;
4119 int cMsWaitPerTry = 10;
4120 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4121 Assert(pGip);
4122 do
4123 {
4124 RTCpuSetCopy(&pDevExt->TscDeltaCpuSet, &pGip->OnlineCpuSet);
4125 rc = supdrvMeasureInitialTscDeltas(pDevExt);
4126 if ( RT_SUCCESS(rc)
4127 || ( RT_FAILURE(rc)
4128 && rc != VERR_TRY_AGAIN
4129 && rc != VERR_CPU_OFFLINE))
4130 {
4131 break;
4132 }
4133 RTThreadSleep(cMsWaitPerTry);
4134 } while (cTries-- > 0);
4135 pDevExt->fTscThreadRecomputeAllDeltas = false;
4136 }
4137 else
4138 {
4139 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4140 unsigned iCpu;
4141
4142 /* Measure TSC-deltas only for the CPUs that are in the set. */
4143 rc = VINF_SUCCESS;
4144 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
4145 {
4146 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
4147 if (RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
4148 {
4149 if (pGipCpuWorker->i64TSCDelta == INT64_MAX)
4150 {
4151 int rc2 = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
4152 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
4153 rc = rc2;
4154 }
4155 else
4156 {
4157 /*
4158 * The thread/someone must've called SUPR0TscDeltaMeasureBySetIndex(),
4159 * mark the delta as fine to get the timer thread off our back.
4160 */
4161 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
4162 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
4163 }
4164 }
4165 }
4166 }
4167 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4168 if (pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4169 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
4170 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4171 Assert(rc != VERR_NOT_AVAILABLE); /* VERR_NOT_AVAILABLE is used as init value, see supdrvTscDeltaThreadInit(). */
4172 ASMAtomicWriteS32(&pDevExt->rcTscDelta, rc);
4173 break;
4174 }
4175
4176 case kTscDeltaThreadState_Terminating:
4177 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Destroyed;
4178 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4179 return VINF_SUCCESS;
4180
4181 case kTscDeltaThreadState_Butchered:
4182 default:
4183 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE);
4184 }
4185 }
4186
4187 return rc;
4188}
4189
4190
4191/**
4192 * Waits for the TSC-delta measurement thread to respond to a state change.
4193 *
4194 * @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time,
4195 * other error code on internal error.
4196 *
4197 * @param pThis Pointer to the grant service instance data.
4198 * @param enmCurState The current state.
4199 * @param enmNewState The new state we're waiting for it to enter.
4200 */
4201static int supdrvTscDeltaThreadWait(PSUPDRVDEVEXT pDevExt, SUPDRVTSCDELTATHREADSTATE enmCurState,
4202 SUPDRVTSCDELTATHREADSTATE enmNewState)
4203{
4204 /*
4205 * Wait a short while for the expected state transition.
4206 */
4207 int rc;
4208 RTSemEventWait(pDevExt->hTscDeltaEvent, RT_MS_1SEC);
4209 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4210 if (pDevExt->enmTscDeltaThreadState == enmNewState)
4211 {
4212 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4213 rc = VINF_SUCCESS;
4214 }
4215 else if (pDevExt->enmTscDeltaThreadState == enmCurState)
4216 {
4217 /*
4218 * Wait longer if the state has not yet transitioned to the one we want.
4219 */
4220 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4221 rc = RTSemEventWait(pDevExt->hTscDeltaEvent, 50 * RT_MS_1SEC);
4222 if ( RT_SUCCESS(rc)
4223 || rc == VERR_TIMEOUT)
4224 {
4225 /*
4226 * Check the state whether we've succeeded.
4227 */
4228 SUPDRVTSCDELTATHREADSTATE enmState;
4229 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4230 enmState = pDevExt->enmTscDeltaThreadState;
4231 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4232 if (enmState == enmNewState)
4233 rc = VINF_SUCCESS;
4234 else if (enmState == enmCurState)
4235 {
4236 rc = VERR_TIMEOUT;
4237 OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmState=%d enmNewState=%d\n", enmState,
4238 enmNewState));
4239 }
4240 else
4241 {
4242 rc = VERR_INTERNAL_ERROR;
4243 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState,
4244 enmState, enmNewState));
4245 }
4246 }
4247 else
4248 OSDBGPRINT(("supdrvTscDeltaThreadWait: RTSemEventWait failed. rc=%Rrc\n", rc));
4249 }
4250 else
4251 {
4252 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4253 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d\n", enmCurState, enmNewState));
4254 rc = VERR_INTERNAL_ERROR;
4255 }
4256
4257 return rc;
4258}
4259
4260
4261/**
4262 * Signals the TSC-delta thread to start measuring TSC-deltas.
4263 *
4264 * @param pDevExt Pointer to the device instance data.
4265 * @param fForceAll Force re-calculating TSC-deltas on all CPUs.
4266 */
4267static void supdrvTscDeltaThreadStartMeasurement(PSUPDRVDEVEXT pDevExt, bool fForceAll)
4268{
4269 if (pDevExt->hTscDeltaThread != NIL_RTTHREAD)
4270 {
4271 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4272 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
4273 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4274 {
4275 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
4276 if (fForceAll)
4277 pDevExt->fTscThreadRecomputeAllDeltas = true;
4278 }
4279 else if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_WaitAndMeasure
4280 && fForceAll)
4281 pDevExt->fTscThreadRecomputeAllDeltas = true;
4282 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4283 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4284 }
4285}
4286
4287
4288/**
4289 * Terminates the actual thread running supdrvTscDeltaThread().
4290 *
4291 * This is an internal worker function for supdrvTscDeltaThreadInit() and
4292 * supdrvTscDeltaTerm().
4293 *
4294 * @param pDevExt Pointer to the device instance data.
4295 */
4296static void supdrvTscDeltaThreadTerminate(PSUPDRVDEVEXT pDevExt)
4297{
4298 int rc;
4299 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4300 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Terminating;
4301 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4302 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4303 rc = RTThreadWait(pDevExt->hTscDeltaThread, 50 * RT_MS_1SEC, NULL /* prc */);
4304 if (RT_FAILURE(rc))
4305 {
4306 /* Signal a few more times before giving up. */
4307 int cTriesLeft = 5;
4308 while (--cTriesLeft > 0)
4309 {
4310 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4311 rc = RTThreadWait(pDevExt->hTscDeltaThread, 2 * RT_MS_1SEC, NULL /* prc */);
4312 if (rc != VERR_TIMEOUT)
4313 break;
4314 }
4315 }
4316}
4317
4318
4319/**
4320 * Initializes and spawns the TSC-delta measurement thread.
4321 *
4322 * A thread is required for servicing re-measurement requests from events like
4323 * CPUs coming online, suspend/resume etc. as it cannot be done synchronously
4324 * under all contexts on all OSs.
4325 *
4326 * @returns VBox status code.
4327 * @param pDevExt Pointer to the device instance data.
4328 *
4329 * @remarks Must only be called -after- initializing GIP and setting up MP
4330 * notifications!
4331 */
4332static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt)
4333{
4334 int rc;
4335 Assert(pDevExt->pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
4336 rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck");
4337 if (RT_SUCCESS(rc))
4338 {
4339 rc = RTSemEventCreate(&pDevExt->hTscDeltaEvent);
4340 if (RT_SUCCESS(rc))
4341 {
4342 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Creating;
4343 pDevExt->cMsTscDeltaTimeout = 1;
4344 rc = RTThreadCreate(&pDevExt->hTscDeltaThread, supdrvTscDeltaThread, pDevExt, 0 /* cbStack */,
4345 RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "VBoxTscThread");
4346 if (RT_SUCCESS(rc))
4347 {
4348 rc = supdrvTscDeltaThreadWait(pDevExt, kTscDeltaThreadState_Creating, kTscDeltaThreadState_Listening);
4349 if (RT_SUCCESS(rc))
4350 {
4351 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
4352 return rc;
4353 }
4354
4355 OSDBGPRINT(("supdrvTscDeltaInit: supdrvTscDeltaThreadWait failed. rc=%Rrc\n", rc));
4356 supdrvTscDeltaThreadTerminate(pDevExt);
4357 }
4358 else
4359 OSDBGPRINT(("supdrvTscDeltaInit: RTThreadCreate failed. rc=%Rrc\n", rc));
4360 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
4361 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
4362 }
4363 else
4364 OSDBGPRINT(("supdrvTscDeltaInit: RTSemEventCreate failed. rc=%Rrc\n", rc));
4365 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
4366 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
4367 }
4368 else
4369 OSDBGPRINT(("supdrvTscDeltaInit: RTSpinlockCreate failed. rc=%Rrc\n", rc));
4370
4371 return rc;
4372}
4373
4374
4375/**
4376 * Terminates the TSC-delta measurement thread and cleanup.
4377 *
4378 * @param pDevExt Pointer to the device instance data.
4379 */
4380static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt)
4381{
4382 if ( pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK
4383 && pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
4384 {
4385 supdrvTscDeltaThreadTerminate(pDevExt);
4386 }
4387
4388 if (pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK)
4389 {
4390 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
4391 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
4392 }
4393
4394 if (pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
4395 {
4396 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
4397 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
4398 }
4399
4400 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
4401}
4402
4403#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
4404
4405/**
4406 * Measure the TSC delta for the CPU given by its CPU set index.
4407 *
4408 * @returns VBox status code.
4409 * @retval VERR_INTERRUPTED if interrupted while waiting.
4410 * @retval VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED if we were unable to get a
4411 * measurment.
4412 * @retval VERR_CPU_OFFLINE if the specified CPU is offline.
4413 *
4414 * @param pSession The caller's session. GIP must've been mapped.
4415 * @param iCpuSet The CPU set index of the CPU to measure.
4416 * @param fFlags Flags, SUP_TSCDELTA_MEASURE_F_XXX.
4417 * @param cMsWaitRetry Number of milliseconds to wait between each retry.
4418 * @param cMsWaitThread Number of milliseconds to wait for the thread to get
4419 * ready.
4420 * @param cTries Number of times to try, pass 0 for the default.
4421 */
4422SUPR0DECL(int) SUPR0TscDeltaMeasureBySetIndex(PSUPDRVSESSION pSession, uint32_t iCpuSet, uint32_t fFlags,
4423 RTMSINTERVAL cMsWaitRetry, RTMSINTERVAL cMsWaitThread, uint32_t cTries)
4424{
4425 PSUPDRVDEVEXT pDevExt;
4426 PSUPGLOBALINFOPAGE pGip;
4427 uint16_t iGipCpu;
4428 int rc;
4429#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4430 uint64_t msTsStartWait;
4431 uint32_t iWaitLoop;
4432#endif
4433
4434 /*
4435 * Validate and adjust the input.
4436 */
4437 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4438 if (!pSession->fGipReferenced)
4439 return VERR_WRONG_ORDER;
4440
4441 pDevExt = pSession->pDevExt;
4442 AssertReturn(SUP_IS_DEVEXT_VALID(pDevExt), VERR_INVALID_PARAMETER);
4443
4444 pGip = pDevExt->pGip;
4445 AssertPtrReturn(pGip, VERR_INTERNAL_ERROR_2);
4446
4447 AssertReturn(iCpuSet < RTCPUSET_MAX_CPUS, VERR_INVALID_CPU_INDEX);
4448 AssertReturn(iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx), VERR_INVALID_CPU_INDEX);
4449 iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet];
4450 AssertReturn(iGipCpu < pGip->cCpus, VERR_INVALID_CPU_INDEX);
4451
4452 if (fFlags & ~SUP_TSCDELTA_MEASURE_F_VALID_MASK)
4453 return VERR_INVALID_FLAGS;
4454
4455 /*
4456 * The request is a noop if the TSC delta isn't being used.
4457 */
4458 if (pGip->enmUseTscDelta <= SUPGIPUSETSCDELTA_ZERO_CLAIMED)
4459 return VINF_SUCCESS;
4460
4461 if (cTries == 0)
4462 cTries = 12;
4463 else if (cTries > 256)
4464 cTries = 256;
4465
4466 if (cMsWaitRetry == 0)
4467 cMsWaitRetry = 2;
4468 else if (cMsWaitRetry > 1000)
4469 cMsWaitRetry = 1000;
4470
4471#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4472 /*
4473 * Has the TSC already been measured and we're not forced to redo it?
4474 */
4475 if ( pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX
4476 && !(fFlags & SUP_TSCDELTA_MEASURE_F_FORCE))
4477 return VINF_SUCCESS;
4478
4479 /*
4480 * Asynchronous request? Forward it to the thread, no waiting.
4481 */
4482 if (fFlags & SUP_TSCDELTA_MEASURE_F_ASYNC)
4483 {
4484 /** @todo Async. doesn't implement options like retries, waiting. We'll need
4485 * to pass those options to the thread somehow and implement it in the
4486 * thread. Check if anyone uses/needs fAsync before implementing this. */
4487 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4488 RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, iCpuSet);
4489 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
4490 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4491 {
4492 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
4493 rc = VINF_SUCCESS;
4494 }
4495 else if (pDevExt->enmTscDeltaThreadState != kTscDeltaThreadState_WaitAndMeasure)
4496 rc = VERR_THREAD_IS_DEAD;
4497 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4498 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4499 return VINF_SUCCESS;
4500 }
4501
4502 /*
4503 * If a TSC-delta measurement request is already being serviced by the thread,
4504 * wait 'cTries' times if a retry-timeout is provided, otherwise bail as busy.
4505 */
4506 msTsStartWait = RTTimeSystemMilliTS();
4507 for (iWaitLoop = 0;; iWaitLoop++)
4508 {
4509 uint64_t cMsElapsed;
4510 SUPDRVTSCDELTATHREADSTATE enmState;
4511 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4512 enmState = pDevExt->enmTscDeltaThreadState;
4513 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4514
4515 if (enmState == kTscDeltaThreadState_Measuring)
4516 { /* Must wait, the thread is busy. */ }
4517 else if (enmState == kTscDeltaThreadState_WaitAndMeasure)
4518 { /* Must wait, this state only says what will happen next. */ }
4519 else if (enmState == kTscDeltaThreadState_Terminating)
4520 { /* Must wait, this state only says what should happen next. */ }
4521 else
4522 break; /* All other states, the thread is either idly listening or dead. */
4523
4524 /* Wait or fail. */
4525 if (cMsWaitThread == 0)
4526 return VERR_SUPDRV_TSC_DELTA_MEASUREMENT_BUSY;
4527 cMsElapsed = RTTimeSystemMilliTS() - msTsStartWait;
4528 if (cMsElapsed >= cMsWaitThread)
4529 return VERR_SUPDRV_TSC_DELTA_MEASUREMENT_BUSY;
4530
4531 rc = RTThreadSleep(RT_MIN((RTMSINTERVAL)(cMsWaitThread - cMsElapsed), RT_MIN(iWaitLoop + 1, 10)));
4532 if (rc == VERR_INTERRUPTED)
4533 return rc;
4534 }
4535#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
4536
4537 /*
4538 * Try measure the TSC delta the given number of times.
4539 */
4540 for (;;)
4541 {
4542 /* Unless we're forced to measure the delta, check whether it's done already. */
4543 if ( !(fFlags & SUP_TSCDELTA_MEASURE_F_FORCE)
4544 && pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX)
4545 {
4546 rc = VINF_SUCCESS;
4547 break;
4548 }
4549
4550 /* Measure it. */
4551 rc = supdrvMeasureTscDeltaOne(pDevExt, iGipCpu);
4552 if (rc != VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED)
4553 {
4554 Assert(pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX || RT_FAILURE_NP(rc));
4555 break;
4556 }
4557
4558 /* Retry? */
4559 if (cTries <= 1)
4560 break;
4561 cTries--;
4562
4563 /* Always delay between retries (be nice to the rest of the system
4564 and avoid the BSOD hounds). */
4565 rc = RTThreadSleep(cMsWaitRetry);
4566 if (rc == VERR_INTERRUPTED)
4567 break;
4568 }
4569
4570 return rc;
4571}
4572
4573
4574/**
4575 * Service a TSC-delta measurement request.
4576 *
4577 * @returns VBox status code.
4578 * @param pDevExt Pointer to the device instance data.
4579 * @param pSession The support driver session.
4580 * @param pReq Pointer to the TSC-delta measurement request.
4581 */
4582int VBOXCALL supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCDELTAMEASURE pReq)
4583{
4584 uint32_t cTries;
4585 uint32_t iCpuSet;
4586 uint32_t fFlags;
4587 RTMSINTERVAL cMsWaitRetry;
4588
4589 /*
4590 * Validate and adjust/resolve the input so they can be passed onto SUPR0TscDeltaMeasureBySetIndex.
4591 */
4592 AssertPtr(pDevExt); AssertPtr(pSession); AssertPtr(pReq); /* paranoia^2 */
4593
4594 if (pReq->u.In.idCpu == NIL_RTCPUID)
4595 return VERR_INVALID_CPU_ID;
4596 iCpuSet = RTMpCpuIdToSetIndex(pReq->u.In.idCpu);
4597 if (iCpuSet >= RTCPUSET_MAX_CPUS)
4598 return VERR_INVALID_CPU_ID;
4599
4600 cTries = pReq->u.In.cRetries == 0 ? 0 : (uint32_t)pReq->u.In.cRetries + 1;
4601
4602 cMsWaitRetry = RT_MAX(pReq->u.In.cMsWaitRetry, 5);
4603
4604 fFlags = 0;
4605 if (pReq->u.In.fAsync)
4606 fFlags |= SUP_TSCDELTA_MEASURE_F_ASYNC;
4607 if (pReq->u.In.fForce)
4608 fFlags |= SUP_TSCDELTA_MEASURE_F_FORCE;
4609
4610 return SUPR0TscDeltaMeasureBySetIndex(pSession, iCpuSet, fFlags, cMsWaitRetry,
4611 cTries == 0 ? 5 * RT_MS_1SEC : cMsWaitRetry * cTries /*cMsWaitThread*/,
4612 cTries);
4613}
4614
4615
4616/**
4617 * Reads TSC with delta applied.
4618 *
4619 * Will try to resolve delta value INT64_MAX before applying it. This is the
4620 * main purpose of this function, to handle the case where the delta needs to be
4621 * determined.
4622 *
4623 * @returns VBox status code.
4624 * @param pDevExt Pointer to the device instance data.
4625 * @param pSession The support driver session.
4626 * @param pReq Pointer to the TSC-read request.
4627 */
4628int VBOXCALL supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCREAD pReq)
4629{
4630 PSUPGLOBALINFOPAGE pGip;
4631 int rc;
4632
4633 /*
4634 * Validate. We require the client to have mapped GIP (no asserting on
4635 * ring-3 preconditions).
4636 */
4637 AssertPtr(pDevExt); AssertPtr(pReq); AssertPtr(pSession); /* paranoia^2 */
4638 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
4639 return VERR_WRONG_ORDER;
4640 pGip = pDevExt->pGip;
4641 AssertReturn(pGip, VERR_INTERNAL_ERROR_2);
4642
4643 /*
4644 * We're usually here because we need to apply delta, but we shouldn't be
4645 * upset if the GIP is some different mode.
4646 */
4647 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
4648 {
4649 uint32_t cTries = 0;
4650 for (;;)
4651 {
4652 /*
4653 * Start by gathering the data, using CLI for disabling preemption
4654 * while we do that.
4655 */
4656 RTCCUINTREG fEFlags = ASMIntDisableFlags();
4657 int iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
4658 int iGipCpu;
4659 if (RT_LIKELY( (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
4660 && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
4661 {
4662 int64_t i64Delta = pGip->aCPUs[iGipCpu].i64TSCDelta;
4663 pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
4664 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
4665 ASMSetFlags(fEFlags);
4666
4667 /*
4668 * If we're lucky we've got a delta, but no predicitions here
4669 * as this I/O control is normally only used when the TSC delta
4670 * is set to INT64_MAX.
4671 */
4672 if (i64Delta != INT64_MAX)
4673 {
4674 pReq->u.Out.u64AdjustedTsc -= i64Delta;
4675 rc = VINF_SUCCESS;
4676 break;
4677 }
4678
4679 /* Give up after a few times. */
4680 if (cTries >= 4)
4681 {
4682 rc = VWRN_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
4683 break;
4684 }
4685
4686 /* Need to measure the delta an try again. */
4687 rc = supdrvMeasureTscDeltaOne(pDevExt, iGipCpu);
4688 Assert(pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX || RT_FAILURE_NP(rc));
4689 /** @todo should probably delay on failure... dpc watchdogs */
4690 }
4691 else
4692 {
4693 /* This really shouldn't happen. */
4694 AssertMsgFailed(("idCpu=%#x iCpuSet=%#x (%d)\n", RTMpCpuId(), iCpuSet, iCpuSet));
4695 pReq->u.Out.idApic = ASMGetApicId();
4696 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
4697 ASMSetFlags(fEFlags);
4698 rc = VERR_INTERNAL_ERROR_5; /** @todo change to warning. */
4699 break;
4700 }
4701 }
4702 }
4703 else
4704 {
4705 /*
4706 * No delta to apply. Easy. Deal with preemption the lazy way.
4707 */
4708 RTCCUINTREG fEFlags = ASMIntDisableFlags();
4709 int iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
4710 int iGipCpu;
4711 if (RT_LIKELY( (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
4712 && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
4713 pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
4714 else
4715 pReq->u.Out.idApic = ASMGetApicId();
4716 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
4717 ASMSetFlags(fEFlags);
4718 rc = VINF_SUCCESS;
4719 }
4720
4721 return rc;
4722}
4723
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette