VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDrvGip.cpp@ 106625

最後變更 在這個檔案從106625是 106625,由 vboxsync 提交於 5 月 前

SUPDrv: Making it build on win.arm64... jiraref:VBP-1253

  • 屬性 svn:eol-style 設為 native
  • 屬性 svn:keywords 設為 Author Date Id Revision
檔案大小: 188.4 KB
 
1/* $Id: SUPDrvGip.cpp 106625 2024-10-23 15:45:04Z vboxsync $ */
2/** @file
3 * VBoxDrv - The VirtualBox Support Driver - Common code for GIP.
4 */
5
6/*
7 * Copyright (C) 2006-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.alldomusa.eu.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * The contents of this file may alternatively be used under the terms
26 * of the Common Development and Distribution License Version 1.0
27 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28 * in the VirtualBox distribution, in which case the provisions of the
29 * CDDL are applicable instead of those of the GPL.
30 *
31 * You may elect to license modified versions of this file under the
32 * terms and conditions of either the GPL or the CDDL or both.
33 *
34 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35 */
36
37
38/*********************************************************************************************************************************
39* Header Files *
40*********************************************************************************************************************************/
41#define LOG_GROUP LOG_GROUP_SUP_DRV
42#define SUPDRV_AGNOSTIC
43#include "SUPDrvInternal.h"
44#ifndef PAGE_SHIFT
45# include <iprt/param.h>
46#endif
47#include <iprt/asm.h>
48#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
49# include <iprt/asm-amd64-x86.h>
50#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
51# include <iprt/asm-arm.h>
52#else
53# error "Port me!"
54#endif
55#include <iprt/asm-math.h>
56#include <iprt/cpuset.h>
57#include <iprt/handletable.h>
58#include <iprt/mem.h>
59#include <iprt/mp.h>
60#include <iprt/power.h>
61#include <iprt/process.h>
62#include <iprt/semaphore.h>
63#include <iprt/spinlock.h>
64#include <iprt/thread.h>
65#include <iprt/uuid.h>
66#include <iprt/net.h>
67#include <iprt/crc.h>
68#include <iprt/string.h>
69#include <iprt/timer.h>
70#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
71# include <iprt/rand.h>
72# include <iprt/path.h>
73#endif
74#include <iprt/uint128.h>
75#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
76# include <iprt/x86.h>
77#elif defined(RT_ARCH_ARM64)
78# include <iprt/armv8.h>
79#endif
80
81#include <VBox/param.h>
82#include <VBox/log.h>
83#include <VBox/err.h>
84
85#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
86# include "dtrace/SUPDrv.h"
87#else
88/* ... */
89#endif
90
91
92/*********************************************************************************************************************************
93* Defined Constants And Macros *
94*********************************************************************************************************************************/
95/** The frequency by which we recalculate the u32UpdateHz and
96 * u32UpdateIntervalNS GIP members. The value must be a power of 2.
97 *
98 * Warning: Bumping this too high might overflow u32UpdateIntervalNS.
99 */
100#define GIP_UPDATEHZ_RECALC_FREQ 0x800
101
102/** A reserved TSC value used for synchronization as well as measurement of
103 * TSC deltas. */
104#define GIP_TSC_DELTA_RSVD UINT64_MAX
105/** The number of TSC delta measurement loops in total (includes primer and
106 * read-time loops). */
107#define GIP_TSC_DELTA_LOOPS 96
108/** The number of cache primer loops. */
109#define GIP_TSC_DELTA_PRIMER_LOOPS 4
110/** The number of loops until we keep computing the minumum read time. */
111#define GIP_TSC_DELTA_READ_TIME_LOOPS 24
112
113/** The TSC frequency refinement period in seconds.
114 * The timer fires after 200ms, then every second, this value just says when
115 * to stop it after that. */
116#define GIP_TSC_REFINE_PERIOD_IN_SECS 12
117/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_PRACTICALLY_ZERO rating */
118#define GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO 32
119/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_ROUGHLY_ZERO rating */
120#define GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO 448
121/** The TSC delta value for the initial GIP master - 0 in regular builds.
122 * To test the delta code this can be set to a non-zero value. */
123#if 0
124# define GIP_TSC_DELTA_INITIAL_MASTER_VALUE INT64_C(170139095182512) /* 0x00009abd9854acb0 */
125#else
126# define GIP_TSC_DELTA_INITIAL_MASTER_VALUE INT64_C(0)
127#endif
128
129AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS < GIP_TSC_DELTA_READ_TIME_LOOPS);
130AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS < GIP_TSC_DELTA_LOOPS);
131
132/** @def VBOX_SVN_REV
133 * The makefile should define this if it can. */
134#ifndef VBOX_SVN_REV
135# define VBOX_SVN_REV 0
136#endif
137
138#if 0 /* Don't start the GIP timers. Useful when debugging the IPRT timer code. */
139# define DO_NOT_START_GIP
140#endif
141
142
143/*********************************************************************************************************************************
144* Internal Functions *
145*********************************************************************************************************************************/
146static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
147static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
148static int supdrvGipSetFlags(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, uint32_t fOrMask, uint32_t fAndMask);
149static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS, uint64_t uCpuHz);
150static void supdrvTscResetSamples(PSUPDRVDEVEXT pDevExt, bool fClearDeltas);
151#ifdef SUPDRV_USE_TSC_DELTA_THREAD
152static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt);
153static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt);
154static void supdrvTscDeltaThreadStartMeasurement(PSUPDRVDEVEXT pDevExt, bool fForceAll);
155#else
156static int supdrvTscMeasureInitialDeltas(PSUPDRVDEVEXT pDevExt);
157static int supdrvTscMeasureDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker);
158#endif
159
160
161/*********************************************************************************************************************************
162* Global Variables *
163*********************************************************************************************************************************/
164DECLEXPORT(PSUPGLOBALINFOPAGE) g_pSUPGlobalInfoPage = NULL;
165SUPR0_EXPORT_SYMBOL(g_pSUPGlobalInfoPage);
166
167
168
169/*
170 *
171 * Misc Common GIP Code
172 * Misc Common GIP Code
173 * Misc Common GIP Code
174 *
175 *
176 */
177
178
179/**
180 * Finds the GIP CPU index corresponding to @a idCpu.
181 *
182 * @returns GIP CPU array index, UINT32_MAX if not found.
183 * @param pGip The GIP.
184 * @param idCpu The CPU ID.
185 */
186static uint32_t supdrvGipFindCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
187{
188 uint32_t i;
189 for (i = 0; i < pGip->cCpus; i++)
190 if (pGip->aCPUs[i].idCpu == idCpu)
191 return i;
192 return UINT32_MAX;
193}
194
195
196/**
197 * Gets the APIC ID using the best available method.
198 *
199 * @returns APIC ID.
200 * @param pGip The GIP, for SUPGIPGETCPU_XXX.
201 *
202 * @note APIC ID == CPU ID on non-x86 platforms.
203 */
204DECLINLINE(uint32_t) supdrvGipGetApicId(PSUPGLOBALINFOPAGE pGip)
205{
206#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
207 if (pGip->fGetGipCpu & SUPGIPGETCPU_APIC_ID_EXT_0B)
208 return ASMGetApicIdExt0B();
209 if (pGip->fGetGipCpu & SUPGIPGETCPU_APIC_ID_EXT_8000001E)
210 return ASMGetApicIdExt8000001E();
211 return ASMGetApicId();
212
213#elif defined(RT_ARCH_ARM64) && defined(RT_OS_WINDOWS)
214 RT_NOREF(pGip);
215 return (uint32_t)ASMGetThreadIdRoEL0();
216#else
217# error "port me"
218#endif
219}
220
221
222/**
223 * Gets the APIC ID using the best available method, slow version.
224 *
225 * @note APIC ID == CPU ID on non-x86 platforms.
226 */
227static uint32_t supdrvGipGetApicIdSlow(void)
228{
229#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
230 uint32_t const idApic = ASMGetApicId();
231
232 /* The Intel CPU topology leaf: */
233 uint32_t uOther = ASMCpuId_EAX(0);
234 if (uOther >= UINT32_C(0xb) && RTX86IsValidStdRange(uOther))
235 {
236 uint32_t uEax = 0;
237 uint32_t uEbx = 0;
238 uint32_t uEcx = 0;
239 uint32_t uEdx = 0;
240# if defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD)
241 ASMCpuId_Idx_ECX(0xb, 0, &uEax, &uEbx, &uEcx, &uEdx);
242# else
243 ASMCpuIdExSlow(0xb, 0, 0, 0, &uEax, &uEbx, &uEcx, &uEdx);
244# endif
245 if ((uEcx >> 8) != 0) /* level type != invalid */
246 {
247 if ((uEdx & 0xff) == idApic)
248 return uEdx;
249 AssertMsgFailed(("ASMGetApicIdExt0B=>%#x idApic=%#x\n", uEdx, idApic));
250 }
251 }
252
253 /* The AMD leaf: */
254 uOther = ASMCpuId_EAX(UINT32_C(0x80000000));
255 if (uOther >= UINT32_C(0x8000001e) && RTX86IsValidExtRange(uOther))
256 {
257 uOther = ASMGetApicIdExt8000001E();
258 if ((uOther & 0xff) == idApic)
259 return uOther;
260 AssertMsgFailed(("ASMGetApicIdExt8000001E=>%#x idApic=%#x\n", uOther, idApic));
261 }
262 return idApic;
263
264#elif defined(RT_ARCH_ARM64) && defined(RT_OS_WINDOWS)
265 return (uint32_t)ASMGetThreadIdRoEL0();
266#else
267# error "port me"
268#endif
269}
270
271
272
273/*
274 *
275 * GIP Mapping and Unmapping Related Code.
276 * GIP Mapping and Unmapping Related Code.
277 * GIP Mapping and Unmapping Related Code.
278 *
279 *
280 */
281
282
283/**
284 * (Re-)initializes the per-cpu structure prior to starting or resuming the GIP
285 * updating.
286 *
287 * @param pGipCpu The per CPU structure for this CPU.
288 * @param u64NanoTS The current time.
289 */
290static void supdrvGipReInitCpu(PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
291{
292 /*
293 * Here we don't really care about applying the TSC delta. The re-initialization of this
294 * value is not relevant especially while (re)starting the GIP as the first few ones will
295 * be ignored anyway, see supdrvGipDoUpdateCpu().
296 */
297 pGipCpu->u64TSC = ASMReadTSC() - pGipCpu->u32UpdateIntervalTSC;
298 pGipCpu->u64NanoTS = u64NanoTS;
299}
300
301
302/**
303 * Set the current TSC and NanoTS value for the CPU.
304 *
305 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
306 * @param pvUser1 Pointer to the ring-0 GIP mapping.
307 * @param pvUser2 Pointer to the variable holding the current time.
308 */
309static DECLCALLBACK(void) supdrvGipReInitCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
310{
311 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
312 uint32_t const idApic = supdrvGipGetApicId(pGip);
313 if (idApic < RT_ELEMENTS(pGip->aiCpuFromApicId))
314 {
315 unsigned const iCpu = pGip->aiCpuFromApicId[idApic];
316
317 if (RT_LIKELY(iCpu < pGip->cCpus && pGip->aCPUs[iCpu].idCpu == idCpu))
318 supdrvGipReInitCpu(&pGip->aCPUs[iCpu], *(uint64_t *)pvUser2);
319 else
320 LogRelMax(64, ("supdrvGipReInitCpuCallback: iCpu=%#x out of bounds (%#zx, idApic=%#x)\n",
321 iCpu, RT_ELEMENTS(pGip->aiCpuFromApicId), idApic));
322 }
323 else
324 LogRelMax(64, ("supdrvGipReInitCpuCallback: idApic=%#x out of bounds (%#zx)\n",
325 idApic, RT_ELEMENTS(pGip->aiCpuFromApicId)));
326
327 NOREF(pvUser2);
328}
329
330
331/**
332 * State structure for supdrvGipDetectGetGipCpuCallback.
333 */
334typedef struct SUPDRVGIPDETECTGETCPU
335{
336 /** Bitmap of APIC IDs that has been seen (initialized to zero).
337 * Used to detect duplicate APIC IDs (paranoia). */
338 uint8_t volatile bmApicId[4096 / 8];
339 /** Mask of supported GIP CPU getter methods (SUPGIPGETCPU_XXX) (all bits set
340 * initially). The callback clears the methods not detected. */
341 uint32_t volatile fSupported;
342 /** The first callback detecting any kind of range issues (initialized to
343 * NIL_RTCPUID). */
344 RTCPUID volatile idCpuProblem;
345} SUPDRVGIPDETECTGETCPU;
346/** Pointer to state structure for supdrvGipDetectGetGipCpuCallback. */
347typedef SUPDRVGIPDETECTGETCPU *PSUPDRVGIPDETECTGETCPU;
348
349
350/**
351 * Checks for alternative ways of getting the CPU ID.
352 *
353 * This also checks the APIC ID, CPU ID and CPU set index values against the
354 * GIP tables.
355 *
356 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
357 * @param pvUser1 Pointer to the state structure.
358 * @param pvUser2 Pointer to the GIP.
359 */
360static DECLCALLBACK(void) supdrvGipDetectGetGipCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
361{
362 PSUPDRVGIPDETECTGETCPU pState = (PSUPDRVGIPDETECTGETCPU)pvUser1;
363 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser2;
364 int const iCpuSet = RTMpCpuIdToSetIndex(idCpu);
365 uint32_t fSupported = 0;
366 uint32_t idApic;
367#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
368 uint32_t uEax, uEbx, uEcx, uEdx;
369#else
370 uint32_t const uEax = 0; /* Dummy for LogRel. */
371#endif
372 NOREF(pGip);
373
374 AssertMsg(idCpu == RTMpCpuId(), ("idCpu=%#x RTMpCpuId()=%#x\n", idCpu, RTMpCpuId())); /* paranoia^3 */
375
376
377#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
378 /*
379 * Check that the CPU ID and CPU set index are interchangable.
380 */
381 if ((RTCPUID)iCpuSet == idCpu)
382 {
383 AssertCompile(RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS));
384 if ( iCpuSet >= 0
385 && iCpuSet < RTCPUSET_MAX_CPUS
386 && RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS))
387 {
388 PSUPGIPCPU pGipCpu = SUPGetGipCpuBySetIndex(pGip, iCpuSet);
389
390 /*
391 * Check whether the IDTR.LIMIT contains a CPU number.
392 */
393# ifdef RT_ARCH_X86
394 uint16_t const cbIdt = sizeof(X86DESC64SYSTEM) * 256;
395# else
396 uint16_t const cbIdt = sizeof(X86DESCGATE) * 256;
397# endif
398 RTIDTR Idtr;
399 ASMGetIDTR(&Idtr);
400 if (Idtr.cbIdt >= cbIdt)
401 {
402 uint32_t uTmp = Idtr.cbIdt - cbIdt;
403 uTmp &= RTCPUSET_MAX_CPUS - 1;
404 if (uTmp == idCpu)
405 {
406 RTIDTR Idtr2;
407 ASMGetIDTR(&Idtr2);
408 if (Idtr2.cbIdt == Idtr.cbIdt)
409 fSupported |= SUPGIPGETCPU_IDTR_LIMIT_MASK_MAX_SET_CPUS;
410 }
411 }
412
413 /*
414 * Check whether RDTSCP is an option.
415 */
416 if (ASMHasCpuId())
417 {
418 if ( RTX86IsValidExtRange(ASMCpuId_EAX(UINT32_C(0x80000000)))
419 && (ASMCpuId_EDX(UINT32_C(0x80000001)) & X86_CPUID_EXT_FEATURE_EDX_RDTSCP) )
420 {
421 uint32_t uAux;
422 ASMReadTscWithAux(&uAux);
423 if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
424 {
425 ASMNopPause();
426 ASMReadTscWithAux(&uAux);
427 if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
428 fSupported |= SUPGIPGETCPU_RDTSCP_MASK_MAX_SET_CPUS;
429 }
430
431 if (pGipCpu)
432 {
433 uint32_t const uGroupedAux = (uint8_t)pGipCpu->iCpuGroupMember | ((uint32_t)pGipCpu->iCpuGroup << 8);
434 if ( (uAux & UINT16_MAX) == uGroupedAux
435 && pGipCpu->iCpuGroupMember <= UINT8_MAX)
436 {
437 ASMNopPause();
438 ASMReadTscWithAux(&uAux);
439 if ((uAux & UINT16_MAX) == uGroupedAux)
440 fSupported |= SUPGIPGETCPU_RDTSCP_GROUP_IN_CH_NUMBER_IN_CL;
441 }
442 }
443 }
444 }
445 }
446 }
447
448 /*
449 * Check for extended APIC ID methods.
450 */
451 idApic = UINT32_MAX;
452 uEax = ASMCpuId_EAX(0);
453 if (uEax >= UINT32_C(0xb) && RTX86IsValidStdRange(uEax))
454 {
455# if defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD)
456 ASMCpuId_Idx_ECX(0xb, 0, &uEax, &uEbx, &uEcx, &uEdx);
457# else
458 ASMCpuIdExSlow(0xb, 0, 0, 0, &uEax, &uEbx, &uEcx, &uEdx);
459# endif
460 if ((uEcx >> 8) != 0) /* level type != invalid */
461 {
462 if (RT_LIKELY( uEdx < RT_ELEMENTS(pGip->aiCpuFromApicId)
463 && !ASMBitTest(pState->bmApicId, uEdx)))
464 {
465 if (uEdx == ASMGetApicIdExt0B())
466 {
467 idApic = uEdx;
468 fSupported |= SUPGIPGETCPU_APIC_ID_EXT_0B;
469 }
470 else
471 AssertMsgFailed(("%#x vs %#x\n", uEdx, ASMGetApicIdExt0B()));
472 }
473 }
474 }
475
476 uEax = ASMCpuId_EAX(UINT32_C(0x80000000));
477 if (uEax >= UINT32_C(0x8000001e) && RTX86IsValidExtRange(uEax))
478 {
479# if defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD)
480 ASMCpuId_Idx_ECX(UINT32_C(0x8000001e), 0, &uEax, &uEbx, &uEcx, &uEdx);
481# else
482 ASMCpuIdExSlow(UINT32_C(0x8000001e), 0, 0, 0, &uEax, &uEbx, &uEcx, &uEdx);
483# endif
484 if (uEax || uEbx || uEcx || uEdx)
485 {
486 if (RT_LIKELY( uEax < RT_ELEMENTS(pGip->aiCpuFromApicId)
487 && ( idApic == UINT32_MAX
488 || idApic == uEax)
489 && !ASMBitTest(pState->bmApicId, uEax)))
490 {
491 if (uEax == ASMGetApicIdExt8000001E())
492 {
493 idApic = uEax;
494 fSupported |= SUPGIPGETCPU_APIC_ID_EXT_8000001E;
495 }
496 else
497 AssertMsgFailed(("%#x vs %#x\n", uEax, ASMGetApicIdExt8000001E()));
498 }
499 }
500 }
501
502#else /* !defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86) */
503 idApic = supdrvGipGetApicIdSlow();
504#endif /* !defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86) */
505
506 /*
507 * Check that the APIC ID is unique.
508 */
509#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
510 uEax = ASMGetApicId();
511 if (RT_LIKELY( uEax < RT_ELEMENTS(pGip->aiCpuFromApicId)
512 && ( idApic == UINT32_MAX
513 || idApic == uEax)
514 && !ASMAtomicBitTestAndSet(pState->bmApicId, uEax)))
515 {
516 idApic = uEax;
517 fSupported |= SUPGIPGETCPU_APIC_ID;
518 }
519 else
520#endif /* defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86) */
521 if ( idApic == UINT32_MAX
522 || idApic >= RT_ELEMENTS(pGip->aiCpuFromApicId) /* parnaoia */
523 || ASMAtomicBitTestAndSet(pState->bmApicId, idApic))
524 {
525 AssertCompile(sizeof(pState->bmApicId) * 8 == RT_ELEMENTS(pGip->aiCpuFromApicId));
526 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
527 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x/%#x - duplicate APIC ID.\n",
528 idCpu, iCpuSet, uEax, idApic));
529 }
530
531 /*
532 * Check that the iCpuSet is within the expected range.
533 */
534 if (RT_UNLIKELY( iCpuSet < 0
535 || (unsigned)iCpuSet >= RTCPUSET_MAX_CPUS
536 || (unsigned)iCpuSet >= RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)))
537 {
538 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
539 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU set index is out of range.\n",
540 idCpu, iCpuSet, idApic));
541 }
542 else
543 {
544 RTCPUID idCpu2 = RTMpCpuIdFromSetIndex(iCpuSet);
545 if (RT_UNLIKELY(idCpu2 != idCpu))
546 {
547 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
548 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU id/index roundtrip problem: %#x\n",
549 idCpu, iCpuSet, idApic, idCpu2));
550 }
551 }
552
553 /*
554 * Update the supported feature mask before we return.
555 */
556 ASMAtomicAndU32(&pState->fSupported, fSupported);
557
558 NOREF(pvUser2);
559}
560
561
562/**
563 * Increase the timer freqency on hosts where this is possible (NT).
564 *
565 * The idea is that more interrupts is better for us... Also, it's better than
566 * we increase the timer frequence, because we might end up getting inaccurate
567 * callbacks if someone else does it.
568 *
569 * @param pDevExt Sets u32SystemTimerGranularityGrant if increased.
570 */
571static void supdrvGipRequestHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
572{
573 if (pDevExt->u32SystemTimerGranularityGrant == 0)
574 {
575 uint32_t u32SystemResolution;
576 if ( RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
577 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
578 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1953125 /* 512 HZ */, &u32SystemResolution))
579 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 2000000 /* 500 HZ */, &u32SystemResolution))
580 )
581 {
582#if 0 /* def VBOX_STRICT - this is somehow triggers bogus assertions on windows 10 */
583 uint32_t u32After = RTTimerGetSystemGranularity();
584 AssertMsg(u32After <= u32SystemResolution, ("u32After=%u u32SystemResolution=%u\n", u32After, u32SystemResolution));
585#endif
586 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
587 }
588 }
589}
590
591
592/**
593 * Undoes supdrvGipRequestHigherTimerFrequencyFromSystem.
594 *
595 * @param pDevExt Clears u32SystemTimerGranularityGrant.
596 */
597static void supdrvGipReleaseHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
598{
599 if (pDevExt->u32SystemTimerGranularityGrant)
600 {
601 int rc2 = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant);
602 AssertRC(rc2);
603 pDevExt->u32SystemTimerGranularityGrant = 0;
604 }
605}
606
607
608/**
609 * Maps the GIP into userspace and/or get the physical address of the GIP.
610 *
611 * @returns IPRT status code.
612 * @param pSession Session to which the GIP mapping should belong.
613 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
614 * @param pHCPhysGip Where to store the physical address. (optional)
615 *
616 * @remark There is no reference counting on the mapping, so one call to this function
617 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
618 * and remove the session as a GIP user.
619 */
620SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
621{
622 int rc;
623 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
624 RTR3PTR pGipR3 = NIL_RTR3PTR;
625 RTHCPHYS HCPhys = NIL_RTHCPHYS;
626 LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
627
628 /*
629 * Validate
630 */
631 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
632 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
633 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
634
635#ifdef SUPDRV_USE_MUTEX_FOR_GIP
636 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
637#else
638 RTSemFastMutexRequest(pDevExt->mtxGip);
639#endif
640 if (pDevExt->pGip)
641 {
642 /*
643 * Map it?
644 */
645 rc = VINF_SUCCESS;
646 if (ppGipR3)
647 {
648 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
649 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
650 RTMEM_PROT_READ, NIL_RTR0PROCESS);
651 if (RT_SUCCESS(rc))
652 pGipR3 = RTR0MemObjAddressR3(pSession->GipMapObjR3);
653 }
654
655 /*
656 * Get physical address.
657 */
658 if (pHCPhysGip && RT_SUCCESS(rc))
659 HCPhys = pDevExt->HCPhysGip;
660
661 /*
662 * Reference globally.
663 */
664 if (!pSession->fGipReferenced && RT_SUCCESS(rc))
665 {
666 pSession->fGipReferenced = 1;
667 pDevExt->cGipUsers++;
668 if (pDevExt->cGipUsers == 1)
669 {
670 PSUPGLOBALINFOPAGE pGipR0 = pDevExt->pGip;
671 uint64_t u64NanoTS;
672
673 /*
674 * GIP starts/resumes updating again. On windows we bump the
675 * host timer frequency to make sure we don't get stuck in guest
676 * mode and to get better timer (and possibly clock) accuracy.
677 */
678 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
679
680 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
681
682 /*
683 * document me
684 */
685 if (pGipR0->aCPUs[0].u32TransactionId != 2 /* not the first time */)
686 {
687 unsigned i;
688 for (i = 0; i < pGipR0->cCpus; i++)
689 ASMAtomicUoWriteU32(&pGipR0->aCPUs[i].u32TransactionId,
690 (pGipR0->aCPUs[i].u32TransactionId + GIP_UPDATEHZ_RECALC_FREQ * 2)
691 & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
692 ASMAtomicWriteU64(&pGipR0->u64NanoTSLastUpdateHz, 0);
693 }
694
695 /*
696 * document me
697 */
698 u64NanoTS = RTTimeSystemNanoTS() - pGipR0->u32UpdateIntervalNS;
699 if ( pGipR0->u32Mode == SUPGIPMODE_INVARIANT_TSC
700 || pGipR0->u32Mode == SUPGIPMODE_SYNC_TSC
701 || RTMpGetOnlineCount() == 1)
702 supdrvGipReInitCpu(&pGipR0->aCPUs[0], u64NanoTS);
703 else
704 RTMpOnAll(supdrvGipReInitCpuCallback, pGipR0, &u64NanoTS);
705
706 /*
707 * Detect alternative ways to figure the CPU ID in ring-3 and
708 * raw-mode context. Check the sanity of the APIC IDs, CPU IDs,
709 * and CPU set indexes while we're at it.
710 */
711 if (RT_SUCCESS(rc))
712 {
713 PSUPDRVGIPDETECTGETCPU pDetectState = (PSUPDRVGIPDETECTGETCPU)RTMemTmpAllocZ(sizeof(*pDetectState));
714 if (pDetectState)
715 {
716 pDetectState->fSupported = UINT32_MAX;
717 pDetectState->idCpuProblem = NIL_RTCPUID;
718 rc = RTMpOnAll(supdrvGipDetectGetGipCpuCallback, pDetectState, pGipR0);
719 if (pDetectState->idCpuProblem == NIL_RTCPUID)
720 {
721 if ( pDetectState->fSupported != UINT32_MAX
722 && pDetectState->fSupported != 0)
723 {
724 if (pGipR0->fGetGipCpu != pDetectState->fSupported)
725 {
726 pGipR0->fGetGipCpu = pDetectState->fSupported;
727 LogRel(("SUPR0GipMap: fGetGipCpu=%#x\n", pDetectState->fSupported));
728 }
729 }
730 else
731 {
732 LogRel(("SUPR0GipMap: No supported ways of getting the APIC ID or CPU number in ring-3! (%#x)\n",
733 pDetectState->fSupported));
734 rc = VERR_UNSUPPORTED_CPU;
735 }
736 }
737 else
738 {
739 LogRel(("SUPR0GipMap: APIC ID, CPU ID or CPU set index problem detected on CPU #%u (%#x)!\n",
740 pDetectState->idCpuProblem, pDetectState->idCpuProblem));
741 rc = VERR_INVALID_CPU_ID;
742 }
743 RTMemTmpFree(pDetectState);
744 }
745 else
746 rc = VERR_NO_TMP_MEMORY;
747 }
748
749 /*
750 * Start the GIP timer if all is well..
751 */
752 if (RT_SUCCESS(rc))
753 {
754#ifndef DO_NOT_START_GIP
755 rc = RTTimerStart(pDevExt->pGipTimer, 0 /* fire ASAP */); AssertRC(rc);
756#endif
757 rc = VINF_SUCCESS;
758 }
759
760 /*
761 * Bail out on error.
762 */
763 if (RT_FAILURE(rc))
764 {
765 LogRel(("SUPR0GipMap: failed rc=%Rrc\n", rc));
766 pDevExt->cGipUsers = 0;
767 pSession->fGipReferenced = 0;
768 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
769 {
770 int rc2 = RTR0MemObjFree(pSession->GipMapObjR3, false); AssertRC(rc2);
771 if (RT_SUCCESS(rc2))
772 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
773 }
774 HCPhys = NIL_RTHCPHYS;
775 pGipR3 = NIL_RTR3PTR;
776 }
777 }
778 }
779 }
780 else
781 {
782 rc = VERR_GENERAL_FAILURE;
783 Log(("SUPR0GipMap: GIP is not available!\n"));
784 }
785#ifdef SUPDRV_USE_MUTEX_FOR_GIP
786 RTSemMutexRelease(pDevExt->mtxGip);
787#else
788 RTSemFastMutexRelease(pDevExt->mtxGip);
789#endif
790
791 /*
792 * Write returns.
793 */
794 if (pHCPhysGip)
795 *pHCPhysGip = HCPhys;
796 if (ppGipR3)
797 *ppGipR3 = pGipR3;
798
799#ifdef DEBUG_DARWIN_GIP
800 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
801#else
802 LogFlow(( "SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
803#endif
804 return rc;
805}
806SUPR0_EXPORT_SYMBOL(SUPR0GipMap);
807
808
809/**
810 * Unmaps any user mapping of the GIP and terminates all GIP access
811 * from this session.
812 *
813 * @returns IPRT status code.
814 * @param pSession Session to which the GIP mapping should belong.
815 */
816SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
817{
818 int rc = VINF_SUCCESS;
819 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
820#ifdef DEBUG_DARWIN_GIP
821 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
822 pSession,
823 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
824 pSession->GipMapObjR3));
825#else
826 LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
827#endif
828 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
829
830#ifdef SUPDRV_USE_MUTEX_FOR_GIP
831 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
832#else
833 RTSemFastMutexRequest(pDevExt->mtxGip);
834#endif
835
836 /*
837 * GIP test-mode session?
838 */
839 if ( pSession->fGipTestMode
840 && pDevExt->pGip)
841 {
842 supdrvGipSetFlags(pDevExt, pSession, 0, ~SUPGIP_FLAGS_TESTING_ENABLE);
843 Assert(!pSession->fGipTestMode);
844 }
845
846 /*
847 * Unmap anything?
848 */
849 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
850 {
851 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
852 AssertRC(rc);
853 if (RT_SUCCESS(rc))
854 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
855 }
856
857 /*
858 * Dereference global GIP.
859 */
860 if (pSession->fGipReferenced && !rc)
861 {
862 pSession->fGipReferenced = 0;
863 if ( pDevExt->cGipUsers > 0
864 && !--pDevExt->cGipUsers)
865 {
866 LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
867#ifndef DO_NOT_START_GIP
868 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = VINF_SUCCESS;
869#endif
870 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
871 }
872 }
873
874#ifdef SUPDRV_USE_MUTEX_FOR_GIP
875 RTSemMutexRelease(pDevExt->mtxGip);
876#else
877 RTSemFastMutexRelease(pDevExt->mtxGip);
878#endif
879
880 return rc;
881}
882SUPR0_EXPORT_SYMBOL(SUPR0GipUnmap);
883
884
885/**
886 * Gets the GIP pointer.
887 *
888 * @returns Pointer to the GIP or NULL.
889 */
890SUPDECL(PSUPGLOBALINFOPAGE) SUPGetGIP(void)
891{
892 return g_pSUPGlobalInfoPage;
893}
894
895
896
897
898
899/*
900 *
901 *
902 * GIP Initialization, Termination and CPU Offline / Online Related Code.
903 * GIP Initialization, Termination and CPU Offline / Online Related Code.
904 * GIP Initialization, Termination and CPU Offline / Online Related Code.
905 *
906 *
907 */
908
909/**
910 * Used by supdrvGipInitRefineInvariantTscFreqTimer and supdrvGipInitMeasureTscFreq
911 * to update the TSC frequency related GIP variables.
912 *
913 * @param pGip The GIP.
914 * @param nsElapsed The number of nanoseconds elapsed.
915 * @param cElapsedTscTicks The corresponding number of TSC ticks.
916 * @param iTick The tick number for debugging.
917 */
918static void supdrvGipInitSetCpuFreq(PSUPGLOBALINFOPAGE pGip, uint64_t nsElapsed, uint64_t cElapsedTscTicks, uint32_t iTick)
919{
920 /*
921 * Calculate the frequency.
922 */
923 uint64_t uCpuHz;
924 if ( cElapsedTscTicks < UINT64_MAX / RT_NS_1SEC
925 && nsElapsed < UINT32_MAX)
926 uCpuHz = ASMMultU64ByU32DivByU32(cElapsedTscTicks, RT_NS_1SEC, (uint32_t)nsElapsed);
927 else
928 {
929 RTUINT128U CpuHz, Tmp, Divisor;
930 CpuHz.s.Lo = CpuHz.s.Hi = 0;
931 RTUInt128MulU64ByU64(&Tmp, cElapsedTscTicks, RT_NS_1SEC_64);
932 RTUInt128Div(&CpuHz, &Tmp, RTUInt128AssignU64(&Divisor, nsElapsed));
933 uCpuHz = CpuHz.s.Lo;
934 }
935
936 /*
937 * Update the GIP.
938 */
939 ASMAtomicWriteU64(&pGip->u64CpuHz, uCpuHz);
940 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
941 {
942 ASMAtomicWriteU64(&pGip->aCPUs[0].u64CpuHz, uCpuHz);
943
944 /* For inspecting the frequency calcs using tstGIP-2, debugger or similar. */
945 if (iTick + 1 < pGip->cCpus)
946 ASMAtomicWriteU64(&pGip->aCPUs[iTick + 1].u64CpuHz, uCpuHz);
947 }
948}
949
950
951/**
952 * Timer callback function for TSC frequency refinement in invariant GIP mode.
953 *
954 * This is started during driver init and fires once
955 * GIP_TSC_REFINE_PERIOD_IN_SECS seconds later.
956 *
957 * @param pTimer The timer.
958 * @param pvUser Opaque pointer to the device instance data.
959 * @param iTick The timer tick.
960 */
961static DECLCALLBACK(void) supdrvGipInitRefineInvariantTscFreqTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
962{
963 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
964 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
965 RTCPUID idCpu;
966 uint64_t cNsElapsed;
967 uint64_t cTscTicksElapsed;
968 uint64_t nsNow;
969 uint64_t uTsc;
970 RTCCUINTREG fEFlags;
971
972 /* Paranoia. */
973 AssertReturnVoid(pGip);
974 AssertReturnVoid(pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC);
975
976 /*
977 * If we got a power event, stop the refinement process.
978 */
979 if (pDevExt->fInvTscRefinePowerEvent)
980 {
981 int rc = RTTimerStop(pTimer); AssertRC(rc);
982 return;
983 }
984
985 /*
986 * Read the TSC and time, noting which CPU we are on.
987 *
988 * Don't bother spinning until RTTimeSystemNanoTS changes, since on
989 * systems where it matters we're in a context where we cannot waste that
990 * much time (DPC watchdog, called from clock interrupt).
991 */
992 fEFlags = ASMIntDisableFlags();
993 uTsc = ASMReadTSC();
994 nsNow = RTTimeSystemNanoTS();
995 idCpu = RTMpCpuId();
996 ASMSetFlags(fEFlags);
997
998 cNsElapsed = nsNow - pDevExt->nsStartInvarTscRefine;
999 cTscTicksElapsed = uTsc - pDevExt->uTscStartInvarTscRefine;
1000
1001 /*
1002 * If the above measurement was taken on a different CPU than the one we
1003 * started the process on, cTscTicksElapsed will need to be adjusted with
1004 * the TSC deltas of both the CPUs.
1005 *
1006 * We ASSUME that the delta calculation process takes less time than the
1007 * TSC frequency refinement timer. If it doesn't, we'll complain and
1008 * drop the frequency refinement.
1009 *
1010 * Note! We cannot entirely trust enmUseTscDelta here because it's
1011 * downgraded after each delta calculation.
1012 */
1013 if ( idCpu != pDevExt->idCpuInvarTscRefine
1014 && pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1015 {
1016 uint32_t iStartCpuSet = RTMpCpuIdToSetIndex(pDevExt->idCpuInvarTscRefine);
1017 uint32_t iStopCpuSet = RTMpCpuIdToSetIndex(idCpu);
1018 uint16_t iStartGipCpu = iStartCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1019 ? pGip->aiCpuFromCpuSetIdx[iStartCpuSet] : UINT16_MAX;
1020 uint16_t iStopGipCpu = iStopCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1021 ? pGip->aiCpuFromCpuSetIdx[iStopCpuSet] : UINT16_MAX;
1022 int64_t iStartTscDelta = iStartGipCpu < pGip->cCpus ? pGip->aCPUs[iStartGipCpu].i64TSCDelta : INT64_MAX;
1023 int64_t iStopTscDelta = iStopGipCpu < pGip->cCpus ? pGip->aCPUs[iStopGipCpu].i64TSCDelta : INT64_MAX;
1024 if (RT_LIKELY(iStartTscDelta != INT64_MAX && iStopTscDelta != INT64_MAX))
1025 {
1026 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
1027 {
1028 /* cTscTicksElapsed = (uTsc - iStopTscDelta) - (pDevExt->uTscStartInvarTscRefine - iStartTscDelta); */
1029 cTscTicksElapsed += iStartTscDelta - iStopTscDelta;
1030 }
1031 }
1032 /*
1033 * Allow 5 times the refinement period to elapse before we give up on the TSC delta
1034 * calculations.
1035 */
1036 else if (cNsElapsed > GIP_TSC_REFINE_PERIOD_IN_SECS * 5 * RT_NS_1SEC_64)
1037 {
1038 SUPR0Printf("vboxdrv: Failed to refine invariant TSC frequency because deltas are unavailable after %u (%u) seconds\n",
1039 (uint32_t)(cNsElapsed / RT_NS_1SEC), GIP_TSC_REFINE_PERIOD_IN_SECS);
1040 SUPR0Printf("vboxdrv: start: %u, %u, %#llx stop: %u, %u, %#llx\n",
1041 iStartCpuSet, iStartGipCpu, iStartTscDelta, iStopCpuSet, iStopGipCpu, iStopTscDelta);
1042 int rc = RTTimerStop(pTimer); AssertRC(rc);
1043 return;
1044 }
1045 }
1046
1047 /*
1048 * Calculate and update the CPU frequency variables in GIP.
1049 *
1050 * If there is a GIP user already and we've already refined the frequency
1051 * a couple of times, don't update it as we want a stable frequency value
1052 * for all VMs.
1053 */
1054 if ( pDevExt->cGipUsers == 0
1055 || cNsElapsed < RT_NS_1SEC * 2)
1056 {
1057 supdrvGipInitSetCpuFreq(pGip, cNsElapsed, cTscTicksElapsed, (uint32_t)iTick);
1058
1059 /*
1060 * Stop the timer once we've reached the defined refinement period.
1061 */
1062 if (cNsElapsed > GIP_TSC_REFINE_PERIOD_IN_SECS * RT_NS_1SEC_64)
1063 {
1064 int rc = RTTimerStop(pTimer);
1065 AssertRC(rc);
1066 }
1067 }
1068 else
1069 {
1070 int rc = RTTimerStop(pTimer);
1071 AssertRC(rc);
1072 }
1073}
1074
1075
1076/**
1077 * @callback_method_impl{FNRTPOWERNOTIFICATION}
1078 */
1079static DECLCALLBACK(void) supdrvGipPowerNotificationCallback(RTPOWEREVENT enmEvent, void *pvUser)
1080{
1081 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
1082 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1083
1084 /*
1085 * If the TSC frequency refinement timer is running, we need to cancel it so it
1086 * doesn't screw up the frequency after a long suspend.
1087 *
1088 * Recalculate all TSC-deltas on host resume as it may have changed, seen
1089 * on Windows 7 running on the Dell Optiplex Intel Core i5-3570.
1090 */
1091 if (enmEvent == RTPOWEREVENT_RESUME)
1092 {
1093 ASMAtomicWriteBool(&pDevExt->fInvTscRefinePowerEvent, true);
1094 if ( RT_LIKELY(pGip)
1095 && pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED
1096 && !supdrvOSAreCpusOfflinedOnSuspend())
1097 {
1098#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1099 supdrvTscDeltaThreadStartMeasurement(pDevExt, true /* fForceAll */);
1100#else
1101 RTCpuSetCopy(&pDevExt->TscDeltaCpuSet, &pGip->OnlineCpuSet);
1102 supdrvTscMeasureInitialDeltas(pDevExt);
1103#endif
1104 }
1105 }
1106 else if (enmEvent == RTPOWEREVENT_SUSPEND)
1107 ASMAtomicWriteBool(&pDevExt->fInvTscRefinePowerEvent, true);
1108}
1109
1110
1111/**
1112 * Start the TSC-frequency refinment timer for the invariant TSC GIP mode.
1113 *
1114 * We cannot use this in the synchronous and asynchronous tsc GIP modes because
1115 * the CPU may change the TSC frequence between now and when the timer fires
1116 * (supdrvInitAsyncRefineTscTimer).
1117 *
1118 * @param pDevExt Pointer to the device instance data.
1119 */
1120static void supdrvGipInitStartTimerForRefiningInvariantTscFreq(PSUPDRVDEVEXT pDevExt)
1121{
1122 uint64_t u64NanoTS;
1123 RTCCUINTREG fEFlags;
1124 int rc;
1125
1126 /*
1127 * Register a power management callback.
1128 */
1129 pDevExt->fInvTscRefinePowerEvent = false;
1130 rc = RTPowerNotificationRegister(supdrvGipPowerNotificationCallback, pDevExt);
1131 AssertRC(rc); /* ignore */
1132
1133 /*
1134 * Record the TSC and NanoTS as the starting anchor point for refinement
1135 * of the TSC. We try get as close to a clock tick as possible on systems
1136 * which does not provide high resolution time.
1137 */
1138 u64NanoTS = RTTimeSystemNanoTS();
1139 while (RTTimeSystemNanoTS() == u64NanoTS)
1140 ASMNopPause();
1141
1142 fEFlags = ASMIntDisableFlags();
1143 pDevExt->uTscStartInvarTscRefine = ASMReadTSC();
1144 pDevExt->nsStartInvarTscRefine = RTTimeSystemNanoTS();
1145 pDevExt->idCpuInvarTscRefine = RTMpCpuId();
1146 ASMSetFlags(fEFlags);
1147
1148 /*
1149 * Create a timer that runs on the same CPU so we won't have a depencency
1150 * on the TSC-delta and can run in parallel to it. On systems that does not
1151 * implement CPU specific timers we'll apply deltas in the timer callback,
1152 * just like we do for CPUs going offline.
1153 *
1154 * The longer the refinement interval the better the accuracy, at least in
1155 * theory. If it's too long though, ring-3 may already be starting its
1156 * first VMs before we're done. On most systems we will be loading the
1157 * support driver during boot and VMs won't be started for a while yet,
1158 * it is really only a problem during development (especially with
1159 * on-demand driver starting on windows).
1160 *
1161 * To avoid wasting time doing a long supdrvGipInitMeasureTscFreq() call
1162 * to calculate the frequency during driver loading, the timer is set
1163 * to fire after 200 ms the first time. It will then reschedule itself
1164 * to fire every second until GIP_TSC_REFINE_PERIOD_IN_SECS has been
1165 * reached or it notices that there is a user land client with GIP
1166 * mapped (we want a stable frequency for all VMs).
1167 */
1168 rc = RTTimerCreateEx(&pDevExt->pInvarTscRefineTimer, RT_NS_1SEC,
1169 RTTIMER_FLAGS_CPU(RTMpCpuIdToSetIndex(pDevExt->idCpuInvarTscRefine)),
1170 supdrvGipInitRefineInvariantTscFreqTimer, pDevExt);
1171 if (RT_SUCCESS(rc))
1172 {
1173 rc = RTTimerStart(pDevExt->pInvarTscRefineTimer, 2*RT_NS_100MS);
1174 if (RT_SUCCESS(rc))
1175 return;
1176 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
1177 }
1178
1179 if (rc == VERR_CPU_OFFLINE || rc == VERR_NOT_SUPPORTED)
1180 {
1181 rc = RTTimerCreateEx(&pDevExt->pInvarTscRefineTimer, RT_NS_1SEC, RTTIMER_FLAGS_CPU_ANY,
1182 supdrvGipInitRefineInvariantTscFreqTimer, pDevExt);
1183 if (RT_SUCCESS(rc))
1184 {
1185 rc = RTTimerStart(pDevExt->pInvarTscRefineTimer, 2*RT_NS_100MS);
1186 if (RT_SUCCESS(rc))
1187 return;
1188 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
1189 }
1190 }
1191
1192 pDevExt->pInvarTscRefineTimer = NULL;
1193 OSDBGPRINT(("vboxdrv: Failed to create or start TSC frequency refinement timer: rc=%Rrc\n", rc));
1194}
1195
1196
1197/**
1198 * @callback_method_impl{PFNRTMPWORKER,
1199 * RTMpOnSpecific callback for reading TSC and time on the CPU we started
1200 * the measurements on.}
1201 */
1202static DECLCALLBACK(void) supdrvGipInitReadTscAndNanoTsOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1203{
1204 RTCCUINTREG fEFlags = ASMIntDisableFlags();
1205 uint64_t *puTscStop = (uint64_t *)pvUser1;
1206 uint64_t *pnsStop = (uint64_t *)pvUser2;
1207 RT_NOREF1(idCpu);
1208
1209 *puTscStop = ASMReadTSC();
1210 *pnsStop = RTTimeSystemNanoTS();
1211
1212 ASMSetFlags(fEFlags);
1213}
1214
1215
1216/**
1217 * Measures the TSC frequency of the system.
1218 *
1219 * The TSC frequency can vary on systems which are not reported as invariant.
1220 * On such systems the object of this function is to find out what the nominal,
1221 * maximum TSC frequency under 'normal' CPU operation.
1222 *
1223 * @returns VBox status code.
1224 * @param pGip Pointer to the GIP.
1225 * @param fRough Set if we're doing the rough calculation that the
1226 * TSC measuring code needs, where accuracy isn't all
1227 * that important (too high is better than too low).
1228 * When clear we try for best accuracy that we can
1229 * achieve in reasonably short time.
1230 */
1231static int supdrvGipInitMeasureTscFreq(PSUPGLOBALINFOPAGE pGip, bool fRough)
1232{
1233 uint32_t nsTimerIncr = RTTimerGetSystemGranularity();
1234 int cTriesLeft = fRough ? 4 : 2;
1235 while (cTriesLeft-- > 0)
1236 {
1237 RTCCUINTREG fEFlags;
1238 uint64_t nsStart;
1239 uint64_t nsStop;
1240 uint64_t uTscStart;
1241 uint64_t uTscStop;
1242 RTCPUID idCpuStart;
1243 RTCPUID idCpuStop;
1244
1245 /*
1246 * Synchronize with the host OS clock tick on systems without high
1247 * resolution time API (older Windows version for example).
1248 */
1249 nsStart = RTTimeSystemNanoTS();
1250 while (RTTimeSystemNanoTS() == nsStart)
1251 ASMNopPause();
1252
1253 /*
1254 * Read the TSC and current time, noting which CPU we're on.
1255 */
1256 fEFlags = ASMIntDisableFlags();
1257 uTscStart = ASMReadTSC();
1258 nsStart = RTTimeSystemNanoTS();
1259 idCpuStart = RTMpCpuId();
1260 ASMSetFlags(fEFlags);
1261
1262 /*
1263 * Delay for a while.
1264 */
1265 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1266 {
1267 /*
1268 * Sleep-wait since the TSC frequency is constant, it eases host load.
1269 * Shorter interval produces more variance in the frequency (esp. Windows).
1270 */
1271 uint64_t msElapsed = 0;
1272 uint64_t msDelay = ( ((fRough ? 16 : 200) * RT_NS_1MS + nsTimerIncr - 1) / nsTimerIncr * nsTimerIncr - RT_NS_100US )
1273 / RT_NS_1MS;
1274 do
1275 {
1276 RTThreadSleep((RTMSINTERVAL)(msDelay - msElapsed));
1277 nsStop = RTTimeSystemNanoTS();
1278 msElapsed = (nsStop - nsStart) / RT_NS_1MS;
1279 } while (msElapsed < msDelay);
1280
1281 while (RTTimeSystemNanoTS() == nsStop)
1282 ASMNopPause();
1283 }
1284 else
1285 {
1286 /*
1287 * Busy-wait keeping the frequency up.
1288 */
1289 do
1290 {
1291 ASMNopPause();
1292 nsStop = RTTimeSystemNanoTS();
1293 } while (nsStop - nsStart < RT_NS_100MS);
1294 }
1295
1296 /*
1297 * Read the TSC and time again.
1298 */
1299 fEFlags = ASMIntDisableFlags();
1300 uTscStop = ASMReadTSC();
1301 nsStop = RTTimeSystemNanoTS();
1302 idCpuStop = RTMpCpuId();
1303 ASMSetFlags(fEFlags);
1304
1305 /*
1306 * If the CPU changes, things get a bit complicated and what we
1307 * can get away with depends on the GIP mode / TSC reliability.
1308 */
1309 if (idCpuStop != idCpuStart)
1310 {
1311 bool fDoXCall = false;
1312
1313 /*
1314 * Synchronous TSC mode: we're probably fine as it's unlikely
1315 * that we were rescheduled because of TSC throttling or power
1316 * management reasons, so just go ahead.
1317 */
1318 if (pGip->u32Mode == SUPGIPMODE_SYNC_TSC)
1319 {
1320 /* Probably ok, maybe we should retry once?. */
1321 Assert(pGip->enmUseTscDelta == SUPGIPUSETSCDELTA_NOT_APPLICABLE);
1322 }
1323 /*
1324 * If we're just doing the rough measurement, do the cross call and
1325 * get on with things (we don't have deltas!).
1326 */
1327 else if (fRough)
1328 fDoXCall = true;
1329 /*
1330 * Invariant TSC mode: It doesn't matter if we have delta available
1331 * for both CPUs. That is not something we can assume at this point.
1332 *
1333 * Note! We cannot necessarily trust enmUseTscDelta here because it's
1334 * downgraded after each delta calculation and the delta
1335 * calculations may not be complete yet.
1336 */
1337 else if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1338 {
1339/** @todo This section of code is never reached atm, consider dropping it later on... */
1340 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1341 {
1342 uint32_t iStartCpuSet = RTMpCpuIdToSetIndex(idCpuStart);
1343 uint32_t iStopCpuSet = RTMpCpuIdToSetIndex(idCpuStop);
1344 uint16_t iStartGipCpu = iStartCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1345 ? pGip->aiCpuFromCpuSetIdx[iStartCpuSet] : UINT16_MAX;
1346 uint16_t iStopGipCpu = iStopCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1347 ? pGip->aiCpuFromCpuSetIdx[iStopCpuSet] : UINT16_MAX;
1348 int64_t iStartTscDelta = iStartGipCpu < pGip->cCpus ? pGip->aCPUs[iStartGipCpu].i64TSCDelta : INT64_MAX;
1349 int64_t iStopTscDelta = iStopGipCpu < pGip->cCpus ? pGip->aCPUs[iStopGipCpu].i64TSCDelta : INT64_MAX;
1350 if (RT_LIKELY(iStartTscDelta != INT64_MAX && iStopTscDelta != INT64_MAX))
1351 {
1352 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
1353 {
1354 uTscStart -= iStartTscDelta;
1355 uTscStop -= iStopTscDelta;
1356 }
1357 }
1358 /*
1359 * Invalid CPU indexes are not caused by online/offline races, so
1360 * we have to trigger driver load failure if that happens as GIP
1361 * and IPRT assumptions are busted on this system.
1362 */
1363 else if (iStopGipCpu >= pGip->cCpus || iStartGipCpu >= pGip->cCpus)
1364 {
1365 SUPR0Printf("vboxdrv: Unexpected CPU index in supdrvGipInitMeasureTscFreq.\n");
1366 SUPR0Printf("vboxdrv: start: %u, %u, %#llx stop: %u, %u, %#llx\n",
1367 iStartCpuSet, iStartGipCpu, iStartTscDelta, iStopCpuSet, iStopGipCpu, iStopTscDelta);
1368 return VERR_INVALID_CPU_INDEX;
1369 }
1370 /*
1371 * No valid deltas. We retry, if we're on our last retry
1372 * we do the cross call instead just to get a result. The
1373 * frequency will be refined in a few seconds anyway.
1374 */
1375 else if (cTriesLeft > 0)
1376 continue;
1377 else
1378 fDoXCall = true;
1379 }
1380 }
1381 /*
1382 * Asynchronous TSC mode: This is bad, as the reason we usually
1383 * use this mode is to deal with variable TSC frequencies and
1384 * deltas. So, we need to get the TSC from the same CPU as
1385 * started it, we also need to keep that CPU busy. So, retry
1386 * and fall back to the cross call on the last attempt.
1387 */
1388 else
1389 {
1390 Assert(pGip->u32Mode == SUPGIPMODE_ASYNC_TSC);
1391 if (cTriesLeft > 0)
1392 continue;
1393 fDoXCall = true;
1394 }
1395
1396 if (fDoXCall)
1397 {
1398 /*
1399 * Try read the TSC and timestamp on the start CPU.
1400 */
1401 int rc = RTMpOnSpecific(idCpuStart, supdrvGipInitReadTscAndNanoTsOnCpu, &uTscStop, &nsStop);
1402 if (RT_FAILURE(rc) && (!fRough || cTriesLeft > 0))
1403 continue;
1404 }
1405 }
1406
1407 /*
1408 * Calculate the TSC frequency and update it (shared with the refinement timer).
1409 */
1410 supdrvGipInitSetCpuFreq(pGip, nsStop - nsStart, uTscStop - uTscStart, 0);
1411 return VINF_SUCCESS;
1412 }
1413
1414 Assert(!fRough);
1415 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
1416}
1417
1418
1419/**
1420 * Finds our (@a idCpu) entry, or allocates a new one if not found.
1421 *
1422 * @returns Index of the CPU in the cache set.
1423 * @param pGip The GIP.
1424 * @param idCpu The CPU ID.
1425 */
1426static uint32_t supdrvGipFindOrAllocCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
1427{
1428 uint32_t i, cTries;
1429
1430 /*
1431 * ASSUMES that CPU IDs are constant.
1432 */
1433 for (i = 0; i < pGip->cCpus; i++)
1434 if (pGip->aCPUs[i].idCpu == idCpu)
1435 return i;
1436
1437 cTries = 0;
1438 do
1439 {
1440 for (i = 0; i < pGip->cCpus; i++)
1441 {
1442 bool fRc;
1443 ASMAtomicCmpXchgSize(&pGip->aCPUs[i].idCpu, idCpu, NIL_RTCPUID, fRc);
1444 if (fRc)
1445 return i;
1446 }
1447 } while (cTries++ < 32);
1448 AssertReleaseFailed();
1449 return i - 1;
1450}
1451
1452
1453/**
1454 * The calling CPU should be accounted as online, update GIP accordingly.
1455 *
1456 * This is used by supdrvGipCreate() as well as supdrvGipMpEvent().
1457 *
1458 * @param pDevExt The device extension.
1459 * @param idCpu The CPU ID.
1460 */
1461static void supdrvGipMpEventOnlineOrInitOnCpu(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
1462{
1463 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1464 int iCpuSet = 0;
1465 uint32_t idApic;
1466 uint32_t i = 0;
1467 uint64_t u64NanoTS = 0;
1468
1469 AssertPtrReturnVoid(pGip);
1470 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1471 AssertRelease(idCpu == RTMpCpuId());
1472 Assert(pGip->cPossibleCpus == RTMpGetCount());
1473
1474 /*
1475 * Do this behind a spinlock with interrupts disabled as this can fire
1476 * on all CPUs simultaneously, see @bugref{6110}.
1477 */
1478 RTSpinlockAcquire(pDevExt->hGipSpinlock);
1479
1480 /*
1481 * Update the globals.
1482 */
1483 ASMAtomicWriteU16(&pGip->cPresentCpus, RTMpGetPresentCount());
1484 ASMAtomicWriteU16(&pGip->cOnlineCpus, RTMpGetOnlineCount());
1485 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
1486 if (iCpuSet >= 0)
1487 {
1488 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
1489 RTCpuSetAddByIndex(&pGip->OnlineCpuSet, iCpuSet);
1490 RTCpuSetAddByIndex(&pGip->PresentCpuSet, iCpuSet);
1491 }
1492
1493 /*
1494 * Update the entry.
1495 */
1496 u64NanoTS = RTTimeSystemNanoTS() - pGip->u32UpdateIntervalNS;
1497 i = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
1498
1499 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS, pGip->u64CpuHz);
1500
1501 idApic = supdrvGipGetApicIdSlow();
1502 ASMAtomicWriteU16(&pGip->aCPUs[i].idApic, idApic);
1503 ASMAtomicWriteS16(&pGip->aCPUs[i].iCpuSet, (int16_t)iCpuSet);
1504 ASMAtomicWriteSize(&pGip->aCPUs[i].idCpu, idCpu);
1505
1506 pGip->aCPUs[i].iCpuGroup = 0;
1507 pGip->aCPUs[i].iCpuGroupMember = iCpuSet;
1508#ifdef RT_OS_WINDOWS
1509 supdrvOSGipInitGroupBitsForCpu(pDevExt, pGip, &pGip->aCPUs[i]);
1510#endif
1511
1512 /*
1513 * Update the APIC ID and CPU set index mappings.
1514 */
1515 if (idApic < RT_ELEMENTS(pGip->aiCpuFromApicId))
1516 ASMAtomicWriteU16(&pGip->aiCpuFromApicId[idApic], i);
1517 else
1518 LogRelMax(64, ("supdrvGipMpEventOnlineOrInitOnCpu: idApic=%#x is out of bounds (%#zx, i=%u, iCpuSet=%d)\n",
1519 idApic, RT_ELEMENTS(pGip->aiCpuFromApicId), i, iCpuSet));
1520 if ((unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx))
1521 ASMAtomicWriteU16(&pGip->aiCpuFromCpuSetIdx[iCpuSet], i);
1522 else
1523 LogRelMax(64, ("supdrvGipMpEventOnlineOrInitOnCpu: iCpuSet=%d is out of bounds (%#zx, i=%u, idApic=%d)\n",
1524 iCpuSet, RT_ELEMENTS(pGip->aiCpuFromApicId), i, idApic));
1525
1526 /* Add this CPU to this set of CPUs we need to calculate the TSC-delta for. */
1527 RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, RTMpCpuIdToSetIndex(idCpu));
1528
1529 /* Update the Mp online/offline counter. */
1530 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
1531
1532 /* Commit it. */
1533 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_ONLINE);
1534
1535 RTSpinlockRelease(pDevExt->hGipSpinlock);
1536}
1537
1538
1539/**
1540 * RTMpOnSpecific callback wrapper for supdrvGipMpEventOnlineOrInitOnCpu().
1541 *
1542 * @param idCpu The CPU ID we are running on.
1543 * @param pvUser1 Opaque pointer to the device instance data.
1544 * @param pvUser2 Not used.
1545 */
1546static DECLCALLBACK(void) supdrvGipMpEventOnlineCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1547{
1548 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser1;
1549 NOREF(pvUser2);
1550 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
1551}
1552
1553
1554/**
1555 * The CPU should be accounted as offline, update the GIP accordingly.
1556 *
1557 * This is used by supdrvGipMpEvent.
1558 *
1559 * @param pDevExt The device extension.
1560 * @param idCpu The CPU ID.
1561 */
1562static void supdrvGipMpEventOffline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
1563{
1564 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1565 int iCpuSet;
1566 unsigned i;
1567
1568 AssertPtrReturnVoid(pGip);
1569 RTSpinlockAcquire(pDevExt->hGipSpinlock);
1570
1571 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
1572 AssertReturnVoid(iCpuSet >= 0);
1573
1574 i = pGip->aiCpuFromCpuSetIdx[iCpuSet];
1575 AssertReturnVoid(i < pGip->cCpus);
1576 AssertReturnVoid(pGip->aCPUs[i].idCpu == idCpu);
1577
1578 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
1579 RTCpuSetDelByIndex(&pGip->OnlineCpuSet, iCpuSet);
1580
1581 /* Update the Mp online/offline counter. */
1582 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
1583
1584 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1585 {
1586 /* Reset the TSC delta, we will recalculate it lazily. */
1587 ASMAtomicWriteS64(&pGip->aCPUs[i].i64TSCDelta, INT64_MAX);
1588 /* Remove this CPU from the set of CPUs that we have obtained the TSC deltas. */
1589 RTCpuSetDelByIndex(&pDevExt->TscDeltaObtainedCpuSet, iCpuSet);
1590 }
1591
1592 /* Commit it. */
1593 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_OFFLINE);
1594
1595 RTSpinlockRelease(pDevExt->hGipSpinlock);
1596}
1597
1598
1599/**
1600 * Multiprocessor event notification callback.
1601 *
1602 * This is used to make sure that the GIP master gets passed on to
1603 * another CPU. It also updates the associated CPU data.
1604 *
1605 * @param enmEvent The event.
1606 * @param idCpu The cpu it applies to.
1607 * @param pvUser Pointer to the device extension.
1608 */
1609static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
1610{
1611 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
1612 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1613
1614 if (pGip)
1615 {
1616 RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
1617 switch (enmEvent)
1618 {
1619 case RTMPEVENT_ONLINE:
1620 {
1621 RTThreadPreemptDisable(&PreemptState);
1622 if (idCpu == RTMpCpuId())
1623 {
1624 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
1625 RTThreadPreemptRestore(&PreemptState);
1626 }
1627 else
1628 {
1629 RTThreadPreemptRestore(&PreemptState);
1630 RTMpOnSpecific(idCpu, supdrvGipMpEventOnlineCallback, pDevExt, NULL /* pvUser2 */);
1631 }
1632
1633 /*
1634 * Recompute TSC-delta for the newly online'd CPU.
1635 */
1636 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1637 {
1638#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1639 supdrvTscDeltaThreadStartMeasurement(pDevExt, false /* fForceAll */);
1640#else
1641 uint32_t iCpu = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
1642 supdrvTscMeasureDeltaOne(pDevExt, iCpu);
1643#endif
1644 }
1645 break;
1646 }
1647
1648 case RTMPEVENT_OFFLINE:
1649 supdrvGipMpEventOffline(pDevExt, idCpu);
1650 break;
1651 }
1652 }
1653
1654 /*
1655 * Make sure there is a master GIP.
1656 */
1657 if (enmEvent == RTMPEVENT_OFFLINE)
1658 {
1659 RTCPUID idGipMaster = ASMAtomicReadU32(&pDevExt->idGipMaster);
1660 if (idGipMaster == idCpu)
1661 {
1662 /*
1663 * The GIP master is going offline, find a new one.
1664 */
1665 bool fIgnored;
1666 unsigned i;
1667 RTCPUID idNewGipMaster = NIL_RTCPUID;
1668 RTCPUSET OnlineCpus;
1669 RTMpGetOnlineSet(&OnlineCpus);
1670
1671 for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
1672 if (RTCpuSetIsMemberByIndex(&OnlineCpus, i))
1673 {
1674 RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
1675 if (idCurCpu != idGipMaster)
1676 {
1677 idNewGipMaster = idCurCpu;
1678 break;
1679 }
1680 }
1681
1682 Log(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
1683 ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
1684 NOREF(fIgnored);
1685 }
1686 }
1687}
1688
1689
1690/**
1691 * On CPU initialization callback for RTMpOnAll.
1692 *
1693 * @param idCpu The CPU ID.
1694 * @param pvUser1 The device extension.
1695 * @param pvUser2 The GIP.
1696 */
1697static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1698{
1699 /* This is good enough, even though it will update some of the globals a
1700 bit to much. */
1701 supdrvGipMpEventOnlineOrInitOnCpu((PSUPDRVDEVEXT)pvUser1, idCpu);
1702 NOREF(pvUser2);
1703}
1704
1705
1706/**
1707 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
1708 *
1709 * @param idCpu Ignored.
1710 * @param pvUser1 Where to put the TSC.
1711 * @param pvUser2 Ignored.
1712 */
1713static DECLCALLBACK(void) supdrvGipInitDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1714{
1715 Assert(RTMpCpuIdToSetIndex(idCpu) == (intptr_t)pvUser2);
1716 ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
1717 RT_NOREF2(idCpu, pvUser2);
1718}
1719
1720
1721/**
1722 * Determine if Async GIP mode is required because of TSC drift.
1723 *
1724 * When using the default/normal timer code it is essential that the time stamp counter
1725 * (TSC) runs never backwards, that is, a read operation to the counter should return
1726 * a bigger value than any previous read operation. This is guaranteed by the latest
1727 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
1728 * case we have to choose the asynchronous timer mode.
1729 *
1730 * @param poffMin Pointer to the determined difference between different
1731 * cores (optional, can be NULL).
1732 * @return false if the time stamp counters appear to be synchronized, true otherwise.
1733 */
1734static bool supdrvGipInitDetermineAsyncTsc(uint64_t *poffMin)
1735{
1736 /*
1737 * Just iterate all the cpus 8 times and make sure that the TSC is
1738 * ever increasing. We don't bother taking TSC rollover into account.
1739 */
1740 int iEndCpu = RTMpGetArraySize();
1741 int iCpu;
1742 int cLoops = 8;
1743 bool fAsync = false;
1744 int rc = VINF_SUCCESS;
1745 uint64_t offMax = 0;
1746 uint64_t offMin = ~(uint64_t)0;
1747 uint64_t PrevTsc = ASMReadTSC();
1748
1749 while (cLoops-- > 0)
1750 {
1751 for (iCpu = 0; iCpu < iEndCpu; iCpu++)
1752 {
1753 uint64_t CurTsc;
1754 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvGipInitDetermineAsyncTscWorker,
1755 &CurTsc, (void *)(uintptr_t)iCpu);
1756 if (RT_SUCCESS(rc))
1757 {
1758 if (CurTsc <= PrevTsc)
1759 {
1760 fAsync = true;
1761 offMin = offMax = PrevTsc - CurTsc;
1762 Log(("supdrvGipInitDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
1763 iCpu, cLoops, CurTsc, PrevTsc));
1764 break;
1765 }
1766
1767 /* Gather statistics (except the first time). */
1768 if (iCpu != 0 || cLoops != 7)
1769 {
1770 uint64_t off = CurTsc - PrevTsc;
1771 if (off < offMin)
1772 offMin = off;
1773 if (off > offMax)
1774 offMax = off;
1775 Log2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
1776 }
1777
1778 /* Next */
1779 PrevTsc = CurTsc;
1780 }
1781 else if (rc == VERR_NOT_SUPPORTED)
1782 break;
1783 else
1784 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
1785 }
1786
1787 /* broke out of the loop. */
1788 if (iCpu < iEndCpu)
1789 break;
1790 }
1791
1792 if (poffMin)
1793 *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
1794 Log(("supdrvGipInitDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n",
1795 fAsync, iEndCpu, rc, offMin, offMax));
1796#if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
1797 OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
1798#endif
1799 return fAsync;
1800}
1801
1802
1803/**
1804 * supdrvGipInit() worker that determines the GIP TSC mode.
1805 *
1806 * @returns The most suitable TSC mode.
1807 * @param pDevExt Pointer to the device instance data.
1808 */
1809static SUPGIPMODE supdrvGipInitDetermineTscMode(PSUPDRVDEVEXT pDevExt)
1810{
1811#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1812 uint64_t u64DiffCoresIgnored;
1813 uint32_t uEAX, uEBX, uECX, uEDX;
1814
1815 /*
1816 * Establish whether the CPU advertises TSC as invariant, we need that in
1817 * a couple of places below.
1818 */
1819 bool fInvariantTsc = false;
1820 if (ASMHasCpuId())
1821 {
1822 uEAX = ASMCpuId_EAX(0x80000000);
1823 if (RTX86IsValidExtRange(uEAX) && uEAX >= 0x80000007)
1824 {
1825 uEDX = ASMCpuId_EDX(0x80000007);
1826 if (uEDX & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR)
1827 fInvariantTsc = true;
1828 }
1829 }
1830
1831 /*
1832 * On single CPU systems, we don't need to consider ASYNC mode.
1833 */
1834 if (RTMpGetCount() <= 1)
1835 return fInvariantTsc ? SUPGIPMODE_INVARIANT_TSC : SUPGIPMODE_SYNC_TSC;
1836
1837 /*
1838 * Allow the user and/or OS specific bits to force async mode.
1839 */
1840 if (supdrvOSGetForcedAsyncTscMode(pDevExt))
1841 return SUPGIPMODE_ASYNC_TSC;
1842
1843 /*
1844 * Use invariant mode if the CPU says TSC is invariant.
1845 */
1846 if (fInvariantTsc)
1847 return SUPGIPMODE_INVARIANT_TSC;
1848
1849 /*
1850 * TSC is not invariant and we're on SMP, this presents two problems:
1851 *
1852 * (1) There might be a skew between the CPU, so that cpu0
1853 * returns a TSC that is slightly different from cpu1.
1854 * This screw may be due to (2), bad TSC initialization
1855 * or slightly different TSC rates.
1856 *
1857 * (2) Power management (and other things) may cause the TSC
1858 * to run at a non-constant speed, and cause the speed
1859 * to be different on the cpus. This will result in (1).
1860 *
1861 * If any of the above is detected, we will have to use ASYNC mode.
1862 */
1863 /* (1). Try check for current differences between the cpus. */
1864 if (supdrvGipInitDetermineAsyncTsc(&u64DiffCoresIgnored))
1865 return SUPGIPMODE_ASYNC_TSC;
1866
1867 /* (2) If it's an AMD CPU with power management, we won't trust its TSC. */
1868 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
1869 if ( RTX86IsValidStdRange(uEAX)
1870 && (RTX86IsAmdCpu(uEBX, uECX, uEDX) || RTX86IsHygonCpu(uEBX, uECX, uEDX)) )
1871 {
1872 /* Check for APM support. */
1873 uEAX = ASMCpuId_EAX(0x80000000);
1874 if (RTX86IsValidExtRange(uEAX) && uEAX >= 0x80000007)
1875 {
1876 uEDX = ASMCpuId_EDX(0x80000007);
1877 if (uEDX & 0x3e) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
1878 return SUPGIPMODE_ASYNC_TSC;
1879 }
1880 }
1881
1882 return SUPGIPMODE_SYNC_TSC;
1883
1884#elif defined(RT_ARCH_ARM64)
1885 RT_NOREF(pDevExt);
1886 return SUPGIPMODE_INVARIANT_TSC;
1887
1888#else
1889# error "Port me"
1890#endif
1891}
1892
1893
1894/**
1895 * Initializes per-CPU GIP information.
1896 *
1897 * @param pGip Pointer to the GIP.
1898 * @param pCpu Pointer to which GIP CPU to initialize.
1899 * @param u64NanoTS The current nanosecond timestamp.
1900 * @param uCpuHz The CPU frequency to set, 0 if the caller doesn't know.
1901 */
1902static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS, uint64_t uCpuHz)
1903{
1904 pCpu->u32TransactionId = 2;
1905 pCpu->u64NanoTS = u64NanoTS;
1906 pCpu->u64TSC = ASMReadTSC();
1907 pCpu->u64TSCSample = GIP_TSC_DELTA_RSVD;
1908 pCpu->i64TSCDelta = pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED ? INT64_MAX : 0;
1909
1910 ASMAtomicWriteSize(&pCpu->enmState, SUPGIPCPUSTATE_INVALID);
1911 ASMAtomicWriteU32(&pCpu->idCpu, NIL_RTCPUID);
1912 ASMAtomicWriteS16(&pCpu->iCpuSet, -1);
1913 ASMAtomicWriteU16(&pCpu->iCpuGroup, 0);
1914 ASMAtomicWriteU16(&pCpu->iCpuGroupMember, UINT16_MAX);
1915 ASMAtomicWriteU16(&pCpu->idApic, UINT16_MAX);
1916 ASMAtomicWriteU32(&pCpu->iReservedForNumaNode, 0);
1917
1918 /*
1919 * The first time we're called, we don't have a CPU frequency handy,
1920 * so pretend it's a 4 GHz CPU. On CPUs that are online, we'll get
1921 * called again and at that point we have a more plausible CPU frequency
1922 * value handy. The frequency history will also be adjusted again on
1923 * the 2nd timer callout (maybe we can skip that now?).
1924 */
1925 if (!uCpuHz)
1926 {
1927 pCpu->u64CpuHz = _4G - 1;
1928 pCpu->u32UpdateIntervalTSC = (uint32_t)((_4G - 1) / pGip->u32UpdateHz);
1929 }
1930 else
1931 {
1932 pCpu->u64CpuHz = uCpuHz;
1933 pCpu->u32UpdateIntervalTSC = (uint32_t)(uCpuHz / pGip->u32UpdateHz);
1934 }
1935 pCpu->au32TSCHistory[0]
1936 = pCpu->au32TSCHistory[1]
1937 = pCpu->au32TSCHistory[2]
1938 = pCpu->au32TSCHistory[3]
1939 = pCpu->au32TSCHistory[4]
1940 = pCpu->au32TSCHistory[5]
1941 = pCpu->au32TSCHistory[6]
1942 = pCpu->au32TSCHistory[7]
1943 = pCpu->u32UpdateIntervalTSC;
1944}
1945
1946
1947/**
1948 * Initializes the GIP data.
1949 *
1950 * @returns VBox status code.
1951 * @param pDevExt Pointer to the device instance data.
1952 * @param pGip Pointer to the read-write kernel mapping of the GIP.
1953 * @param HCPhys The physical address of the GIP.
1954 * @param u64NanoTS The current nanosecond timestamp.
1955 * @param uUpdateHz The update frequency.
1956 * @param uUpdateIntervalNS The update interval in nanoseconds.
1957 * @param cCpus The CPU count.
1958 * @param cbGipCpuGroups The supdrvOSGipGetGroupTableSize return value we
1959 * used when allocating the GIP structure.
1960 */
1961static int supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
1962 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS,
1963 unsigned cCpus, size_t cbGipCpuGroups)
1964{
1965 size_t const cbGip = RT_ALIGN_Z(RT_UOFFSETOF_DYN(SUPGLOBALINFOPAGE, aCPUs[cCpus]) + cbGipCpuGroups, PAGE_SIZE);
1966 unsigned i;
1967#ifdef DEBUG_DARWIN_GIP
1968 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
1969#else
1970 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
1971#endif
1972
1973 /*
1974 * Initialize the structure.
1975 */
1976 memset(pGip, 0, cbGip);
1977
1978 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
1979 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
1980 pGip->u32Mode = supdrvGipInitDetermineTscMode(pDevExt);
1981 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
1982 /*|| pGip->u32Mode == SUPGIPMODE_SYNC_TSC */)
1983 pGip->enmUseTscDelta = supdrvOSAreTscDeltasInSync() /* Allow OS override (windows). */
1984 ? SUPGIPUSETSCDELTA_ZERO_CLAIMED : SUPGIPUSETSCDELTA_PRACTICALLY_ZERO /* downgrade later */;
1985 else
1986 pGip->enmUseTscDelta = SUPGIPUSETSCDELTA_NOT_APPLICABLE;
1987 pGip->cCpus = (uint16_t)cCpus;
1988 pGip->cPages = (uint16_t)(cbGip / PAGE_SIZE);
1989 pGip->u32UpdateHz = uUpdateHz;
1990 pGip->u32UpdateIntervalNS = uUpdateIntervalNS;
1991 pGip->fGetGipCpu = SUPGIPGETCPU_APIC_ID;
1992 RTCpuSetEmpty(&pGip->OnlineCpuSet);
1993 RTCpuSetEmpty(&pGip->PresentCpuSet);
1994 RTMpGetSet(&pGip->PossibleCpuSet);
1995 pGip->cOnlineCpus = RTMpGetOnlineCount();
1996 pGip->cPresentCpus = RTMpGetPresentCount();
1997 pGip->cPossibleCpus = RTMpGetCount();
1998 pGip->cPossibleCpuGroups = 1;
1999 pGip->idCpuMax = RTMpGetMaxCpuId();
2000 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromApicId); i++)
2001 pGip->aiCpuFromApicId[i] = UINT16_MAX;
2002 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); i++)
2003 pGip->aiCpuFromCpuSetIdx[i] = UINT16_MAX;
2004 for (i = 0; i < RT_ELEMENTS(pGip->aoffCpuGroup); i++)
2005 pGip->aoffCpuGroup[i] = UINT32_MAX;
2006 for (i = 0; i < cCpus; i++)
2007 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS, 0 /*uCpuHz*/);
2008#ifdef RT_OS_WINDOWS
2009 int rc = supdrvOSInitGipGroupTable(pDevExt, pGip, cbGipCpuGroups);
2010 AssertRCReturn(rc, rc);
2011#endif
2012
2013 /*
2014 * Link it to the device extension.
2015 */
2016 pDevExt->pGip = pGip;
2017 pDevExt->HCPhysGip = HCPhys;
2018 pDevExt->cGipUsers = 0;
2019
2020 return VINF_SUCCESS;
2021}
2022
2023
2024/**
2025 * Creates the GIP.
2026 *
2027 * @returns VBox status code.
2028 * @param pDevExt Instance data. GIP stuff may be updated.
2029 */
2030int VBOXCALL supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
2031{
2032 PSUPGLOBALINFOPAGE pGip;
2033 size_t cbGip;
2034 size_t cbGipCpuGroups;
2035 RTHCPHYS HCPhysGip;
2036 uint32_t u32SystemResolution;
2037 uint32_t u32Interval;
2038 uint32_t u32MinInterval;
2039 uint32_t uMod;
2040 unsigned cCpus;
2041 int rc;
2042
2043 LogFlow(("supdrvGipCreate:\n"));
2044
2045 /*
2046 * Assert order.
2047 */
2048 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
2049 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
2050 Assert(!pDevExt->pGipTimer);
2051#ifdef SUPDRV_USE_MUTEX_FOR_GIP
2052 Assert(pDevExt->mtxGip != NIL_RTSEMMUTEX);
2053 Assert(pDevExt->mtxTscDelta != NIL_RTSEMMUTEX);
2054#else
2055 Assert(pDevExt->mtxGip != NIL_RTSEMFASTMUTEX);
2056 Assert(pDevExt->mtxTscDelta != NIL_RTSEMFASTMUTEX);
2057#endif
2058
2059 /*
2060 * Check the CPU count.
2061 */
2062 cCpus = RTMpGetArraySize();
2063 if (cCpus > RT_MIN(RTCPUSET_MAX_CPUS, RT_ELEMENTS(pGip->aiCpuFromApicId)))
2064 {
2065 SUPR0Printf("VBoxDrv: Too many CPUs (%u) for the GIP (max %u)\n", cCpus, RT_MIN(RTCPUSET_MAX_CPUS, RT_ELEMENTS(pGip->aiCpuFromApicId)));
2066 return VERR_TOO_MANY_CPUS;
2067 }
2068
2069 /*
2070 * Allocate a contiguous set of pages with a default kernel mapping.
2071 */
2072#ifdef RT_OS_WINDOWS
2073 cbGipCpuGroups = supdrvOSGipGetGroupTableSize(pDevExt);
2074#else
2075 cbGipCpuGroups = 0;
2076#endif
2077 cbGip = RT_UOFFSETOF_DYN(SUPGLOBALINFOPAGE, aCPUs[cCpus]) + cbGipCpuGroups;
2078 rc = RTR0MemObjAllocCont(&pDevExt->GipMemObj, cbGip, NIL_RTHCPHYS /*PhysHighest*/, false /*fExecutable*/);
2079 if (RT_FAILURE(rc))
2080 {
2081 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
2082 return rc;
2083 }
2084 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
2085 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
2086
2087 /*
2088 * Find a reasonable update interval and initialize the structure.
2089 */
2090 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
2091 /** @todo figure out why using a 100Ms interval upsets timekeeping in VMs.
2092 * See @bugref{6710}. */
2093 u32MinInterval = RT_NS_10MS;
2094 u32SystemResolution = RTTimerGetSystemGranularity();
2095 u32Interval = u32MinInterval;
2096 uMod = u32MinInterval % u32SystemResolution;
2097 if (uMod)
2098 u32Interval += u32SystemResolution - uMod;
2099
2100 rc = supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), RT_NS_1SEC / u32Interval /*=Hz*/, u32Interval,
2101 cCpus, cbGipCpuGroups);
2102
2103 /*
2104 * Important sanity check... (Sets rc)
2105 */
2106 if (RT_UNLIKELY( pGip->enmUseTscDelta == SUPGIPUSETSCDELTA_ZERO_CLAIMED
2107 && pGip->u32Mode == SUPGIPMODE_ASYNC_TSC
2108 && !supdrvOSGetForcedAsyncTscMode(pDevExt)))
2109 {
2110 OSDBGPRINT(("supdrvGipCreate: Host-OS/user claims the TSC-deltas are zero but we detected async. TSC! Bad.\n"));
2111 rc = VERR_INTERNAL_ERROR_2;
2112 }
2113
2114 /* It doesn't make sense to do TSC-delta detection on systems we detect as async. */
2115 AssertStmt( pGip->u32Mode != SUPGIPMODE_ASYNC_TSC
2116 || pGip->enmUseTscDelta <= SUPGIPUSETSCDELTA_ZERO_CLAIMED,
2117 rc = VERR_INTERNAL_ERROR_3);
2118
2119 /*
2120 * Do the TSC frequency measurements.
2121 *
2122 * If we're in invariant TSC mode, just to a quick preliminary measurement
2123 * that the TSC-delta measurement code can use to yield cross calls.
2124 *
2125 * If we're in any of the other two modes, neither which require MP init,
2126 * notifications or deltas for the job, do the full measurement now so
2127 * that supdrvGipInitOnCpu() can populate the TSC interval and history
2128 * array with more reasonable values.
2129 */
2130 if (RT_SUCCESS(rc))
2131 {
2132 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
2133 {
2134 rc = supdrvGipInitMeasureTscFreq(pGip, true /*fRough*/); /* cannot fail */
2135 supdrvGipInitStartTimerForRefiningInvariantTscFreq(pDevExt);
2136 }
2137 else
2138 rc = supdrvGipInitMeasureTscFreq(pGip, false /*fRough*/);
2139 if (RT_SUCCESS(rc))
2140 {
2141 /*
2142 * Start TSC-delta measurement thread before we start getting MP
2143 * events that will try kick it into action (includes the
2144 * RTMpOnAll/supdrvGipInitOnCpu call below).
2145 */
2146 RTCpuSetEmpty(&pDevExt->TscDeltaCpuSet);
2147 RTCpuSetEmpty(&pDevExt->TscDeltaObtainedCpuSet);
2148#ifdef SUPDRV_USE_TSC_DELTA_THREAD
2149 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
2150 rc = supdrvTscDeltaThreadInit(pDevExt);
2151#endif
2152 if (RT_SUCCESS(rc))
2153 {
2154 rc = RTMpNotificationRegister(supdrvGipMpEvent, pDevExt);
2155 if (RT_SUCCESS(rc))
2156 {
2157 /*
2158 * Do GIP initialization on all online CPUs. Wake up the
2159 * TSC-delta thread afterwards.
2160 */
2161 rc = RTMpOnAll(supdrvGipInitOnCpu, pDevExt, pGip);
2162 if (RT_SUCCESS(rc))
2163 {
2164#ifdef SUPDRV_USE_TSC_DELTA_THREAD
2165 supdrvTscDeltaThreadStartMeasurement(pDevExt, true /* fForceAll */);
2166#else
2167 uint16_t iCpu;
2168 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
2169 {
2170 /*
2171 * Measure the TSC deltas now that we have MP notifications.
2172 */
2173 int cTries = 5;
2174 do
2175 {
2176 rc = supdrvTscMeasureInitialDeltas(pDevExt);
2177 if ( rc != VERR_TRY_AGAIN
2178 && rc != VERR_CPU_OFFLINE)
2179 break;
2180 } while (--cTries > 0);
2181 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
2182 Log(("supdrvTscDeltaInit: cpu[%u] delta %lld\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta));
2183 }
2184 else
2185 {
2186 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
2187 AssertMsg(!pGip->aCPUs[iCpu].i64TSCDelta, ("iCpu=%u %lld mode=%d\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta, pGip->u32Mode));
2188 }
2189 if (RT_SUCCESS(rc))
2190#endif
2191 {
2192 /*
2193 * Create the timer.
2194 * If CPU_ALL isn't supported we'll have to fall back to synchronous mode.
2195 */
2196 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
2197 {
2198 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL,
2199 supdrvGipAsyncTimer, pDevExt);
2200 if (rc == VERR_NOT_SUPPORTED)
2201 {
2202 OSDBGPRINT(("supdrvGipCreate: omni timer not supported, falling back to synchronous mode\n"));
2203 pGip->u32Mode = SUPGIPMODE_SYNC_TSC;
2204 }
2205 }
2206 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
2207 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0 /* fFlags */,
2208 supdrvGipSyncAndInvariantTimer, pDevExt);
2209 if (RT_SUCCESS(rc))
2210 {
2211 /*
2212 * We're good.
2213 */
2214 Log(("supdrvGipCreate: %u ns interval.\n", u32Interval));
2215 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
2216
2217 g_pSUPGlobalInfoPage = pGip;
2218 return VINF_SUCCESS;
2219 }
2220
2221 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %u ns interval. rc=%Rrc\n", u32Interval, rc));
2222 Assert(!pDevExt->pGipTimer);
2223 }
2224 }
2225 else
2226 OSDBGPRINT(("supdrvGipCreate: RTMpOnAll failed. rc=%Rrc\n", rc));
2227 }
2228 else
2229 OSDBGPRINT(("supdrvGipCreate: failed to register MP event notfication. rc=%Rrc\n", rc));
2230 }
2231 else
2232 OSDBGPRINT(("supdrvGipCreate: supdrvTscDeltaInit failed. rc=%Rrc\n", rc));
2233 }
2234 else
2235 OSDBGPRINT(("supdrvGipCreate: supdrvTscMeasureInitialDeltas failed. rc=%Rrc\n", rc));
2236 }
2237
2238 /* Releases timer frequency increase too. */
2239 supdrvGipDestroy(pDevExt);
2240 return rc;
2241}
2242
2243
2244/**
2245 * Invalidates the GIP data upon termination.
2246 *
2247 * @param pGip Pointer to the read-write kernel mapping of the GIP.
2248 */
2249static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
2250{
2251 unsigned i;
2252 pGip->u32Magic = 0;
2253 for (i = 0; i < pGip->cCpus; i++)
2254 {
2255 pGip->aCPUs[i].u64NanoTS = 0;
2256 pGip->aCPUs[i].u64TSC = 0;
2257 pGip->aCPUs[i].iTSCHistoryHead = 0;
2258 pGip->aCPUs[i].u64TSCSample = 0;
2259 pGip->aCPUs[i].i64TSCDelta = INT64_MAX;
2260 }
2261}
2262
2263
2264/**
2265 * Terminates the GIP.
2266 *
2267 * @param pDevExt Instance data. GIP stuff may be updated.
2268 */
2269void VBOXCALL supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
2270{
2271 int rc;
2272#ifdef DEBUG_DARWIN_GIP
2273 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
2274 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
2275 pDevExt->pGipTimer, pDevExt->GipMemObj));
2276#endif
2277
2278 /*
2279 * Stop receiving MP notifications before tearing anything else down.
2280 */
2281 RTMpNotificationDeregister(supdrvGipMpEvent, pDevExt);
2282
2283#ifdef SUPDRV_USE_TSC_DELTA_THREAD
2284 /*
2285 * Terminate the TSC-delta measurement thread and resources.
2286 */
2287 supdrvTscDeltaTerm(pDevExt);
2288#endif
2289
2290 /*
2291 * Destroy the TSC-refinement timer.
2292 */
2293 if (pDevExt->pInvarTscRefineTimer)
2294 {
2295 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
2296 pDevExt->pInvarTscRefineTimer = NULL;
2297 }
2298
2299 /*
2300 * Invalid the GIP data.
2301 */
2302 if (pDevExt->pGip)
2303 {
2304 supdrvGipTerm(pDevExt->pGip);
2305 pDevExt->pGip = NULL;
2306 }
2307 g_pSUPGlobalInfoPage = NULL;
2308
2309 /*
2310 * Destroy the timer and free the GIP memory object.
2311 */
2312 if (pDevExt->pGipTimer)
2313 {
2314 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
2315 pDevExt->pGipTimer = NULL;
2316 }
2317
2318 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
2319 {
2320 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
2321 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
2322 }
2323
2324 /*
2325 * Finally, make sure we've release the system timer resolution request
2326 * if one actually succeeded and is still pending.
2327 */
2328 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
2329}
2330
2331
2332
2333
2334/*
2335 *
2336 *
2337 * GIP Update Timer Related Code
2338 * GIP Update Timer Related Code
2339 * GIP Update Timer Related Code
2340 *
2341 *
2342 */
2343
2344
2345/**
2346 * Worker routine for supdrvGipUpdate() and supdrvGipUpdatePerCpu() that
2347 * updates all the per cpu data except the transaction id.
2348 *
2349 * @param pDevExt The device extension.
2350 * @param pGipCpu Pointer to the per cpu data.
2351 * @param u64NanoTS The current time stamp.
2352 * @param u64TSC The current TSC.
2353 * @param iTick The current timer tick.
2354 *
2355 * @remarks Can be called with interrupts disabled!
2356 */
2357static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick)
2358{
2359 uint64_t u64TSCDelta;
2360 bool fUpdateCpuHz;
2361 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2362 AssertPtrReturnVoid(pGip);
2363
2364 /* Delta between this and the previous update. */
2365 ASMAtomicUoWriteU32(&pGipCpu->u32PrevUpdateIntervalNS, (uint32_t)(u64NanoTS - pGipCpu->u64NanoTS));
2366
2367 /*
2368 * Update the NanoTS.
2369 */
2370 ASMAtomicWriteU64(&pGipCpu->u64NanoTS, u64NanoTS);
2371
2372 /*
2373 * Calc TSC delta.
2374 */
2375 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
2376 ASMAtomicWriteU64(&pGipCpu->u64TSC, u64TSC);
2377
2378 /*
2379 * Determine if we need to update the CPU (TSC) frequency calculation.
2380 *
2381 * We don't need to keep recalculating the frequency when it's invariant,
2382 * unless the special tstGIP-2 testing mode is enabled.
2383 */
2384 fUpdateCpuHz = pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC;
2385 if (!(pGip->fFlags & SUPGIP_FLAGS_TESTING))
2386 { /* likely*/ }
2387 else
2388 {
2389 uint32_t fGipFlags = pGip->fFlags;
2390 if (fGipFlags & (SUPGIP_FLAGS_TESTING_ENABLE | SUPGIP_FLAGS_TESTING_START))
2391 {
2392 if (fGipFlags & SUPGIP_FLAGS_TESTING_START)
2393 {
2394 /* Cache the TSC frequency before forcing updates due to test mode. */
2395 if (!fUpdateCpuHz)
2396 pDevExt->uGipTestModeInvariantCpuHz = pGip->aCPUs[0].u64CpuHz;
2397 ASMAtomicAndU32(&pGip->fFlags, ~SUPGIP_FLAGS_TESTING_START);
2398 }
2399 fUpdateCpuHz = true;
2400 }
2401 else if (fGipFlags & SUPGIP_FLAGS_TESTING_STOP)
2402 {
2403 /* Restore the cached TSC frequency if any. */
2404 if (!fUpdateCpuHz)
2405 {
2406 Assert(pDevExt->uGipTestModeInvariantCpuHz);
2407 ASMAtomicWriteU64(&pGip->aCPUs[0].u64CpuHz, pDevExt->uGipTestModeInvariantCpuHz);
2408 }
2409 ASMAtomicAndU32(&pGip->fFlags, ~(SUPGIP_FLAGS_TESTING_STOP | SUPGIP_FLAGS_TESTING));
2410 }
2411 }
2412
2413 /*
2414 * Calculate the CPU (TSC) frequency if necessary.
2415 */
2416 if (fUpdateCpuHz)
2417 {
2418 uint64_t u64CpuHz;
2419 uint32_t u32UpdateIntervalTSC;
2420 uint32_t u32UpdateIntervalTSCSlack;
2421 uint32_t u32TransactionId;
2422 unsigned iTSCHistoryHead;
2423
2424 if (u64TSCDelta >> 32)
2425 {
2426 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
2427 pGipCpu->cErrors++;
2428 }
2429
2430 /*
2431 * On the 2nd and 3rd callout, reset the history with the current TSC
2432 * interval since the values entered by supdrvGipInit are totally off.
2433 * The interval on the 1st callout completely unreliable, the 2nd is a bit
2434 * better, while the 3rd should be most reliable.
2435 */
2436 /** @todo Could we drop this now that we initializes the history
2437 * with nominal TSC frequency values? */
2438 u32TransactionId = pGipCpu->u32TransactionId;
2439 if (RT_UNLIKELY( ( u32TransactionId == 5
2440 || u32TransactionId == 7)
2441 && ( iTick == 2
2442 || iTick == 3) ))
2443 {
2444 unsigned i;
2445 for (i = 0; i < RT_ELEMENTS(pGipCpu->au32TSCHistory); i++)
2446 ASMAtomicUoWriteU32(&pGipCpu->au32TSCHistory[i], (uint32_t)u64TSCDelta);
2447 }
2448
2449 /*
2450 * Validate the NanoTS deltas between timer fires with an arbitrary threshold of 0.5%.
2451 * Wait until we have at least one full history since the above history reset. The
2452 * assumption is that the majority of the previous history values will be tolerable.
2453 * See @bugref{6710#c67}.
2454 */
2455 /** @todo Could we drop the fudging there now that we initializes the history
2456 * with nominal TSC frequency values? */
2457 if ( u32TransactionId > 23 /* 7 + (8 * 2) */
2458 && pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
2459 {
2460 uint32_t uNanoTsThreshold = pGip->u32UpdateIntervalNS / 200;
2461 if ( pGipCpu->u32PrevUpdateIntervalNS > pGip->u32UpdateIntervalNS + uNanoTsThreshold
2462 || pGipCpu->u32PrevUpdateIntervalNS < pGip->u32UpdateIntervalNS - uNanoTsThreshold)
2463 {
2464 uint32_t u32;
2465 u32 = pGipCpu->au32TSCHistory[0];
2466 u32 += pGipCpu->au32TSCHistory[1];
2467 u32 += pGipCpu->au32TSCHistory[2];
2468 u32 += pGipCpu->au32TSCHistory[3];
2469 u32 >>= 2;
2470 u64TSCDelta = pGipCpu->au32TSCHistory[4];
2471 u64TSCDelta += pGipCpu->au32TSCHistory[5];
2472 u64TSCDelta += pGipCpu->au32TSCHistory[6];
2473 u64TSCDelta += pGipCpu->au32TSCHistory[7];
2474 u64TSCDelta >>= 2;
2475 u64TSCDelta += u32;
2476 u64TSCDelta >>= 1;
2477 }
2478 }
2479
2480 /*
2481 * TSC History.
2482 */
2483 Assert(RT_ELEMENTS(pGipCpu->au32TSCHistory) == 8);
2484 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
2485 ASMAtomicWriteU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
2486 ASMAtomicWriteU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
2487
2488 /*
2489 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
2490 *
2491 * On Windows, we have an occasional (but recurring) sour value that messed up
2492 * the history but taking only 1 interval reduces the precision overall.
2493 */
2494 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
2495 || pGip->u32UpdateHz >= 1000)
2496 {
2497 uint32_t u32;
2498 u32 = pGipCpu->au32TSCHistory[0];
2499 u32 += pGipCpu->au32TSCHistory[1];
2500 u32 += pGipCpu->au32TSCHistory[2];
2501 u32 += pGipCpu->au32TSCHistory[3];
2502 u32 >>= 2;
2503 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
2504 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
2505 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
2506 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
2507 u32UpdateIntervalTSC >>= 2;
2508 u32UpdateIntervalTSC += u32;
2509 u32UpdateIntervalTSC >>= 1;
2510
2511 /* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11. */
2512 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
2513 }
2514 else if (pGip->u32UpdateHz >= 90)
2515 {
2516 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
2517 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
2518 u32UpdateIntervalTSC >>= 1;
2519
2520 /* value chosen on a 2GHz thinkpad running windows */
2521 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
2522 }
2523 else
2524 {
2525 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
2526
2527 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
2528 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
2529 }
2530 ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
2531
2532 /*
2533 * CpuHz.
2534 */
2535 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, RT_NS_1SEC);
2536 u64CpuHz /= pGip->u32UpdateIntervalNS;
2537 ASMAtomicWriteU64(&pGipCpu->u64CpuHz, u64CpuHz);
2538 }
2539}
2540
2541
2542/**
2543 * Updates the GIP.
2544 *
2545 * @param pDevExt The device extension.
2546 * @param u64NanoTS The current nanosecond timestamp.
2547 * @param u64TSC The current TSC timestamp.
2548 * @param idCpu The CPU ID.
2549 * @param iTick The current timer tick.
2550 *
2551 * @remarks Can be called with interrupts disabled!
2552 */
2553static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick)
2554{
2555 /*
2556 * Determine the relevant CPU data.
2557 */
2558 PSUPGIPCPU pGipCpu;
2559 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2560 AssertPtrReturnVoid(pGip);
2561
2562 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
2563 pGipCpu = &pGip->aCPUs[0];
2564 else
2565 {
2566 unsigned iCpu;
2567 uint32_t idApic = supdrvGipGetApicId(pGip);
2568 if (RT_LIKELY(idApic < RT_ELEMENTS(pGip->aiCpuFromApicId)))
2569 { /* likely */ }
2570 else
2571 return;
2572 iCpu = pGip->aiCpuFromApicId[idApic];
2573 if (RT_LIKELY(iCpu < pGip->cCpus))
2574 { /* likely */ }
2575 else
2576 return;
2577 pGipCpu = &pGip->aCPUs[iCpu];
2578 if (RT_LIKELY(pGipCpu->idCpu == idCpu))
2579 { /* likely */ }
2580 else
2581 return;
2582 }
2583
2584 /*
2585 * Start update transaction.
2586 */
2587 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
2588 {
2589 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
2590 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
2591 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2592 pGipCpu->cErrors++;
2593 return;
2594 }
2595
2596 /*
2597 * Recalc the update frequency every 0x800th time.
2598 */
2599 if ( pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC /* cuz we're not recalculating the frequency on invariant hosts. */
2600 && !(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
2601 {
2602 if (pGip->u64NanoTSLastUpdateHz)
2603 {
2604#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
2605 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
2606 uint32_t u32UpdateHz = (uint32_t)((RT_NS_1SEC_64 * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
2607 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
2608 {
2609 /** @todo r=ramshankar: Changing u32UpdateHz might screw up TSC frequency
2610 * calculation on non-invariant hosts if it changes the history decision
2611 * taken in supdrvGipDoUpdateCpu(). */
2612 uint64_t u64Interval = u64Delta / GIP_UPDATEHZ_RECALC_FREQ;
2613 ASMAtomicWriteU32(&pGip->u32UpdateHz, u32UpdateHz);
2614 ASMAtomicWriteU32(&pGip->u32UpdateIntervalNS, (uint32_t)u64Interval);
2615 }
2616#endif
2617 }
2618 ASMAtomicWriteU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS | 1);
2619 }
2620
2621 /*
2622 * Update the data.
2623 */
2624 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
2625
2626 /*
2627 * Complete transaction.
2628 */
2629 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2630}
2631
2632
2633/**
2634 * Updates the per cpu GIP data for the calling cpu.
2635 *
2636 * @param pDevExt The device extension.
2637 * @param u64NanoTS The current nanosecond timestamp.
2638 * @param u64TSC The current TSC timesaver.
2639 * @param idCpu The CPU ID.
2640 * @param idApic The APIC id for the CPU index.
2641 * @param iTick The current timer tick.
2642 *
2643 * @remarks Can be called with interrupts disabled!
2644 */
2645static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
2646 RTCPUID idCpu, uint8_t idApic, uint64_t iTick)
2647{
2648 uint32_t iCpu;
2649 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2650
2651 /*
2652 * Avoid a potential race when a CPU online notification doesn't fire on
2653 * the onlined CPU but the tick creeps in before the event notification is
2654 * run.
2655 */
2656 if (RT_LIKELY(iTick != 1))
2657 { /* likely*/ }
2658 else
2659 {
2660 iCpu = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
2661 if (pGip->aCPUs[iCpu].enmState == SUPGIPCPUSTATE_OFFLINE)
2662 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
2663 }
2664
2665 iCpu = pGip->aiCpuFromApicId[idApic];
2666 if (RT_LIKELY(iCpu < pGip->cCpus))
2667 {
2668 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
2669 if (pGipCpu->idCpu == idCpu)
2670 {
2671 /*
2672 * Start update transaction.
2673 */
2674 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
2675 {
2676 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
2677 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2678 pGipCpu->cErrors++;
2679 return;
2680 }
2681
2682 /*
2683 * Update the data.
2684 */
2685 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
2686
2687 /*
2688 * Complete transaction.
2689 */
2690 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2691 }
2692 }
2693}
2694
2695
2696/**
2697 * Timer callback function for the sync and invariant GIP modes.
2698 *
2699 * @param pTimer The timer.
2700 * @param pvUser Opaque pointer to the device extension.
2701 * @param iTick The timer tick.
2702 */
2703static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2704{
2705 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
2706 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2707 RTCCUINTREG fEFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
2708 uint64_t u64TSC = ASMReadTSC();
2709 uint64_t u64NanoTS = RTTimeSystemNanoTS();
2710 RT_NOREF1(pTimer);
2711
2712 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
2713 {
2714 /*
2715 * The calculations in supdrvGipUpdate() is somewhat timing sensitive,
2716 * missing timer ticks is not an option for GIP because the GIP users
2717 * will end up incrementing the time in 1ns per time getter call until
2718 * there is a complete timer update. So, if the delta has yet to be
2719 * calculated, we just pretend it is zero for now (the GIP users
2720 * probably won't have it for a wee while either and will do the same).
2721 *
2722 * We could maybe on some platforms try cross calling a CPU with a
2723 * working delta here, but it's not worth the hassle since the
2724 * likelihood of this happening is really low. On Windows, Linux, and
2725 * Solaris timers fire on the CPU they were registered/started on.
2726 * Darwin timers doesn't necessarily (they are high priority threads).
2727 */
2728 uint32_t iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
2729 uint16_t iGipCpu = RT_LIKELY(iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx))
2730 ? pGip->aiCpuFromCpuSetIdx[iCpuSet] : UINT16_MAX;
2731 Assert(!ASMIntAreEnabled());
2732 if (RT_LIKELY(iGipCpu < pGip->cCpus))
2733 {
2734 int64_t iTscDelta = pGip->aCPUs[iGipCpu].i64TSCDelta;
2735 if (iTscDelta != INT64_MAX)
2736 u64TSC -= iTscDelta;
2737 }
2738 }
2739
2740 supdrvGipUpdate(pDevExt, u64NanoTS, u64TSC, NIL_RTCPUID, iTick);
2741
2742 ASMSetFlags(fEFlags);
2743}
2744
2745
2746/**
2747 * Timer callback function for async GIP mode.
2748 * @param pTimer The timer.
2749 * @param pvUser Opaque pointer to the device extension.
2750 * @param iTick The timer tick.
2751 */
2752static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2753{
2754 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
2755 RTCCUINTREG fEFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
2756 RTCPUID idCpu = RTMpCpuId();
2757 uint64_t u64TSC = ASMReadTSC();
2758 uint64_t NanoTS = RTTimeSystemNanoTS();
2759 RT_NOREF1(pTimer);
2760
2761 /** @todo reset the transaction number and whatnot when iTick == 1. */
2762 if (pDevExt->idGipMaster == idCpu)
2763 supdrvGipUpdate(pDevExt, NanoTS, u64TSC, idCpu, iTick);
2764 else
2765 supdrvGipUpdatePerCpu(pDevExt, NanoTS, u64TSC, idCpu, supdrvGipGetApicId(pDevExt->pGip), iTick);
2766
2767 ASMSetFlags(fEFlags);
2768}
2769
2770
2771
2772
2773/*
2774 *
2775 *
2776 * TSC Delta Measurements And Related Code
2777 * TSC Delta Measurements And Related Code
2778 * TSC Delta Measurements And Related Code
2779 *
2780 *
2781 */
2782
2783
2784/*
2785 * Select TSC delta measurement algorithm.
2786 */
2787#if 0
2788# define GIP_TSC_DELTA_METHOD_1
2789#else
2790# define GIP_TSC_DELTA_METHOD_2
2791#endif
2792
2793/** For padding variables to keep them away from other cache lines. Better too
2794 * large than too small!
2795 * @remarks Current AMD64 and x86 CPUs seems to use 64 bytes. There are claims
2796 * that NetBurst had 128 byte cache lines while the 486 thru Pentium
2797 * III had 32 bytes cache lines. */
2798#define GIP_TSC_DELTA_CACHE_LINE_SIZE 128
2799
2800
2801/**
2802 * TSC delta measurement algorithm \#2 result entry.
2803 */
2804typedef struct SUPDRVTSCDELTAMETHOD2ENTRY
2805{
2806 uint32_t iSeqMine;
2807 uint32_t iSeqOther;
2808 uint64_t uTsc;
2809} SUPDRVTSCDELTAMETHOD2ENTRY;
2810
2811/**
2812 * TSC delta measurement algorithm \#2 Data.
2813 */
2814typedef struct SUPDRVTSCDELTAMETHOD2
2815{
2816 /** Padding to make sure the iCurSeqNo is in its own cache line. */
2817 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2818 /** The current sequence number of this worker. */
2819 uint32_t volatile iCurSeqNo;
2820 /** Padding to make sure the iCurSeqNo is in its own cache line. */
2821 uint32_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint32_t) - 1];
2822 /** Result table. */
2823 SUPDRVTSCDELTAMETHOD2ENTRY aResults[64];
2824} SUPDRVTSCDELTAMETHOD2;
2825/** Pointer to the data for TSC delta measurement algorithm \#2 .*/
2826typedef SUPDRVTSCDELTAMETHOD2 *PSUPDRVTSCDELTAMETHOD2;
2827
2828
2829/**
2830 * The TSC delta synchronization struct, version 2.
2831 *
2832 * The synchronization variable is completely isolated in its own cache line
2833 * (provided our max cache line size estimate is correct).
2834 */
2835typedef struct SUPTSCDELTASYNC2
2836{
2837 /** Padding to make sure the uVar1 is in its own cache line. */
2838 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2839
2840 /** The synchronization variable, holds values GIP_TSC_DELTA_SYNC_*. */
2841 volatile uint32_t uSyncVar;
2842 /** Sequence synchronizing variable used for post 'GO' synchronization. */
2843 volatile uint32_t uSyncSeq;
2844
2845 /** Padding to make sure the uVar1 is in its own cache line. */
2846 uint64_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t) - 2];
2847
2848 /** Start RDTSC value. Put here mainly to save stack space. */
2849 uint64_t uTscStart;
2850 /** Copy of SUPDRVGIPTSCDELTARGS::cMaxTscTicks. */
2851 uint64_t cMaxTscTicks;
2852} SUPTSCDELTASYNC2;
2853AssertCompileSize(SUPTSCDELTASYNC2, GIP_TSC_DELTA_CACHE_LINE_SIZE * 2 + sizeof(uint64_t));
2854typedef SUPTSCDELTASYNC2 *PSUPTSCDELTASYNC2;
2855
2856/** Prestart wait. */
2857#define GIP_TSC_DELTA_SYNC2_PRESTART_WAIT UINT32_C(0x0ffe)
2858/** Prestart aborted. */
2859#define GIP_TSC_DELTA_SYNC2_PRESTART_ABORT UINT32_C(0x0fff)
2860/** Ready (on your mark). */
2861#define GIP_TSC_DELTA_SYNC2_READY UINT32_C(0x1000)
2862/** Steady (get set). */
2863#define GIP_TSC_DELTA_SYNC2_STEADY UINT32_C(0x1001)
2864/** Go! */
2865#define GIP_TSC_DELTA_SYNC2_GO UINT32_C(0x1002)
2866/** Used by the verification test. */
2867#define GIP_TSC_DELTA_SYNC2_GO_GO UINT32_C(0x1003)
2868
2869/** We reached the time limit. */
2870#define GIP_TSC_DELTA_SYNC2_TIMEOUT UINT32_C(0x1ffe)
2871/** The other party won't touch the sync struct ever again. */
2872#define GIP_TSC_DELTA_SYNC2_FINAL UINT32_C(0x1fff)
2873
2874
2875/**
2876 * Argument package/state passed by supdrvTscMeasureDeltaOne() to the RTMpOn
2877 * callback worker.
2878 * @todo add
2879 */
2880typedef struct SUPDRVGIPTSCDELTARGS
2881{
2882 /** The device extension. */
2883 PSUPDRVDEVEXT pDevExt;
2884 /** Pointer to the GIP CPU array entry for the worker. */
2885 PSUPGIPCPU pWorker;
2886 /** Pointer to the GIP CPU array entry for the master. */
2887 PSUPGIPCPU pMaster;
2888 /** The maximum number of ticks to spend in supdrvTscMeasureDeltaCallback.
2889 * (This is what we need a rough TSC frequency for.) */
2890 uint64_t cMaxTscTicks;
2891 /** Used to abort synchronization setup. */
2892 bool volatile fAbortSetup;
2893
2894 /** Padding to make sure the master variables live in its own cache lines. */
2895 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2896
2897 /** @name Master
2898 * @{ */
2899 /** The time the master spent in the MP worker. */
2900 uint64_t cElapsedMasterTscTicks;
2901 /** The iTry value when stopped at. */
2902 uint32_t iTry;
2903 /** Set if the run timed out. */
2904 bool volatile fTimedOut;
2905 /** Pointer to the master's synchronization struct (on stack). */
2906 PSUPTSCDELTASYNC2 volatile pSyncMaster;
2907 /** Master data union. */
2908 union
2909 {
2910 /** Data (master) for delta verification. */
2911 struct
2912 {
2913 /** Verification test TSC values for the master. */
2914 uint64_t volatile auTscs[32];
2915 } Verify;
2916 /** Data (master) for measurement method \#2. */
2917 struct
2918 {
2919 /** Data and sequence number. */
2920 SUPDRVTSCDELTAMETHOD2 Data;
2921 /** The lag setting for the next run. */
2922 bool fLag;
2923 /** Number of hits. */
2924 uint32_t cHits;
2925 } M2;
2926 } uMaster;
2927 /** The verifier verdict, VINF_SUCCESS if ok, VERR_OUT_OF_RANGE if not,
2928 * VERR_TRY_AGAIN on timeout. */
2929 int32_t rcVerify;
2930#ifdef TSCDELTA_VERIFY_WITH_STATS
2931 /** The maximum difference between TSC read during delta verification. */
2932 int64_t cMaxVerifyTscTicks;
2933 /** The minimum difference between two TSC reads during verification. */
2934 int64_t cMinVerifyTscTicks;
2935 /** The bad TSC diff, worker relative to master (= worker - master).
2936 * Negative value means the worker is behind the master. */
2937 int64_t iVerifyBadTscDiff;
2938#endif
2939 /** @} */
2940
2941 /** Padding to make sure the worker variables live is in its own cache line. */
2942 uint64_t au64CacheLinePaddingBetween[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2943
2944 /** @name Proletarian
2945 * @{ */
2946 /** Pointer to the worker's synchronization struct (on stack). */
2947 PSUPTSCDELTASYNC2 volatile pSyncWorker;
2948 /** The time the worker spent in the MP worker. */
2949 uint64_t cElapsedWorkerTscTicks;
2950 /** Worker data union. */
2951 union
2952 {
2953 /** Data (worker) for delta verification. */
2954 struct
2955 {
2956 /** Verification test TSC values for the worker. */
2957 uint64_t volatile auTscs[32];
2958 } Verify;
2959 /** Data (worker) for measurement method \#2. */
2960 struct
2961 {
2962 /** Data and sequence number. */
2963 SUPDRVTSCDELTAMETHOD2 Data;
2964 /** The lag setting for the next run (set by master). */
2965 bool fLag;
2966 } M2;
2967 } uWorker;
2968 /** @} */
2969
2970 /** Padding to make sure the above is in its own cache line. */
2971 uint64_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2972} SUPDRVGIPTSCDELTARGS;
2973typedef SUPDRVGIPTSCDELTARGS *PSUPDRVGIPTSCDELTARGS;
2974
2975
2976/** @name Macros that implements the basic synchronization steps common to
2977 * the algorithms.
2978 *
2979 * Must be used from loop as the timeouts are implemented via 'break' statements
2980 * at the moment.
2981 *
2982 * @{
2983 */
2984#if defined(DEBUG_bird) /* || defined(VBOX_STRICT) */
2985# define TSCDELTA_DBG_VARS() uint32_t iDbgCounter
2986# define TSCDELTA_DBG_START_LOOP() do { iDbgCounter = 0; } while (0)
2987# define TSCDELTA_DBG_CHECK_LOOP() \
2988 do { iDbgCounter++; if ((iDbgCounter & UINT32_C(0x01ffffff)) == 0) RT_BREAKPOINT(); } while (0)
2989#else
2990# define TSCDELTA_DBG_VARS() ((void)0)
2991# define TSCDELTA_DBG_START_LOOP() ((void)0)
2992# define TSCDELTA_DBG_CHECK_LOOP() ((void)0)
2993#endif
2994#if 0
2995# define TSCDELTA_DBG_SYNC_MSG(a_Args) SUPR0Printf a_Args
2996#else
2997# define TSCDELTA_DBG_SYNC_MSG(a_Args) ((void)0)
2998#endif
2999#if 0
3000# define TSCDELTA_DBG_SYNC_MSG2(a_Args) SUPR0Printf a_Args
3001#else
3002# define TSCDELTA_DBG_SYNC_MSG2(a_Args) ((void)0)
3003#endif
3004#if 0
3005# define TSCDELTA_DBG_SYNC_MSG9(a_Args) SUPR0Printf a_Args
3006#else
3007# define TSCDELTA_DBG_SYNC_MSG9(a_Args) ((void)0)
3008#endif
3009
3010
3011static bool supdrvTscDeltaSync2_Before(PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
3012 bool fIsMaster, PRTCCUINTREG pfEFlags, PSUPDRVGIPTSCDELTARGS pArgs)
3013{
3014 uint32_t iMySeq = fIsMaster ? 0 : 256;
3015 uint32_t const iMaxSeq = iMySeq + 16; /* For the last loop, darn linux/freebsd C-ishness. */
3016 uint32_t u32Tmp;
3017 uint32_t iSync2Loops = 0;
3018 RTCCUINTREG fEFlags;
3019 TSCDELTA_DBG_VARS();
3020
3021#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
3022 *pfEFlags = X86_EFL_IF | X86_EFL_1; /* should shut up most nagging compilers. */
3023#else
3024 *pfEFlags = 0;
3025#endif
3026
3027 /*
3028 * The master tells the worker to get on it's mark.
3029 */
3030 if (fIsMaster)
3031 {
3032 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_STEADY, GIP_TSC_DELTA_SYNC2_READY)))
3033 { /* likely*/ }
3034 else
3035 {
3036 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #1 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
3037 return false;
3038 }
3039 }
3040
3041 /*
3042 * Wait for the on your mark signal (ack in the master case). We process timeouts here.
3043 */
3044 ASMAtomicWriteU32(&(pMySync)->uSyncSeq, 0);
3045 for (;;)
3046 {
3047 fEFlags = ASMIntDisableFlags();
3048 u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
3049 if (u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY)
3050 break;
3051 ASMSetFlags(fEFlags);
3052 ASMNopPause();
3053
3054 /* Abort? */
3055 if (u32Tmp != GIP_TSC_DELTA_SYNC2_READY)
3056 {
3057 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #2 u32Tmp=%#x\n", fIsMaster ? "master" : "worker", u32Tmp));
3058 return false;
3059 }
3060
3061 /* Check for timeouts every so often (not every loop in case RDTSC is
3062 trapping or something). Must check the first time around. */
3063#if 0 /* For debugging the timeout paths. */
3064 static uint32_t volatile xxx;
3065#endif
3066 if ( ( (iSync2Loops & 0x3ff) == 0
3067 && ASMReadTSC() - pMySync->uTscStart > pMySync->cMaxTscTicks)
3068#if 0 /* This is crazy, I know, but enable this code and the results are markedly better when enabled on the 1.4GHz AMD (debug). */
3069 || (!fIsMaster && (++xxx & 0xf) == 0)
3070#endif
3071 )
3072 {
3073 /* Try switch our own state into timeout mode so the master cannot tell us to 'GO',
3074 ignore the timeout if we've got the go ahead already (simpler). */
3075 if (ASMAtomicCmpXchgU32(&pMySync->uSyncVar, GIP_TSC_DELTA_SYNC2_TIMEOUT, GIP_TSC_DELTA_SYNC2_READY))
3076 {
3077 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: timeout\n", fIsMaster ? "master" : "worker"));
3078 ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_TIMEOUT, GIP_TSC_DELTA_SYNC2_STEADY);
3079 ASMAtomicWriteBool(&pArgs->fTimedOut, true);
3080 return false;
3081 }
3082 }
3083 iSync2Loops++;
3084 }
3085
3086 /*
3087 * Interrupts are now disabled and will remain disabled until we do
3088 * TSCDELTA_MASTER_SYNC_AFTER / TSCDELTA_OTHER_SYNC_AFTER.
3089 */
3090 *pfEFlags = fEFlags;
3091
3092 /*
3093 * The worker tells the master that it is on its mark and that the master
3094 * need to get into position as well.
3095 */
3096 if (!fIsMaster)
3097 {
3098 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_STEADY, GIP_TSC_DELTA_SYNC2_READY)))
3099 { /* likely */ }
3100 else
3101 {
3102 ASMSetFlags(fEFlags);
3103 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #3 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
3104 return false;
3105 }
3106 }
3107
3108 /*
3109 * The master sends the 'go' to the worker and wait for ACK.
3110 */
3111 if (fIsMaster)
3112 {
3113 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO, GIP_TSC_DELTA_SYNC2_STEADY)))
3114 { /* likely */ }
3115 else
3116 {
3117 ASMSetFlags(fEFlags);
3118 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #4 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
3119 return false;
3120 }
3121 }
3122
3123 /*
3124 * Wait for the 'go' signal (ack in the master case).
3125 */
3126 TSCDELTA_DBG_START_LOOP();
3127 for (;;)
3128 {
3129 u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
3130 if (u32Tmp == GIP_TSC_DELTA_SYNC2_GO)
3131 break;
3132 if (RT_LIKELY(u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY))
3133 { /* likely */ }
3134 else
3135 {
3136 ASMSetFlags(fEFlags);
3137 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #5 u32Tmp=%#x\n", fIsMaster ? "master" : "worker", u32Tmp));
3138 return false;
3139 }
3140
3141 TSCDELTA_DBG_CHECK_LOOP();
3142 ASMNopPause();
3143 }
3144
3145 /*
3146 * The worker acks the 'go' (shouldn't fail).
3147 */
3148 if (!fIsMaster)
3149 {
3150 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO, GIP_TSC_DELTA_SYNC2_STEADY)))
3151 { /* likely */ }
3152 else
3153 {
3154 ASMSetFlags(fEFlags);
3155 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #6 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
3156 return false;
3157 }
3158 }
3159
3160 /*
3161 * Try enter mostly lockstep execution with it.
3162 */
3163 for (;;)
3164 {
3165 uint32_t iOtherSeq1, iOtherSeq2;
3166 ASMCompilerBarrier();
3167 ASMSerializeInstruction();
3168
3169 ASMAtomicWriteU32(&pMySync->uSyncSeq, iMySeq);
3170 ASMNopPause();
3171 iOtherSeq1 = ASMAtomicXchgU32(&pOtherSync->uSyncSeq, iMySeq);
3172 ASMNopPause();
3173 iOtherSeq2 = ASMAtomicReadU32(&pMySync->uSyncSeq);
3174
3175 ASMCompilerBarrier();
3176 if (iOtherSeq1 == iOtherSeq2)
3177 return true;
3178
3179 /* Did the other guy give up? Should we give up? */
3180 if ( iOtherSeq1 == UINT32_MAX
3181 || iOtherSeq2 == UINT32_MAX)
3182 return true;
3183 if (++iMySeq >= iMaxSeq)
3184 {
3185 ASMAtomicWriteU32(&pMySync->uSyncSeq, UINT32_MAX);
3186 return true;
3187 }
3188 ASMNopPause();
3189 }
3190}
3191
3192#define TSCDELTA_MASTER_SYNC_BEFORE(a_pMySync, a_pOtherSync, a_pfEFlags, a_pArgs) \
3193 if (RT_LIKELY(supdrvTscDeltaSync2_Before(a_pMySync, a_pOtherSync, true /*fIsMaster*/, a_pfEFlags, a_pArgs))) \
3194 { /*likely*/ } \
3195 else if (true) \
3196 { \
3197 TSCDELTA_DBG_SYNC_MSG9(("sync/before/master: #89\n")); \
3198 break; \
3199 } else do {} while (0)
3200#define TSCDELTA_OTHER_SYNC_BEFORE(a_pMySync, a_pOtherSync, a_pfEFlags, a_pArgs) \
3201 if (RT_LIKELY(supdrvTscDeltaSync2_Before(a_pMySync, a_pOtherSync, false /*fIsMaster*/, a_pfEFlags, a_pArgs))) \
3202 { /*likely*/ } \
3203 else if (true) \
3204 { \
3205 TSCDELTA_DBG_SYNC_MSG9(("sync/before/other: #89\n")); \
3206 break; \
3207 } else do {} while (0)
3208
3209
3210static bool supdrvTscDeltaSync2_After(PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
3211 bool fIsMaster, RTCCUINTREG fEFlags)
3212{
3213 TSCDELTA_DBG_VARS();
3214 RT_NOREF1(pOtherSync);
3215
3216 /*
3217 * Wait for the 'ready' signal. In the master's case, this means the
3218 * worker has completed its data collection, while in the worker's case it
3219 * means the master is done processing the data and it's time for the next
3220 * loop iteration (or whatever).
3221 */
3222 ASMSetFlags(fEFlags);
3223 TSCDELTA_DBG_START_LOOP();
3224 for (;;)
3225 {
3226 uint32_t u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
3227 if ( u32Tmp == GIP_TSC_DELTA_SYNC2_READY
3228 || (u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY && !fIsMaster) /* kicked twice => race */ )
3229 return true;
3230 ASMNopPause();
3231 if (RT_LIKELY(u32Tmp == GIP_TSC_DELTA_SYNC2_GO))
3232 { /* likely */}
3233 else
3234 {
3235 TSCDELTA_DBG_SYNC_MSG(("sync/after/other: #1 u32Tmp=%#x\n", u32Tmp));
3236 return false; /* shouldn't ever happen! */
3237 }
3238 TSCDELTA_DBG_CHECK_LOOP();
3239 ASMNopPause();
3240 }
3241}
3242
3243#define TSCDELTA_MASTER_SYNC_AFTER(a_pMySync, a_pOtherSync, a_fEFlags) \
3244 if (RT_LIKELY(supdrvTscDeltaSync2_After(a_pMySync, a_pOtherSync, true /*fIsMaster*/, a_fEFlags))) \
3245 { /* likely */ } \
3246 else if (true) \
3247 { \
3248 TSCDELTA_DBG_SYNC_MSG9(("sync/after/master: #97\n")); \
3249 break; \
3250 } else do {} while (0)
3251
3252#define TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(a_pMySync, a_pOtherSync) \
3253 /* \
3254 * Tell the worker that we're done processing the data and ready for the next round. \
3255 */ \
3256 if (RT_LIKELY(ASMAtomicCmpXchgU32(&(a_pOtherSync)->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_GO))) \
3257 { /* likely */ } \
3258 else if (true)\
3259 { \
3260 TSCDELTA_DBG_SYNC_MSG(("sync/after/master: #99 uSyncVar=%#x\n", (a_pOtherSync)->uSyncVar)); \
3261 break; \
3262 } else do {} while (0)
3263
3264#define TSCDELTA_OTHER_SYNC_AFTER(a_pMySync, a_pOtherSync, a_fEFlags) \
3265 if (true) { \
3266 /* \
3267 * Tell the master that we're done collecting data and wait for the next round to start. \
3268 */ \
3269 if (RT_LIKELY(ASMAtomicCmpXchgU32(&(a_pOtherSync)->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_GO))) \
3270 { /* likely */ } \
3271 else \
3272 { \
3273 ASMSetFlags(a_fEFlags); \
3274 TSCDELTA_DBG_SYNC_MSG(("sync/after/other: #0 uSyncVar=%#x\n", (a_pOtherSync)->uSyncVar)); \
3275 break; \
3276 } \
3277 if (RT_LIKELY(supdrvTscDeltaSync2_After(a_pMySync, a_pOtherSync, false /*fIsMaster*/, a_fEFlags))) \
3278 { /* likely */ } \
3279 else \
3280 { \
3281 TSCDELTA_DBG_SYNC_MSG9(("sync/after/other: #98\n")); \
3282 break; \
3283 } \
3284 } else do {} while (0)
3285/** @} */
3286
3287
3288#ifdef GIP_TSC_DELTA_METHOD_1
3289/**
3290 * TSC delta measurement algorithm \#1 (GIP_TSC_DELTA_METHOD_1).
3291 *
3292 *
3293 * We ignore the first few runs of the loop in order to prime the
3294 * cache. Also, we need to be careful about using 'pause' instruction
3295 * in critical busy-wait loops in this code - it can cause undesired
3296 * behaviour with hyperthreading.
3297 *
3298 * We try to minimize the measurement error by computing the minimum
3299 * read time of the compare statement in the worker by taking TSC
3300 * measurements across it.
3301 *
3302 * It must be noted that the computed minimum read time is mostly to
3303 * eliminate huge deltas when the worker is too early and doesn't by
3304 * itself help produce more accurate deltas. We allow two times the
3305 * computed minimum as an arbitrary acceptable threshold. Therefore,
3306 * it is still possible to get negative deltas where there are none
3307 * when the worker is earlier. As long as these occasional negative
3308 * deltas are lower than the time it takes to exit guest-context and
3309 * the OS to reschedule EMT on a different CPU, we won't expose a TSC
3310 * that jumped backwards. It is due to the existence of the negative
3311 * deltas that we don't recompute the delta with the master and
3312 * worker interchanged to eliminate the remaining measurement error.
3313 *
3314 *
3315 * @param pArgs The argument/state data.
3316 * @param pMySync My synchronization structure.
3317 * @param pOtherSync My partner's synchronization structure.
3318 * @param fIsMaster Set if master, clear if worker.
3319 * @param iTry The attempt number.
3320 */
3321static void supdrvTscDeltaMethod1Loop(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
3322 bool fIsMaster, uint32_t iTry)
3323{
3324 PSUPGIPCPU pGipCpuWorker = pArgs->pWorker;
3325 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3326 uint64_t uMinCmpReadTime = UINT64_MAX;
3327 unsigned iLoop;
3328 NOREF(iTry);
3329
3330 for (iLoop = 0; iLoop < GIP_TSC_DELTA_LOOPS; iLoop++)
3331 {
3332 RTCCUINTREG fEFlags;
3333 if (fIsMaster)
3334 {
3335 /*
3336 * The master.
3337 */
3338 AssertMsg(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD,
3339 ("%#llx idMaster=%#x idWorker=%#x (idGipMaster=%#x)\n",
3340 pGipCpuMaster->u64TSCSample, pGipCpuMaster->idCpu, pGipCpuWorker->idCpu, pArgs->pDevExt->idGipMaster));
3341 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3342
3343 do
3344 {
3345 ASMSerializeInstruction();
3346 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, ASMReadTSC());
3347 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
3348
3349 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3350
3351 /* Process the data. */
3352 if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
3353 {
3354 if (pGipCpuWorker->u64TSCSample != GIP_TSC_DELTA_RSVD)
3355 {
3356 int64_t iDelta = pGipCpuWorker->u64TSCSample
3357 - (pGipCpuMaster->u64TSCSample - pGipCpuMaster->i64TSCDelta);
3358 if ( iDelta >= GIP_TSC_DELTA_INITIAL_MASTER_VALUE
3359 ? iDelta < pGipCpuWorker->i64TSCDelta
3360 : iDelta > pGipCpuWorker->i64TSCDelta || pGipCpuWorker->i64TSCDelta == INT64_MAX)
3361 pGipCpuWorker->i64TSCDelta = iDelta;
3362 }
3363 }
3364
3365 /* Reset our TSC sample and tell the worker to move on. */
3366 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, GIP_TSC_DELTA_RSVD);
3367 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3368 }
3369 else
3370 {
3371 /*
3372 * The worker.
3373 */
3374 uint64_t uTscWorker;
3375 uint64_t uTscWorkerFlushed;
3376 uint64_t uCmpReadTime;
3377
3378 ASMAtomicReadU64(&pGipCpuMaster->u64TSCSample); /* Warm the cache line. */
3379 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3380
3381 /*
3382 * Keep reading the TSC until we notice that the master has read his. Reading
3383 * the TSC -after- the master has updated the memory is way too late. We thus
3384 * compensate by trying to measure how long it took for the worker to notice
3385 * the memory flushed from the master.
3386 */
3387 do
3388 {
3389 ASMSerializeInstruction();
3390 uTscWorker = ASMReadTSC();
3391 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
3392 ASMSerializeInstruction();
3393 uTscWorkerFlushed = ASMReadTSC();
3394
3395 uCmpReadTime = uTscWorkerFlushed - uTscWorker;
3396 if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
3397 {
3398 /* This is totally arbitrary a.k.a I don't like it but I have no better ideas for now. */
3399 if (uCmpReadTime < (uMinCmpReadTime << 1))
3400 {
3401 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, uTscWorker);
3402 if (uCmpReadTime < uMinCmpReadTime)
3403 uMinCmpReadTime = uCmpReadTime;
3404 }
3405 else
3406 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
3407 }
3408 else if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS)
3409 {
3410 if (uCmpReadTime < uMinCmpReadTime)
3411 uMinCmpReadTime = uCmpReadTime;
3412 }
3413
3414 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3415 }
3416 }
3417
3418 TSCDELTA_DBG_SYNC_MSG9(("sync/method1loop/%s: #92 iLoop=%u MyState=%#x\n", fIsMaster ? "master" : "worker", iLoop,
3419 pMySync->uSyncVar));
3420
3421 /*
3422 * We must reset the worker TSC sample value in case it gets picked as a
3423 * GIP master later on (it's trashed above, naturally).
3424 */
3425 if (!fIsMaster)
3426 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
3427}
3428#endif /* GIP_TSC_DELTA_METHOD_1 */
3429
3430
3431#ifdef GIP_TSC_DELTA_METHOD_2
3432/*
3433 * TSC delta measurement algorithm \#2 configuration and code - Experimental!!
3434 */
3435
3436# define GIP_TSC_DELTA_M2_LOOPS (7 + GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3437# define GIP_TSC_DELTA_M2_PRIMER_LOOPS 0
3438
3439
3440static void supdrvTscDeltaMethod2ProcessDataOnMaster(PSUPDRVGIPTSCDELTARGS pArgs)
3441{
3442 int64_t iMasterTscDelta = pArgs->pMaster->i64TSCDelta;
3443 int64_t iBestDelta = pArgs->pWorker->i64TSCDelta;
3444 uint32_t idxResult;
3445 uint32_t cHits = 0;
3446
3447 /*
3448 * Look for matching entries in the master and worker tables.
3449 */
3450 for (idxResult = 0; idxResult < RT_ELEMENTS(pArgs->uMaster.M2.Data.aResults); idxResult++)
3451 {
3452 uint32_t idxOther = pArgs->uMaster.M2.Data.aResults[idxResult].iSeqOther;
3453 if (idxOther & 1)
3454 {
3455 idxOther >>= 1;
3456 if (idxOther < RT_ELEMENTS(pArgs->uWorker.M2.Data.aResults))
3457 {
3458 if (pArgs->uWorker.M2.Data.aResults[idxOther].iSeqOther == pArgs->uMaster.M2.Data.aResults[idxResult].iSeqMine)
3459 {
3460 int64_t iDelta;
3461 iDelta = pArgs->uWorker.M2.Data.aResults[idxOther].uTsc
3462 - (pArgs->uMaster.M2.Data.aResults[idxResult].uTsc - iMasterTscDelta);
3463 if ( iDelta >= GIP_TSC_DELTA_INITIAL_MASTER_VALUE
3464 ? iDelta < iBestDelta
3465 : iDelta > iBestDelta || iBestDelta == INT64_MAX)
3466 iBestDelta = iDelta;
3467 cHits++;
3468 }
3469 }
3470 }
3471 }
3472
3473 /*
3474 * Save the results.
3475 */
3476 if (cHits > 2)
3477 pArgs->pWorker->i64TSCDelta = iBestDelta;
3478 pArgs->uMaster.M2.cHits += cHits;
3479}
3480
3481
3482/**
3483 * The core function of the 2nd TSC delta measurement algorithm.
3484 *
3485 * The idea here is that we have the two CPUs execute the exact same code
3486 * collecting a largish set of TSC samples. The code has one data dependency on
3487 * the other CPU which intention it is to synchronize the execution as well as
3488 * help cross references the two sets of TSC samples (the sequence numbers).
3489 *
3490 * The @a fLag parameter is used to modify the execution a tiny bit on one or
3491 * both of the CPUs. When @a fLag differs between the CPUs, it is thought that
3492 * it will help with making the CPUs enter lock step execution occasionally.
3493 *
3494 */
3495static void supdrvTscDeltaMethod2CollectData(PSUPDRVTSCDELTAMETHOD2 pMyData, uint32_t volatile *piOtherSeqNo, bool fLag)
3496{
3497 SUPDRVTSCDELTAMETHOD2ENTRY *pEntry = &pMyData->aResults[0];
3498 uint32_t cLeft = RT_ELEMENTS(pMyData->aResults);
3499
3500 ASMAtomicWriteU32(&pMyData->iCurSeqNo, 0);
3501 ASMSerializeInstruction();
3502 while (cLeft-- > 0)
3503 {
3504 uint64_t uTsc;
3505 uint32_t iSeqMine = ASMAtomicIncU32(&pMyData->iCurSeqNo);
3506 uint32_t iSeqOther = ASMAtomicReadU32(piOtherSeqNo);
3507 ASMCompilerBarrier();
3508 ASMSerializeInstruction(); /* Way better result than with ASMMemoryFenceSSE2() in this position! */
3509 uTsc = ASMReadTSC();
3510 ASMAtomicIncU32(&pMyData->iCurSeqNo);
3511 ASMCompilerBarrier();
3512 ASMSerializeInstruction();
3513 pEntry->iSeqMine = iSeqMine;
3514 pEntry->iSeqOther = iSeqOther;
3515 pEntry->uTsc = uTsc;
3516 pEntry++;
3517 ASMSerializeInstruction();
3518 if (fLag)
3519 ASMNopPause();
3520 }
3521}
3522
3523
3524/**
3525 * TSC delta measurement algorithm \#2 (GIP_TSC_DELTA_METHOD_2).
3526 *
3527 * See supdrvTscDeltaMethod2CollectData for algorithm details.
3528 *
3529 * @param pArgs The argument/state data.
3530 * @param pMySync My synchronization structure.
3531 * @param pOtherSync My partner's synchronization structure.
3532 * @param fIsMaster Set if master, clear if worker.
3533 * @param iTry The attempt number.
3534 */
3535static void supdrvTscDeltaMethod2Loop(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
3536 bool fIsMaster, uint32_t iTry)
3537{
3538 unsigned iLoop;
3539 RT_NOREF1(iTry);
3540
3541 for (iLoop = 0; iLoop < GIP_TSC_DELTA_M2_LOOPS; iLoop++)
3542 {
3543 RTCCUINTREG fEFlags;
3544 if (fIsMaster)
3545 {
3546 /*
3547 * Adjust the loop lag fudge.
3548 */
3549# if GIP_TSC_DELTA_M2_PRIMER_LOOPS > 0
3550 if (iLoop < GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3551 {
3552 /* Lag during the priming to be nice to everyone.. */
3553 pArgs->uMaster.M2.fLag = true;
3554 pArgs->uWorker.M2.fLag = true;
3555 }
3556 else
3557# endif
3558 if (iLoop < (GIP_TSC_DELTA_M2_LOOPS - GIP_TSC_DELTA_M2_PRIMER_LOOPS) / 4)
3559 {
3560 /* 25 % of the body without lagging. */
3561 pArgs->uMaster.M2.fLag = false;
3562 pArgs->uWorker.M2.fLag = false;
3563 }
3564 else if (iLoop < (GIP_TSC_DELTA_M2_LOOPS - GIP_TSC_DELTA_M2_PRIMER_LOOPS) / 4 * 2)
3565 {
3566 /* 25 % of the body with both lagging. */
3567 pArgs->uMaster.M2.fLag = true;
3568 pArgs->uWorker.M2.fLag = true;
3569 }
3570 else
3571 {
3572 /* 50% of the body with alternating lag. */
3573 pArgs->uMaster.M2.fLag = (iLoop & 1) == 0;
3574 pArgs->uWorker.M2.fLag= (iLoop & 1) == 1;
3575 }
3576
3577 /*
3578 * Sync up with the worker and collect data.
3579 */
3580 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3581 supdrvTscDeltaMethod2CollectData(&pArgs->uMaster.M2.Data, &pArgs->uWorker.M2.Data.iCurSeqNo, pArgs->uMaster.M2.fLag);
3582 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3583
3584 /*
3585 * Process the data.
3586 */
3587# if GIP_TSC_DELTA_M2_PRIMER_LOOPS > 0
3588 if (iLoop >= GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3589# endif
3590 supdrvTscDeltaMethod2ProcessDataOnMaster(pArgs);
3591
3592 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3593 }
3594 else
3595 {
3596 /*
3597 * The worker.
3598 */
3599 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3600 supdrvTscDeltaMethod2CollectData(&pArgs->uWorker.M2.Data, &pArgs->uMaster.M2.Data.iCurSeqNo, pArgs->uWorker.M2.fLag);
3601 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3602 }
3603 }
3604}
3605
3606#endif /* GIP_TSC_DELTA_METHOD_2 */
3607
3608
3609
3610static int supdrvTscDeltaVerify(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync,
3611 PSUPTSCDELTASYNC2 pOtherSync, bool fIsMaster, int64_t iWorkerTscDelta)
3612{
3613 /*PSUPGIPCPU pGipCpuWorker = pArgs->pWorker; - unused */
3614 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3615 uint32_t i;
3616 TSCDELTA_DBG_VARS();
3617
3618 for (;;)
3619 {
3620 RTCCUINTREG fEFlags;
3621 AssertCompile((RT_ELEMENTS(pArgs->uMaster.Verify.auTscs) & 1) == 0);
3622 AssertCompile(RT_ELEMENTS(pArgs->uMaster.Verify.auTscs) == RT_ELEMENTS(pArgs->uWorker.Verify.auTscs));
3623
3624 if (fIsMaster)
3625 {
3626 uint64_t uTscWorker;
3627 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3628
3629 /*
3630 * Collect TSC, master goes first.
3631 */
3632 for (i = 0; i < RT_ELEMENTS(pArgs->uMaster.Verify.auTscs); i += 2)
3633 {
3634 /* Read, kick & wait #1. */
3635 uint64_t uTsc = ASMReadTSC();
3636 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO_GO);
3637 ASMSerializeInstruction();
3638 pArgs->uMaster.Verify.auTscs[i] = uTsc;
3639 TSCDELTA_DBG_START_LOOP();
3640 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO)
3641 {
3642 TSCDELTA_DBG_CHECK_LOOP();
3643 ASMNopPause();
3644 }
3645
3646 /* Read, kick & wait #2. */
3647 uTsc = ASMReadTSC();
3648 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO);
3649 ASMSerializeInstruction();
3650 pArgs->uMaster.Verify.auTscs[i + 1] = uTsc;
3651 TSCDELTA_DBG_START_LOOP();
3652 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO_GO)
3653 {
3654 TSCDELTA_DBG_CHECK_LOOP();
3655 ASMNopPause();
3656 }
3657 }
3658
3659 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3660
3661 /*
3662 * Process the data.
3663 */
3664#ifdef TSCDELTA_VERIFY_WITH_STATS
3665 pArgs->cMaxVerifyTscTicks = INT64_MIN;
3666 pArgs->cMinVerifyTscTicks = INT64_MAX;
3667 pArgs->iVerifyBadTscDiff = 0;
3668#endif
3669 ASMAtomicWriteS32(&pArgs->rcVerify, VINF_SUCCESS);
3670 uTscWorker = 0;
3671 for (i = 0; i < RT_ELEMENTS(pArgs->uMaster.Verify.auTscs); i++)
3672 {
3673 /* Master vs previous worker entry. */
3674 uint64_t uTscMaster = pArgs->uMaster.Verify.auTscs[i] - pGipCpuMaster->i64TSCDelta;
3675 int64_t iDiff;
3676 if (i > 0)
3677 {
3678 iDiff = uTscMaster - uTscWorker;
3679#ifdef TSCDELTA_VERIFY_WITH_STATS
3680 if (iDiff > pArgs->cMaxVerifyTscTicks)
3681 pArgs->cMaxVerifyTscTicks = iDiff;
3682 if (iDiff < pArgs->cMinVerifyTscTicks)
3683 pArgs->cMinVerifyTscTicks = iDiff;
3684#endif
3685 if (iDiff < 0)
3686 {
3687#ifdef TSCDELTA_VERIFY_WITH_STATS
3688 pArgs->iVerifyBadTscDiff = -iDiff;
3689#endif
3690 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_OUT_OF_RANGE);
3691 break;
3692 }
3693 }
3694
3695 /* Worker vs master. */
3696 uTscWorker = pArgs->uWorker.Verify.auTscs[i] - iWorkerTscDelta;
3697 iDiff = uTscWorker - uTscMaster;
3698#ifdef TSCDELTA_VERIFY_WITH_STATS
3699 if (iDiff > pArgs->cMaxVerifyTscTicks)
3700 pArgs->cMaxVerifyTscTicks = iDiff;
3701 if (iDiff < pArgs->cMinVerifyTscTicks)
3702 pArgs->cMinVerifyTscTicks = iDiff;
3703#endif
3704 if (iDiff < 0)
3705 {
3706#ifdef TSCDELTA_VERIFY_WITH_STATS
3707 pArgs->iVerifyBadTscDiff = iDiff;
3708#endif
3709 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_OUT_OF_RANGE);
3710 break;
3711 }
3712 }
3713
3714 /* Done. */
3715 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3716 }
3717 else
3718 {
3719 /*
3720 * The worker, master leads.
3721 */
3722 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3723
3724 for (i = 0; i < RT_ELEMENTS(pArgs->uWorker.Verify.auTscs); i += 2)
3725 {
3726 uint64_t uTsc;
3727
3728 /* Wait, Read and Kick #1. */
3729 TSCDELTA_DBG_START_LOOP();
3730 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO)
3731 {
3732 TSCDELTA_DBG_CHECK_LOOP();
3733 ASMNopPause();
3734 }
3735 uTsc = ASMReadTSC();
3736 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO_GO);
3737 ASMSerializeInstruction();
3738 pArgs->uWorker.Verify.auTscs[i] = uTsc;
3739
3740 /* Wait, Read and Kick #2. */
3741 TSCDELTA_DBG_START_LOOP();
3742 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO_GO)
3743 {
3744 TSCDELTA_DBG_CHECK_LOOP();
3745 ASMNopPause();
3746 }
3747 uTsc = ASMReadTSC();
3748 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO);
3749 ASMSerializeInstruction();
3750 pArgs->uWorker.Verify.auTscs[i + 1] = uTsc;
3751 }
3752
3753 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3754 }
3755 return pArgs->rcVerify;
3756 }
3757
3758 /*
3759 * Timed out, please retry.
3760 */
3761 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_TRY_AGAIN);
3762 return VERR_TIMEOUT;
3763}
3764
3765
3766
3767/**
3768 * Handles the special abort procedure during synchronization setup in
3769 * supdrvTscMeasureDeltaCallbackUnwrapped().
3770 *
3771 * @returns 0 (dummy, ignored)
3772 * @param pArgs Pointer to argument/state data.
3773 * @param pMySync Pointer to my sync structure.
3774 * @param fIsMaster Set if we're the master, clear if worker.
3775 * @param fTimeout Set if it's a timeout.
3776 */
3777DECL_NO_INLINE(static, int)
3778supdrvTscMeasureDeltaCallbackAbortSyncSetup(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, bool fIsMaster, bool fTimeout)
3779{
3780 PSUPTSCDELTASYNC2 volatile *ppMySync = fIsMaster ? &pArgs->pSyncMaster : &pArgs->pSyncWorker;
3781 PSUPTSCDELTASYNC2 volatile *ppOtherSync = fIsMaster ? &pArgs->pSyncWorker : &pArgs->pSyncMaster;
3782 TSCDELTA_DBG_VARS();
3783 RT_NOREF1(pMySync);
3784
3785 /*
3786 * Clear our sync pointer and make sure the abort flag is set.
3787 */
3788 ASMAtomicWriteNullPtr(ppMySync);
3789 ASMAtomicWriteBool(&pArgs->fAbortSetup, true);
3790 if (fTimeout)
3791 ASMAtomicWriteBool(&pArgs->fTimedOut, true);
3792
3793 /*
3794 * Make sure the other party is out of there and won't be touching our
3795 * sync state again (would cause stack corruption).
3796 */
3797 TSCDELTA_DBG_START_LOOP();
3798 while (ASMAtomicReadPtrT(ppOtherSync, PSUPTSCDELTASYNC2) != NULL)
3799 {
3800 ASMNopPause();
3801 ASMNopPause();
3802 ASMNopPause();
3803 TSCDELTA_DBG_CHECK_LOOP();
3804 }
3805
3806 return 0;
3807}
3808
3809
3810/**
3811 * This is used by supdrvTscMeasureInitialDeltas() to read the TSC on two CPUs
3812 * and compute the delta between them.
3813 *
3814 * To reduce code size a good when timeout handling was added, a dummy return
3815 * value had to be added (saves 1-3 lines per timeout case), thus this
3816 * 'Unwrapped' function and the dummy 0 return value.
3817 *
3818 * @returns 0 (dummy, ignored)
3819 * @param idCpu The CPU we are current scheduled on.
3820 * @param pArgs Pointer to a parameter package.
3821 *
3822 * @remarks Measuring TSC deltas between the CPUs is tricky because we need to
3823 * read the TSC at exactly the same time on both the master and the
3824 * worker CPUs. Due to DMA, bus arbitration, cache locality,
3825 * contention, SMI, pipelining etc. there is no guaranteed way of
3826 * doing this on x86 CPUs.
3827 */
3828static int supdrvTscMeasureDeltaCallbackUnwrapped(RTCPUID idCpu, PSUPDRVGIPTSCDELTARGS pArgs)
3829{
3830 PSUPDRVDEVEXT pDevExt = pArgs->pDevExt;
3831 PSUPGIPCPU pGipCpuWorker = pArgs->pWorker;
3832 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3833 bool const fIsMaster = idCpu == pGipCpuMaster->idCpu;
3834 uint32_t iTry;
3835 PSUPTSCDELTASYNC2 volatile *ppMySync = fIsMaster ? &pArgs->pSyncMaster : &pArgs->pSyncWorker;
3836 PSUPTSCDELTASYNC2 volatile *ppOtherSync = fIsMaster ? &pArgs->pSyncWorker : &pArgs->pSyncMaster;
3837 SUPTSCDELTASYNC2 MySync;
3838 PSUPTSCDELTASYNC2 pOtherSync;
3839 int rc;
3840 TSCDELTA_DBG_VARS();
3841
3842 /* A bit of paranoia first. */
3843 if (!pGipCpuMaster || !pGipCpuWorker)
3844 return 0;
3845
3846 /*
3847 * If the CPU isn't part of the measurement, return immediately.
3848 */
3849 if ( !fIsMaster
3850 && idCpu != pGipCpuWorker->idCpu)
3851 return 0;
3852
3853 /*
3854 * Set up my synchronization stuff and wait for the other party to show up.
3855 *
3856 * We don't wait forever since the other party may be off fishing (offline,
3857 * spinning with ints disables, whatever), we must play nice to the rest of
3858 * the system as this context generally isn't one in which we will get
3859 * preempted and we may hold up a number of lower priority interrupts.
3860 */
3861 ASMAtomicWriteU32(&MySync.uSyncVar, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT);
3862 ASMAtomicWritePtr(ppMySync, &MySync);
3863 MySync.uTscStart = ASMReadTSC();
3864 MySync.cMaxTscTicks = pArgs->cMaxTscTicks;
3865
3866 /* Look for the partner, might not be here yet... Special abort considerations. */
3867 iTry = 0;
3868 TSCDELTA_DBG_START_LOOP();
3869 while ((pOtherSync = ASMAtomicReadPtrT(ppOtherSync, PSUPTSCDELTASYNC2)) == NULL)
3870 {
3871 ASMNopPause();
3872 if ( ASMAtomicReadBool(&pArgs->fAbortSetup)
3873 || !RTMpIsCpuOnline(fIsMaster ? pGipCpuWorker->idCpu : pGipCpuMaster->idCpu) )
3874 return supdrvTscMeasureDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3875 if ( (iTry++ & 0xff) == 0
3876 && ASMReadTSC() - MySync.uTscStart > pArgs->cMaxTscTicks)
3877 return supdrvTscMeasureDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, true /*fTimeout*/);
3878 TSCDELTA_DBG_CHECK_LOOP();
3879 ASMNopPause();
3880 }
3881
3882 /* I found my partner, waiting to be found... Special abort considerations. */
3883 if (fIsMaster)
3884 if (!ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)) /* parnaoia */
3885 return supdrvTscMeasureDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3886
3887 iTry = 0;
3888 TSCDELTA_DBG_START_LOOP();
3889 while (ASMAtomicReadU32(&MySync.uSyncVar) == GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)
3890 {
3891 ASMNopPause();
3892 if (ASMAtomicReadBool(&pArgs->fAbortSetup))
3893 return supdrvTscMeasureDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3894 if ( (iTry++ & 0xff) == 0
3895 && ASMReadTSC() - MySync.uTscStart > pArgs->cMaxTscTicks)
3896 {
3897 if ( fIsMaster
3898 && !ASMAtomicCmpXchgU32(&MySync.uSyncVar, GIP_TSC_DELTA_SYNC2_PRESTART_ABORT, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT))
3899 break; /* race #1: slave has moved on, handle timeout in loop instead. */
3900 return supdrvTscMeasureDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, true /*fTimeout*/);
3901 }
3902 TSCDELTA_DBG_CHECK_LOOP();
3903 }
3904
3905 if (!fIsMaster)
3906 if (!ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)) /* race #1 */
3907 return supdrvTscMeasureDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3908
3909/** @todo Add a resumable state to pArgs so we don't waste time if we time
3910 * out or something. Timeouts are legit, any of the two CPUs may get
3911 * interrupted. */
3912
3913 /*
3914 * Start by seeing if we have a zero delta between the two CPUs.
3915 * This should normally be the case.
3916 */
3917 rc = supdrvTscDeltaVerify(pArgs, &MySync, pOtherSync, fIsMaster, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
3918 if (RT_SUCCESS(rc))
3919 {
3920 if (fIsMaster)
3921 {
3922 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
3923 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
3924 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
3925 }
3926 }
3927 /*
3928 * If the verification didn't time out, do regular delta measurements.
3929 * We retry this until we get a reasonable value.
3930 */
3931 else if (rc != VERR_TIMEOUT)
3932 {
3933 Assert(pGipCpuWorker->i64TSCDelta == INT64_MAX);
3934 for (iTry = 0; iTry < 12; iTry++)
3935 {
3936 /*
3937 * Check the state before we start.
3938 */
3939 uint32_t u32Tmp = ASMAtomicReadU32(&MySync.uSyncVar);
3940 if ( u32Tmp != GIP_TSC_DELTA_SYNC2_READY
3941 && (fIsMaster || u32Tmp != GIP_TSC_DELTA_SYNC2_STEADY) /* worker may be late prepping for the next round */ )
3942 {
3943 TSCDELTA_DBG_SYNC_MSG(("sync/loop/%s: #0 iTry=%u MyState=%#x\n", fIsMaster ? "master" : "worker", iTry, u32Tmp));
3944 break;
3945 }
3946
3947 /*
3948 * Do the measurements.
3949 */
3950#ifdef GIP_TSC_DELTA_METHOD_1
3951 supdrvTscDeltaMethod1Loop(pArgs, &MySync, pOtherSync, fIsMaster, iTry);
3952#elif defined(GIP_TSC_DELTA_METHOD_2)
3953 supdrvTscDeltaMethod2Loop(pArgs, &MySync, pOtherSync, fIsMaster, iTry);
3954#else
3955# error "huh??"
3956#endif
3957
3958 /*
3959 * Check the state.
3960 */
3961 u32Tmp = ASMAtomicReadU32(&MySync.uSyncVar);
3962 if ( u32Tmp != GIP_TSC_DELTA_SYNC2_READY
3963 && (fIsMaster || u32Tmp != GIP_TSC_DELTA_SYNC2_STEADY) /* worker may be late prepping for the next round */ )
3964 {
3965 if (fIsMaster)
3966 TSCDELTA_DBG_SYNC_MSG(("sync/loop/master: #1 iTry=%u MyState=%#x\n", iTry, u32Tmp));
3967 else
3968 TSCDELTA_DBG_SYNC_MSG2(("sync/loop/worker: #1 iTry=%u MyState=%#x\n", iTry, u32Tmp));
3969 break;
3970 }
3971
3972 /*
3973 * Success? If so, stop trying. Master decides.
3974 */
3975 if (fIsMaster)
3976 {
3977 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
3978 {
3979 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
3980 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
3981 TSCDELTA_DBG_SYNC_MSG2(("sync/loop/master: #9 iTry=%u MyState=%#x\n", iTry, MySync.uSyncVar));
3982 break;
3983 }
3984 }
3985 }
3986 if (fIsMaster)
3987 pArgs->iTry = iTry;
3988 }
3989
3990 /*
3991 * End the synchronization dance. We tell the other that we're done,
3992 * then wait for the same kind of reply.
3993 */
3994 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_FINAL);
3995 ASMAtomicWriteNullPtr(ppMySync);
3996 iTry = 0;
3997 TSCDELTA_DBG_START_LOOP();
3998 while (ASMAtomicReadU32(&MySync.uSyncVar) != GIP_TSC_DELTA_SYNC2_FINAL)
3999 {
4000 iTry++;
4001 if ( iTry == 0
4002 && !RTMpIsCpuOnline(fIsMaster ? pGipCpuWorker->idCpu : pGipCpuMaster->idCpu))
4003 break; /* this really shouldn't happen. */
4004 TSCDELTA_DBG_CHECK_LOOP();
4005 ASMNopPause();
4006 }
4007
4008 /*
4009 * Collect some runtime stats.
4010 */
4011 if (fIsMaster)
4012 pArgs->cElapsedMasterTscTicks = ASMReadTSC() - MySync.uTscStart;
4013 else
4014 pArgs->cElapsedWorkerTscTicks = ASMReadTSC() - MySync.uTscStart;
4015 return 0;
4016}
4017
4018/**
4019 * Callback used by supdrvTscMeasureInitialDeltas() to read the TSC on two CPUs
4020 * and compute the delta between them.
4021 *
4022 * @param idCpu The CPU we are current scheduled on.
4023 * @param pvUser1 Pointer to a parameter package (SUPDRVGIPTSCDELTARGS).
4024 * @param pvUser2 Unused.
4025 */
4026static DECLCALLBACK(void) supdrvTscMeasureDeltaCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
4027{
4028 supdrvTscMeasureDeltaCallbackUnwrapped(idCpu, (PSUPDRVGIPTSCDELTARGS)pvUser1);
4029 RT_NOREF1(pvUser2);
4030}
4031
4032
4033/**
4034 * Measures the TSC delta between the master GIP CPU and one specified worker
4035 * CPU.
4036 *
4037 * @returns VBox status code.
4038 * @retval VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED on pure measurement
4039 * failure.
4040 * @param pDevExt Pointer to the device instance data.
4041 * @param idxWorker The index of the worker CPU from the GIP's array of
4042 * CPUs.
4043 *
4044 * @remarks This must be called with preemption enabled!
4045 */
4046static int supdrvTscMeasureDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker)
4047{
4048 int rc;
4049 int rc2;
4050 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4051 RTCPUID idMaster = pDevExt->idGipMaster;
4052 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[idxWorker];
4053 PSUPGIPCPU pGipCpuMaster;
4054 uint32_t iGipCpuMaster;
4055#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4056 uint32_t u32Tmp;
4057#endif
4058
4059 /* Validate input a bit. */
4060 AssertReturn(pGip, VERR_INVALID_PARAMETER);
4061 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
4062 Assert(RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4063
4064 /*
4065 * Don't attempt measuring the delta for the GIP master.
4066 */
4067 if (pGipCpuWorker->idCpu == idMaster)
4068 {
4069 if (pGipCpuWorker->i64TSCDelta == INT64_MAX) /* This shouldn't happen, but just in case. */
4070 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
4071 return VINF_SUCCESS;
4072 }
4073
4074 /*
4075 * One measurement at a time, at least for now. We might be using
4076 * broadcast IPIs so, so be nice to the rest of the system.
4077 */
4078#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4079 rc = RTSemMutexRequest(pDevExt->mtxTscDelta, RT_INDEFINITE_WAIT);
4080#else
4081 rc = RTSemFastMutexRequest(pDevExt->mtxTscDelta);
4082#endif
4083 if (RT_FAILURE(rc))
4084 return rc;
4085
4086 /*
4087 * If the CPU has hyper-threading and the APIC IDs of the master and worker are adjacent,
4088 * try pick a different master. (This fudge only works with multi core systems.)
4089 * ASSUMES related threads have adjacent APIC IDs. ASSUMES two threads per core.
4090 *
4091 * We skip this on AMDs for now as their HTT is different from Intel's and
4092 * it doesn't seem to have any favorable effect on the results.
4093 *
4094 * If the master is offline, we need a new master too, so share the code.
4095 */
4096 iGipCpuMaster = supdrvGipFindCpuIndexForCpuId(pGip, idMaster);
4097 AssertReturn(iGipCpuMaster < pGip->cCpus, VERR_INVALID_CPU_ID);
4098 pGipCpuMaster = &pGip->aCPUs[iGipCpuMaster];
4099#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4100 if ( ( (pGipCpuMaster->idApic & ~1) == (pGipCpuWorker->idApic & ~1)
4101 && pGip->cOnlineCpus > 2
4102 && ASMHasCpuId()
4103 && RTX86IsValidStdRange(ASMCpuId_EAX(0))
4104 && (ASMCpuId_EDX(1) & X86_CPUID_FEATURE_EDX_HTT)
4105 && ( !ASMIsAmdCpu()
4106 || RTX86GetCpuFamily(u32Tmp = ASMCpuId_EAX(1)) > 0x15
4107 || ( RTX86GetCpuFamily(u32Tmp) == 0x15 /* Piledriver+, not bulldozer (FX-4150 didn't like it). */
4108 && RTX86GetCpuModelAMD(u32Tmp) >= 0x02) ) )
4109 || !RTMpIsCpuOnline(idMaster) )
4110 {
4111 uint32_t i;
4112 for (i = 0; i < pGip->cCpus; i++)
4113 if ( i != iGipCpuMaster
4114 && i != idxWorker
4115 && pGip->aCPUs[i].enmState == SUPGIPCPUSTATE_ONLINE
4116 && pGip->aCPUs[i].i64TSCDelta != INT64_MAX
4117 && pGip->aCPUs[i].idCpu != NIL_RTCPUID
4118 && pGip->aCPUs[i].idCpu != idMaster /* paranoia starts here... */
4119 && pGip->aCPUs[i].idCpu != pGipCpuWorker->idCpu
4120 && pGip->aCPUs[i].idApic != pGipCpuWorker->idApic
4121 && pGip->aCPUs[i].idApic != pGipCpuMaster->idApic
4122 && RTMpIsCpuOnline(pGip->aCPUs[i].idCpu))
4123 {
4124 iGipCpuMaster = i;
4125 pGipCpuMaster = &pGip->aCPUs[i];
4126 idMaster = pGipCpuMaster->idCpu;
4127 break;
4128 }
4129 }
4130#endif /* defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86) */
4131
4132 if (RTCpuSetIsMemberByIndex(&pGip->OnlineCpuSet, pGipCpuWorker->iCpuSet))
4133 {
4134 /*
4135 * Initialize data package for the RTMpOnPair callback.
4136 */
4137 PSUPDRVGIPTSCDELTARGS pArgs = (PSUPDRVGIPTSCDELTARGS)RTMemAllocZ(sizeof(*pArgs));
4138 if (pArgs)
4139 {
4140 pArgs->pWorker = pGipCpuWorker;
4141 pArgs->pMaster = pGipCpuMaster;
4142 pArgs->pDevExt = pDevExt;
4143 pArgs->pSyncMaster = NULL;
4144 pArgs->pSyncWorker = NULL;
4145 pArgs->cMaxTscTicks = ASMAtomicReadU64(&pGip->u64CpuHz) / 512; /* 1953 us */
4146
4147 /*
4148 * Do the RTMpOnPair call. We reset i64TSCDelta first so we
4149 * and supdrvTscMeasureDeltaCallback can use it as a success check.
4150 */
4151 /** @todo Store the i64TSCDelta result in pArgs first? Perhaps deals with
4152 * that when doing the restart loop reorg. */
4153 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
4154 rc = RTMpOnPair(pGipCpuMaster->idCpu, pGipCpuWorker->idCpu, RTMPON_F_CONCURRENT_EXEC,
4155 supdrvTscMeasureDeltaCallback, pArgs, NULL);
4156 if (RT_SUCCESS(rc))
4157 {
4158#if 0
4159 SUPR0Printf("mponpair ticks: %9llu %9llu max: %9llu iTry: %u%s\n", pArgs->cElapsedMasterTscTicks,
4160 pArgs->cElapsedWorkerTscTicks, pArgs->cMaxTscTicks, pArgs->iTry,
4161 pArgs->fTimedOut ? " timed out" :"");
4162#endif
4163#if 0
4164 SUPR0Printf("rcVerify=%d iVerifyBadTscDiff=%lld cMinVerifyTscTicks=%lld cMaxVerifyTscTicks=%lld\n",
4165 pArgs->rcVerify, pArgs->iVerifyBadTscDiff, pArgs->cMinVerifyTscTicks, pArgs->cMaxVerifyTscTicks);
4166#endif
4167 if (RT_LIKELY(pGipCpuWorker->i64TSCDelta != INT64_MAX))
4168 {
4169 /*
4170 * Work the TSC delta applicability rating. It starts
4171 * optimistic in supdrvGipInit, we downgrade it here.
4172 */
4173 SUPGIPUSETSCDELTA enmRating;
4174 if ( pGipCpuWorker->i64TSCDelta > GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO
4175 || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO)
4176 enmRating = SUPGIPUSETSCDELTA_NOT_ZERO;
4177 else if ( pGipCpuWorker->i64TSCDelta > GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO
4178 || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO)
4179 enmRating = SUPGIPUSETSCDELTA_ROUGHLY_ZERO;
4180 else
4181 enmRating = SUPGIPUSETSCDELTA_PRACTICALLY_ZERO;
4182 if (pGip->enmUseTscDelta < enmRating)
4183 {
4184 AssertCompile(sizeof(pGip->enmUseTscDelta) == sizeof(uint32_t));
4185 ASMAtomicWriteU32((uint32_t volatile *)&pGip->enmUseTscDelta, enmRating);
4186 }
4187 }
4188 else
4189 rc = VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
4190 }
4191 /** @todo return try-again if we get an offline CPU error. */
4192
4193 RTMemFree(pArgs);
4194 }
4195 else
4196 rc = VERR_NO_MEMORY;
4197 }
4198 else
4199 rc = VERR_CPU_OFFLINE;
4200
4201 /*
4202 * We're done now.
4203 */
4204#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4205 rc2 = RTSemMutexRelease(pDevExt->mtxTscDelta); AssertRC(rc2);
4206#else
4207 rc2 = RTSemFastMutexRelease(pDevExt->mtxTscDelta); AssertRC(rc2);
4208#endif
4209 return rc;
4210}
4211
4212
4213/**
4214 * Resets the TSC-delta related TSC samples and optionally the deltas
4215 * themselves.
4216 *
4217 * @param pDevExt Pointer to the device instance data.
4218 * @param fResetTscDeltas Whether the TSC-deltas are also to be reset.
4219 *
4220 * @remarks This might be called while holding a spinlock!
4221 */
4222static void supdrvTscResetSamples(PSUPDRVDEVEXT pDevExt, bool fResetTscDeltas)
4223{
4224 unsigned iCpu;
4225 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4226 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
4227 {
4228 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
4229 ASMAtomicWriteU64(&pGipCpu->u64TSCSample, GIP_TSC_DELTA_RSVD);
4230 if (fResetTscDeltas)
4231 {
4232 RTCpuSetDelByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpu->iCpuSet);
4233 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, INT64_MAX);
4234 }
4235 }
4236}
4237
4238
4239/**
4240 * Picks an online CPU as the master TSC for TSC-delta computations.
4241 *
4242 * @returns VBox status code.
4243 * @param pDevExt Pointer to the device instance data.
4244 * @param pidxMaster Where to store the CPU array index of the chosen
4245 * master. Optional, can be NULL.
4246 */
4247static int supdrvTscPickMaster(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster)
4248{
4249 /*
4250 * Pick the first CPU online as the master TSC and make it the new GIP master based
4251 * on the APIC ID.
4252 *
4253 * Technically we can simply use "idGipMaster" but doing this gives us master as CPU 0
4254 * in most cases making it nicer/easier for comparisons. It is safe to update the GIP
4255 * master as this point since the sync/async timer isn't created yet.
4256 */
4257 unsigned iCpu;
4258 uint32_t idxMaster = UINT32_MAX;
4259 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4260 for (iCpu = 0; iCpu < RT_ELEMENTS(pGip->aiCpuFromApicId); iCpu++)
4261 {
4262 uint16_t idxCpu = pGip->aiCpuFromApicId[iCpu];
4263 if (idxCpu != UINT16_MAX)
4264 {
4265 PSUPGIPCPU pGipCpu = &pGip->aCPUs[idxCpu];
4266 if (RTCpuSetIsMemberByIndex(&pGip->OnlineCpuSet, pGipCpu->iCpuSet))
4267 {
4268 idxMaster = idxCpu;
4269 pGipCpu->i64TSCDelta = GIP_TSC_DELTA_INITIAL_MASTER_VALUE;
4270 ASMAtomicWriteSize(&pDevExt->idGipMaster, pGipCpu->idCpu);
4271 if (pidxMaster)
4272 *pidxMaster = idxMaster;
4273 return VINF_SUCCESS;
4274 }
4275 }
4276 }
4277 return VERR_CPU_OFFLINE;
4278}
4279
4280
4281/**
4282 * Performs the initial measurements of the TSC deltas between CPUs.
4283 *
4284 * This is called by supdrvGipCreate(), supdrvGipPowerNotificationCallback() or
4285 * triggered by it if threaded.
4286 *
4287 * @returns VBox status code.
4288 * @param pDevExt Pointer to the device instance data.
4289 *
4290 * @remarks Must be called only after supdrvGipInitOnCpu() as this function uses
4291 * idCpu, GIP's online CPU set which are populated in
4292 * supdrvGipInitOnCpu().
4293 */
4294static int supdrvTscMeasureInitialDeltas(PSUPDRVDEVEXT pDevExt)
4295{
4296 PSUPGIPCPU pGipCpuMaster;
4297 unsigned iCpu;
4298 unsigned iOddEven;
4299 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4300 uint32_t idxMaster = UINT32_MAX;
4301 uint32_t cMpOnOffEvents = ASMAtomicReadU32(&pDevExt->cMpOnOffEvents);
4302
4303 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
4304 supdrvTscResetSamples(pDevExt, true /* fClearDeltas */);
4305 int rc = supdrvTscPickMaster(pDevExt, &idxMaster);
4306 if (RT_FAILURE(rc))
4307 {
4308 SUPR0Printf("Failed to pick a CPU master for TSC-delta measurements rc=%Rrc\n", rc);
4309 return rc;
4310 }
4311 AssertReturn(idxMaster < pGip->cCpus, VERR_INVALID_CPU_INDEX);
4312 pGipCpuMaster = &pGip->aCPUs[idxMaster];
4313 Assert(pDevExt->idGipMaster == pGipCpuMaster->idCpu);
4314
4315 /*
4316 * If there is only a single CPU online we have nothing to do.
4317 */
4318 if (pGip->cOnlineCpus <= 1)
4319 {
4320 AssertReturn(pGip->cOnlineCpus > 0, VERR_INTERNAL_ERROR_5);
4321 return VINF_SUCCESS;
4322 }
4323
4324 /*
4325 * Loop thru the GIP CPU array and get deltas for each CPU (except the
4326 * master). We do the CPUs with the even numbered APIC IDs first so that
4327 * we've got alternative master CPUs to pick from on hyper-threaded systems.
4328 */
4329 for (iOddEven = 0; iOddEven < 2; iOddEven++)
4330 {
4331 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
4332 {
4333 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
4334 if ( iCpu != idxMaster
4335 && (iOddEven > 0 || (pGipCpuWorker->idApic & 1) == 0)
4336 && RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
4337 {
4338 rc = supdrvTscMeasureDeltaOne(pDevExt, iCpu);
4339 if (RT_FAILURE(rc))
4340 {
4341 SUPR0Printf("supdrvTscMeasureDeltaOne failed. rc=%d CPU[%u].idCpu=%u Master[%u].idCpu=%u\n", rc, iCpu,
4342 pGipCpuWorker->idCpu, idxMaster, pDevExt->idGipMaster, pGipCpuMaster->idCpu);
4343 break;
4344 }
4345
4346 if (ASMAtomicReadU32(&pDevExt->cMpOnOffEvents) != cMpOnOffEvents)
4347 {
4348 SUPR0Printf("One or more CPUs transitioned between online & offline states. I'm confused, retry...\n");
4349 rc = VERR_TRY_AGAIN;
4350 break;
4351 }
4352 }
4353 }
4354 }
4355
4356 return rc;
4357}
4358
4359
4360#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4361
4362/**
4363 * Switches the TSC-delta measurement thread into the butchered state.
4364 *
4365 * @returns VBox status code.
4366 * @param pDevExt Pointer to the device instance data.
4367 * @param fSpinlockHeld Whether the TSC-delta spinlock is held or not.
4368 * @param pszFailed An error message to log.
4369 * @param rcFailed The error code to exit the thread with.
4370 */
4371static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed)
4372{
4373 if (!fSpinlockHeld)
4374 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4375
4376 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Butchered;
4377 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4378 OSDBGPRINT(("supdrvTscDeltaThreadButchered: %s. rc=%Rrc\n", pszFailed, rcFailed));
4379 return rcFailed;
4380}
4381
4382
4383/**
4384 * The TSC-delta measurement thread.
4385 *
4386 * @returns VBox status code.
4387 * @param hThread The thread handle.
4388 * @param pvUser Opaque pointer to the device instance data.
4389 */
4390static DECLCALLBACK(int) supdrvTscDeltaThread(RTTHREAD hThread, void *pvUser)
4391{
4392 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
4393 int rc = VERR_INTERNAL_ERROR_2;
4394 for (;;)
4395 {
4396 /*
4397 * Switch on the current state.
4398 */
4399 SUPDRVTSCDELTATHREADSTATE enmState;
4400 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4401 enmState = pDevExt->enmTscDeltaThreadState;
4402 switch (enmState)
4403 {
4404 case kTscDeltaThreadState_Creating:
4405 {
4406 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
4407 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
4408 if (RT_FAILURE(rc))
4409 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
4410 RT_FALL_THRU();
4411 }
4412
4413 case kTscDeltaThreadState_Listening:
4414 {
4415 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4416
4417 /*
4418 * Linux counts uninterruptible sleeps as load, hence we shall do a
4419 * regular, interruptible sleep here and ignore wake ups due to signals.
4420 * See task_contributes_to_load() in include/linux/sched.h in the Linux sources.
4421 */
4422 rc = RTThreadUserWaitNoResume(hThread, pDevExt->cMsTscDeltaTimeout);
4423 if ( RT_FAILURE(rc)
4424 && rc != VERR_TIMEOUT
4425 && rc != VERR_INTERRUPTED)
4426 return supdrvTscDeltaThreadButchered(pDevExt, false /* fSpinlockHeld */, "RTThreadUserWait", rc);
4427 RTThreadUserReset(hThread);
4428 break;
4429 }
4430
4431 case kTscDeltaThreadState_WaitAndMeasure:
4432 {
4433 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Measuring;
4434 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent); /* (Safe on windows as long as spinlock isn't IRQ safe.) */
4435 if (RT_FAILURE(rc))
4436 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
4437 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4438 RTThreadSleep(1);
4439 RT_FALL_THRU();
4440 }
4441
4442 case kTscDeltaThreadState_Measuring:
4443 {
4444 if (pDevExt->fTscThreadRecomputeAllDeltas)
4445 {
4446 int cTries = 8;
4447 int cMsWaitPerTry = 10;
4448 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4449 Assert(pGip);
4450 do
4451 {
4452 RTCpuSetCopy(&pDevExt->TscDeltaCpuSet, &pGip->OnlineCpuSet);
4453 rc = supdrvTscMeasureInitialDeltas(pDevExt);
4454 if ( RT_SUCCESS(rc)
4455 || ( RT_FAILURE(rc)
4456 && rc != VERR_TRY_AGAIN
4457 && rc != VERR_CPU_OFFLINE))
4458 {
4459 break;
4460 }
4461 RTThreadSleep(cMsWaitPerTry);
4462 } while (cTries-- > 0);
4463 pDevExt->fTscThreadRecomputeAllDeltas = false;
4464 }
4465 else
4466 {
4467 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4468 unsigned iCpu;
4469
4470 /* Measure TSC-deltas only for the CPUs that are in the set. */
4471 rc = VINF_SUCCESS;
4472 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
4473 {
4474 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
4475 if (RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
4476 {
4477 if (pGipCpuWorker->i64TSCDelta == INT64_MAX)
4478 {
4479 int rc2 = supdrvTscMeasureDeltaOne(pDevExt, iCpu);
4480 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
4481 rc = rc2;
4482 }
4483 else
4484 {
4485 /*
4486 * The thread/someone must've called SUPR0TscDeltaMeasureBySetIndex(),
4487 * mark the delta as fine to get the timer thread off our back.
4488 */
4489 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
4490 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
4491 }
4492 }
4493 }
4494 }
4495 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4496 if (pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4497 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
4498 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4499 Assert(rc != VERR_NOT_AVAILABLE); /* VERR_NOT_AVAILABLE is used as init value, see supdrvTscDeltaThreadInit(). */
4500 ASMAtomicWriteS32(&pDevExt->rcTscDelta, rc);
4501 break;
4502 }
4503
4504 case kTscDeltaThreadState_Terminating:
4505 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Destroyed;
4506 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4507 return VINF_SUCCESS;
4508
4509 case kTscDeltaThreadState_Butchered:
4510 default:
4511 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE);
4512 }
4513 }
4514 /* not reached */
4515}
4516
4517
4518/**
4519 * Waits for the TSC-delta measurement thread to respond to a state change.
4520 *
4521 * @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time,
4522 * other error code on internal error.
4523 *
4524 * @param pDevExt The device instance data.
4525 * @param enmCurState The current state.
4526 * @param enmNewState The new state we're waiting for it to enter.
4527 */
4528static int supdrvTscDeltaThreadWait(PSUPDRVDEVEXT pDevExt, SUPDRVTSCDELTATHREADSTATE enmCurState,
4529 SUPDRVTSCDELTATHREADSTATE enmNewState)
4530{
4531 SUPDRVTSCDELTATHREADSTATE enmActualState;
4532 int rc;
4533
4534 /*
4535 * Wait a short while for the expected state transition.
4536 */
4537 RTSemEventWait(pDevExt->hTscDeltaEvent, RT_MS_1SEC);
4538 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4539 enmActualState = pDevExt->enmTscDeltaThreadState;
4540 if (enmActualState == enmNewState)
4541 {
4542 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4543 rc = VINF_SUCCESS;
4544 }
4545 else if (enmActualState == enmCurState)
4546 {
4547 /*
4548 * Wait longer if the state has not yet transitioned to the one we want.
4549 */
4550 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4551 rc = RTSemEventWait(pDevExt->hTscDeltaEvent, 50 * RT_MS_1SEC);
4552 if ( RT_SUCCESS(rc)
4553 || rc == VERR_TIMEOUT)
4554 {
4555 /*
4556 * Check the state whether we've succeeded.
4557 */
4558 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4559 enmActualState = pDevExt->enmTscDeltaThreadState;
4560 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4561 if (enmActualState == enmNewState)
4562 rc = VINF_SUCCESS;
4563 else if (enmActualState == enmCurState)
4564 {
4565 rc = VERR_TIMEOUT;
4566 OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmActualState=%d enmNewState=%d\n",
4567 enmActualState, enmNewState));
4568 }
4569 else
4570 {
4571 rc = VERR_INTERNAL_ERROR;
4572 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState,
4573 enmActualState, enmNewState));
4574 }
4575 }
4576 else
4577 OSDBGPRINT(("supdrvTscDeltaThreadWait: RTSemEventWait failed. rc=%Rrc\n", rc));
4578 }
4579 else
4580 {
4581 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4582 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state %d when transitioning from %d to %d\n",
4583 enmActualState, enmCurState, enmNewState));
4584 rc = VERR_INTERNAL_ERROR;
4585 }
4586
4587 return rc;
4588}
4589
4590
4591/**
4592 * Signals the TSC-delta thread to start measuring TSC-deltas.
4593 *
4594 * @param pDevExt Pointer to the device instance data.
4595 * @param fForceAll Force re-calculating TSC-deltas on all CPUs.
4596 */
4597static void supdrvTscDeltaThreadStartMeasurement(PSUPDRVDEVEXT pDevExt, bool fForceAll)
4598{
4599 if (pDevExt->hTscDeltaThread != NIL_RTTHREAD)
4600 {
4601 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4602 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
4603 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4604 {
4605 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
4606 if (fForceAll)
4607 pDevExt->fTscThreadRecomputeAllDeltas = true;
4608 }
4609 else if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_WaitAndMeasure
4610 && fForceAll)
4611 pDevExt->fTscThreadRecomputeAllDeltas = true;
4612 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4613 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4614 }
4615}
4616
4617
4618/**
4619 * Terminates the actual thread running supdrvTscDeltaThread().
4620 *
4621 * This is an internal worker function for supdrvTscDeltaThreadInit() and
4622 * supdrvTscDeltaTerm().
4623 *
4624 * @param pDevExt Pointer to the device instance data.
4625 */
4626static void supdrvTscDeltaThreadTerminate(PSUPDRVDEVEXT pDevExt)
4627{
4628 int rc;
4629 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4630 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Terminating;
4631 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4632 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4633 rc = RTThreadWait(pDevExt->hTscDeltaThread, 50 * RT_MS_1SEC, NULL /* prc */);
4634 if (RT_FAILURE(rc))
4635 {
4636 /* Signal a few more times before giving up. */
4637 int cTriesLeft = 5;
4638 while (--cTriesLeft > 0)
4639 {
4640 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4641 rc = RTThreadWait(pDevExt->hTscDeltaThread, 2 * RT_MS_1SEC, NULL /* prc */);
4642 if (rc != VERR_TIMEOUT)
4643 break;
4644 }
4645 }
4646}
4647
4648
4649/**
4650 * Initializes and spawns the TSC-delta measurement thread.
4651 *
4652 * A thread is required for servicing re-measurement requests from events like
4653 * CPUs coming online, suspend/resume etc. as it cannot be done synchronously
4654 * under all contexts on all OSs.
4655 *
4656 * @returns VBox status code.
4657 * @param pDevExt Pointer to the device instance data.
4658 *
4659 * @remarks Must only be called -after- initializing GIP and setting up MP
4660 * notifications!
4661 */
4662static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt)
4663{
4664 int rc;
4665 Assert(pDevExt->pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
4666 rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck");
4667 if (RT_SUCCESS(rc))
4668 {
4669 rc = RTSemEventCreate(&pDevExt->hTscDeltaEvent);
4670 if (RT_SUCCESS(rc))
4671 {
4672 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Creating;
4673 pDevExt->cMsTscDeltaTimeout = 60000;
4674 rc = RTThreadCreate(&pDevExt->hTscDeltaThread, supdrvTscDeltaThread, pDevExt, 0 /* cbStack */,
4675 RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "VBoxTscThread");
4676 if (RT_SUCCESS(rc))
4677 {
4678 rc = supdrvTscDeltaThreadWait(pDevExt, kTscDeltaThreadState_Creating, kTscDeltaThreadState_Listening);
4679 if (RT_SUCCESS(rc))
4680 {
4681 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
4682 return rc;
4683 }
4684
4685 OSDBGPRINT(("supdrvTscDeltaInit: supdrvTscDeltaThreadWait failed. rc=%Rrc\n", rc));
4686 supdrvTscDeltaThreadTerminate(pDevExt);
4687 }
4688 else
4689 OSDBGPRINT(("supdrvTscDeltaInit: RTThreadCreate failed. rc=%Rrc\n", rc));
4690 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
4691 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
4692 }
4693 else
4694 OSDBGPRINT(("supdrvTscDeltaInit: RTSemEventCreate failed. rc=%Rrc\n", rc));
4695 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
4696 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
4697 }
4698 else
4699 OSDBGPRINT(("supdrvTscDeltaInit: RTSpinlockCreate failed. rc=%Rrc\n", rc));
4700
4701 return rc;
4702}
4703
4704
4705/**
4706 * Terminates the TSC-delta measurement thread and cleanup.
4707 *
4708 * @param pDevExt Pointer to the device instance data.
4709 */
4710static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt)
4711{
4712 if ( pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK
4713 && pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
4714 {
4715 supdrvTscDeltaThreadTerminate(pDevExt);
4716 }
4717
4718 if (pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK)
4719 {
4720 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
4721 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
4722 }
4723
4724 if (pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
4725 {
4726 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
4727 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
4728 }
4729
4730 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
4731}
4732
4733#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
4734
4735/**
4736 * Measure the TSC delta for the CPU given by its CPU set index.
4737 *
4738 * @returns VBox status code.
4739 * @retval VERR_INTERRUPTED if interrupted while waiting.
4740 * @retval VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED if we were unable to get a
4741 * measurement.
4742 * @retval VERR_CPU_OFFLINE if the specified CPU is offline.
4743 *
4744 * @param pSession The caller's session. GIP must've been mapped.
4745 * @param iCpuSet The CPU set index of the CPU to measure.
4746 * @param fFlags Flags, SUP_TSCDELTA_MEASURE_F_XXX.
4747 * @param cMsWaitRetry Number of milliseconds to wait between each retry.
4748 * @param cMsWaitThread Number of milliseconds to wait for the thread to get
4749 * ready.
4750 * @param cTries Number of times to try, pass 0 for the default.
4751 */
4752SUPR0DECL(int) SUPR0TscDeltaMeasureBySetIndex(PSUPDRVSESSION pSession, uint32_t iCpuSet, uint32_t fFlags,
4753 RTMSINTERVAL cMsWaitRetry, RTMSINTERVAL cMsWaitThread, uint32_t cTries)
4754{
4755 PSUPDRVDEVEXT pDevExt;
4756 PSUPGLOBALINFOPAGE pGip;
4757 uint16_t iGipCpu;
4758 int rc;
4759#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4760 uint64_t msTsStartWait;
4761 uint32_t iWaitLoop;
4762#endif
4763
4764 /*
4765 * Validate and adjust the input.
4766 */
4767 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4768 if (!pSession->fGipReferenced)
4769 return VERR_WRONG_ORDER;
4770
4771 pDevExt = pSession->pDevExt;
4772 AssertReturn(SUP_IS_DEVEXT_VALID(pDevExt), VERR_INVALID_PARAMETER);
4773
4774 pGip = pDevExt->pGip;
4775 AssertPtrReturn(pGip, VERR_INTERNAL_ERROR_2);
4776
4777 AssertReturn(iCpuSet < RTCPUSET_MAX_CPUS, VERR_INVALID_CPU_INDEX);
4778 AssertReturn(iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx), VERR_INVALID_CPU_INDEX);
4779 iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet];
4780 AssertReturn(iGipCpu < pGip->cCpus, VERR_INVALID_CPU_INDEX);
4781
4782 if (fFlags & ~SUP_TSCDELTA_MEASURE_F_VALID_MASK)
4783 return VERR_INVALID_FLAGS;
4784
4785 /*
4786 * The request is a noop if the TSC delta isn't being used.
4787 */
4788 if (pGip->enmUseTscDelta <= SUPGIPUSETSCDELTA_ZERO_CLAIMED)
4789 return VINF_SUCCESS;
4790
4791 if (cTries == 0)
4792 cTries = 12;
4793 else if (cTries > 256)
4794 cTries = 256;
4795
4796 if (cMsWaitRetry == 0)
4797 cMsWaitRetry = 2;
4798 else if (cMsWaitRetry > 1000)
4799 cMsWaitRetry = 1000;
4800
4801#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4802 /*
4803 * Has the TSC already been measured and we're not forced to redo it?
4804 */
4805 if ( pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX
4806 && !(fFlags & SUP_TSCDELTA_MEASURE_F_FORCE))
4807 return VINF_SUCCESS;
4808
4809 /*
4810 * Asynchronous request? Forward it to the thread, no waiting.
4811 */
4812 if (fFlags & SUP_TSCDELTA_MEASURE_F_ASYNC)
4813 {
4814 /** @todo Async. doesn't implement options like retries, waiting. We'll need
4815 * to pass those options to the thread somehow and implement it in the
4816 * thread. Check if anyone uses/needs fAsync before implementing this. */
4817 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4818 RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, iCpuSet);
4819 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
4820 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4821 {
4822 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
4823 rc = VINF_SUCCESS;
4824 }
4825 else if (pDevExt->enmTscDeltaThreadState != kTscDeltaThreadState_WaitAndMeasure)
4826 rc = VERR_THREAD_IS_DEAD;
4827 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4828 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4829 return VINF_SUCCESS;
4830 }
4831
4832 /*
4833 * If a TSC-delta measurement request is already being serviced by the thread,
4834 * wait 'cTries' times if a retry-timeout is provided, otherwise bail as busy.
4835 */
4836 msTsStartWait = RTTimeSystemMilliTS();
4837 for (iWaitLoop = 0;; iWaitLoop++)
4838 {
4839 uint64_t cMsElapsed;
4840 SUPDRVTSCDELTATHREADSTATE enmState;
4841 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4842 enmState = pDevExt->enmTscDeltaThreadState;
4843 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4844
4845 if (enmState == kTscDeltaThreadState_Measuring)
4846 { /* Must wait, the thread is busy. */ }
4847 else if (enmState == kTscDeltaThreadState_WaitAndMeasure)
4848 { /* Must wait, this state only says what will happen next. */ }
4849 else if (enmState == kTscDeltaThreadState_Terminating)
4850 { /* Must wait, this state only says what should happen next. */ }
4851 else
4852 break; /* All other states, the thread is either idly listening or dead. */
4853
4854 /* Wait or fail. */
4855 if (cMsWaitThread == 0)
4856 return VERR_SUPDRV_TSC_DELTA_MEASUREMENT_BUSY;
4857 cMsElapsed = RTTimeSystemMilliTS() - msTsStartWait;
4858 if (cMsElapsed >= cMsWaitThread)
4859 return VERR_SUPDRV_TSC_DELTA_MEASUREMENT_BUSY;
4860
4861 rc = RTThreadSleep(RT_MIN((RTMSINTERVAL)(cMsWaitThread - cMsElapsed), RT_MIN(iWaitLoop + 1, 10)));
4862 if (rc == VERR_INTERRUPTED)
4863 return rc;
4864 }
4865#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
4866
4867 /*
4868 * Try measure the TSC delta the given number of times.
4869 */
4870 for (;;)
4871 {
4872 /* Unless we're forced to measure the delta, check whether it's done already. */
4873 if ( !(fFlags & SUP_TSCDELTA_MEASURE_F_FORCE)
4874 && pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX)
4875 {
4876 rc = VINF_SUCCESS;
4877 break;
4878 }
4879
4880 /* Measure it. */
4881 rc = supdrvTscMeasureDeltaOne(pDevExt, iGipCpu);
4882 if (rc != VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED)
4883 {
4884 Assert(pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX || RT_FAILURE_NP(rc));
4885 break;
4886 }
4887
4888 /* Retry? */
4889 if (cTries <= 1)
4890 break;
4891 cTries--;
4892
4893 /* Always delay between retries (be nice to the rest of the system
4894 and avoid the BSOD hounds). */
4895 rc = RTThreadSleep(cMsWaitRetry);
4896 if (rc == VERR_INTERRUPTED)
4897 break;
4898 }
4899
4900 return rc;
4901}
4902SUPR0_EXPORT_SYMBOL(SUPR0TscDeltaMeasureBySetIndex);
4903
4904
4905/**
4906 * Service a TSC-delta measurement request.
4907 *
4908 * @returns VBox status code.
4909 * @param pDevExt Pointer to the device instance data.
4910 * @param pSession The support driver session.
4911 * @param pReq Pointer to the TSC-delta measurement request.
4912 */
4913int VBOXCALL supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCDELTAMEASURE pReq)
4914{
4915 uint32_t cTries;
4916 uint32_t iCpuSet;
4917 uint32_t fFlags;
4918 RTMSINTERVAL cMsWaitRetry;
4919 RT_NOREF1(pDevExt);
4920
4921 /*
4922 * Validate and adjust/resolve the input so they can be passed onto SUPR0TscDeltaMeasureBySetIndex.
4923 */
4924 AssertPtr(pDevExt); AssertPtr(pSession); AssertPtr(pReq); /* paranoia^2 */
4925
4926 if (pReq->u.In.idCpu == NIL_RTCPUID)
4927 return VERR_INVALID_CPU_ID;
4928 iCpuSet = RTMpCpuIdToSetIndex(pReq->u.In.idCpu);
4929 if (iCpuSet >= RTCPUSET_MAX_CPUS)
4930 return VERR_INVALID_CPU_ID;
4931
4932 cTries = pReq->u.In.cRetries == 0 ? 0 : (uint32_t)pReq->u.In.cRetries + 1;
4933
4934 cMsWaitRetry = RT_MAX(pReq->u.In.cMsWaitRetry, 5);
4935
4936 fFlags = 0;
4937 if (pReq->u.In.fAsync)
4938 fFlags |= SUP_TSCDELTA_MEASURE_F_ASYNC;
4939 if (pReq->u.In.fForce)
4940 fFlags |= SUP_TSCDELTA_MEASURE_F_FORCE;
4941
4942 return SUPR0TscDeltaMeasureBySetIndex(pSession, iCpuSet, fFlags, cMsWaitRetry,
4943 cTries == 0 ? 5 * RT_MS_1SEC : cMsWaitRetry * cTries /*cMsWaitThread*/,
4944 cTries);
4945}
4946
4947
4948/**
4949 * Reads TSC with delta applied.
4950 *
4951 * Will try to resolve delta value INT64_MAX before applying it. This is the
4952 * main purpose of this function, to handle the case where the delta needs to be
4953 * determined.
4954 *
4955 * @returns VBox status code.
4956 * @param pDevExt Pointer to the device instance data.
4957 * @param pSession The support driver session.
4958 * @param pReq Pointer to the TSC-read request.
4959 */
4960int VBOXCALL supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCREAD pReq)
4961{
4962 PSUPGLOBALINFOPAGE pGip;
4963 int rc;
4964
4965 /*
4966 * Validate. We require the client to have mapped GIP (no asserting on
4967 * ring-3 preconditions).
4968 */
4969 AssertPtr(pDevExt); AssertPtr(pReq); AssertPtr(pSession); /* paranoia^2 */
4970 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
4971 return VERR_WRONG_ORDER;
4972 pGip = pDevExt->pGip;
4973 AssertReturn(pGip, VERR_INTERNAL_ERROR_2);
4974
4975 /*
4976 * We're usually here because we need to apply delta, but we shouldn't be
4977 * upset if the GIP is some different mode.
4978 */
4979 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
4980 {
4981 uint32_t cTries = 0;
4982 for (;;)
4983 {
4984 /*
4985 * Start by gathering the data, using CLI for disabling preemption
4986 * while we do that.
4987 */
4988 RTCCUINTREG fEFlags = ASMIntDisableFlags();
4989 int iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
4990 int iGipCpu = 0; /* gcc maybe used uninitialized */
4991 if (RT_LIKELY( (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
4992 && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
4993 {
4994 int64_t i64Delta = pGip->aCPUs[iGipCpu].i64TSCDelta;
4995 pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
4996 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
4997 ASMSetFlags(fEFlags);
4998
4999 /*
5000 * If we're lucky we've got a delta, but no predictions here
5001 * as this I/O control is normally only used when the TSC delta
5002 * is set to INT64_MAX.
5003 */
5004 if (i64Delta != INT64_MAX)
5005 {
5006 pReq->u.Out.u64AdjustedTsc -= i64Delta;
5007 rc = VINF_SUCCESS;
5008 break;
5009 }
5010
5011 /* Give up after a few times. */
5012 if (cTries >= 4)
5013 {
5014 rc = VWRN_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
5015 break;
5016 }
5017
5018 /* Need to measure the delta an try again. */
5019 rc = supdrvTscMeasureDeltaOne(pDevExt, iGipCpu);
5020 Assert(pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX || RT_FAILURE_NP(rc));
5021 /** @todo should probably delay on failure... dpc watchdogs */
5022 }
5023 else
5024 {
5025 /* This really shouldn't happen. */
5026 AssertMsgFailed(("idCpu=%#x iCpuSet=%#x (%d)\n", RTMpCpuId(), iCpuSet, iCpuSet));
5027 pReq->u.Out.idApic = supdrvGipGetApicIdSlow();
5028 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
5029 ASMSetFlags(fEFlags);
5030 rc = VERR_INTERNAL_ERROR_5; /** @todo change to warning. */
5031 break;
5032 }
5033 }
5034 }
5035 else
5036 {
5037 /*
5038 * No delta to apply. Easy. Deal with preemption the lazy way.
5039 */
5040 RTCCUINTREG fEFlags = ASMIntDisableFlags();
5041 int iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
5042 int iGipCpu = 0; /* gcc may be used uninitialized */
5043 if (RT_LIKELY( (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
5044 && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
5045 pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
5046 else
5047 pReq->u.Out.idApic = supdrvGipGetApicIdSlow();
5048 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
5049 ASMSetFlags(fEFlags);
5050 rc = VINF_SUCCESS;
5051 }
5052
5053 return rc;
5054}
5055
5056
5057/**
5058 * Worker for supdrvIOCtl_GipSetFlags.
5059 *
5060 * @returns VBox status code.
5061 * @retval VERR_WRONG_ORDER if an enable-once-per-session flag is set again for
5062 * a session.
5063 *
5064 * @param pDevExt Pointer to the device instance data.
5065 * @param pSession The support driver session.
5066 * @param fOrMask The OR mask of the GIP flags, see SUPGIP_FLAGS_XXX.
5067 * @param fAndMask The AND mask of the GIP flags, see SUPGIP_FLAGS_XXX.
5068 *
5069 * @remarks Caller must own the GIP mutex.
5070 *
5071 * @remarks This function doesn't validate any of the flags.
5072 */
5073static int supdrvGipSetFlags(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, uint32_t fOrMask, uint32_t fAndMask)
5074{
5075 uint32_t cRefs;
5076 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
5077 AssertMsg((fOrMask & fAndMask) == fOrMask, ("%#x & %#x\n", fOrMask, fAndMask)); /* ASSUMED by code below */
5078
5079 /*
5080 * Compute GIP test-mode flags.
5081 */
5082 if (fOrMask & SUPGIP_FLAGS_TESTING_ENABLE)
5083 {
5084 if (!pSession->fGipTestMode)
5085 {
5086 Assert(pDevExt->cGipTestModeRefs < _64K);
5087 pSession->fGipTestMode = true;
5088 cRefs = ++pDevExt->cGipTestModeRefs;
5089 if (cRefs == 1)
5090 {
5091 fOrMask |= SUPGIP_FLAGS_TESTING | SUPGIP_FLAGS_TESTING_START;
5092 fAndMask &= ~SUPGIP_FLAGS_TESTING_STOP;
5093 }
5094 }
5095 else
5096 {
5097 LogRelMax(10, ("supdrvGipSetFlags: SUPGIP_FLAGS_TESTING_ENABLE already set for this session\n"));
5098 return VERR_WRONG_ORDER;
5099 }
5100 }
5101 else if ( !(fAndMask & SUPGIP_FLAGS_TESTING_ENABLE)
5102 && pSession->fGipTestMode)
5103 {
5104 Assert(pDevExt->cGipTestModeRefs > 0);
5105 Assert(pDevExt->cGipTestModeRefs < _64K);
5106 pSession->fGipTestMode = false;
5107 cRefs = --pDevExt->cGipTestModeRefs;
5108 if (!cRefs)
5109 fOrMask |= SUPGIP_FLAGS_TESTING_STOP;
5110 else
5111 fAndMask |= SUPGIP_FLAGS_TESTING_ENABLE;
5112 }
5113
5114 /*
5115 * Commit the flags. This should be done as atomically as possible
5116 * since the flag consumers won't be holding the GIP mutex.
5117 */
5118 ASMAtomicOrU32(&pGip->fFlags, fOrMask);
5119 ASMAtomicAndU32(&pGip->fFlags, fAndMask);
5120
5121 return VINF_SUCCESS;
5122}
5123
5124
5125/**
5126 * Sets GIP test mode parameters.
5127 *
5128 * @returns VBox status code.
5129 * @param pDevExt Pointer to the device instance data.
5130 * @param pSession The support driver session.
5131 * @param fOrMask The OR mask of the GIP flags, see SUPGIP_FLAGS_XXX.
5132 * @param fAndMask The AND mask of the GIP flags, see SUPGIP_FLAGS_XXX.
5133 */
5134int VBOXCALL supdrvIOCtl_GipSetFlags(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, uint32_t fOrMask, uint32_t fAndMask)
5135{
5136 PSUPGLOBALINFOPAGE pGip;
5137 int rc;
5138
5139 /*
5140 * Validate. We require the client to have mapped GIP (no asserting on
5141 * ring-3 preconditions).
5142 */
5143 AssertPtr(pDevExt); AssertPtr(pSession); /* paranoia^2 */
5144 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
5145 return VERR_WRONG_ORDER;
5146 pGip = pDevExt->pGip;
5147 AssertReturn(pGip, VERR_INTERNAL_ERROR_3);
5148
5149 if (fOrMask & ~SUPGIP_FLAGS_VALID_MASK)
5150 return VERR_INVALID_PARAMETER;
5151 if ((fAndMask & ~SUPGIP_FLAGS_VALID_MASK) != ~SUPGIP_FLAGS_VALID_MASK)
5152 return VERR_INVALID_PARAMETER;
5153
5154 /*
5155 * Don't confuse supdrvGipSetFlags or anyone else by both setting
5156 * and clearing the same flags. AND takes precedence.
5157 */
5158 fOrMask &= fAndMask;
5159
5160 /*
5161 * Take the loader lock to avoid having to think about races between two
5162 * clients changing the flags at the same time (state is not simple).
5163 */
5164#ifdef SUPDRV_USE_MUTEX_FOR_GIP
5165 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
5166#else
5167 RTSemFastMutexRequest(pDevExt->mtxGip);
5168#endif
5169
5170 rc = supdrvGipSetFlags(pDevExt, pSession, fOrMask, fAndMask);
5171
5172#ifdef SUPDRV_USE_MUTEX_FOR_GIP
5173 RTSemMutexRelease(pDevExt->mtxGip);
5174#else
5175 RTSemFastMutexRelease(pDevExt->mtxGip);
5176#endif
5177 return rc;
5178}
5179
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette